OR-Tools  8.0
topologicalsorter.cc
Go to the documentation of this file.
1 // Copyright 2010-2018 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
15 
16 #include <algorithm>
17 #include <map>
18 #include <queue>
19 #include <string>
20 #include <vector>
21 
22 #include "ortools/base/map_util.h"
23 #include "ortools/base/stl_util.h"
24 
25 namespace util {
26 namespace internal {
27 
28 namespace {
29 template <typename IntQueue>
30 inline void PopTop(IntQueue* q, int* top) {
31  *top = q->front();
32  q->pop();
33 }
34 
35 template <typename C, typename F>
36 void PopTop(std::priority_queue<int, C, F>* q, int* top) {
37  *top = q->top();
38  q->pop();
39 }
40 } // namespace
41 
42 template <bool stable_sort>
44  CHECK(!TraversalStarted()) << "Cannot add nodes after starting traversal";
45 
46  if (node_index >= adjacency_lists_.size()) {
47  adjacency_lists_.resize(node_index + 1);
48  }
49 }
50 
51 // Up to a point, we detect duplicates up front and do not insert them.
52 // Then we switch to using RemoveDuplicates(), see below.
53 //
54 // Note(user): I did benchmarks on this in November 2011, and while
55 // 32 seemed too large, I did not see very significant performance
56 // differences with 0, 4, 8 or 16. But since larger values of this
57 // threshold mean that there will be slightly less space used up by
58 // small adjacency lists in case there are repeated edges, I picked 16.
60 
61 template <bool stable_sort>
63  CHECK(!TraversalStarted()) << "Cannot add edges after starting traversal";
64 
65  AddNode(std::max(from, to));
66 
67  AdjacencyList& adj_list = adjacency_lists_[from];
68  const uint32 adj_list_size = adj_list.size();
69  if (adj_list_size <= kLazyDuplicateDetectionSizeThreshold) {
70  for (AdjacencyList::const_iterator it = adj_list.begin();
71  it != adj_list.end(); ++it) {
72  if (*it == to) {
73  return;
74  }
75  }
76  adj_list.push_back(to);
77  ++num_edges_;
78  } else {
79  adj_list.push_back(to);
80  if (++num_edges_added_since_last_duplicate_removal_ > ++num_edges_ / 2) {
81  num_edges_added_since_last_duplicate_removal_ = 0;
82  // We remove all duplicates at once, but skip lists for which the
83  // number of duplicates can't be too large, i.e. lists smaller than
84  // kLazyDuplicateDetectionSizeThreshold * 2. The overall ratio of
85  // duplicate edges remains bounded by 2/3 in the worst case.
86  num_edges_ -= RemoveDuplicates(&adjacency_lists_,
88  }
89  }
90 }
91 
92 template <bool stable_sort>
94  int* next_node_index, bool* cyclic, std::vector<int>* output_cycle_nodes) {
95  if (!TraversalStarted()) {
96  StartTraversal();
97  }
98 
99  *cyclic = false;
100  if (num_nodes_left_ == 0) {
101  return false;
102  }
103  if (nodes_with_zero_indegree_.empty()) {
104  VLOG(2) << "Not all nodes have been visited (" << num_nodes_left_
105  << " nodes left), but there aren't any zero-indegree nodes"
106  << " available. This graph is cyclic! Use ExtractCycle() for"
107  << " more information.";
108  *cyclic = true;
109  if (output_cycle_nodes != NULL) {
110  ExtractCycle(output_cycle_nodes);
111  }
112  return false;
113  }
114 
115  // Pop one orphan node.
116  --num_nodes_left_;
117  PopTop(&nodes_with_zero_indegree_, next_node_index);
118 
119  // Swap out the adjacency list, since we won't need it afterwards,
120  // to decrease memory usage.
121  AdjacencyList adj_list;
122  adj_list.swap(adjacency_lists_[*next_node_index]);
123 
124  // Add new orphan nodes to nodes_with_zero_indegree_.
125  for (int i = 0; i < adj_list.size(); ++i) {
126  if (--indegree_[adj_list[i]] == 0) {
127  nodes_with_zero_indegree_.push(adj_list[i]);
128  }
129  }
130  return true;
131 }
132 
133 template <bool stable_sort>
135  if (TraversalStarted()) {
136  return;
137  }
138 
139  const int num_nodes = adjacency_lists_.size();
140  indegree_.assign(num_nodes, 0);
141 
142  // Iterate over all adjacency lists, and fill the indegree[] vector.
143  // Note that we don't bother removing duplicates: there can't be
144  // too many, since we removed them progressively, and it is actually
145  // cheaper to keep them at this point.
146  for (int from = 0; from < num_nodes; ++from) {
147  AdjacencyList& adj_list = adjacency_lists_[from];
148  for (AdjacencyList::const_iterator it = adj_list.begin();
149  it != adj_list.end(); ++it) {
150  ++indegree_[*it];
151  }
152  }
153 
154  // Initialize the nodes_with_zero_indegree_ vector.
155  for (int node = 0; node < num_nodes; ++node) {
156  if (indegree_[node] == 0) {
157  nodes_with_zero_indegree_.push(node);
158  }
159  }
160 
161  num_nodes_left_ = num_nodes;
162  traversal_started_ = true;
163 }
164 
165 // static
166 template <bool stable_sort>
168  std::vector<AdjacencyList>* lists, int skip_lists_smaller_than) {
169  // We can always skip lists with less than 2 elements.
170  if (skip_lists_smaller_than < 2) {
171  skip_lists_smaller_than = 2;
172  }
173  const int n = lists->size();
174  std::vector<bool> visited(n, false);
175  int num_duplicates_removed = 0;
176  for (std::vector<AdjacencyList>::iterator list = lists->begin();
177  list != lists->end(); ++list) {
178  if (list->size() < skip_lists_smaller_than) {
179  continue;
180  }
181  num_duplicates_removed += list->size();
182  // To optimize the duplicate removal loop, we split it in two:
183  // first, find the first duplicate, then copy the rest of the shifted
184  // adjacency list as we keep detecting duplicates.
185  AdjacencyList::iterator it = list->begin();
186  DCHECK(it != list->end());
187  while (!visited[*it]) {
188  visited[*(it++)] = true;
189  if (it == list->end()) {
190  break;
191  }
192  }
193  // Skip the shifted copy if there were no duplicates at all.
194  if (it != list->end()) {
195  AdjacencyList::iterator it2 = it;
196  while (++it != list->end()) {
197  if (!visited[*it]) {
198  visited[*it] = true;
199  *(it2++) = *it;
200  }
201  }
202  list->erase(it2, list->end());
203  }
204  for (it = list->begin(); it != list->end(); ++it) {
205  visited[*it] = false;
206  }
207  num_duplicates_removed -= list->size();
208  }
209  return num_duplicates_removed;
210 }
211 
212 // Note(user): as of 2012-09, this implementation works in
213 // O(number of edges + number of nodes), which is the theoretical best.
214 // It could probably be optimized to gain a significant constant speed-up;
215 // but at the cost of more code complexity.
216 template <bool stable_sort>
218  std::vector<int>* cycle_nodes) const {
219  const int num_nodes = adjacency_lists_.size();
220  cycle_nodes->clear();
221  // To find a cycle, we start a DFS from each yet-unvisited node and
222  // try to find a cycle, if we don't find it then we know for sure that
223  // no cycle is reachable from any of the explored nodes (so, we don't
224  // explore them in later DFSs).
225  std::vector<bool> no_cycle_reachable_from(num_nodes, false);
226  // The DFS stack will contain a chain of nodes, from the root of the
227  // DFS to the current leaf.
228  struct DfsState {
229  int node;
230  // Points at the first child node that we did *not* yet look at.
231  int adj_list_index;
232  explicit DfsState(int _node) : node(_node), adj_list_index(0) {}
233  };
234  std::vector<DfsState> dfs_stack;
235  std::vector<bool> in_cur_stack(num_nodes, false);
236  for (int start_node = 0; start_node < num_nodes; ++start_node) {
237  if (no_cycle_reachable_from[start_node]) {
238  continue;
239  }
240  // Start the DFS.
241  dfs_stack.push_back(DfsState(start_node));
242  in_cur_stack[start_node] = true;
243  while (!dfs_stack.empty()) {
244  DfsState* cur_state = &dfs_stack.back();
245  if (cur_state->adj_list_index >=
246  adjacency_lists_[cur_state->node].size()) {
247  no_cycle_reachable_from[cur_state->node] = true;
248  in_cur_stack[cur_state->node] = false;
249  dfs_stack.pop_back();
250  continue;
251  }
252  // Look at the current child, and increase the current state's
253  // adj_list_index.
254  const int child =
255  adjacency_lists_[cur_state->node][cur_state->adj_list_index];
256  ++(cur_state->adj_list_index);
257  if (no_cycle_reachable_from[child]) {
258  continue;
259  }
260  if (in_cur_stack[child]) {
261  // We detected a cycle! Fill it and return.
262  for (;;) {
263  cycle_nodes->push_back(dfs_stack.back().node);
264  if (dfs_stack.back().node == child) {
265  std::reverse(cycle_nodes->begin(), cycle_nodes->end());
266  return;
267  }
268  dfs_stack.pop_back();
269  }
270  }
271  // Push the child onto the stack.
272  dfs_stack.push_back(DfsState(child));
273  in_cur_stack[child] = true;
274  }
275  }
276  // If we're here, then all the DFS stopped, and they never encountered
277  // a cycle (otherwise, we would have returned). Just exit; the output
278  // vector has been cleared already.
279 }
280 
281 // Generate the templated code. Including these definitions allows us
282 // to have templated code inside the .cc file and not incur linker errors.
285 
286 } // namespace internal
287 
288 std::vector<int> FindCycleInDenseIntGraph(
289  int num_nodes, const std::vector<std::pair<int, int>>& arcs) {
290  std::vector<int> cycle;
291  if (num_nodes < 1) {
292  return cycle;
293  }
294  internal::DenseIntTopologicalSorterTpl</* stable= */ false> sorter(num_nodes);
295  for (const auto& arc : arcs) {
296  sorter.AddEdge(arc.first, arc.second);
297  }
298  sorter.ExtractCycle(&cycle);
299  return cycle;
300 }
301 } // namespace util
util::internal::DenseIntTopologicalSorterTpl::AddEdge
void AddEdge(int from, int to)
Definition: topologicalsorter.cc:62
map_util.h
max
int64 max
Definition: alldiff_cst.cc:139
util::internal::DenseIntTopologicalSorterTpl::RemoveDuplicates
static int RemoveDuplicates(std::vector< AdjacencyList > *lists, int skip_lists_smaller_than)
Definition: topologicalsorter.cc:167
util::internal::DenseIntTopologicalSorterTpl::AddNode
void AddNode(int node_index)
Definition: topologicalsorter.cc:43
util::internal::DenseIntTopologicalSorterTpl::GetNext
bool GetNext(int *next_node_index, bool *cyclic, std::vector< int > *output_cycle_nodes=NULL)
Definition: topologicalsorter.cc:93
uint32
unsigned int uint32
Definition: integral_types.h:38
util::internal::DenseIntTopologicalSorterTpl::AdjacencyList
::std::vector< int > AdjacencyList
Definition: topologicalsorter.h:144
stl_util.h
topologicalsorter.h
util::internal::DenseIntTopologicalSorterTpl::ExtractCycle
void ExtractCycle(std::vector< int > *cycle_nodes) const
Definition: topologicalsorter.cc:217
internal
Definition: bop_parameters.pb.h:40
util::internal::DenseIntTopologicalSorterTpl::StartTraversal
void StartTraversal()
Definition: topologicalsorter.cc:134
util::internal::DenseIntTopologicalSorterTpl< false >
util
Definition: status_builder.h:21
util::internal::kLazyDuplicateDetectionSizeThreshold
static const int kLazyDuplicateDetectionSizeThreshold
Definition: topologicalsorter.cc:59
util::FindCycleInDenseIntGraph
std::vector< int > FindCycleInDenseIntGraph(int num_nodes, const std::vector< std::pair< int, int >> &arcs)
Definition: topologicalsorter.cc:288