OR-Tools  9.0
topologicalsorter.cc
Go to the documentation of this file.
1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
15 
16 #include <algorithm>
17 #include <cstddef>
18 #include <cstdint>
19 #include <map>
20 #include <queue>
21 #include <string>
22 #include <vector>
23 
24 #include "ortools/base/map_util.h"
25 #include "ortools/base/stl_util.h"
26 
27 namespace util {
28 namespace internal {
29 
30 namespace {
31 template <typename IntQueue>
32 inline void PopTop(IntQueue* q, int* top) {
33  *top = q->front();
34  q->pop();
35 }
36 
37 template <typename C, typename F>
38 void PopTop(std::priority_queue<int, C, F>* q, int* top) {
39  *top = q->top();
40  q->pop();
41 }
42 } // namespace
43 
44 template <bool stable_sort>
46  CHECK(!TraversalStarted()) << "Cannot add nodes after starting traversal";
47  CHECK_GE(node_index, 0) << "Index must not be negative";
48 
49  if (static_cast<std::size_t>(node_index) >= adjacency_lists_.size()) {
50  adjacency_lists_.resize(node_index + 1);
51  }
52 }
53 
54 // Up to a point, we detect duplicates up front and do not insert them.
55 // Then we switch to using RemoveDuplicates(), see below.
56 //
57 // Note(user): I did benchmarks on this in November 2011, and while
58 // 32 seemed too large, I did not see very significant performance
59 // differences with 0, 4, 8 or 16. But since larger values of this
60 // threshold mean that there will be slightly less space used up by
61 // small adjacency lists in case there are repeated edges, I picked 16.
63 
64 template <bool stable_sort>
66  CHECK(!TraversalStarted()) << "Cannot add edges after starting traversal";
67 
68  AddNode(std::max(from, to));
69 
70  AdjacencyList& adj_list = adjacency_lists_[from];
71  const uint32_t adj_list_size = adj_list.size();
72  if (adj_list_size <= kLazyDuplicateDetectionSizeThreshold) {
73  for (AdjacencyList::const_iterator it = adj_list.begin();
74  it != adj_list.end(); ++it) {
75  if (*it == to) {
76  return;
77  }
78  }
79  adj_list.push_back(to);
80  ++num_edges_;
81  } else {
82  adj_list.push_back(to);
83  if (++num_edges_added_since_last_duplicate_removal_ > ++num_edges_ / 2) {
84  num_edges_added_since_last_duplicate_removal_ = 0;
85  // We remove all duplicates at once, but skip lists for which the
86  // number of duplicates can't be too large, i.e. lists smaller than
87  // kLazyDuplicateDetectionSizeThreshold * 2. The overall ratio of
88  // duplicate edges remains bounded by 2/3 in the worst case.
89  num_edges_ -= RemoveDuplicates(&adjacency_lists_,
91  }
92  }
93 }
94 
95 template <bool stable_sort>
97  int* next_node_index, bool* cyclic, std::vector<int>* output_cycle_nodes) {
98  if (!TraversalStarted()) {
99  StartTraversal();
100  }
101 
102  *cyclic = false;
103  if (num_nodes_left_ == 0) {
104  return false;
105  }
106  if (nodes_with_zero_indegree_.empty()) {
107  VLOG(2) << "Not all nodes have been visited (" << num_nodes_left_
108  << " nodes left), but there aren't any zero-indegree nodes"
109  << " available. This graph is cyclic! Use ExtractCycle() for"
110  << " more information.";
111  *cyclic = true;
112  if (output_cycle_nodes != NULL) {
113  ExtractCycle(output_cycle_nodes);
114  }
115  return false;
116  }
117 
118  // Pop one orphan node.
119  --num_nodes_left_;
120  PopTop(&nodes_with_zero_indegree_, next_node_index);
121 
122  // Swap out the adjacency list, since we won't need it afterwards,
123  // to decrease memory usage.
124  AdjacencyList adj_list;
125  adj_list.swap(adjacency_lists_[*next_node_index]);
126 
127  // Add new orphan nodes to nodes_with_zero_indegree_.
128  for (std::size_t i = 0; i < adj_list.size(); ++i) {
129  if (--indegree_[adj_list[i]] == 0) {
130  nodes_with_zero_indegree_.push(adj_list[i]);
131  }
132  }
133  return true;
134 }
135 
136 template <bool stable_sort>
138  if (TraversalStarted()) {
139  return;
140  }
141 
142  const int num_nodes = adjacency_lists_.size();
143  indegree_.assign(num_nodes, 0);
144 
145  // Iterate over all adjacency lists, and fill the indegree[] vector.
146  // Note that we don't bother removing duplicates: there can't be
147  // too many, since we removed them progressively, and it is actually
148  // cheaper to keep them at this point.
149  for (int from = 0; from < num_nodes; ++from) {
150  AdjacencyList& adj_list = adjacency_lists_[from];
151  for (AdjacencyList::const_iterator it = adj_list.begin();
152  it != adj_list.end(); ++it) {
153  ++indegree_[*it];
154  }
155  }
156 
157  // Initialize the nodes_with_zero_indegree_ vector.
158  for (int node = 0; node < num_nodes; ++node) {
159  if (indegree_[node] == 0) {
160  nodes_with_zero_indegree_.push(node);
161  }
162  }
163 
164  num_nodes_left_ = num_nodes;
165  traversal_started_ = true;
166 }
167 
168 // static
169 template <bool stable_sort>
171  std::vector<AdjacencyList>* lists, int skip_lists_smaller_than) {
172  // We can always skip lists with less than 2 elements.
173  if (skip_lists_smaller_than < 2) {
174  skip_lists_smaller_than = 2;
175  }
176  const int n = lists->size();
177  std::vector<bool> visited(n, false);
178  int num_duplicates_removed = 0;
179  for (std::vector<AdjacencyList>::iterator list = lists->begin();
180  list != lists->end(); ++list) {
181  if (list->size() < static_cast<std::size_t>(skip_lists_smaller_than)) {
182  continue;
183  }
184  num_duplicates_removed += list->size();
185  // To optimize the duplicate removal loop, we split it in two:
186  // first, find the first duplicate, then copy the rest of the shifted
187  // adjacency list as we keep detecting duplicates.
188  AdjacencyList::iterator it = list->begin();
189  DCHECK(it != list->end());
190  while (!visited[*it]) {
191  visited[*(it++)] = true;
192  if (it == list->end()) {
193  break;
194  }
195  }
196  // Skip the shifted copy if there were no duplicates at all.
197  if (it != list->end()) {
198  AdjacencyList::iterator it2 = it;
199  while (++it != list->end()) {
200  if (!visited[*it]) {
201  visited[*it] = true;
202  *(it2++) = *it;
203  }
204  }
205  list->erase(it2, list->end());
206  }
207  for (it = list->begin(); it != list->end(); ++it) {
208  visited[*it] = false;
209  }
210  num_duplicates_removed -= list->size();
211  }
212  return num_duplicates_removed;
213 }
214 
215 // Note(user): as of 2012-09, this implementation works in
216 // O(number of edges + number of nodes), which is the theoretical best.
217 // It could probably be optimized to gain a significant constant speed-up;
218 // but at the cost of more code complexity.
219 template <bool stable_sort>
221  std::vector<int>* cycle_nodes) const {
222  const int num_nodes = adjacency_lists_.size();
223  cycle_nodes->clear();
224  // To find a cycle, we start a DFS from each yet-unvisited node and
225  // try to find a cycle, if we don't find it then we know for sure that
226  // no cycle is reachable from any of the explored nodes (so, we don't
227  // explore them in later DFSs).
228  std::vector<bool> no_cycle_reachable_from(num_nodes, false);
229  // The DFS stack will contain a chain of nodes, from the root of the
230  // DFS to the current leaf.
231  struct DfsState {
232  int node;
233  // Points at the first child node that we did *not* yet look at.
234  std::size_t adj_list_index;
235  explicit DfsState(int _node) : node(_node), adj_list_index(0) {}
236  };
237  std::vector<DfsState> dfs_stack;
238  std::vector<bool> in_cur_stack(num_nodes, false);
239  for (int start_node = 0; start_node < num_nodes; ++start_node) {
240  if (no_cycle_reachable_from[start_node]) {
241  continue;
242  }
243  // Start the DFS.
244  dfs_stack.push_back(DfsState(start_node));
245  in_cur_stack[start_node] = true;
246  while (!dfs_stack.empty()) {
247  DfsState* cur_state = &dfs_stack.back();
248  if (cur_state->adj_list_index >=
249  adjacency_lists_[cur_state->node].size()) {
250  no_cycle_reachable_from[cur_state->node] = true;
251  in_cur_stack[cur_state->node] = false;
252  dfs_stack.pop_back();
253  continue;
254  }
255  // Look at the current child, and increase the current state's
256  // adj_list_index.
257  const int child =
258  adjacency_lists_[cur_state->node][cur_state->adj_list_index];
259  ++(cur_state->adj_list_index);
260  if (no_cycle_reachable_from[child]) {
261  continue;
262  }
263  if (in_cur_stack[child]) {
264  // We detected a cycle! Fill it and return.
265  for (;;) {
266  cycle_nodes->push_back(dfs_stack.back().node);
267  if (dfs_stack.back().node == child) {
268  std::reverse(cycle_nodes->begin(), cycle_nodes->end());
269  return;
270  }
271  dfs_stack.pop_back();
272  }
273  }
274  // Push the child onto the stack.
275  dfs_stack.push_back(DfsState(child));
276  in_cur_stack[child] = true;
277  }
278  }
279  // If we're here, then all the DFS stopped, and they never encountered
280  // a cycle (otherwise, we would have returned). Just exit; the output
281  // vector has been cleared already.
282 }
283 
284 // Generate the templated code. Including these definitions allows us
285 // to have templated code inside the .cc file and not incur linker errors.
288 
289 } // namespace internal
290 
291 std::vector<int> FindCycleInDenseIntGraph(
292  int num_nodes, const std::vector<std::pair<int, int>>& arcs) {
293  std::vector<int> cycle;
294  if (num_nodes < 1) {
295  return cycle;
296  }
297  internal::DenseIntTopologicalSorterTpl</* stable= */ false> sorter(num_nodes);
298  for (const auto& arc : arcs) {
299  sorter.AddEdge(arc.first, arc.second);
300  }
301  sorter.ExtractCycle(&cycle);
302  return cycle;
303 }
304 } // namespace util
int64_t max
Definition: alldiff_cst.cc:140
#define CHECK(condition)
Definition: base/logging.h:498
#define CHECK_GE(val1, val2)
Definition: base/logging.h:709
#define DCHECK(condition)
Definition: base/logging.h:892
#define VLOG(verboselevel)
Definition: base/logging.h:986
void ExtractCycle(std::vector< int > *cycle_nodes) const
bool GetNext(int *next_node_index, bool *cyclic, std::vector< int > *output_cycle_nodes=NULL)
static int RemoveDuplicates(std::vector< AdjacencyList > *lists, int skip_lists_smaller_than)
static const int kLazyDuplicateDetectionSizeThreshold
std::vector< int > FindCycleInDenseIntGraph(int num_nodes, const std::vector< std::pair< int, int >> &arcs)