OR-Tools  9.3
topologicalsorter.cc
Go to the documentation of this file.
1// Copyright 2010-2021 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <cstddef>
18#include <cstdint>
19#include <map>
20#include <queue>
21#include <string>
22#include <vector>
23
26
27namespace util {
28namespace internal {
29
30namespace {
31template <typename IntQueue>
32inline void PopTop(IntQueue* q, int* top) {
33 *top = q->front();
34 q->pop();
35}
36
37template <typename C, typename F>
38void PopTop(std::priority_queue<int, C, F>* q, int* top) {
39 *top = q->top();
40 q->pop();
41}
42} // namespace
43
44template <bool stable_sort>
46 CHECK(!TraversalStarted()) << "Cannot add nodes after starting traversal";
47 CHECK_GE(node_index, 0) << "Index must not be negative";
48
49 if (static_cast<std::size_t>(node_index) >= adjacency_lists_.size()) {
50 adjacency_lists_.resize(node_index + 1);
51 }
52}
53
54// Up to a point, we detect duplicates up front and do not insert them.
55// Then we switch to using RemoveDuplicates(), see below.
56//
57// Note(user): I did benchmarks on this in November 2011, and while
58// 32 seemed too large, I did not see very significant performance
59// differences with 0, 4, 8 or 16. But since larger values of this
60// threshold mean that there will be slightly less space used up by
61// small adjacency lists in case there are repeated edges, I picked 16.
63
64template <bool stable_sort>
66 CHECK(!TraversalStarted()) << "Cannot add edges after starting traversal";
67
68 AddNode(std::max(from, to));
69
70 AdjacencyList& adj_list = adjacency_lists_[from];
71 const uint32_t adj_list_size = adj_list.size();
72 if (adj_list_size <= kLazyDuplicateDetectionSizeThreshold) {
73 for (AdjacencyList::const_iterator it = adj_list.begin();
74 it != adj_list.end(); ++it) {
75 if (*it == to) {
76 return;
77 }
78 }
79 adj_list.push_back(to);
80 ++num_edges_;
81 } else {
82 adj_list.push_back(to);
83 if (++num_edges_added_since_last_duplicate_removal_ > ++num_edges_ / 2) {
84 num_edges_added_since_last_duplicate_removal_ = 0;
85 // We remove all duplicates at once, but skip lists for which the
86 // number of duplicates can't be too large, i.e. lists smaller than
87 // kLazyDuplicateDetectionSizeThreshold * 2. The overall ratio of
88 // duplicate edges remains bounded by 2/3 in the worst case.
89 num_edges_ -= RemoveDuplicates(&adjacency_lists_,
91 }
92 }
93}
94
95template <bool stable_sort>
97 int* next_node_index, bool* cyclic, std::vector<int>* output_cycle_nodes) {
98 if (!TraversalStarted()) {
99 StartTraversal();
100 }
101
102 *cyclic = false;
103 if (num_nodes_left_ == 0) {
104 return false;
105 }
106 if (nodes_with_zero_indegree_.empty()) {
107 VLOG(2) << "Not all nodes have been visited (" << num_nodes_left_
108 << " nodes left), but there aren't any zero-indegree nodes"
109 << " available. This graph is cyclic! Use ExtractCycle() for"
110 << " more information.";
111 *cyclic = true;
112 if (output_cycle_nodes != nullptr) {
113 ExtractCycle(output_cycle_nodes);
114 }
115 return false;
116 }
117
118 // Pop one orphan node.
119 --num_nodes_left_;
120 PopTop(&nodes_with_zero_indegree_, next_node_index);
121
122 // Swap out the adjacency list, since we won't need it afterwards,
123 // to decrease memory usage.
124 AdjacencyList adj_list;
125 adj_list.swap(adjacency_lists_[*next_node_index]);
126
127 // Add new orphan nodes to nodes_with_zero_indegree_.
128 for (std::size_t i = 0; i < adj_list.size(); ++i) {
129 if (--indegree_[adj_list[i]] == 0) {
130 nodes_with_zero_indegree_.push(adj_list[i]);
131 }
132 }
133 return true;
134}
135
136template <bool stable_sort>
138 if (TraversalStarted()) {
139 return;
140 }
141
142 const int num_nodes = adjacency_lists_.size();
143 indegree_.assign(num_nodes, 0);
144
145 // Iterate over all adjacency lists, and fill the indegree[] vector.
146 // Note that we don't bother removing duplicates: there can't be
147 // too many, since we removed them progressively, and it is actually
148 // cheaper to keep them at this point.
149 for (int from = 0; from < num_nodes; ++from) {
150 AdjacencyList& adj_list = adjacency_lists_[from];
151 for (AdjacencyList::const_iterator it = adj_list.begin();
152 it != adj_list.end(); ++it) {
153 ++indegree_[*it];
154 }
155 }
156
157 // Initialize the nodes_with_zero_indegree_ vector.
158 for (int node = 0; node < num_nodes; ++node) {
159 if (indegree_[node] == 0) {
160 nodes_with_zero_indegree_.push(node);
161 }
162 }
163
164 num_nodes_left_ = num_nodes;
165 traversal_started_ = true;
166}
167
168// static
169template <bool stable_sort>
171 std::vector<AdjacencyList>* lists, int skip_lists_smaller_than) {
172 // We can always skip lists with less than 2 elements.
173 if (skip_lists_smaller_than < 2) {
174 skip_lists_smaller_than = 2;
175 }
176 const int n = lists->size();
177 std::vector<bool> visited(n, false);
178 int num_duplicates_removed = 0;
179 for (std::vector<AdjacencyList>::iterator list = lists->begin();
180 list != lists->end(); ++list) {
181 if (list->size() < static_cast<std::size_t>(skip_lists_smaller_than)) {
182 continue;
183 }
184 num_duplicates_removed += list->size();
185 // To optimize the duplicate removal loop, we split it in two:
186 // first, find the first duplicate, then copy the rest of the shifted
187 // adjacency list as we keep detecting duplicates.
188 AdjacencyList::iterator it = list->begin();
189 DCHECK(it != list->end());
190 while (!visited[*it]) {
191 visited[*(it++)] = true;
192 if (it == list->end()) {
193 break;
195 }
196 // Skip the shifted copy if there were no duplicates at all.
197 if (it != list->end()) {
198 AdjacencyList::iterator it2 = it;
199 while (++it != list->end()) {
200 if (!visited[*it]) {
201 visited[*it] = true;
202 *(it2++) = *it;
203 }
204 }
205 list->erase(it2, list->end());
206 }
207 for (it = list->begin(); it != list->end(); ++it) {
208 visited[*it] = false;
209 }
210 num_duplicates_removed -= list->size();
211 }
212 return num_duplicates_removed;
213}
214
215// Note(user): as of 2012-09, this implementation works in
216// O(number of edges + number of nodes), which is the theoretical best.
217// It could probably be optimized to gain a significant constant speed-up;
218// but at the cost of more code complexity.
219template <bool stable_sort>
221 std::vector<int>* cycle_nodes) const {
222 const int num_nodes = adjacency_lists_.size();
223 cycle_nodes->clear();
224 // To find a cycle, we start a DFS from each yet-unvisited node and
225 // try to find a cycle, if we don't find it then we know for sure that
226 // no cycle is reachable from any of the explored nodes (so, we don't
227 // explore them in later DFSs).
228 std::vector<bool> no_cycle_reachable_from(num_nodes, false);
229 // The DFS stack will contain a chain of nodes, from the root of the
230 // DFS to the current leaf.
231 struct DfsState {
232 int node;
233 // Points at the first child node that we did *not* yet look at.
234 std::size_t adj_list_index;
235 explicit DfsState(int _node) : node(_node), adj_list_index(0) {}
236 };
237 std::vector<DfsState> dfs_stack;
238 std::vector<bool> in_cur_stack(num_nodes, false);
239 for (int start_node = 0; start_node < num_nodes; ++start_node) {
240 if (no_cycle_reachable_from[start_node]) {
241 continue;
242 }
243 // Start the DFS.
244 dfs_stack.push_back(DfsState(start_node));
245 in_cur_stack[start_node] = true;
246 while (!dfs_stack.empty()) {
247 DfsState* cur_state = &dfs_stack.back();
248 if (cur_state->adj_list_index >=
249 adjacency_lists_[cur_state->node].size()) {
250 no_cycle_reachable_from[cur_state->node] = true;
251 in_cur_stack[cur_state->node] = false;
252 dfs_stack.pop_back();
253 continue;
254 }
255 // Look at the current child, and increase the current state's
256 // adj_list_index.
257 const int child =
258 adjacency_lists_[cur_state->node][cur_state->adj_list_index];
259 ++(cur_state->adj_list_index);
260 if (no_cycle_reachable_from[child]) {
261 continue;
262 }
263 if (in_cur_stack[child]) {
264 // We detected a cycle! Fill it and return.
265 for (;;) {
266 cycle_nodes->push_back(dfs_stack.back().node);
267 if (dfs_stack.back().node == child) {
268 std::reverse(cycle_nodes->begin(), cycle_nodes->end());
269 return;
270 }
271 dfs_stack.pop_back();
272 }
273 }
274 // Push the child onto the stack.
275 dfs_stack.push_back(DfsState(child));
276 in_cur_stack[child] = true;
277 }
278 }
279 // If we're here, then all the DFS stopped, and they never encountered
280 // a cycle (otherwise, we would have returned). Just exit; the output
281 // vector has been cleared already.
282}
283
284// Generate the templated code. Including these definitions allows us
285// to have templated code inside the .cc file and not incur linker errors.
288
289} // namespace internal
290
292 int num_nodes, const std::vector<std::pair<int, int>>& arcs) {
293 std::vector<int> cycle;
294 if (num_nodes < 1) {
295 return cycle;
296 }
297 internal::DenseIntTopologicalSorterTpl</* stable= */ false> sorter(num_nodes);
298 for (const auto& arc : arcs) {
299 sorter.AddEdge(arc.first, arc.second);
300 }
301 sorter.ExtractCycle(&cycle);
302 return cycle;
303}
304} // namespace util
int64_t max
Definition: alldiff_cst.cc:140
#define CHECK(condition)
Definition: base/logging.h:495
#define CHECK_GE(val1, val2)
Definition: base/logging.h:707
#define DCHECK(condition)
Definition: base/logging.h:890
#define VLOG(verboselevel)
Definition: base/logging.h:984
void ExtractCycle(std::vector< int > *cycle_nodes) const
int arc
static const int kLazyDuplicateDetectionSizeThreshold
std::vector< int > FindCycleInDenseIntGraph(int num_nodes, const std::vector< std::pair< int, int > > &arcs)