C++ Reference

C++ Reference: Graph

strongly_connected_components.h
Go to the documentation of this file.
1 // Copyright 2010-2018 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 // This code computes the strongly connected components of a directed graph,
15 // and presents them sorted by reverse topological order.
16 //
17 // It implements an efficient version of Tarjan's strongly connected components
18 // algorithm published in: Tarjan, R. E. (1972), "Depth-first search and linear
19 // graph algorithms", SIAM Journal on Computing.
20 //
21 // A description can also be found here:
22 // http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
23 //
24 // SIMPLE EXAMPLE:
25 //
26 // Fill a std::vector<std::vector<int>> graph; representing your graph adjacency
27 // lists. That is, graph[i] contains the nodes adjacent to node #i. The nodes
28 // must be integers in [0, num_nodes). Then just do:
29 //
30 // std::vector<std::vector<int>> components;
31 // FindStronglyConnectedComponents(
32 // static_cast<int>(graph.size()), graph, &components);
33 //
34 // The nodes of each strongly connected components will be listed in each
35 // subvector of components. The components appear in reverse topological order:
36 // outgoing arcs from a component will only be towards earlier components.
37 //
38 // IMPORTANT: num_nodes will be the number of nodes of the graph. Its type
39 // is the type used internally by the algorithm. It is why it is better to
40 // convert it to int or even int32 rather than using size_t which takes 64 bits.
41 
42 #ifndef UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
43 #define UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
44 
45 #include <limits>
46 #include <vector>
47 
48 #include "ortools/base/logging.h"
49 #include "ortools/base/macros.h"
50 
51 // Finds the strongly connected components of a directed graph. It is templated
52 // so it can be used in many contexts. See the simple example above for the
53 // easiest use case.
54 //
55 // The requirement of the different types are:
56 // - The type NodeIndex must be an integer type representing a node of the
57 // graph. The nodes must be in [0, num_nodes). It can be unsigned.
58 // - The type Graph must provide a [] operator such that the following code
59 // iterates over the adjacency list of the given node:
60 // for (const NodeIndex head : graph[node]) {}
61 // - The type SccOutput must implement the function:
62 // emplace_back(NodeIndex const* begin, NodeIndex const* end);
63 // It will be called with the connected components of the given graph as they
64 // are found (In the reverse topological order).
65 //
66 // More practical details on the algorithm:
67 // - It deals properly with self-loop and duplicate nodes.
68 // - It is really fast! and work in O(nodes + edges).
69 // - Its memory usage is also bounded by O(nodes + edges) but in practice it
70 // uses less than the input graph.
71 template <typename NodeIndex, typename Graph, typename SccOutput>
72 void FindStronglyConnectedComponents(const NodeIndex num_nodes,
73  const Graph& graph, SccOutput* components);
74 
75 // A simple custom output class that just counts the number of SCC. Not
76 // allocating many vectors can save both space and speed if your graph is large.
77 //
78 // Note: If this matters, you probably don't want to use
79 // std::vector<std::vector<int>> as an input either. See StaticGraph in
80 // ortools/graph/graph.h for an efficient graph data structure compatible with
81 // this algorithm.
82 template <typename NodeIndex>
85  void emplace_back(NodeIndex const* b, NodeIndex const* e) {
87  }
88  // This is just here so this class can transparently replace a code that
89  // use std::vector<std::vector<int>> as an SccOutput, and get its size with
90  // size().
91  int size() const { return number_of_components; }
92 };
93 
94 // This implementation is slightly different than a classical iterative version
95 // of Tarjan's strongly connected components algorithm. But basically it is
96 // still an iterative DFS. We use a class so memory can be reused if one needs
97 // to compute many SCC in a row. It also allows more complex behavior in the
98 // Graph or SccOutput class that might inspect the current state of the
99 // algorithm.
100 //
101 // TODO(user): Possible optimizations:
102 // - Try to reserve the vectors which sizes are bounded by num_nodes.
103 // - Use an index rather than doing push_back(), pop_back() on them.
104 template <typename NodeIndex, typename Graph, typename SccOutput>
106  public:
108  const Graph& graph,
109  SccOutput* components) {
110  // Reset the class fields.
111  scc_stack_.clear();
112  scc_start_index_.clear();
113  node_index_.assign(num_nodes, 0);
114  node_to_process_.clear();
115 
116  // Optimization. This will always be equal to scc_start_index_.back() except
117  // when scc_stack_ is empty, in which case its value does not matter.
118  NodeIndex current_scc_start = 0;
119 
120  // Loop over all the nodes not yet settled and start a DFS from each of
121  // them.
122  for (NodeIndex base_node = 0; base_node < num_nodes; ++base_node) {
123  if (node_index_[base_node] != 0) continue;
124  DCHECK_EQ(0, node_to_process_.size());
125  node_to_process_.push_back(base_node);
126  do {
127  const NodeIndex node = node_to_process_.back();
128  const NodeIndex index = node_index_[node];
129  if (index == 0) {
130  // We continue the dfs from this node and set its 1-based index.
131  scc_stack_.push_back(node);
132  current_scc_start = scc_stack_.size();
133  node_index_[node] = current_scc_start;
134  scc_start_index_.push_back(current_scc_start);
135 
136  // Enqueue all its adjacent nodes.
137  NodeIndex min_head_index = kSettledIndex;
138  for (const NodeIndex head : graph[node]) {
139  const NodeIndex head_index = node_index_[head];
140  if (head_index == 0) {
141  node_to_process_.push_back(head);
142  } else {
143  // Note that if head_index == kSettledIndex, nothing happens.
144  min_head_index = std::min(min_head_index, head_index);
145  }
146  }
147 
148  // Update the start of this strongly connected component.
149  // Note that scc_start_index_ can never be empty since it first
150  // element is 1 and by definition min_head_index is 1-based and can't
151  // be 0.
152  while (current_scc_start > min_head_index) {
153  scc_start_index_.pop_back();
154  current_scc_start = scc_start_index_.back();
155  }
156  } else {
157  node_to_process_.pop_back();
158  if (current_scc_start == index) {
159  // We found a strongly connected component.
160  components->emplace_back(&scc_stack_[current_scc_start - 1],
161  &scc_stack_[0] + scc_stack_.size());
162  for (int i = current_scc_start - 1; i < scc_stack_.size(); ++i) {
163  node_index_[scc_stack_[i]] = kSettledIndex;
164  }
165  scc_stack_.resize(current_scc_start - 1);
166  scc_start_index_.pop_back();
167  current_scc_start =
168  scc_start_index_.empty() ? 0 : scc_start_index_.back();
169  }
170  }
171  } while (!node_to_process_.empty());
172  }
173  }
174 
175  // Advanced usage. This can be used in either the Graph or SccOutput template
176  // class to query the current state of the algorithm. It allows to build more
177  // complex variant based on the core DFS algo.
179  return node_index_[node] > 0 && node_index_[node] < kSettledIndex;
180  }
181 
182  private:
183  static constexpr NodeIndex kSettledIndex =
184  std::numeric_limits<NodeIndex>::max();
185 
186  // Each node expanded by the DFS will be pushed on this stack. A node is only
187  // popped back when its strongly connected component has been explored and
188  // outputted.
189  std::vector<NodeIndex> scc_stack_;
190 
191  // This is equivalent to the "low link" of a node in Tarjan's algorithm.
192  // Basically, scc_start_index_.back() represent the 1-based index in
193  // scc_stack_ of the beginning of the current strongly connected component.
194  // All the nodes after this index will be on the same component.
195  std::vector<NodeIndex> scc_start_index_;
196 
197  // Each node is assigned an index which changes 2 times in the algorithm:
198  // - Everyone starts with an index of 0 which means unexplored.
199  // - The first time they are explored by the DFS and pushed on scc_stack_,
200  // they get their 1-based index on this stack.
201  // - Once they have been processed and outputted to components, they are said
202  // to be settled, and their index become kSettledIndex.
203  std::vector<NodeIndex> node_index_;
204 
205  // This is a well known way to do an efficient iterative DFS. Each time a node
206  // is explored, all its adjacent nodes are pushed on this stack. The iterative
207  // dfs processes the nodes one by one by popping them back from here.
208  std::vector<NodeIndex> node_to_process_;
209 };
210 
211 // Simple wrapper function for most usage.
212 template <typename NodeIndex, typename Graph, typename SccOutput>
214  const Graph& graph,
215  SccOutput* components) {
217  return helper.FindStronglyConnectedComponents(num_nodes, graph, components);
218 }
219 
220 #endif // UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
int size() const
ListGraph Graph
Definition: graph.h:2356
void emplace_back(NodeIndex const *b, NodeIndex const *e)
void FindStronglyConnectedComponents(const NodeIndex num_nodes, const Graph &graph, SccOutput *components)
bool NodeIsInCurrentDfsPath(NodeIndex node) const
int32 NodeIndex
Definition: ebert_graph.h:192
void FindStronglyConnectedComponents(const NodeIndex num_nodes, const Graph &graph, SccOutput *components)
int number_of_components