C++ Reference

C++ Reference: Graph

cliques.h
Go to the documentation of this file.
1// Copyright 2010-2021 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14//
15// Maximal clique algorithms, based on the Bron-Kerbosch algorithm.
16// See http://en.wikipedia.org/wiki/Bron-Kerbosch_algorithm
17// and
18// C. Bron and J. Kerbosch, Joep, "Algorithm 457: finding all cliques of an
19// undirected graph", CACM 16 (9): 575-577, 1973.
20// http://dl.acm.org/citation.cfm?id=362367&bnc=1.
21//
22// Keywords: undirected graph, clique, clique cover, Bron, Kerbosch.
23
24#ifndef OR_TOOLS_GRAPH_CLIQUES_H_
25#define OR_TOOLS_GRAPH_CLIQUES_H_
26
27#include <cstdint>
28#include <functional>
29#include <limits>
30#include <numeric>
31#include <vector>
32
33#include "absl/strings/str_cat.h"
34#include "ortools/base/logging.h"
35#include "ortools/base/strong_int.h"
36#include "ortools/base/strong_vector.h"
37#include "ortools/util/time_limit.h"
38
39namespace operations_research {
40
41// Finds all maximal cliques, even of size 1, in the
42// graph described by the graph callback. graph->Run(i, j) indicates
43// if there is an arc between i and j.
44// This function takes ownership of 'callback' and deletes it after it has run.
45// If 'callback' returns true, then the search for cliques stops.
46void FindCliques(std::function<bool(int, int)> graph, int node_count,
47 std::function<bool(const std::vector<int>&)> callback);
48
49// Covers the maximum number of arcs of the graph with cliques. The graph
50// is described by the graph callback. graph->Run(i, j) indicates if
51// there is an arc between i and j.
52// This function takes ownership of 'callback' and deletes it after it has run.
53// It calls 'callback' upon each clique.
54// It ignores cliques of size 1.
55void CoverArcsByCliques(std::function<bool(int, int)> graph, int node_count,
56 std::function<bool(const std::vector<int>&)> callback);
57
58// Possible return values of the callback for reporting cliques. The returned
59// value determines whether the algorithm will continue the search.
60enum class CliqueResponse {
61 // The algorithm will continue searching for other maximal cliques.
63 // The algorithm will stop the search immediately. The search can be resumed
64 // by calling BronKerboschAlgorithm::Run (resp. RunIterations) again.
65 STOP
66};
67
68// The status value returned by BronKerboschAlgorithm::Run and
69// BronKerboschAlgorithm::RunIterations.
71 // The algorithm has enumerated all maximal cliques.
73 // The search algorithm was interrupted either because it reached the
74 // iteration limit or because the clique callback returned
75 // CliqueResponse::STOP.
77};
78
79// Implements the Bron-Kerbosch algorithm for finding maximal cliques.
80// The graph is represented as a callback that gets two nodes as its arguments
81// and it returns true if and only if there is an arc between the two nodes. The
82// cliques are reported back to the user using a second callback.
83//
84// Typical usage:
85// auto graph = [](int node1, int node2) { return true; };
86// auto on_clique = [](const std::vector<int>& clique) {
87// LOG(INFO) << "Clique!";
88// };
89//
90// BronKerboschAlgorithm<int> bron_kerbosch(graph, num_nodes, on_clique);
91// bron_kerbosch.Run();
92//
93// or:
94//
95// BronKerboschAlgorithm bron_kerbosch(graph, num_nodes, clique);
96// bron_kerbosch.RunIterations(kMaxNumIterations);
97//
98// This is a non-recursive implementation of the Bron-Kerbosch algorithm with
99// pivots as described in the paper by Bron and Kerbosch (1973) (the version 2
100// algorithm in the paper).
101// The basic idea of the algorithm is to incrementally build the cliques using
102// depth-first search. During the search, the algorithm maintains two sets of
103// candidates (nodes that are connected to all nodes in the current clique):
104// - the "not" set - these are candidates that were already visited by the
105// search and all the maximal cliques that contain them as a part of the
106// current clique were already reported.
107// - the actual candidates - these are candidates that were not visited yet, and
108// they can be added to the clique.
109// In each iteration, the algorithm does the first of the following actions that
110// applies:
111// A. If there are no actual candidates and there are candidates in the "not"
112// set, or if all actual candidates are connected to the same node in the
113// "not" set, the current clique can't be extended to a maximal clique that
114// was not already reported. Return from the recursive call and move the
115// selected candidate to the set "not".
116// B. If there are no candidates at all, it means that the current clique can't
117// be extended and that it is in fact a maximal clique. Report it to the user
118// and return from the recursive call. Move the selected candidate to the set
119// "not".
120// C. Otherwise, there are actual candidates, extend the current clique with one
121// of these candidates and process it recursively.
122//
123// To avoid unnecessary steps, the algorithm selects a pivot at each level of
124// the recursion to guide the selection of candidates added to the current
125// clique. The pivot can be either in the "not" set and among the actual
126// candidates. The algorithm tries to move the pivot and all actual candidates
127// connected to it to the set "not" as quickly as possible. This will fulfill
128// the conditions of step A, and the search algorithm will be able to leave the
129// current branch. Selecting a pivot that has the lowest number of disconnected
130// nodes among the candidates can reduce the running time significantly.
131//
132// The worst-case maximal depth of the recursion is equal to the number of nodes
133// in the graph, which makes the natural recursive implementation impractical
134// for nodes with more than a few thousands of nodes. To avoid the limitation,
135// this class simulates the recursion by maintaining a stack with the state at
136// each level of the recursion. The algorithm then runs in a loop. In each
137// iteration, the algorithm can do one or both of:
138// 1. Return to the previous recursion level (step A or B of the algorithm) by
139// removing the top state from the stack.
140// 2. Select the next candidate and enter the next recursion level (step C of
141// the algorithm) by adding a new state to the stack.
142//
143// The worst-case time complexity of the algorithm is O(3^(N/3)), and the memory
144// complexity is O(N^2), where N is the number of nodes in the graph.
145template <typename NodeIndex>
147 public:
148 // A callback called by the algorithm to test if there is an arc between a
149 // pair of nodes. The callback must return true if and only if there is an
150 // arc. Note that to function properly, the function must be symmetrical
151 // (represent an undirected graph).
152 using IsArcCallback = std::function<bool(NodeIndex, NodeIndex)>;
153 // A callback called by the algorithm to report a maximal clique to the user.
154 // The clique is returned as a list of nodes in the clique, in no particular
155 // order. The caller must make a copy of the vector if they want to keep the
156 // nodes.
157 //
158 // The return value of the callback controls how the algorithm continues after
159 // this clique. See the description of the values of 'CliqueResponse' for more
160 // details.
162 std::function<CliqueResponse(const std::vector<NodeIndex>&)>;
163
164 // Initializes the Bron-Kerbosch algorithm for the given graph and clique
165 // callback function.
167 CliqueCallback clique_callback)
168 : is_arc_(std::move(is_arc)),
169 clique_callback_(std::move(clique_callback)),
170 num_nodes_(num_nodes) {}
171
172 // Runs the Bron-Kerbosch algorithm for kint64max iterations. In practice,
173 // this is equivalent to running until completion or until the clique callback
174 // returns BronKerboschAlgorithmStatus::STOP. If the method returned because
175 // the search is finished, it will return COMPLETED; otherwise, it will return
176 // INTERRUPTED and it can be resumed by calling this method again.
178
179 // Runs at most 'max_num_iterations' iterations of the Bron-Kerbosch
180 // algorithm. When this function returns INTERRUPTED, there is still work to
181 // be done to process all the cliques in the graph. In such case the method
182 // can be called again and it will resume the work where the previous call had
183 // stopped. When it returns COMPLETED any subsequent call to the method will
184 // resume the search from the beginning.
185 BronKerboschAlgorithmStatus RunIterations(int64_t max_num_iterations);
186
187 // Runs at most 'max_num_iterations' iterations of the Bron-Kerbosch
188 // algorithm, until the time limit is exceeded or until all cliques are
189 // enumerated. When this function returns INTERRUPTED, there is still work to
190 // be done to process all the cliques in the graph. In such case the method
191 // can be called again and it will resume the work where the previous call had
192 // stopped. When it returns COMPLETED any subsequent call to the method will
193 // resume the search from the beginning.
194 BronKerboschAlgorithmStatus RunWithTimeLimit(int64_t max_num_iterations,
195 TimeLimit* time_limit);
196
197 // Runs the Bron-Kerbosch algorithm for at most kint64max iterations, until
198 // the time limit is excceded or until all cliques are enumerated. In
199 // practice, running the algorithm for kint64max iterations is equivalent to
200 // running until completion or until the other stopping conditions apply. When
201 // this function returns INTERRUPTED, there is still work to be done to
202 // process all the cliques in the graph. In such case the method can be called
203 // again and it will resume the work where the previous call had stopped. When
204 // it returns COMPLETED any subsequent call to the method will resume the
205 // search from the beginning.
207 return RunWithTimeLimit(std::numeric_limits<int64_t>::max(), time_limit);
208 }
209
210 private:
211 DEFINE_STRONG_INT_TYPE(CandidateIndex, ptrdiff_t);
212
213 // A data structure that maintains the variables of one "iteration" of the
214 // search algorithm. These are the variables that would normally be allocated
215 // on the stack in the recursive implementation.
216 //
217 // Note that most of the variables in the structure are explicitly left
218 // uninitialized by the constructor to avoid wasting resources on values that
219 // will be overwritten anyway. Most of the initialization is done in
220 // BronKerboschAlgorithm::InitializeState.
221 struct State {
222 State() {}
223 State(const State& other)
224 : pivot(other.pivot),
225 num_remaining_candidates(other.num_remaining_candidates),
226 candidates(other.candidates),
227 first_candidate_index(other.first_candidate_index),
228 candidate_for_recursion(other.candidate_for_recursion) {}
229
230 State& operator=(const State& other) {
231 pivot = other.pivot;
232 num_remaining_candidates = other.num_remaining_candidates;
233 candidates = other.candidates;
234 first_candidate_index = other.first_candidate_index;
235 candidate_for_recursion = other.candidate_for_recursion;
236 return *this;
237 }
238
239 // Moves the first candidate in the state to the "not" set. Assumes that the
240 // first candidate is also the pivot or a candidate disconnected from the
241 // pivot (as done by RunIteration).
242 inline void MoveFirstCandidateToNotSet() {
243 ++first_candidate_index;
244 --num_remaining_candidates;
245 }
246
247 // Creates a human-readable representation of the current state.
248 std::string DebugString() {
249 std::string buffer;
250 absl::StrAppend(&buffer, "pivot = ", pivot,
251 "\nnum_remaining_candidates = ", num_remaining_candidates,
252 "\ncandidates = [");
253 for (CandidateIndex i(0); i < candidates.size(); ++i) {
254 if (i > 0) buffer += ", ";
255 absl::StrAppend(&buffer, candidates[i]);
256 }
257 absl::StrAppend(
258 &buffer, "]\nfirst_candidate_index = ", first_candidate_index.value(),
259 "\ncandidate_for_recursion = ", candidate_for_recursion.value());
260 return buffer;
261 }
262
263 // The pivot node selected for the given level of the recursion.
264 NodeIndex pivot;
265 // The number of remaining candidates to be explored at the given level of
266 // the recursion; the number is computed as num_disconnected_nodes +
267 // pre_increment in the original algorithm.
268 int num_remaining_candidates;
269 // The list of nodes that are candidates for extending the current clique.
270 // This vector has the format proposed in the paper by Bron-Kerbosch; the
271 // first 'first_candidate_index' elements of the vector represent the
272 // "not" set of nodes that were already visited by the algorithm. The
273 // remaining elements are the actual candidates for extending the current
274 // clique.
275 // NOTE(user): We could store the delta between the iterations; however,
276 // we need to evaluate the impact this would have on the performance.
277 absl::StrongVector<CandidateIndex, NodeIndex> candidates;
278 // The index of the first actual candidate in 'candidates'. This number is
279 // also the number of elements of the "not" set stored at the beginning of
280 // 'candidates'.
281 CandidateIndex first_candidate_index;
282
283 // The current position in candidates when looking for the pivot and/or the
284 // next candidate disconnected from the pivot.
285 CandidateIndex candidate_for_recursion;
286 };
287
288 // The deterministic time coefficients for the push and pop operations of the
289 // Bron-Kerbosch algorithm. The coefficients are set to match approximately
290 // the running time in seconds on a recent workstation on the random graph
291 // benchmark.
292 // NOTE(user): PushState is not the only source of complexity in the
293 // algorithm, but non-negative linear least squares produced zero coefficients
294 // for all other deterministic counters tested during the benchmarking. When
295 // we optimize the algorithm, we might need to add deterministic time to the
296 // other places that may produce complexity, namely InitializeState, PopState
297 // and SelectCandidateIndexForRecursion.
298 static const double kPushStateDeterministicTimeSecondsPerCandidate;
299
300 // Initializes the root state of the algorithm.
301 void Initialize();
302
303 // Removes the top state from the state stack. This is equivalent to returning
304 // in the recursive implementation of the algorithm.
305 void PopState();
306
307 // Adds a new state to the top of the stack, adding the node 'selected' to the
308 // current clique. This is equivalent to making a recurisve call in the
309 // recursive implementation of the algorithm.
310 void PushState(NodeIndex selected);
311
312 // Initializes the given state. Runs the pivot selection algorithm in the
313 // state.
314 void InitializeState(State* state);
315
316 // Returns true if (node1, node2) is an arc in the graph or if node1 == node2.
317 inline bool IsArc(NodeIndex node1, NodeIndex node2) const {
318 return node1 == node2 || is_arc_(node1, node2);
319 }
320
321 // Selects the next node for recursion. The selected node is either the pivot
322 // (if it is not in the set "not") or a node that is disconnected from the
323 // pivot.
324 CandidateIndex SelectCandidateIndexForRecursion(State* state);
325
326 // Returns a human-readable string representation of the clique.
327 std::string CliqueDebugString(const std::vector<NodeIndex>& clique);
328
329 // The callback called when the algorithm needs to determine if (node1, node2)
330 // is an arc in the graph.
331 IsArcCallback is_arc_;
332
333 // The callback called when the algorithm discovers a maximal clique. The
334 // return value of the callback controls how the algorithm proceeds with the
335 // clique search.
336 CliqueCallback clique_callback_;
337
338 // The number of nodes in the graph.
339 const NodeIndex num_nodes_;
340
341 // Contains the state of the aglorithm. The vector serves as an external stack
342 // for the recursive part of the algorithm - instead of using the C++ stack
343 // and natural recursion, it is implemented as a loop and new states are added
344 // to the top of the stack. The algorithm ends when the stack is empty.
345 std::vector<State> states_;
346
347 // A vector that receives the current clique found by the algorithm.
348 std::vector<NodeIndex> current_clique_;
349
350 // Set to true if the algorithm is active (it was not stopped by an the clique
351 // callback).
352 int64_t num_remaining_iterations_;
353
354 // The current time limit used by the solver. The time limit is assigned by
355 // the Run methods and it can be different for each call to run.
356 TimeLimit* time_limit_;
357};
358
359template <typename NodeIndex>
360void BronKerboschAlgorithm<NodeIndex>::InitializeState(State* state) {
361 DCHECK(state != nullptr);
362 const int num_candidates = state->candidates.size();
363 int num_disconnected_candidates = num_candidates;
364 state->pivot = 0;
365 CandidateIndex pivot_index(-1);
366 for (CandidateIndex pivot_candidate_index(0);
367 pivot_candidate_index < num_candidates &&
368 num_disconnected_candidates > 0;
369 ++pivot_candidate_index) {
370 const NodeIndex pivot_candidate = state->candidates[pivot_candidate_index];
371 int count = 0;
372 for (CandidateIndex i(state->first_candidate_index); i < num_candidates;
373 ++i) {
374 if (!IsArc(pivot_candidate, state->candidates[i])) {
375 ++count;
376 }
377 }
378 if (count < num_disconnected_candidates) {
379 pivot_index = pivot_candidate_index;
380 state->pivot = pivot_candidate;
381 num_disconnected_candidates = count;
382 }
383 }
384 state->num_remaining_candidates = num_disconnected_candidates;
385 if (pivot_index >= state->first_candidate_index) {
386 std::swap(state->candidates[pivot_index],
387 state->candidates[state->first_candidate_index]);
388 ++state->num_remaining_candidates;
389 }
390}
391
392template <typename NodeIndex>
393typename BronKerboschAlgorithm<NodeIndex>::CandidateIndex
394BronKerboschAlgorithm<NodeIndex>::SelectCandidateIndexForRecursion(
395 State* state) {
396 DCHECK(state != nullptr);
397 CandidateIndex disconnected_node_index =
398 std::max(state->first_candidate_index, state->candidate_for_recursion);
399 while (disconnected_node_index < state->candidates.size() &&
400 state->candidates[disconnected_node_index] != state->pivot &&
401 IsArc(state->pivot, state->candidates[disconnected_node_index])) {
402 ++disconnected_node_index;
403 }
404 state->candidate_for_recursion = disconnected_node_index;
405 return disconnected_node_index;
406}
407
408template <typename NodeIndex>
409void BronKerboschAlgorithm<NodeIndex>::Initialize() {
410 DCHECK(states_.empty());
411 states_.reserve(num_nodes_);
412 states_.emplace_back();
413
414 State* const root_state = &states_.back();
415 root_state->first_candidate_index = 0;
416 root_state->candidate_for_recursion = 0;
417 root_state->candidates.resize(num_nodes_, 0);
418 std::iota(root_state->candidates.begin(), root_state->candidates.end(), 0);
419 root_state->num_remaining_candidates = num_nodes_;
420 InitializeState(root_state);
421
422 DVLOG(2) << "Initialized";
423}
424
425template <typename NodeIndex>
426void BronKerboschAlgorithm<NodeIndex>::PopState() {
427 DCHECK(!states_.empty());
428 states_.pop_back();
429 if (!states_.empty()) {
430 State* const state = &states_.back();
431 current_clique_.pop_back();
432 state->MoveFirstCandidateToNotSet();
433 }
434}
435
436template <typename NodeIndex>
437std::string BronKerboschAlgorithm<NodeIndex>::CliqueDebugString(
438 const std::vector<NodeIndex>& clique) {
439 std::string message = "Clique: [ ";
440 for (const NodeIndex node : clique) {
441 absl::StrAppend(&message, node, " ");
442 }
443 message += "]";
444 return message;
445}
446
447template <typename NodeIndex>
448void BronKerboschAlgorithm<NodeIndex>::PushState(NodeIndex selected) {
449 DCHECK(!states_.empty());
450 DCHECK(time_limit_ != nullptr);
451 DVLOG(2) << "PushState: New depth = " << states_.size() + 1
452 << ", selected node = " << selected;
453 absl::StrongVector<CandidateIndex, NodeIndex> new_candidates;
454
455 State* const previous_state = &states_.back();
456 const double deterministic_time =
457 kPushStateDeterministicTimeSecondsPerCandidate *
458 previous_state->candidates.size();
459 time_limit_->AdvanceDeterministicTime(deterministic_time, "PushState");
460
461 // Add all candidates from previous_state->candidates that are connected to
462 // 'selected' in the graph to the vector 'new_candidates', skipping the node
463 // 'selected'; this node is always at the position
464 // 'previous_state->first_candidate_index', so we can skip it by skipping the
465 // element at this particular index.
466 new_candidates.reserve(previous_state->candidates.size());
467 for (CandidateIndex i(0); i < previous_state->first_candidate_index; ++i) {
468 const NodeIndex candidate = previous_state->candidates[i];
469 if (IsArc(selected, candidate)) {
470 new_candidates.push_back(candidate);
471 }
472 }
473 const CandidateIndex new_first_candidate_index(new_candidates.size());
474 for (CandidateIndex i = previous_state->first_candidate_index + 1;
475 i < previous_state->candidates.size(); ++i) {
476 const NodeIndex candidate = previous_state->candidates[i];
477 if (IsArc(selected, candidate)) {
478 new_candidates.push_back(candidate);
479 }
480 }
481
482 current_clique_.push_back(selected);
483 if (new_candidates.empty()) {
484 // We've found a clique. Report it to the user, but do not push the state
485 // because it would be popped immediately anyway.
486 DVLOG(2) << CliqueDebugString(current_clique_);
487 const CliqueResponse response = clique_callback_(current_clique_);
488 if (response == CliqueResponse::STOP) {
489 // The number of remaining iterations will be decremented at the end of
490 // the loop in RunIterations; setting it to 0 here would make it -1 at
491 // the end of the main loop.
492 num_remaining_iterations_ = 1;
493 }
494 current_clique_.pop_back();
495 previous_state->MoveFirstCandidateToNotSet();
496 return;
497 }
498
499 // NOTE(user): The following line may invalidate previous_state (if the
500 // vector data was re-allocated in the process). We must avoid using
501 // previous_state below here.
502 states_.emplace_back();
503 State* const new_state = &states_.back();
504 new_state->candidates.swap(new_candidates);
505 new_state->first_candidate_index = new_first_candidate_index;
506
507 InitializeState(new_state);
508}
509
510template <typename NodeIndex>
512 int64_t max_num_iterations, TimeLimit* time_limit) {
513 CHECK(time_limit != nullptr);
514 time_limit_ = time_limit;
515 if (states_.empty()) {
516 Initialize();
517 }
518 for (num_remaining_iterations_ = max_num_iterations;
519 !states_.empty() && num_remaining_iterations_ > 0 &&
520 !time_limit->LimitReached();
521 --num_remaining_iterations_) {
522 State* const state = &states_.back();
523 DVLOG(2) << "Loop: " << states_.size() << " states, "
524 << state->num_remaining_candidates << " candidate to explore\n"
525 << state->DebugString();
526 if (state->num_remaining_candidates == 0) {
527 PopState();
528 continue;
529 }
530
531 const CandidateIndex selected_index =
532 SelectCandidateIndexForRecursion(state);
533 DVLOG(2) << "selected_index = " << selected_index;
534 const NodeIndex selected = state->candidates[selected_index];
535 DVLOG(2) << "Selected candidate = " << selected;
536
537 NodeIndex& f = state->candidates[state->first_candidate_index];
538 NodeIndex& s = state->candidates[selected_index];
539 std::swap(f, s);
540
541 PushState(selected);
542 }
543 time_limit_ = nullptr;
544 return states_.empty() ? BronKerboschAlgorithmStatus::COMPLETED
546}
547
548template <typename NodeIndex>
550 int64_t max_num_iterations) {
551 TimeLimit time_limit(std::numeric_limits<double>::infinity());
552 return RunWithTimeLimit(max_num_iterations, &time_limit);
553}
554
555template <typename NodeIndex>
557 return RunIterations(std::numeric_limits<int64_t>::max());
558}
559
560template <typename NodeIndex>
561const double BronKerboschAlgorithm<
562 NodeIndex>::kPushStateDeterministicTimeSecondsPerCandidate = 0.54663e-7;
563} // namespace operations_research
564
565#endif // OR_TOOLS_GRAPH_CLIQUES_H_
BronKerboschAlgorithmStatus Run()
Definition: cliques.h:556
BronKerboschAlgorithm(IsArcCallback is_arc, NodeIndex num_nodes, CliqueCallback clique_callback)
Definition: cliques.h:166
BronKerboschAlgorithmStatus RunIterations(int64_t max_num_iterations)
Definition: cliques.h:549
BronKerboschAlgorithmStatus RunWithTimeLimit(int64_t max_num_iterations, TimeLimit *time_limit)
Definition: cliques.h:511
BronKerboschAlgorithmStatus RunWithTimeLimit(TimeLimit *time_limit)
Definition: cliques.h:206
std::function< bool(NodeIndex, NodeIndex)> IsArcCallback
Definition: cliques.h:152
std::function< CliqueResponse(const std::vector< NodeIndex > &)> CliqueCallback
Definition: cliques.h:162
void FindCliques(std::function< bool(int, int)> graph, int node_count, std::function< bool(const std::vector< int > &)> callback)
void CoverArcsByCliques(std::function< bool(int, int)> graph, int node_count, std::function< bool(const std::vector< int > &)> callback)