OR-Tools  9.2
perfect_matching.h
Go to the documentation of this file.
1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 // Implementation of the Blossom V min-cost perfect matching algorithm. The
15 // main source for the algo is the paper: "Blossom V: A new implementation
16 // of a minimum cost perfect matching algorithm", Vladimir Kolmogorov.
17 //
18 // The Algorithm is a primal-dual algorithm. It always maintains a dual-feasible
19 // solution. We recall some notations here, but see the paper for more details
20 // as it is well written.
21 //
22 // TODO(user): This is a work in progress. The algo is not fully implemented
23 // yet. The initial version is closer to Blossom IV since we update the dual
24 // values for all trees at once with the same delta.
25 
26 #ifndef OR_TOOLS_GRAPH_PERFECT_MATCHING_H_
27 #define OR_TOOLS_GRAPH_PERFECT_MATCHING_H_
28 
29 #include <cstdint>
30 #include <limits>
31 #include <vector>
32 
33 #include "absl/base/attributes.h"
34 #include "absl/strings/str_cat.h"
35 #include "absl/strings/str_join.h"
38 #include "ortools/base/int_type.h"
40 #include "ortools/base/logging.h"
41 #include "ortools/base/macros.h"
43 
44 namespace operations_research {
45 
46 class BlossomGraph;
47 
48 // Given an undirected graph with costs on each edges, this class allows to
49 // compute a perfect matching with minimum cost. A matching is a set of disjoint
50 // pairs of nodes connected by an edge. The matching is perfect if all nodes are
51 // matched to each others.
53  public:
54  // TODO(user): For now we ask the number of nodes at construction, but we
55  // could automatically infer it from the added edges if needed.
57  explicit MinCostPerfectMatching(int num_nodes) { Reset(num_nodes); }
58 
59  // Resets the class for a new graph.
60  //
61  // TODO(user): Eventually, we may support incremental Solves(). Or at least
62  // memory reuse if one wants to solve many problems in a row.
63  void Reset(int num_nodes);
64 
65  // Adds an undirected edges between the two given nodes.
66  //
67  // For now we only accept non-negative cost.
68  // TODO(user): We can easily shift all costs if negative costs are needed.
69  //
70  // Important: The algorithm supports multi-edges, but it will be slower. So it
71  // is better to only add one edge with a minimum cost between two nodes. In
72  // particular, do not add both AddEdge(a, b, cost) and AddEdge(b, a, cost).
73  // TODO(user): We could just presolve them away.
74  void AddEdgeWithCost(int tail, int head, int64_t cost);
75 
76  // Solves the min-cost perfect matching problem on the given graph.
77  //
78  // NOTE(user): If needed we could support a time limit. Aborting early will
79  // not provide a perfect matching, but the algorithm does maintain a valid
80  // lower bound on the optimal cost that gets better and better during
81  // execution until it reaches the optimal value. Similarly, it is easy to
82  // support an early stop if this bound crosses a preset threshold.
83  enum Status {
84  // A perfect matching with min-cost has been found.
85  OPTIMAL = 0,
86 
87  // There is no perfect matching in this graph.
89 
90  // The costs are too large and caused an overflow during the algorithm
91  // execution.
93 
94  // Advanced usage: the matching is OPTIMAL and was computed without
95  // overflow, but its OptimalCost() does not fit on an int64_t. Note that
96  // Match() still work and you can re-compute the cost in double for
97  // instance.
99  };
100  ABSL_MUST_USE_RESULT Status Solve();
101 
102  // Returns the cost of the perfect matching. Only valid when the last solve
103  // status was OPTIMAL.
104  int64_t OptimalCost() const {
105  DCHECK(optimal_solution_found_);
106  return optimal_cost_;
107  }
108 
109  // Returns the node matched to the given node. In a perfect matching all nodes
110  // have a match. Only valid when the last solve status was OPTIMAL.
111  int Match(int node) const {
112  DCHECK(optimal_solution_found_);
113  return matches_[node];
114  }
115  const std::vector<int>& Matches() const {
116  DCHECK(optimal_solution_found_);
117  return matches_;
118  }
119 
120  private:
121  std::unique_ptr<BlossomGraph> graph_;
122 
123  // Fields used to report the optimal solution. Most of it could be read on
124  // the fly from BlossomGraph, but we prefer to copy them here. This allows to
125  // reclaim the memory of graph_ early or allows to still query the last
126  // solution if we later allow re-solve with incremental changes to the graph.
127  bool optimal_solution_found_ = false;
128  int64_t optimal_cost_ = 0;
129  int64_t maximum_edge_cost_ = 0;
130  std::vector<int> matches_;
131 };
132 
133 // Class containing the main data structure used by the Blossom algorithm.
134 //
135 // At the core is the original undirected graph. During the algorithm execution
136 // we might collapse nodes into so-called Blossoms. A Blossom is a cycle of
137 // external nodes (which can be blossom nodes) of odd length (>= 3). The edges
138 // of the cycle are called blossom-forming eges and will always be tight
139 // (i.e. have a slack of zero). Once a Blossom is created, its nodes become
140 // "internal" and are basically considered merged into the blossom node for the
141 // rest of the algorithm (except if we later re-expand the blossom).
142 //
143 // Moreover, external nodes of the graph will have 3 possible types ([+], [-]
144 // and free [0]). Free nodes will always be matched together in pairs. Nodes of
145 // type [+] and [-] are arranged in a forest of alternating [+]/[-] disjoint
146 // trees. Each unmatched node is the root of a tree, and of type [+]. Nodes [-]
147 // will always have exactly one child to witch they are matched. [+] nodes can
148 // have any number of [-] children, to which they are not matched. All the edges
149 // of the trees will always be tight. Some examples below, double edges are used
150 // for matched nodes:
151 //
152 // A matched pair of free nodes: [0] === [0]
153 //
154 // A possible rooted tree: [+] -- [-] ==== [+]
155 // \
156 // [-] ==== [+] ---- [-] === [+]
157 // \
158 // [-] === [+]
159 //
160 // A single unmatched node is also a tree: [+]
161 //
162 // TODO(user): For now this class does not maintain a second graph of edges
163 // between the trees nor does it maintains priority queue of edges.
164 //
165 // TODO(user): For now we use CHECKs in many places to facilitate development.
166 // Switch them to DCHECKs for speed once the code is more stable.
168  public:
169  // Typed index used by this class.
171  DEFINE_INT_TYPE(EdgeIndex, int);
172  DEFINE_INT_TYPE(CostValue, int64_t);
173 
174  // Basic constants.
175  // NOTE(user): Those can't be constexpr because of the or-tools export,
176  // which complains for constexpr DEFINE_INT_TYPE.
177  static const NodeIndex kNoNodeIndex;
178  static const EdgeIndex kNoEdgeIndex;
179  static const CostValue kMaxCostValue;
180 
181  // Node related data.
182  // We store the edges incident to a node separately in the graph_ member.
183  struct Node {
184  explicit Node(NodeIndex n) : parent(n), match(n), root(n) {}
185 
186  // A node can be in one of these 4 exclusive states. Internal nodes are part
187  // of a Blossom and should be ignored until this Blossom is expanded. All
188  // the other nodes are "external". A free node is always matched to another
189  // free node. All the other external node are in alternating [+]/[-] trees
190  // rooted at the only unmatched node of the tree (always of type [+]).
191  bool IsInternal() const {
192  DCHECK(!is_internal || type == 0);
193  return is_internal;
194  }
195  bool IsFree() const { return type == 0 && !is_internal; }
196  bool IsPlus() const { return type == 1; }
197  bool IsMinus() const { return type == -1; }
198 
199  // Is this node a blossom? if yes, it was formed by merging the node.blossom
200  // nodes together. Note that we reuse the index of node.blossom[0] for this
201  // blossom node. A blossom node can be of any type.
202  bool IsBlossom() const { return !blossom.empty(); }
203 
204  // The type of this node. We use an int for convenience in the update
205  // formulas. This is 1 for [+] nodes, -1 for [-] nodes and 0 for all the
206  // others.
207  //
208  // Internal node also have a type of zero so the dual formula are correct.
209  int type = 0;
210 
211  // Whether this node is part of a blossom.
212  bool is_internal = false;
213 
214  // The parent of this node in its tree or itself otherwise.
215  // Unused for internal nodes.
217 
218  // Itself if not matched, or this node match otherwise.
219  // Unused for internal nodes.
221 
222  // The root of this tree which never changes until a tree is disassambled by
223  // an Augment(). Unused for internal nodes.
225 
226  // The "delta" to apply to get the dual for nodes of this tree.
227  // This is only filled for root nodes (i.e unmatched nodes).
229 
230  // See the formula in Dual() used to derive the true dual of this node.
231  // This is the equal to the "true" dual for free exterior node and internal
232  // node.
234 
235 #ifndef NDEBUG
236  // The true dual of this node. We only maintain this in debug mode.
238 #endif
239 
240  // Non-empty for Blossom only. The odd-cycle of blossom nodes that form this
241  // blossom. The first element should always be the current blossom node, and
242  // all the other nodes are internal nodes.
243  std::vector<NodeIndex> blossom;
244 
245  // This allows to store information about a new blossom node created by
246  // Shrink() so that we can properly restore it on Expand(). Note that we
247  // store the saved information on the second node of a blossom cycle (and
248  // not the blossom node itself) because that node will be "hidden" until the
249  // blossom is expanded so this way, we do not need more than one set of
250  // saved information per node.
251 #ifndef NDEBUG
253 #endif
255  std::vector<NodeIndex> saved_blossom;
256  };
257 
258  // An undirected edge between two nodes: tail <-> head.
259  struct Edge {
261  : pseudo_slack(c),
262 #ifndef NDEBUG
263  slack(c),
264 #endif
265  tail(t),
266  head(h) {
267  }
268 
269  // Returns the "other" end of this edge.
271  DCHECK(n == tail || n == head);
272  return NodeIndex(tail.value() ^ head.value() ^ n.value());
273  }
274 
275  // AdjustablePriorityQueue interface. Note that we use std::greater<> in
276  // our queues since we want the lowest pseudo_slack first.
278  int GetHeapIndex() const { return pq_position; }
279  bool operator>(const Edge& other) const {
280  return pseudo_slack > other.pseudo_slack;
281  }
282 
283  // See the formula is Slack() used to derive the true slack of this edge.
285 
286 #ifndef NDEBUG
287  // We only maintain this in debug mode.
289 #endif
290 
291  // These are the current tail/head of this edges. These are changed when
292  // creating or expanding blossoms. The order do not matter.
293  //
294  // TODO(user): Consider using node_a/node_b instead to remove the "directed"
295  // meaning. I do need to think a bit more about it though.
298 
299  // Position of this Edge in the underlying std::vector<> used to encode the
300  // heap of one priority queue. An edge can be in at most one priority queue
301  // which allow us to share this amongst queues.
302  int pq_position = -1;
303  };
304 
305  // Creates a BlossomGraph on the given number of nodes.
306  explicit BlossomGraph(int num_nodes);
307 
308  // Same comment as MinCostPerfectMatching::AddEdgeWithCost() applies.
310 
311  // Heuristic to start with a dual-feasible solution and some matched edges.
312  // To be called once all edges are added. Returns false if the problem is
313  // detected to be INFEASIBLE.
314  ABSL_MUST_USE_RESULT bool Initialize();
315 
316  // Enters a loop that perform one of Grow()/Augment()/Shrink()/Expand() until
317  // a fixed point is reached.
318  void PrimalUpdates();
319 
320  // Computes the maximum possible delta for UpdateAllTrees() that keeps the
321  // dual feasibility. Dual update approach (2) from the paper. This also fills
322  // primal_update_edge_queue_.
324 
325  // Applies the same dual delta to all trees. Dual update approach (2) from the
326  // paper.
328 
329  // Returns true iff this node is matched and is thus not a tree root.
330  // This cannot live in the Node class because we need to know the NodeIndex.
331  bool NodeIsMatched(NodeIndex n) const;
332 
333  // Returns the node matched to the given one, or n if this node is not
334  // currently matched.
335  NodeIndex Match(NodeIndex n) const;
336 
337  // Adds to the tree of tail the free matched pair(head, Match(head)).
338  // The edge is only used in DCHECKs. We duplicate tail/head because the
339  // order matter here.
340  void Grow(EdgeIndex e, NodeIndex tail, NodeIndex head);
341 
342  // Merges two tree and augment the number of matched nodes by 1. This is
343  // the only functions that change the current matching.
344  void Augment(EdgeIndex e);
345 
346  // Creates a Blossom using the given [+] -- [+] edge between two nodes of the
347  // same tree.
348  void Shrink(EdgeIndex e);
349 
350  // Expands a Blossom into its component.
351  void Expand(NodeIndex to_expand);
352 
353  // Returns the current number of matched nodes.
354  int NumMatched() const { return nodes_.size() - unmatched_nodes_.size(); }
355 
356  // Returns the current dual objective which is always a valid lower-bound on
357  // the min-cost matching. Note that this is capped to kint64max in case of
358  // overflow. Because all of our cost are positive, this starts at zero.
359  CostValue DualObjective() const;
360 
361  // This must be called at the end of the algorithm to recover the matching.
362  void ExpandAllBlossoms();
363 
364  // Return the "slack" of the given edge.
365  CostValue Slack(const Edge& edge) const;
366 
367  // Returns the dual value of the given node (which might be a pseudo-node).
368  CostValue Dual(const Node& node) const;
369 
370  // Display to VLOG(1) some statistic about the solve.
371  void DisplayStats() const;
372 
373  // Checks that there is no possible primal update in the current
374  // configuration.
376 
377  // Tests that the dual values are currently feasible.
378  // This should ALWAYS be the case.
379  bool DebugDualsAreFeasible() const;
380 
381  // In debug mode, we maintain the real slack of each edges and the real dual
382  // of each node via this function. Both Slack() and Dual() checks in debug
383  // mode that the value computed is the correct one.
385 
386  // Returns true iff this is an external edge with a slack of zero.
387  // An external edge is an edge between two external nodes.
388  bool DebugEdgeIsTightAndExternal(const Edge& edge) const;
389 
390  // Getters to access node/edges from outside the class.
391  // Only used in tests.
392  const Edge& GetEdge(int e) const { return edges_[EdgeIndex(e)]; }
393  const Node& GetNode(int n) const { return nodes_[NodeIndex(n)]; }
394 
395  // Display information for debugging.
396  std::string NodeDebugString(NodeIndex n) const;
397  std::string EdgeDebugString(EdgeIndex e) const;
398  std::string DebugString() const;
399 
400  private:
401  // Returns the index of a tight edge between the two given external nodes.
402  // Returns kNoEdgeIndex if none could be found.
403  //
404  // TODO(user): Store edges for match/parent/blossom instead and remove the
405  // need for this function that can take around 10% of the running time on
406  // some problems.
407  EdgeIndex FindTightExternalEdgeBetweenNodes(NodeIndex tail, NodeIndex head);
408 
409  // Appends the path from n to the root of its tree. Used by Augment().
410  void AppendNodePathToRoot(NodeIndex n, std::vector<NodeIndex>* path) const;
411 
412  // Returns the depth of a node in its tree. Used by Shrink().
413  int GetDepth(NodeIndex n) const;
414 
415  // Adds positive delta to dual_objective_ and cap at kint64max on overflow.
416  void AddToDualObjective(CostValue delta);
417 
418  // In the presence of blossoms, the original tail/head of an arc might not be
419  // up to date anymore. It is important to use these functions instead in all
420  // the places where this can happen. That is basically everywhere except in
421  // the initialization.
422  NodeIndex Tail(const Edge& edge) const {
423  return root_blossom_node_[edge.tail];
424  }
425  NodeIndex Head(const Edge& edge) const {
426  return root_blossom_node_[edge.head];
427  }
428 
429  // Returns the Head() or Tail() that does not correspond to node. Node that
430  // node must be one of the original index in the given edge, this is DCHECKed
431  // by edge.OtherEnd().
432  NodeIndex OtherEnd(const Edge& edge, NodeIndex node) const {
433  return root_blossom_node_[edge.OtherEnd(node)];
434  }
435 
436  // Same as OtherEnd() but the given node should either be Tail(edge) or
437  // Head(edge) and do not need to be one of the original node of this edge.
438  NodeIndex OtherEndFromExternalNode(const Edge& edge, NodeIndex node) const {
439  const NodeIndex head = Head(edge);
440  if (head != node) {
441  DCHECK_EQ(node, Tail(edge));
442  return head;
443  }
444  return Tail(edge);
445  }
446 
447  // Returns the given node and if this node is a blossom, all its internal
448  // nodes (recursively). Note that any call to SubNodes() invalidate the
449  // previously returned reference.
450  const std::vector<NodeIndex>& SubNodes(NodeIndex n);
451 
452  // Just used to check that initialized is called exactly once.
453  bool is_initialized_ = false;
454 
455  // The set of all edges/nodes of the graph.
458 
459  // Identity for a non-blossom node, and its top blossom node (in case of many
460  // nested blossom) for an internal node.
461  absl::StrongVector<NodeIndex, NodeIndex> root_blossom_node_;
462 
463  // The current graph incidence. Note that one EdgeIndex should appear in
464  // exactly two places (on its tail and head incidence list).
466 
467  // Used by SubNodes().
468  std::vector<NodeIndex> subnodes_;
469 
470  // The unmatched nodes are exactly the root of the trees. After
471  // initialization, this is only modified by Augment() which removes two nodes
472  // from this list each time. Note that during Shrink()/Expand() we never
473  // change the indexing of the root nodes.
474  std::vector<NodeIndex> unmatched_nodes_;
475 
476  // List of tight_edges and possible shrink to check in PrimalUpdates().
477  std::vector<EdgeIndex> primal_update_edge_queue_;
478  std::vector<EdgeIndex> possible_shrink_;
479 
480  // Priority queues of edges of a certain types.
483  std::vector<Edge*> tmp_all_tops_;
484 
485  // The dual objective. Increase as the algorithm progress. This is a lower
486  // bound on the min-cost of a perfect matching.
487  CostValue dual_objective_ = CostValue(0);
488 
489  // Statistics on the main operations.
490  int64_t num_grows_ = 0;
491  int64_t num_augments_ = 0;
492  int64_t num_shrinks_ = 0;
493  int64_t num_expands_ = 0;
494  int64_t num_dual_updates_ = 0;
495 };
496 
497 } // namespace operations_research
498 
499 #endif // OR_TOOLS_GRAPH_PERFECT_MATCHING_H_
int64_t head
NodeIndex Match(NodeIndex n) const
const Edge & GetEdge(int e) const
bool operator>(const Edge &other) const
CostValue Slack(const Edge &edge) const
CostValue ComputeMaxCommonTreeDualDeltaAndResetPrimalEdgeQueue()
void AddEdge(NodeIndex tail, NodeIndex head, CostValue cost)
void UpdateAllTrees(CostValue delta)
std::string EdgeDebugString(EdgeIndex e) const
void DebugUpdateNodeDual(NodeIndex n, CostValue delta)
static const EdgeIndex kNoEdgeIndex
int64_t tail
NodeIndex OtherEnd(NodeIndex n) const
CostValue Dual(const Node &node) const
bool DebugEdgeIsTightAndExternal(const Edge &edge) const
Edge(NodeIndex t, NodeIndex h, CostValue c)
void AddEdgeWithCost(int tail, int head, int64_t cost)
ABSL_MUST_USE_RESULT bool Initialize()
void Grow(EdgeIndex e, NodeIndex tail, NodeIndex head)
int index
Definition: pack.cc:509
std::string NodeDebugString(NodeIndex n) const
int64_t delta
Definition: resource.cc:1692
size_type size() const
int64_t cost
#define DCHECK(condition)
Definition: base/logging.h:889
static const NodeIndex kNoNodeIndex
#define DCHECK_EQ(val1, val2)
Definition: base/logging.h:890
const Node & GetNode(int n) const
Collection of objects used to extend the Constraint Solver library.
const std::vector< int > & Matches() const
bool NodeIsMatched(NodeIndex n) const
static const CostValue kMaxCostValue
void Expand(NodeIndex to_expand)