C++ Reference

C++ Reference: Algorithms

dynamic_partition.h
Go to the documentation of this file.
1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 // TODO(user): refine this toplevel comment when this file settles.
15 //
16 // Two dynamic partition classes: one that incrementally splits a partition
17 // into more and more parts; one that incrementally merges a partition into less
18 // and less parts.
19 //
20 // GLOSSARY:
21 // The partition classes maintain a partition of N integers 0..N-1
22 // (aka "elements") into disjoint equivalence classes (aka "parts").
23 //
24 // SAFETY:
25 // Like vector<int> crashes when used improperly, these classes are not "safe":
26 // most of their methods may crash if called with invalid arguments. The client
27 // code is responsible for using this class properly. A few DCHECKs() will help
28 // catch bugs, though.
29 
30 #ifndef OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
31 #define OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
32 
33 #include <cstdint>
34 #include <string>
35 #include <vector>
36 
37 #include "ortools/base/logging.h"
38 
39 namespace operations_research {
40 
41 // Partition class that supports incremental splitting, with backtracking.
42 // See http://en.wikipedia.org/wiki/Partition_refinement .
43 // More precisely, the supported edit operations are:
44 // - Refine the partition so that a subset S (typically, |S| <<< N)
45 // of elements are all considered non-equivalent to any element in ¬S.
46 // Typically, this should be done in O(|S|).
47 // - Undo the above operations (backtracking).
48 //
49 // TODO(user): rename this to BacktrackableSplittingPartition.
51  public:
52  // Creates a DynamicPartition on n elements, numbered 0..n-1. Start with
53  // the trivial partition (only one subset containing all elements).
54  explicit DynamicPartition(int num_elements);
55 
56  // Ditto, but specify the initial part of each elements. Part indices must
57  // form a dense integer set starting at 0; eg. [2, 1, 0, 1, 1, 3, 0] is valid.
58  explicit DynamicPartition(const std::vector<int>& initial_part_of_element);
59 
60  // Accessors.
61  int NumElements() const { return element_.size(); }
62  const int NumParts() const { return part_.size(); }
63 
64  // To iterate over the elements in part #i:
65  // for (int element : partition.ElementsInPart(i)) { ... }
66  //
67  // ORDERING OF ELEMENTS INSIDE PARTS: the order of elements within a given
68  // part is volatile, and may change with Refine() or UndoRefine*() operations,
69  // even if the part itself doesn't change.
70  struct IterablePart;
71  IterablePart ElementsInPart(int i) const;
72 
73  int PartOf(int element) const;
74  int SizeOfPart(int part) const;
75  int ParentOfPart(int part) const;
76 
77  // A handy shortcut to ElementsInPart(PartOf(e)). The returned IterablePart
78  // will never be empty, since it contains at least i.
79  IterablePart ElementsInSamePartAs(int i) const;
80 
81  // Returns a fingerprint of the given part. While collisions are possible,
82  // their probability is quite low. Two parts that have the same size and the
83  // same fingerprint are most likely identical.
84  // Also, two parts that have the exact same set of elements will *always*
85  // have the same fingerprint.
86  uint64_t FprintOfPart(int part) const;
87 
88  // Refines the partition such that elements that are in distinguished_subset
89  // never share the same part as elements that aren't in that subset.
90  // This might be a no-op: in that case, NumParts() won't change, but the
91  // order of elements inside each part may change.
92  //
93  // ORDERING OF PARTS:
94  // For each i such that Part #i has a non-trivial intersection with
95  // "distinguished_subset" (neither empty, nor the full Part); Part #i is
96  // stripped out of all elements that are in "distinguished_subset", and
97  // those elements are sent to a newly created part, whose parent_part = i.
98  // The parts newly created by a single Refine() operations are sorted
99  // by parent_part.
100  // Example: a Refine() on a partition with 6 parts causes parts #1, #3 and
101  // #4 to be split: the partition will now contain 3 new parts: part #6 (with
102  // parent_part = 1), part #7 (with parent_part = 3) and part #8 (with
103  // parent_part = 4).
104  //
105  // TODO(user): the graph symmetry finder could probably benefit a lot from
106  // keeping track of one additional bit of information for each part that
107  // remains unchanged by a Refine() operation: was that part entirely *in*
108  // the distinguished subset or entirely *out*?
109  void Refine(const std::vector<int>& distinguished_subset);
110 
111  // Undo one or several Refine() operations, until the number of parts
112  // becomes equal to "original_num_parts".
113  // Prerequisite: NumParts() >= original_num_parts.
114  void UndoRefineUntilNumPartsEqual(int original_num_parts);
115 
116  // Dump the partition to a string. There might be different conventions for
117  // sorting the parts and the elements inside them.
119  // Elements are sorted within parts, and parts are then sorted
120  // lexicographically.
122  // Elements are sorted within parts, and parts are kept in order.
124  };
125  std::string DebugString(DebugStringSorting sorting) const;
126 
127  // ADVANCED USAGE:
128  // All elements (0..n-1) of the partition, sorted in a way that's compatible
129  // with the hierarchical partitioning:
130  // - All the elements of any given part are contiguous.
131  // - Elements of a part P are always after elements of part Parent(P).
132  // - The order remains identical (and the above property holds) after any
133  // UndoRefine*() operation.
134  // Note that the order does get changed by Refine() operations.
135  // This is a reference, so it'll only remain valid and constant until the
136  // class is destroyed or until Refine() get called.
137  const std::vector<int>& ElementsInHierarchicalOrder() const {
138  return element_;
139  }
140 
141  private:
142  // A DynamicPartition instance maintains a list of all of its elements,
143  // 'sorted' by partitions: elements of the same subset are contiguous
144  // in that list.
145  std::vector<int> element_;
146 
147  // The reverse of elements_[]: element_[index_of_[i]] = i.
148  std::vector<int> index_of_;
149 
150  // part_of_[i] is the index of the part that contains element i.
151  std::vector<int> part_of_;
152 
153  struct Part {
154  // This part holds elements[start_index .. end_index-1].
155  // INVARIANT: end_index > start_index.
156  int start_index; // Inclusive
157  int end_index; // Exclusive
158 
159  // The Part that this part was split out of. See the comment at Refine().
160  // INVARIANT: part[i].parent_part <= i, and the equality holds iff part[i]
161  // has no parent.
162  int parent_part; // Index into the part[] array.
163 
164  // The part's fingerprint is the XOR of all fingerprints of its elements.
165  // See FprintOfInt32() in the .cc.
166  uint64_t fprint;
167 
168  Part() : start_index(0), end_index(0), parent_part(0), fprint(0) {}
169  Part(int start_index, int end_index, int parent_part, uint64_t fprint)
170  : start_index(start_index),
171  end_index(end_index),
172  parent_part(parent_part),
173  fprint(fprint) {}
174  };
175  std::vector<Part> part_; // The disjoint parts.
176 
177  // Used temporarily and exclusively by Refine(). This prevents Refine()
178  // from being thread-safe.
179  // INVARIANT: tmp_counter_of_part_ contains only 0s before and after Refine().
180  std::vector<int> tmp_counter_of_part_;
181  std::vector<int> tmp_affected_parts_;
182 };
183 
185  std::vector<int>::const_iterator begin() const { return begin_; }
186  std::vector<int>::const_iterator end() const { return end_; }
187  std::vector<int>::const_iterator begin_;
188  std::vector<int>::const_iterator end_;
189 
190  int size() const { return end_ - begin_; }
191 
193  IterablePart(const std::vector<int>::const_iterator& b,
194  const std::vector<int>::const_iterator& e)
195  : begin_(b), end_(e) {}
196 
197  // These typedefs allow this iterator to be used within testing::ElementsAre.
198  typedef int value_type;
199  typedef std::vector<int>::const_iterator const_iterator;
200 };
201 
202 // Partition class that supports incremental merging, using the union-find
203 // algorithm (see http://en.wikipedia.org/wiki/Disjoint-set_data_structure).
205  public:
206  // At first, all nodes are in their own singleton part.
208  explicit MergingPartition(int num_nodes) { Reset(num_nodes); }
209  void Reset(int num_nodes);
210 
211  int NumNodes() const { return parent_.size(); }
212 
213  // Complexity: amortized O(Ackermann⁻¹(N)) -- which is essentially O(1) --
214  // where N is the number of nodes.
215  //
216  // Return value: If this merge caused a representative node (of either node1
217  // or node2) to stop being a representative (because only one can remain);
218  // this method returns that removed representative. Otherwise it returns -1.
219  //
220  // Details: a smaller part will always be merged onto a larger one.
221  // Upons ties, the smaller representative becomes the overall representative.
222  int MergePartsOf(int node1, int node2); // The 'union' of the union-find.
223 
224  // Get the representative of "node" (a node in the same equivalence class,
225  // which will also be returned for any other "node" in the same class).
226  // The complexity if the same as MergePartsOf().
227  int GetRootAndCompressPath(int node);
228 
229  // Specialized reader API: prunes "nodes" to only keep at most one node per
230  // part: any node which is in the same part as an earlier node will be pruned.
231  void KeepOnlyOneNodePerPart(std::vector<int>* nodes);
232 
233  // Output the whole partition as node equivalence classes: if there are K
234  // parts and N nodes, node_equivalence_classes[i] will contain the part index
235  // (a number in 0..K-1) of node #i. Parts will be sorted by their first node
236  // (i.e. node 0 will always be in part 0; then the next node that isn't in
237  // part 0 will be in part 1, and so on).
238  // Returns the number K of classes.
239  int FillEquivalenceClasses(std::vector<int>* node_equivalence_classes);
240 
241  // Dump all components, with nodes sorted within each part and parts
242  // sorted lexicographically. Eg. "0 1 3 4 | 2 5 | 6 7 8".
243  std::string DebugString();
244 
245  // Advanced usage: sets 'node' to be in its original singleton. All nodes
246  // who may point to 'node' as a parent will remain in an inconsistent state.
247  // This can be used to reinitialize a MergingPartition that has been sparsely
248  // modified in O(|modifications|).
249  // CRASHES IF USED INCORRECTLY.
250  void ResetNode(int node);
251 
252  int NumNodesInSamePartAs(int node) {
253  return part_size_[GetRootAndCompressPath(node)];
254  }
255 
256  // FOR DEBUGGING OR SPECIAL "CONST" ACCESS ONLY:
257  // Find the root of the union-find tree with leaf 'node', i.e. its
258  // representative node, but don't use path compression.
259  // The amortized complexity can be as bad as log(N), as opposed to the
260  // version using path compression.
261  int GetRoot(int node) const;
262 
263  private:
264  // Along the upwards path from 'node' to its root, set the parent of all
265  // nodes (including the root) to 'parent'.
266  void SetParentAlongPathToRoot(int node, int parent);
267 
268  std::vector<int> parent_;
269  std::vector<int> part_size_;
270 
271  // Used transiently by KeepOnlyOneNodePerPart().
272  std::vector<bool> tmp_part_bit_;
273 };
274 
275 // *** Implementation of inline methods of the above classes. ***
276 
278  int i) const {
279  DCHECK_GE(i, 0);
280  DCHECK_LT(i, NumParts());
281  return IterablePart(element_.begin() + part_[i].start_index,
282  element_.begin() + part_[i].end_index);
283 }
284 
285 inline int DynamicPartition::PartOf(int element) const {
286  DCHECK_GE(element, 0);
287  DCHECK_LT(element, part_of_.size());
288  return part_of_[element];
289 }
290 
291 inline int DynamicPartition::SizeOfPart(int part) const {
292  DCHECK_GE(part, 0);
293  DCHECK_LT(part, part_.size());
294  const Part& p = part_[part];
295  return p.end_index - p.start_index;
296 }
297 
298 inline int DynamicPartition::ParentOfPart(int part) const {
299  DCHECK_GE(part, 0);
300  DCHECK_LT(part, part_.size());
301  return part_[part].parent_part;
302 }
303 
305  int i) const {
306  return ElementsInPart(PartOf(i));
307 }
308 
309 inline uint64_t DynamicPartition::FprintOfPart(int part) const {
310  DCHECK_GE(part, 0);
311  DCHECK_LT(part, part_.size());
312  return part_[part].fprint;
313 }
314 
315 inline int MergingPartition::GetRoot(int node) const {
316  DCHECK_GE(node, 0);
317  DCHECK_LT(node, NumNodes());
318  int child = node;
319  while (true) {
320  const int parent = parent_[child];
321  if (parent == child) return child;
322  child = parent;
323  }
324 }
325 
326 inline void MergingPartition::SetParentAlongPathToRoot(int node, int parent) {
327  DCHECK_GE(node, 0);
328  DCHECK_LT(node, NumNodes());
329  DCHECK_GE(parent, 0);
330  DCHECK_LT(parent, NumNodes());
331  int child = node;
332  while (true) {
333  const int old_parent = parent_[child];
334  parent_[child] = parent;
335  if (old_parent == child) return;
336  child = old_parent;
337  }
338 }
339 
340 inline void MergingPartition::ResetNode(int node) {
341  DCHECK_GE(node, 0);
342  DCHECK_LT(node, NumNodes());
343  parent_[node] = node;
344  part_size_[node] = 1;
345 }
346 
347 } // namespace operations_research
348 
349 #endif // OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
IterablePart(const std::vector< int >::const_iterator &b, const std::vector< int >::const_iterator &e)
void Refine(const std::vector< int > &distinguished_subset)
const std::vector< int > & ElementsInHierarchicalOrder() const
std::vector< int >::const_iterator begin() const
std::vector< int >::const_iterator end() const
IterablePart ElementsInPart(int i) const
void UndoRefineUntilNumPartsEqual(int original_num_parts)
IterablePart ElementsInSamePartAs(int i) const
std::string DebugString(DebugStringSorting sorting) const
std::vector< int >::const_iterator const_iterator
int FillEquivalenceClasses(std::vector< int > *node_equivalence_classes)
void KeepOnlyOneNodePerPart(std::vector< int > *nodes)
int MergePartsOf(int node1, int node2)