OR-Tools  9.3
inclusion.h
Go to the documentation of this file.
1// Copyright 2010-2021 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef OR_TOOLS_SAT_INCLUSION_H_
15#define OR_TOOLS_SAT_INCLUSION_H_
16
17#include <stddef.h>
18#include <stdint.h>
19
20#include <algorithm>
21#include <cstdint>
22#include <functional>
23#include <limits>
24#include <tuple>
25#include <utility>
26#include <vector>
27
28#include "absl/types/span.h"
30
31namespace operations_research {
32namespace sat {
33
34// Small utility class to store a vector<vector<>> where one can only append new
35// vector and never change previously added ones.
36//
37// Note that we implement a really small subset of the vector<vector<>> API.
38template <typename T>
40 public:
41 // Same as push_back().
42 // Returns the previous size() as this is convenient for how we use it.
43 int Add(absl::Span<const T> data) {
44 const int index = size();
45 starts_.push_back(buffer_.size());
46 sizes_.push_back(data.size());
47 buffer_.insert(buffer_.end(), data.begin(), data.end());
48 return index;
49 }
50
51 // Warning: this is only valid until the next clear() or Add() call.
52 absl::Span<const T> operator[](int index) const {
53 DCHECK_GE(index, 0);
54 DCHECK_LT(index, starts_.size());
55 DCHECK_LT(index, sizes_.size());
56 const size_t size = static_cast<size_t>(sizes_[index]);
57 if (size == 0) return {};
58 return {&buffer_[starts_[index]], size};
59 }
60
61 void clear() {
62 starts_.clear();
63 sizes_.clear();
64 buffer_.clear();
65 }
66
67 size_t size() const { return starts_.size(); }
68
69 private:
70 std::vector<int> starts_;
71 std::vector<int> sizes_;
72 std::vector<T> buffer_;
73};
74
75// An helper class to process many sets of integer in [0, n] and detects all the
76// set included in each others. This is a common operations in presolve, and
77// while it can be slow the algorithm used here is pretty efficient in practice.
78//
79// The algorithm is based on the SAT preprocessing algorithm to detect clauses
80// that subsumes others. It uses a one-watcher scheme where each subset
81// candidate has only one element watched. To identify all potential subset of a
82// superset, one need to inspect the watch list for all element of the superset
83// candidate.
84//
85// The number n will be detected automatically but we allocate various vector
86// of size n, so avoid having large integer values in your sets.
87//
88// All set contents will be accessed via storage_[index]. And of course Storage
89// can be the CompactVectorVector defined above. But it can also be something
90// that return a class that support .size() and integer range iteration over the
91// element in the set on the fly.
92template <class Storage>
94 public:
95 explicit InclusionDetector(const Storage& storage) : storage_(storage) {}
96
97 // Resets the class to an empty state.
98 void Reset() {
99 num_potential_subsets_ = 0;
100 num_potential_supersets_ = 0;
101 candidates_.clear();
102 }
103
104 // Adds a candidate set to consider for the next DetectInclusions() call.
105 // The argument is an index that will only be used via storage_[index] to get
106 // the content of the candidate set.
107 //
108 // Note that set with no element are just ignored and will never be returned
109 // as part of an inclusion.
110 void AddPotentialSubset(int index);
111 void AddPotentialSuperset(int index);
112 void AddPotentialSet(int index);
113
114 // By default we will detect all inclusions. It is possible to make sure we
115 // don't do more than O(work_limit) operations and eventually abort early by
116 // setting this. Note that we don't reset it on Reset().
117 //
118 // This is needed, because for m candidates of size n, we can have O(m ^ 2)
119 // inclusions, each requiring O(n) work to check.
120 void SetWorkLimit(uint64_t work_limit) { work_limit_ = work_limit; }
121
122 // Finds all subset included in a superset and call "process" on each of the
123 // detected inclusion. The std::function argument corresponds to indices
124 // passed to the Add*() calls.
125 //
126 // The order of detection will be by increasing superset size. For superset
127 // with the same size, the order will be deterministic but not specified. And
128 // similarly, for a given superset, the order of the included subsets is
129 // deterministic but not specified.
130 //
131 // Note that only the candidate marked as such can be a subset/superset.
132 // For the candidate than can be both and are duplicates (i.e. same set), only
133 // one pair will be returned. We will also never return identity inclusion and
134 // we always have subset != superset.
135 void DetectInclusions(
136 const std::function<void(int subset, int superset)>& process);
137
138 // Function that should only be used from within "process()".
139 // Returns the bitset corresponsing to the elements of the current superset
140 // passed to the process() function.
141 const std::vector<bool> IsInSuperset() const { return is_in_superset_; }
142
143 // Function that should only be used from within "process()".
144 // Stop will abort the current search. The other two will cause the
145 // corresponding candidate set to never appear in any future inclusion.
146 void StopProcessingCurrentSubset() { stop_with_current_subset_ = true; }
147 void StopProcessingCurrentSuperset() { stop_with_current_superset_ = true; }
148 void Stop() {
149 stop_ = true;
150 signatures_.clear();
151 one_watcher_.clear();
152 is_in_superset_.clear();
153 }
154
155 // The algorithm here can detect many small set included in a big set while
156 // only scanning the superset once. So if we do scan the superset in the
157 // process function, we can do a lot more work. This is here to reuse the
158 // deterministic limit mechanism.
159 void IncreaseWorkDone(uint64_t increase) { work_done_ += increase; }
160
161 // Stats.
162 int num_potential_subsets() const { return num_potential_subsets_; }
163 int num_potential_supersets() const { return num_potential_supersets_; }
164 uint64_t work_done() const { return work_done_; }
165
166 private:
167 // Allows to access the elements of each candidates via storage_[index];
168 const Storage& storage_;
169
170 // List of candidates, this will be sorted.
171 struct Candidate {
172 int index; // Storage index.
173 int size;
174
175 // For identical sizes, we need this order for correctness
176 // 0: subset only, 1: both, 2: superset only.
177 int order = 1;
178
179 bool CanBeSubset() const { return order <= 1; }
180 bool CanBeSuperset() const { return order >= 1; }
181
182 // We use this with stable_sort, so no need to add the index.
183 bool operator<(const Candidate& other) const {
184 return std::tie(size, order) < std::tie(other.size, other.order);
185 }
186 };
187 std::vector<Candidate> candidates_;
188
189 int num_potential_subsets_ = 0;
190 int num_potential_supersets_ = 0;
191 uint64_t work_done_ = 0;
192 uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
193
194 // Temporary data only used by DetectInclusions().
195 bool stop_;
196 bool stop_with_current_subset_;
197 bool stop_with_current_superset_;
198 std::vector<uint64_t> signatures_;
199 std::vector<std::vector<int>> one_watcher_; // Index in candidates_.
200 std::vector<bool> is_in_superset_;
201};
202
203// Deduction guide.
204template <typename Storage>
206
207template <typename Storage>
209 DCHECK_GE(index, 0);
210 DCHECK_LT(index, storage_.size());
211 const int num_elements = storage_[index].size();
212 if (num_elements == 0) return;
213
214 ++num_potential_subsets_;
215 ++num_potential_supersets_;
216 candidates_.push_back({index, num_elements, /*order=*/1});
217}
218
219template <typename Storage>
221 DCHECK_GE(index, 0);
222 DCHECK_LT(index, storage_.size());
223 const int num_elements = storage_[index].size();
224 if (num_elements == 0) return;
225
226 ++num_potential_subsets_;
227 candidates_.push_back({index, num_elements, /*order=*/0});
228}
229
230template <typename Storage>
232 DCHECK_GE(index, 0);
233 DCHECK_LT(index, storage_.size());
234 const int num_elements = storage_[index].size();
235 if (num_elements == 0) return;
236
237 DCHECK_GE(index, 0);
238 DCHECK_LT(index, storage_.size());
239 ++num_potential_supersets_;
240 candidates_.push_back({index, num_elements, /*order=*/2});
241}
242
243template <typename Storage>
245 const std::function<void(int subset, int superset)>& process) {
246 // No need to do any work in these cases.
247 if (candidates_.size() <= 1) return;
248 if (num_potential_subsets_ == 0) return;
249 if (num_potential_supersets_ == 0) return;
250
251 // Temp data must be ready to use.
252 stop_ = false;
253 DCHECK(is_in_superset_.empty());
254 DCHECK(signatures_.empty());
255 DCHECK(one_watcher_.empty());
256
257 // Main algo.
258 work_done_ = 0;
259 std::stable_sort(candidates_.begin(), candidates_.end());
260 for (const Candidate& candidate : candidates_) {
261 const auto& candidate_elements = storage_[candidate.index];
262 const int candidate_index = signatures_.size();
263
264 // Compute the signature and also resize vector if needed. We want a
265 // signature that is order invariant and is compatible with inclusion.
266 uint64_t signature = 0;
267 int max_element = 0;
268 for (const int e : candidate_elements) {
269 DCHECK_GE(e, 0);
270 max_element = std::max(max_element, e);
271 signature |= (int64_t{1} << (e & 63));
272 }
273 DCHECK_EQ(is_in_superset_.size(), one_watcher_.size());
274 if (max_element >= is_in_superset_.size()) {
275 is_in_superset_.resize(max_element + 1, false);
276 one_watcher_.resize(max_element + 1);
277 }
278 signatures_.push_back(signature);
279
280 stop_with_current_superset_ = false;
281 if (candidate.CanBeSuperset()) {
282 const Candidate& superset = candidate;
283 const auto& superset_elements = candidate_elements;
284
285 // Bitset should be cleared.
286 DCHECK(std::all_of(is_in_superset_.begin(), is_in_superset_.end(),
287 [](bool b) { return !b; }));
288
289 // Find any subset included in current superset.
290 work_done_ += 2 * superset.size;
291 if (work_done_ > work_limit_) return Stop();
292 for (const int e : superset_elements) {
293 is_in_superset_[e] = true;
294 }
295
296 const uint64_t superset_signature = signatures_.back();
297 for (const int superset_e : superset_elements) {
298 for (int i = 0; i < one_watcher_[superset_e].size(); ++i) {
299 const int c_index = one_watcher_[superset_e][i];
300 const Candidate& subset = candidates_[c_index];
301 DCHECK_LE(subset.size, superset.size);
302
303 // Quick check with signature.
304 if ((signatures_[c_index] & ~superset_signature) != 0) continue;
305
306 // Long check with bitset.
307 bool is_included = true;
308 work_done_ += subset.size;
309 if (work_done_ > work_limit_) return Stop();
310 for (const int subset_e : storage_[subset.index]) {
311 if (!is_in_superset_[subset_e]) {
312 is_included = false;
313 break;
314 }
315 }
316 if (!is_included) continue;
317
318 stop_with_current_subset_ = false;
319 process(subset.index, superset.index);
320
321 if (stop_) return;
322 if (work_done_ > work_limit_) return Stop();
323
324 if (stop_with_current_subset_) {
325 // Remove from the watcher list.
326 std::swap(one_watcher_[superset_e][i],
327 one_watcher_[superset_e].back());
328 one_watcher_[superset_e].pop_back();
329 --i;
330 }
331 if (stop_with_current_superset_) break;
332 }
333 if (stop_with_current_superset_) break;
334 }
335
336 // Cleanup.
337 for (const int e : superset_elements) {
338 is_in_superset_[e] = false;
339 }
340 }
341
342 // Add new subset candidate to the watchers.
343 //
344 // Tricky: If this was also a superset and has been removed, we don't want
345 // to watch it!
346 if (candidate.CanBeSubset() && !stop_with_current_superset_) {
347 // Choose to watch the one with smallest list.
348 int best_choice = -1;
349 work_done_ += candidate.size;
350 if (work_done_ > work_limit_) return Stop();
351 for (const int e : candidate_elements) {
352 DCHECK_GE(e, 0);
353 DCHECK_LT(e, one_watcher_.size());
354 if (best_choice == -1 ||
355 one_watcher_[e].size() < one_watcher_[best_choice].size()) {
356 best_choice = e;
357 }
358 }
359 DCHECK_NE(best_choice, -1);
360 one_watcher_[best_choice].push_back(candidate_index);
361 }
362 }
363
364 // Stop also performs some cleanup.
365 Stop();
366}
367
368} // namespace sat
369} // namespace operations_research
370
371#endif // OR_TOOLS_SAT_INCLUSION_H_
int64_t max
Definition: alldiff_cst.cc:140
#define DCHECK_LE(val1, val2)
Definition: base/logging.h:893
#define DCHECK_NE(val1, val2)
Definition: base/logging.h:892
#define DCHECK_GE(val1, val2)
Definition: base/logging.h:895
#define DCHECK_LT(val1, val2)
Definition: base/logging.h:894
#define DCHECK(condition)
Definition: base/logging.h:890
#define DCHECK_EQ(val1, val2)
Definition: base/logging.h:891
absl::Span< const T > operator[](int index) const
Definition: inclusion.h:52
int Add(absl::Span< const T > data)
Definition: inclusion.h:43
void SetWorkLimit(uint64_t work_limit)
Definition: inclusion.h:120
void IncreaseWorkDone(uint64_t increase)
Definition: inclusion.h:159
const std::vector< bool > IsInSuperset() const
Definition: inclusion.h:141
InclusionDetector(const Storage &storage)
Definition: inclusion.h:95
void DetectInclusions(const std::function< void(int subset, int superset)> &process)
Definition: inclusion.h:244
int64_t b
int index
void swap(IdMap< K, V > &a, IdMap< K, V > &b)
Definition: id_map.h:262
InclusionDetector(const Storage &storage) -> InclusionDetector< Storage >
Collection of objects used to extend the Constraint Solver library.