[CP-SAT] more fuzzer bugfixes; tweaking ot the max clique heuristics

This commit is contained in:
Laurent Perron
2024-11-15 15:27:50 +01:00
parent 7549192fc9
commit 441a42bdd2
9 changed files with 196 additions and 111 deletions

View File

@@ -3640,6 +3640,7 @@ cc_library(
name = "inclusion",
hdrs = ["inclusion.h"],
deps = [
":util",
"//ortools/base",
"//ortools/util:bitset",
"//ortools/util:time_limit",

View File

@@ -1863,17 +1863,16 @@ bool BinaryImplicationGraph::MergeAtMostOnes(
// Data to detect inclusion of base amo into extend amo.
std::vector<int> detector_clique_index;
CompactVectorVector<int> storage;
InclusionDetector detector(storage, time_limit_);
detector.SetWorkLimit(max_num_explored_nodes);
// First add all clique as possible subset.
for (const auto& [index, old_size] : index_size_vector) {
std::vector<Literal>& clique = at_most_ones[index];
if (time_limit_->LimitReached()) break;
detector_clique_index.push_back(index);
detector.AddPotentialSubset(storage.AddLiterals(clique));
storage.AddLiterals(at_most_ones[index]);
}
detector.IndexAllSubsets();
// We use an higher limit here as the code is faster.
SubsetsDetector detector(storage, time_limit_);
detector.SetWorkLimit(10 * max_num_explored_nodes);
detector.IndexAllStorageAsSubsets();
// Now try to expand one by one.
//
@@ -1995,7 +1994,7 @@ bool BinaryImplicationGraph::MergeAtMostOnes(
}
if (dtime != nullptr) {
*dtime +=
2e-8 * work_done_in_mark_descendants_ + 1e-8 * detector.work_done();
1e-8 * work_done_in_mark_descendants_ + 1e-9 * detector.work_done();
}
return true;
}

View File

@@ -349,10 +349,6 @@ std::string ValidateIntProdConstraint(const CpModelProto& model,
return absl::StrCat("An int_prod constraint should have a target: ",
ProtobufShortDebugString(ct));
}
if (ct.int_prod().exprs().empty()) {
return absl::StrCat("An int_prod constraint should have some expressions: ",
ProtobufShortDebugString(ct));
}
for (const LinearExpressionProto& expr : ct.int_prod().exprs()) {
RETURN_IF_NOT_EMPTY(ValidateAffineExpression(model, expr));

View File

@@ -1540,9 +1540,15 @@ void LoadIntProdConstraint(const ConstraintProto& ct, Model* m) {
case 0: {
auto* integer_trail = m->GetOrCreate<IntegerTrail>();
auto* sat_solver = m->GetOrCreate<SatSolver>();
if (!integer_trail->Enqueue(prod.LowerOrEqual(1)) ||
!integer_trail->Enqueue(prod.GreaterOrEqual(1))) {
sat_solver->NotifyThatModelIsUnsat();
if (prod.IsConstant()) {
if (prod.constant.value() != 1) {
sat_solver->NotifyThatModelIsUnsat();
}
} else {
if (!integer_trail->Enqueue(prod.LowerOrEqual(1)) ||
!integer_trail->Enqueue(prod.GreaterOrEqual(1))) {
sat_solver->NotifyThatModelIsUnsat();
}
}
break;
}

View File

@@ -7307,11 +7307,17 @@ void CpModelPresolver::Probe() {
}
const int64_t num_old_cliques = cliques.size();
// We adapt the limit if there is a lot of literals in amo/implications.
// Usually we can have big reduction on large problem so it seems
// worthwhile.
double limit = context_->params().merge_at_most_one_work_limit();
if (num_literals_before > 1e6) {
limit *= num_literals_before / 1e6;
}
double dtime = 0.0;
implication_graph->MergeAtMostOnes(
absl::MakeSpan(cliques),
SafeDoubleToInt64(context_->params().merge_at_most_one_work_limit()),
&dtime);
implication_graph->MergeAtMostOnes(absl::MakeSpan(cliques),
SafeDoubleToInt64(limit), &dtime);
timer.AddToWork(dtime);
// Note that because TransformIntoMaxCliques() extend cliques, we are ok
@@ -7345,10 +7351,11 @@ void CpModelPresolver::Probe() {
if (num_old_cliques != num_new_cliques ||
num_literals_before != num_literals_after) {
timer.AddMessage(absl::StrCat("Merged ", num_old_cliques, "(",
num_literals_before, " literals) into ",
num_new_cliques, "(", num_literals_after,
" literals) at_most_ones. "));
timer.AddMessage(absl::StrCat(
"Merged ", FormatCounter(num_old_cliques), "(",
FormatCounter(num_literals_before), " literals) into ",
FormatCounter(num_new_cliques), "(",
FormatCounter(num_literals_after), " literals) at_most_ones. "));
}
}
}
@@ -12383,6 +12390,8 @@ bool ModelCopy::ImportAndSimplifyConstraints(
default: {
ConstraintProto* new_ct = context_->working_model->add_constraints();
*new_ct = ct;
new_ct->mutable_enforcement_literal()->Clear();
FinishEnforcementCopy(new_ct);
if (ignore_names) {
// TODO(user): find a better way than copy then clear_name()?
new_ct->clear_name();

View File

@@ -28,6 +28,7 @@
#include "absl/log/check.h"
#include "absl/types/span.h"
#include "ortools/base/logging.h"
#include "ortools/sat/util.h"
#include "ortools/util/bitset.h"
#include "ortools/util/time_limit.h"
@@ -56,7 +57,7 @@ namespace sat {
template <class Storage>
class InclusionDetector {
public:
explicit InclusionDetector(const Storage& storage, TimeLimit* time_limit)
InclusionDetector(const Storage& storage, TimeLimit* time_limit)
: storage_(storage), time_limit_(time_limit) {}
// Resets the class to an empty state.
@@ -124,24 +125,7 @@ class InclusionDetector {
uint64_t work_done() const { return work_done_; }
bool Stopped() const { return stop_; }
// Different API:
// 1/ Add all potential subset
// 2/ Call IndexAllSubsets()
// 3/ Call one or more time FindSubsets().
// - process() can call StopProcessingCurrentSuperset() to abort early
// - process() can call StopProcessingCurrentSubset() to never consider
// that subset again.
// 4/ Call Stop() to reclaim some memory.
//
// Optimization: next_index_to_try is an index in superset that can be used
// to skip some position for which we already called FindSubsets().
void IndexAllSubsets();
void FindSubsets(absl::Span<const int> superset, int* next_index_to_try,
const std::function<void(int subset)>& process);
private:
uint64_t ComputeSignatureAndResizeVectors(absl::Span<const int> elements);
// Allows to access the elements of each candidates via storage_[index];
const Storage& storage_;
@@ -171,7 +155,6 @@ class InclusionDetector {
uint64_t work_done_ = 0;
uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
// Temporary data only used by DetectInclusions().
bool stop_ = false;
bool stop_with_current_subset_ = false;
bool stop_with_current_superset_ = false;
@@ -181,10 +164,70 @@ class InclusionDetector {
Bitset64<int> is_in_superset_;
};
// Similar API and purpose to InclusionDetector. But this one is a bit simpler
// and faster if it fit your usage. This assume an initial given set of
// potential subsets, that will be queried against supersets one by one.
template <class Storage>
class SubsetsDetector {
public:
SubsetsDetector(const Storage& storage, TimeLimit* time_limit)
: storage_(storage), time_limit_(time_limit) {}
void SetWorkLimit(uint64_t work_limit) { work_limit_ = work_limit; }
void StopProcessingCurrentSubset() { stop_with_current_subset_ = true; }
void StopProcessingCurrentSuperset() { stop_with_current_superset_ = true; }
void Stop() {
stop_ = true;
one_watcher_.clear();
is_in_superset_.resize(0);
}
uint64_t work_done() const { return work_done_; }
bool Stopped() const { return stop_; }
// Different API than InclusionDetector.
// 1/ Add all potential subset to the storage_.
// 2/ Call IndexAllStorageAsSubsets()
// 3/ Call one or more time FindSubsets().
// - process() can call StopProcessingCurrentSuperset() to abort early
// - process() can call StopProcessingCurrentSubset() to never consider
// that subset again.
// 4/ Call Stop() to reclaim some memory.
//
// Optimization: next_index_to_try is an index in superset that can be used
// to skip some position for which we already called FindSubsets().
void IndexAllStorageAsSubsets();
void FindSubsets(absl::Span<const int> superset, int* next_index_to_try,
const std::function<void(int subset)>& process);
private:
// Allows to access the elements of each subsets via storage_[index];
const Storage& storage_;
TimeLimit* time_limit_;
uint64_t work_done_ = 0;
uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
struct OneWatcherData {
int index;
int other_element;
uint64_t signature;
};
bool stop_ = false;
bool stop_with_current_subset_ = false;
bool stop_with_current_superset_ = false;
CompactVectorVector<int, OneWatcherData> one_watcher_;
Bitset64<int> is_in_superset_;
};
// Deduction guide.
template <typename Storage>
InclusionDetector(const Storage& storage) -> InclusionDetector<Storage>;
template <typename Storage>
SubsetsDetector(const Storage& storage) -> SubsetsDetector<Storage>;
template <typename Storage>
inline void InclusionDetector<Storage>::AddPotentialSet(int index) {
DCHECK_GE(index, 0);
@@ -221,10 +264,9 @@ inline void InclusionDetector<Storage>::AddPotentialSuperset(int index) {
candidates_.push_back({index, num_elements, /*order=*/2});
}
// Compute the signature and also resize vectors if needed. We want a
// Compute the signature and the maximum element. We want a
// signature that is order invariant and is compatible with inclusion.
template <typename Storage>
inline uint64_t InclusionDetector<Storage>::ComputeSignatureAndResizeVectors(
inline std::pair<uint64_t, int> ComputeSignatureAndMaxElement(
absl::Span<const int> elements) {
uint64_t signature = 0;
int max_element = 0;
@@ -233,12 +275,7 @@ inline uint64_t InclusionDetector<Storage>::ComputeSignatureAndResizeVectors(
max_element = std::max(max_element, e);
signature |= (int64_t{1} << (e & 63));
}
DCHECK_EQ(is_in_superset_.size(), one_watcher_.size());
if (max_element >= is_in_superset_.size()) {
is_in_superset_.resize(max_element + 1);
one_watcher_.resize(max_element + 1);
}
return signature;
return {signature, max_element};
}
template <typename Storage>
@@ -264,7 +301,15 @@ inline void InclusionDetector<Storage>::DetectInclusions(
for (const Candidate& candidate : candidates_) {
const auto& candidate_elements = storage_[candidate.index];
const int candidate_index = signatures_.size();
signatures_.push_back(ComputeSignatureAndResizeVectors(candidate_elements));
const auto [signature, max_element] =
ComputeSignatureAndMaxElement(candidate_elements);
signatures_.push_back(signature);
DCHECK_EQ(is_in_superset_.size(), one_watcher_.size());
if (max_element >= is_in_superset_.size()) {
is_in_superset_.resize(max_element + 1);
one_watcher_.resize(max_element + 1);
}
stop_with_current_superset_ = false;
if (candidate.CanBeSuperset()) {
@@ -376,48 +421,63 @@ inline void InclusionDetector<Storage>::DetectInclusions(
Stop();
}
// TODO(user): Merge common code.
template <typename Storage>
inline void InclusionDetector<Storage>::IndexAllSubsets() {
if (num_potential_subsets_ == 0) return;
// Temp data must be ready to use.
inline void SubsetsDetector<Storage>::IndexAllStorageAsSubsets() {
stop_ = false;
DCHECK(signatures_.empty());
DCHECK(one_watcher_.empty());
// We don't really care about the order here.
// Flat representation of one_watcher_, we will fill it in one go from there.
std::vector<int> tmp_keys;
std::vector<OneWatcherData> tmp_values;
std::vector<int> element_to_num_watched;
work_done_ = 0;
for (const Candidate& candidate : candidates_) {
const auto& candidate_elements = storage_[candidate.index];
const int candidate_index = signatures_.size();
signatures_.push_back(ComputeSignatureAndResizeVectors(candidate_elements));
for (int index = 0; index < storage_.size(); ++index) {
const auto& subset = storage_[index];
CHECK_GE(subset.size(), 2);
// Add new subset candidate to the watchers.
//
// Tricky: If this was also a superset and has been removed, we don't want
// to watch it!
if (candidate.CanBeSubset()) {
// Choose to watch the one with smallest list.
int best_choice = -1;
work_done_ += candidate.size;
if (work_done_ > work_limit_) return Stop();
for (const int e : candidate_elements) {
DCHECK_GE(e, 0);
DCHECK_LT(e, one_watcher_.size());
if (best_choice == -1 ||
one_watcher_[e].size() < one_watcher_[best_choice].size()) {
best_choice = e;
}
}
DCHECK_NE(best_choice, -1);
one_watcher_[best_choice].push_back(candidate_index);
const auto [signature, max_element] = ComputeSignatureAndMaxElement(subset);
if (max_element >= is_in_superset_.size()) {
is_in_superset_.resize(max_element + 1);
}
if (max_element >= element_to_num_watched.size()) {
element_to_num_watched.resize(max_element + 1);
}
// Choose to watch the one with smallest list so far.
int best_choice = -1;
int best_value = -1;
int second_choice = -1;
int second_value = -1;
work_done_ += subset.size();
if (work_done_ > work_limit_) return Stop();
for (const int e : subset) {
DCHECK_GE(e, 0);
DCHECK_LT(e, element_to_num_watched.size());
const int value = element_to_num_watched[e];
if (value >= best_value) {
second_choice = best_choice;
second_value = best_value;
best_choice = e;
best_value = value;
} else if (value > second_value) {
second_choice = e;
second_value = value;
}
}
DCHECK_NE(best_choice, -1);
DCHECK_NE(second_choice, -1);
DCHECK_NE(best_choice, second_choice);
element_to_num_watched[best_choice]++;
tmp_keys.push_back(best_choice);
tmp_values.push_back({index, second_choice, signature});
}
one_watcher_.ResetFromFlatMapping(tmp_keys, tmp_values);
}
template <typename Storage>
inline void InclusionDetector<Storage>::FindSubsets(
inline void SubsetsDetector<Storage>::FindSubsets(
absl::Span<const int> superset, int* next_index_to_try,
const std::function<void(int subset)>& process) {
// We check each time our work_done_ has increased by more than this.
@@ -426,8 +486,11 @@ inline void InclusionDetector<Storage>::FindSubsets(
// Compute the signature and also resize vector if needed. We want a
// signature that is order invariant and is compatible with inclusion.
const uint64_t superset_signature =
ComputeSignatureAndResizeVectors(superset);
const auto [superset_signature, max_element] =
ComputeSignatureAndMaxElement(superset);
if (max_element >= is_in_superset_.size()) {
is_in_superset_.resize(max_element + 1);
}
// Find any subset included in current superset.
work_done_ += 2 * superset.size();
@@ -446,26 +509,36 @@ inline void InclusionDetector<Storage>::FindSubsets(
stop_with_current_superset_ = false;
const auto is_in_superset_view = is_in_superset_.const_view();
const auto signatures_view = signatures_.data();
for (; *next_index_to_try < superset.size(); ++*next_index_to_try) {
const int superset_e = superset[*next_index_to_try];
for (int i = 0; i < one_watcher_[superset_e].size(); ++i) {
const int c_index = one_watcher_[superset_e][i];
const Candidate& subset = candidates_[c_index];
if (superset_e >= one_watcher_.size()) continue;
auto cached_span = one_watcher_[superset_e];
for (int i = 0; i < cached_span.size(); ++i) {
++work_done_;
// Quick check with size and signature.
if (subset.size > superset.size()) continue;
if ((signatures_view[c_index] & ~superset_signature) != 0) continue;
// Do a bunch of quick checks. The second one is optimized for size 2
// which happens a lot in our usage of merging clique with implications.
const auto [subset_index, other_e, subset_signature] = cached_span[i];
if ((subset_signature & ~superset_signature) != 0) continue;
if (!is_in_superset_view[other_e]) continue;
// Long check with bitset.
const absl::Span<const int> subset = storage_[subset_index];
if (subset.size() > superset.size()) continue;
// TODO(user): Technically we do not need to check the watched position or
// the "other element" position, we could do that by permuting them first
// or last and iterating on a subspan. However, in many slow situation, we
// have millions of size 2 sets, and the time is dominated by the first
// check.
bool is_included = true;
work_done_ += subset.size;
work_done_ += subset.size();
if (work_done_ > work_limit_) return Stop();
if (work_done_ > next_time_limit_check) {
if (time_limit_->LimitReached()) return Stop();
next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
}
for (const int subset_e : storage_[subset.index]) {
for (const int subset_e : subset) {
if (!is_in_superset_view[subset_e]) {
is_included = false;
break;
@@ -474,8 +547,10 @@ inline void InclusionDetector<Storage>::FindSubsets(
if (!is_included) continue;
stop_with_current_subset_ = false;
process(subset.index);
process(subset_index);
// TODO(user): Remove this and the more complex API need once we move
// class.
if (stop_) return;
if (work_done_ > work_limit_) return Stop();
if (work_done_ > next_time_limit_check) {
@@ -484,9 +559,8 @@ inline void InclusionDetector<Storage>::FindSubsets(
}
if (stop_with_current_subset_) {
// Remove from the watcher list.
std::swap(one_watcher_[superset_e][i], one_watcher_[superset_e].back());
one_watcher_[superset_e].pop_back();
one_watcher_.RemoveBySwap(superset_e, i);
cached_span.remove_suffix(1);
--i;
}
if (stop_with_current_superset_) break;

View File

@@ -178,17 +178,16 @@ TEST(InclusionDetectorTest, RandomTest) {
[&num_inclusions](int subset, int superset) { ++num_inclusions; });
}
TEST(InclusionDetectorTest, AlternativeApi) {
TEST(SubsetsDetectorTest, AlternativeApi) {
CompactVectorVector<int> storage;
TimeLimit time_limit;
InclusionDetector detector(storage, &time_limit);
storage.Add({1, 2});
storage.Add({4, 3});
storage.Add({1, 2, 3});
storage.Add({2, 3});
// Lets add some subset.
detector.AddPotentialSubset(storage.Add({1, 2}));
detector.AddPotentialSubset(storage.Add({4, 3}));
detector.AddPotentialSubset(storage.Add({1, 2, 3}));
detector.AddPotentialSubset(storage.Add({2, 3}));
detector.IndexAllSubsets();
TimeLimit time_limit;
SubsetsDetector detector(storage, &time_limit);
detector.IndexAllStorageAsSubsets();
// Now we can query any "superset".
// Note that there is no guarantee on the order of discovery.

View File

@@ -69,6 +69,7 @@ class CompactVectorVector {
// Size of the "key" space, always in [0, size()).
size_t size() const;
bool empty() const;
size_t num_entries() const { return buffer_.size(); }
// Getters, either via [] or via a wrapping to be compatible with older api.
//
@@ -85,9 +86,9 @@ class CompactVectorVector {
starts_.reserve(size);
sizes_.reserve(size);
}
void reserve(int size, int num_terms) {
void reserve(int size, int num_entries) {
reserve(size);
buffer_.reserve(num_terms);
buffer_.reserve(num_entries);
}
// Given a flat mapping (keys[i] -> values[i]) with two parallel vectors, not

View File

@@ -513,7 +513,7 @@ class Bitset64 {
void ClearBucket(IndexType i) {
DCHECK_GE(Value(i), 0);
DCHECK_LT(Value(i), Value(size_));
data_[BitOffset64(Value(i))] = 0;
data_.data()[BitOffset64(Value(i))] = 0;
}
// Clears the bits at position i and i ^ 1.