From 441a42bdd23261d757d2c22e998081a40ed1fe47 Mon Sep 17 00:00:00 2001
From: Laurent Perron <lperron@google.com>
Date: Fri, 15 Nov 2024 15:27:50 +0100
Subject: [PATCH] [CP-SAT] more fuzzer bugfixes; tweaking ot the max clique
 heuristics

---
 ortools/sat/BUILD.bazel          |   1 +
 ortools/sat/clause.cc            |  15 +-
 ortools/sat/cp_model_checker.cc  |   4 -
 ortools/sat/cp_model_loader.cc   |  12 +-
 ortools/sat/cp_model_presolve.cc |  25 ++--
 ortools/sat/inclusion.h          | 226 ++++++++++++++++++++-----------
 ortools/sat/inclusion_test.cc    |  17 ++-
 ortools/sat/util.h               |   5 +-
 ortools/util/bitset.h            |   2 +-
 9 files changed, 196 insertions(+), 111 deletions(-)
diff --git a/ortools/sat/BUILD.bazel b/ortools/sat/BUILD.bazel
index a6dabbad84..3047678851 100644
--- a/ortools/sat/BUILD.bazel
+++ b/ortools/sat/BUILD.bazel
@@ -3640,6 +3640,7 @@ cc_library(
     name = "inclusion",
     hdrs = ["inclusion.h"],
     deps = [
+        ":util",
         "//ortools/base",
         "//ortools/util:bitset",
         "//ortools/util:time_limit",
diff --git a/ortools/sat/clause.cc b/ortools/sat/clause.cc
index 176eeecb09..81398564b3 100644
--- a/ortools/sat/clause.cc
+++ b/ortools/sat/clause.cc
@@ -1863,17 +1863,16 @@ bool BinaryImplicationGraph::MergeAtMostOnes(
   // Data to detect inclusion of base amo into extend amo.
   std::vector<int> detector_clique_index;
   CompactVectorVector<int> storage;
-  InclusionDetector detector(storage, time_limit_);
-  detector.SetWorkLimit(max_num_explored_nodes);
-
-  // First add all clique as possible subset.
   for (const auto& [index, old_size] : index_size_vector) {
-    std::vector<Literal>& clique = at_most_ones[index];
     if (time_limit_->LimitReached()) break;
     detector_clique_index.push_back(index);
-    detector.AddPotentialSubset(storage.AddLiterals(clique));
+    storage.AddLiterals(at_most_ones[index]);
   }
-  detector.IndexAllSubsets();
+
+  // We use an higher limit here as the code is faster.
+  SubsetsDetector detector(storage, time_limit_);
+  detector.SetWorkLimit(10 * max_num_explored_nodes);
+  detector.IndexAllStorageAsSubsets();
 
   // Now try to expand one by one.
   //
@@ -1995,7 +1994,7 @@ bool BinaryImplicationGraph::MergeAtMostOnes(
   }
   if (dtime != nullptr) {
     *dtime +=
-        2e-8 * work_done_in_mark_descendants_ + 1e-8 * detector.work_done();
+        1e-8 * work_done_in_mark_descendants_ + 1e-9 * detector.work_done();
   }
   return true;
 }
diff --git a/ortools/sat/cp_model_checker.cc b/ortools/sat/cp_model_checker.cc
index 9d931eb19b..70ba8f009e 100644
--- a/ortools/sat/cp_model_checker.cc
+++ b/ortools/sat/cp_model_checker.cc
@@ -349,10 +349,6 @@ std::string ValidateIntProdConstraint(const CpModelProto& model,
     return absl::StrCat("An int_prod constraint should have a target: ",
                         ProtobufShortDebugString(ct));
   }
-  if (ct.int_prod().exprs().empty()) {
-    return absl::StrCat("An int_prod constraint should have some expressions: ",
-                        ProtobufShortDebugString(ct));
-  }
 
   for (const LinearExpressionProto& expr : ct.int_prod().exprs()) {
     RETURN_IF_NOT_EMPTY(ValidateAffineExpression(model, expr));
diff --git a/ortools/sat/cp_model_loader.cc b/ortools/sat/cp_model_loader.cc
index 774f4e6060..2093b0cae5 100644
--- a/ortools/sat/cp_model_loader.cc
+++ b/ortools/sat/cp_model_loader.cc
@@ -1540,9 +1540,15 @@ void LoadIntProdConstraint(const ConstraintProto& ct, Model* m) {
     case 0: {
       auto* integer_trail = m->GetOrCreate<IntegerTrail>();
       auto* sat_solver = m->GetOrCreate<SatSolver>();
-      if (!integer_trail->Enqueue(prod.LowerOrEqual(1)) ||
-          !integer_trail->Enqueue(prod.GreaterOrEqual(1))) {
-        sat_solver->NotifyThatModelIsUnsat();
+      if (prod.IsConstant()) {
+        if (prod.constant.value() != 1) {
+          sat_solver->NotifyThatModelIsUnsat();
+        }
+      } else {
+        if (!integer_trail->Enqueue(prod.LowerOrEqual(1)) ||
+            !integer_trail->Enqueue(prod.GreaterOrEqual(1))) {
+          sat_solver->NotifyThatModelIsUnsat();
+        }
       }
       break;
     }
diff --git a/ortools/sat/cp_model_presolve.cc b/ortools/sat/cp_model_presolve.cc
index 0a9b770ee4..c09ee6879a 100644
--- a/ortools/sat/cp_model_presolve.cc
+++ b/ortools/sat/cp_model_presolve.cc
@@ -7307,11 +7307,17 @@ void CpModelPresolver::Probe() {
     }
     const int64_t num_old_cliques = cliques.size();
 
+    // We adapt the limit if there is a lot of literals in amo/implications.
+    // Usually we can have big reduction on large problem so it seems
+    // worthwhile.
+    double limit = context_->params().merge_at_most_one_work_limit();
+    if (num_literals_before > 1e6) {
+      limit *= num_literals_before / 1e6;
+    }
+
     double dtime = 0.0;
-    implication_graph->MergeAtMostOnes(
-        absl::MakeSpan(cliques),
-        SafeDoubleToInt64(context_->params().merge_at_most_one_work_limit()),
-        &dtime);
+    implication_graph->MergeAtMostOnes(absl::MakeSpan(cliques),
+                                       SafeDoubleToInt64(limit), &dtime);
     timer.AddToWork(dtime);
 
     // Note that because TransformIntoMaxCliques() extend cliques, we are ok
@@ -7345,10 +7351,11 @@ void CpModelPresolver::Probe() {
 
     if (num_old_cliques != num_new_cliques ||
         num_literals_before != num_literals_after) {
-      timer.AddMessage(absl::StrCat("Merged ", num_old_cliques, "(",
-                                    num_literals_before, " literals) into ",
-                                    num_new_cliques, "(", num_literals_after,
-                                    " literals) at_most_ones. "));
+      timer.AddMessage(absl::StrCat(
+          "Merged ", FormatCounter(num_old_cliques), "(",
+          FormatCounter(num_literals_before), " literals) into ",
+          FormatCounter(num_new_cliques), "(",
+          FormatCounter(num_literals_after), " literals) at_most_ones. "));
     }
   }
 }
@@ -12383,6 +12390,8 @@ bool ModelCopy::ImportAndSimplifyConstraints(
       default: {
         ConstraintProto* new_ct = context_->working_model->add_constraints();
         *new_ct = ct;
+        new_ct->mutable_enforcement_literal()->Clear();
+        FinishEnforcementCopy(new_ct);
         if (ignore_names) {
           // TODO(user): find a better way than copy then clear_name()?
           new_ct->clear_name();
diff --git a/ortools/sat/inclusion.h b/ortools/sat/inclusion.h
index 815ce1bd98..fb354364de 100644
--- a/ortools/sat/inclusion.h
+++ b/ortools/sat/inclusion.h
@@ -28,6 +28,7 @@
 #include "absl/log/check.h"
 #include "absl/types/span.h"
 #include "ortools/base/logging.h"
+#include "ortools/sat/util.h"
 #include "ortools/util/bitset.h"
 #include "ortools/util/time_limit.h"
 
@@ -56,7 +57,7 @@ namespace sat {
 template <class Storage>
 class InclusionDetector {
  public:
-  explicit InclusionDetector(const Storage& storage, TimeLimit* time_limit)
+  InclusionDetector(const Storage& storage, TimeLimit* time_limit)
       : storage_(storage), time_limit_(time_limit) {}
 
   // Resets the class to an empty state.
@@ -124,24 +125,7 @@ class InclusionDetector {
   uint64_t work_done() const { return work_done_; }
   bool Stopped() const { return stop_; }
 
-  // Different API:
-  // 1/ Add all potential subset
-  // 2/ Call IndexAllSubsets()
-  // 3/ Call one or more time FindSubsets().
-  //    - process() can call StopProcessingCurrentSuperset() to abort early
-  //    - process() can call StopProcessingCurrentSubset() to never consider
-  //      that subset again.
-  // 4/ Call Stop() to reclaim some memory.
-  //
-  // Optimization: next_index_to_try is an index in superset that can be used
-  // to skip some position for which we already called FindSubsets().
-  void IndexAllSubsets();
-  void FindSubsets(absl::Span<const int> superset, int* next_index_to_try,
-                   const std::function<void(int subset)>& process);
-
  private:
-  uint64_t ComputeSignatureAndResizeVectors(absl::Span<const int> elements);
-
   // Allows to access the elements of each candidates via storage_[index];
   const Storage& storage_;
 
@@ -171,7 +155,6 @@ class InclusionDetector {
   uint64_t work_done_ = 0;
   uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
 
-  // Temporary data only used by DetectInclusions().
   bool stop_ = false;
   bool stop_with_current_subset_ = false;
   bool stop_with_current_superset_ = false;
@@ -181,10 +164,70 @@ class InclusionDetector {
   Bitset64<int> is_in_superset_;
 };
 
+// Similar API and purpose to InclusionDetector. But this one is a bit simpler
+// and faster if it fit your usage. This assume an initial given set of
+// potential subsets, that will be queried against supersets one by one.
+template <class Storage>
+class SubsetsDetector {
+ public:
+  SubsetsDetector(const Storage& storage, TimeLimit* time_limit)
+      : storage_(storage), time_limit_(time_limit) {}
+
+  void SetWorkLimit(uint64_t work_limit) { work_limit_ = work_limit; }
+  void StopProcessingCurrentSubset() { stop_with_current_subset_ = true; }
+  void StopProcessingCurrentSuperset() { stop_with_current_superset_ = true; }
+  void Stop() {
+    stop_ = true;
+    one_watcher_.clear();
+    is_in_superset_.resize(0);
+  }
+
+  uint64_t work_done() const { return work_done_; }
+  bool Stopped() const { return stop_; }
+
+  // Different API than InclusionDetector.
+  // 1/ Add all potential subset to the storage_.
+  // 2/ Call IndexAllStorageAsSubsets()
+  // 3/ Call one or more time FindSubsets().
+  //    - process() can call StopProcessingCurrentSuperset() to abort early
+  //    - process() can call StopProcessingCurrentSubset() to never consider
+  //      that subset again.
+  // 4/ Call Stop() to reclaim some memory.
+  //
+  // Optimization: next_index_to_try is an index in superset that can be used
+  // to skip some position for which we already called FindSubsets().
+  void IndexAllStorageAsSubsets();
+  void FindSubsets(absl::Span<const int> superset, int* next_index_to_try,
+                   const std::function<void(int subset)>& process);
+
+ private:
+  // Allows to access the elements of each subsets via storage_[index];
+  const Storage& storage_;
+
+  TimeLimit* time_limit_;
+  uint64_t work_done_ = 0;
+  uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
+
+  struct OneWatcherData {
+    int index;
+    int other_element;
+    uint64_t signature;
+  };
+
+  bool stop_ = false;
+  bool stop_with_current_subset_ = false;
+  bool stop_with_current_superset_ = false;
+  CompactVectorVector<int, OneWatcherData> one_watcher_;
+  Bitset64<int> is_in_superset_;
+};
+
 // Deduction guide.
 template <typename Storage>
 InclusionDetector(const Storage& storage) -> InclusionDetector<Storage>;
 
+template <typename Storage>
+SubsetsDetector(const Storage& storage) -> SubsetsDetector<Storage>;
+
 template <typename Storage>
 inline void InclusionDetector<Storage>::AddPotentialSet(int index) {
   DCHECK_GE(index, 0);
@@ -221,10 +264,9 @@ inline void InclusionDetector<Storage>::AddPotentialSuperset(int index) {
   candidates_.push_back({index, num_elements, /*order=*/2});
 }
 
-// Compute the signature and also resize vectors if needed. We want a
+// Compute the signature and the maximum element. We want a
 // signature that is order invariant and is compatible with inclusion.
-template <typename Storage>
-inline uint64_t InclusionDetector<Storage>::ComputeSignatureAndResizeVectors(
+inline std::pair<uint64_t, int> ComputeSignatureAndMaxElement(
     absl::Span<const int> elements) {
   uint64_t signature = 0;
   int max_element = 0;
@@ -233,12 +275,7 @@ inline uint64_t InclusionDetector<Storage>::ComputeSignatureAndResizeVectors(
     max_element = std::max(max_element, e);
     signature |= (int64_t{1} << (e & 63));
   }
-  DCHECK_EQ(is_in_superset_.size(), one_watcher_.size());
-  if (max_element >= is_in_superset_.size()) {
-    is_in_superset_.resize(max_element + 1);
-    one_watcher_.resize(max_element + 1);
-  }
-  return signature;
+  return {signature, max_element};
 }
 
 template <typename Storage>
@@ -264,7 +301,15 @@ inline void InclusionDetector<Storage>::DetectInclusions(
   for (const Candidate& candidate : candidates_) {
     const auto& candidate_elements = storage_[candidate.index];
     const int candidate_index = signatures_.size();
-    signatures_.push_back(ComputeSignatureAndResizeVectors(candidate_elements));
+
+    const auto [signature, max_element] =
+        ComputeSignatureAndMaxElement(candidate_elements);
+    signatures_.push_back(signature);
+    DCHECK_EQ(is_in_superset_.size(), one_watcher_.size());
+    if (max_element >= is_in_superset_.size()) {
+      is_in_superset_.resize(max_element + 1);
+      one_watcher_.resize(max_element + 1);
+    }
 
     stop_with_current_superset_ = false;
     if (candidate.CanBeSuperset()) {
@@ -376,48 +421,63 @@ inline void InclusionDetector<Storage>::DetectInclusions(
   Stop();
 }
 
-// TODO(user): Merge common code.
 template <typename Storage>
-inline void InclusionDetector<Storage>::IndexAllSubsets() {
-  if (num_potential_subsets_ == 0) return;
-
-  // Temp data must be ready to use.
+inline void SubsetsDetector<Storage>::IndexAllStorageAsSubsets() {
   stop_ = false;
-  DCHECK(signatures_.empty());
-  DCHECK(one_watcher_.empty());
 
-  // We don't really care about the order here.
+  // Flat representation of one_watcher_, we will fill it in one go from there.
+  std::vector<int> tmp_keys;
+  std::vector<OneWatcherData> tmp_values;
+  std::vector<int> element_to_num_watched;
+
   work_done_ = 0;
-  for (const Candidate& candidate : candidates_) {
-    const auto& candidate_elements = storage_[candidate.index];
-    const int candidate_index = signatures_.size();
-    signatures_.push_back(ComputeSignatureAndResizeVectors(candidate_elements));
+  for (int index = 0; index < storage_.size(); ++index) {
+    const auto& subset = storage_[index];
+    CHECK_GE(subset.size(), 2);
 
-    // Add new subset candidate to the watchers.
-    //
-    // Tricky: If this was also a superset and has been removed, we don't want
-    // to watch it!
-    if (candidate.CanBeSubset()) {
-      // Choose to watch the one with smallest list.
-      int best_choice = -1;
-      work_done_ += candidate.size;
-      if (work_done_ > work_limit_) return Stop();
-      for (const int e : candidate_elements) {
-        DCHECK_GE(e, 0);
-        DCHECK_LT(e, one_watcher_.size());
-        if (best_choice == -1 ||
-            one_watcher_[e].size() < one_watcher_[best_choice].size()) {
-          best_choice = e;
-        }
-      }
-      DCHECK_NE(best_choice, -1);
-      one_watcher_[best_choice].push_back(candidate_index);
+    const auto [signature, max_element] = ComputeSignatureAndMaxElement(subset);
+    if (max_element >= is_in_superset_.size()) {
+      is_in_superset_.resize(max_element + 1);
     }
+    if (max_element >= element_to_num_watched.size()) {
+      element_to_num_watched.resize(max_element + 1);
+    }
+
+    // Choose to watch the one with smallest list so far.
+    int best_choice = -1;
+    int best_value = -1;
+    int second_choice = -1;
+    int second_value = -1;
+    work_done_ += subset.size();
+    if (work_done_ > work_limit_) return Stop();
+    for (const int e : subset) {
+      DCHECK_GE(e, 0);
+      DCHECK_LT(e, element_to_num_watched.size());
+      const int value = element_to_num_watched[e];
+      if (value >= best_value) {
+        second_choice = best_choice;
+        second_value = best_value;
+        best_choice = e;
+        best_value = value;
+      } else if (value > second_value) {
+        second_choice = e;
+        second_value = value;
+      }
+    }
+    DCHECK_NE(best_choice, -1);
+    DCHECK_NE(second_choice, -1);
+    DCHECK_NE(best_choice, second_choice);
+
+    element_to_num_watched[best_choice]++;
+    tmp_keys.push_back(best_choice);
+    tmp_values.push_back({index, second_choice, signature});
   }
+
+  one_watcher_.ResetFromFlatMapping(tmp_keys, tmp_values);
 }
 
 template <typename Storage>
-inline void InclusionDetector<Storage>::FindSubsets(
+inline void SubsetsDetector<Storage>::FindSubsets(
     absl::Span<const int> superset, int* next_index_to_try,
     const std::function<void(int subset)>& process) {
   // We check each time our work_done_ has increased by more than this.
@@ -426,8 +486,11 @@ inline void InclusionDetector<Storage>::FindSubsets(
 
   // Compute the signature and also resize vector if needed. We want a
   // signature that is order invariant and is compatible with inclusion.
-  const uint64_t superset_signature =
-      ComputeSignatureAndResizeVectors(superset);
+  const auto [superset_signature, max_element] =
+      ComputeSignatureAndMaxElement(superset);
+  if (max_element >= is_in_superset_.size()) {
+    is_in_superset_.resize(max_element + 1);
+  }
 
   // Find any subset included in current superset.
   work_done_ += 2 * superset.size();
@@ -446,26 +509,36 @@ inline void InclusionDetector<Storage>::FindSubsets(
 
   stop_with_current_superset_ = false;
   const auto is_in_superset_view = is_in_superset_.const_view();
-  const auto signatures_view = signatures_.data();
   for (; *next_index_to_try < superset.size(); ++*next_index_to_try) {
     const int superset_e = superset[*next_index_to_try];
-    for (int i = 0; i < one_watcher_[superset_e].size(); ++i) {
-      const int c_index = one_watcher_[superset_e][i];
-      const Candidate& subset = candidates_[c_index];
+    if (superset_e >= one_watcher_.size()) continue;
+    auto cached_span = one_watcher_[superset_e];
+    for (int i = 0; i < cached_span.size(); ++i) {
+      ++work_done_;
 
-      // Quick check with size and signature.
-      if (subset.size > superset.size()) continue;
-      if ((signatures_view[c_index] & ~superset_signature) != 0) continue;
+      // Do a bunch of quick checks. The second one is optimized for size 2
+      // which happens a lot in our usage of merging clique with implications.
+      const auto [subset_index, other_e, subset_signature] = cached_span[i];
+      if ((subset_signature & ~superset_signature) != 0) continue;
+      if (!is_in_superset_view[other_e]) continue;
 
       // Long check with bitset.
+      const absl::Span<const int> subset = storage_[subset_index];
+      if (subset.size() > superset.size()) continue;
+
+      // TODO(user): Technically we do not need to check the watched position or
+      // the "other element" position, we could do that by permuting them first
+      // or last and iterating on a subspan. However, in many slow situation, we
+      // have millions of size 2 sets, and the time is dominated by the first
+      // check.
       bool is_included = true;
-      work_done_ += subset.size;
+      work_done_ += subset.size();
       if (work_done_ > work_limit_) return Stop();
       if (work_done_ > next_time_limit_check) {
         if (time_limit_->LimitReached()) return Stop();
         next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
       }
-      for (const int subset_e : storage_[subset.index]) {
+      for (const int subset_e : subset) {
         if (!is_in_superset_view[subset_e]) {
           is_included = false;
           break;
@@ -474,8 +547,10 @@ inline void InclusionDetector<Storage>::FindSubsets(
       if (!is_included) continue;
 
       stop_with_current_subset_ = false;
-      process(subset.index);
+      process(subset_index);
 
+      // TODO(user): Remove this and the more complex API need once we move
+      // class.
       if (stop_) return;
       if (work_done_ > work_limit_) return Stop();
       if (work_done_ > next_time_limit_check) {
@@ -484,9 +559,8 @@ inline void InclusionDetector<Storage>::FindSubsets(
       }
 
       if (stop_with_current_subset_) {
-        // Remove from the watcher list.
-        std::swap(one_watcher_[superset_e][i], one_watcher_[superset_e].back());
-        one_watcher_[superset_e].pop_back();
+        one_watcher_.RemoveBySwap(superset_e, i);
+        cached_span.remove_suffix(1);
         --i;
       }
       if (stop_with_current_superset_) break;
diff --git a/ortools/sat/inclusion_test.cc b/ortools/sat/inclusion_test.cc
index a113b4d069..7faa111bd8 100644
--- a/ortools/sat/inclusion_test.cc
+++ b/ortools/sat/inclusion_test.cc
@@ -178,17 +178,16 @@ TEST(InclusionDetectorTest, RandomTest) {
       [&num_inclusions](int subset, int superset) { ++num_inclusions; });
 }
 
-TEST(InclusionDetectorTest, AlternativeApi) {
+TEST(SubsetsDetectorTest, AlternativeApi) {
   CompactVectorVector<int> storage;
-  TimeLimit time_limit;
-  InclusionDetector detector(storage, &time_limit);
+  storage.Add({1, 2});
+  storage.Add({4, 3});
+  storage.Add({1, 2, 3});
+  storage.Add({2, 3});
 
-  // Lets add some subset.
-  detector.AddPotentialSubset(storage.Add({1, 2}));
-  detector.AddPotentialSubset(storage.Add({4, 3}));
-  detector.AddPotentialSubset(storage.Add({1, 2, 3}));
-  detector.AddPotentialSubset(storage.Add({2, 3}));
-  detector.IndexAllSubsets();
+  TimeLimit time_limit;
+  SubsetsDetector detector(storage, &time_limit);
+  detector.IndexAllStorageAsSubsets();
 
   // Now we can query any "superset".
   // Note that there is no guarantee on the order of discovery.
diff --git a/ortools/sat/util.h b/ortools/sat/util.h
index 7863ceda30..29a35b7ac3 100644
--- a/ortools/sat/util.h
+++ b/ortools/sat/util.h
@@ -69,6 +69,7 @@ class CompactVectorVector {
   // Size of the "key" space, always in [0, size()).
   size_t size() const;
   bool empty() const;
+  size_t num_entries() const { return buffer_.size(); }
 
   // Getters, either via [] or via a wrapping to be compatible with older api.
   //
@@ -85,9 +86,9 @@ class CompactVectorVector {
     starts_.reserve(size);
     sizes_.reserve(size);
   }
-  void reserve(int size, int num_terms) {
+  void reserve(int size, int num_entries) {
     reserve(size);
-    buffer_.reserve(num_terms);
+    buffer_.reserve(num_entries);
   }
 
   // Given a flat mapping (keys[i] -> values[i]) with two parallel vectors, not
diff --git a/ortools/util/bitset.h b/ortools/util/bitset.h
index 9b0636a328..e257537ba5 100644
--- a/ortools/util/bitset.h
+++ b/ortools/util/bitset.h
@@ -513,7 +513,7 @@ class Bitset64 {
   void ClearBucket(IndexType i) {
     DCHECK_GE(Value(i), 0);
     DCHECK_LT(Value(i), Value(size_));
-    data_[BitOffset64(Value(i))] = 0;
+    data_.data()[BitOffset64(Value(i))] = 0;
   }
 
   // Clears the bits at position i and i ^ 1.