From 702791f2e359c220b6023e96b30adfd18e926495 Mon Sep 17 00:00:00 2001 From: Laurent Perron Date: Tue, 24 Mar 2020 18:51:13 +0100 Subject: [PATCH] experimental parallel interleaved search; speed up sorted interval lists; misc cleaning of swig files --- ortools/sat/cp_model.proto | 4 +- ortools/sat/cp_model_solver.cc | 66 +++++++++++++++------------ ortools/sat/sat_parameters.proto | 17 +++---- ortools/sat/synchronization.cc | 68 ++++++++++++++++++++-------- ortools/sat/synchronization.h | 29 ++++++++++-- ortools/util/java/proto.i | 2 - ortools/util/java/vector.i | 2 + ortools/util/sorted_interval_list.cc | 63 +++++++++++++++++++------- ortools/util/sorted_interval_list.h | 12 +++-- ortools/util/time_limit.h | 9 +--- 10 files changed, 180 insertions(+), 92 deletions(-) diff --git a/ortools/sat/cp_model.proto b/ortools/sat/cp_model.proto index 202767714f..54e871c065 100644 --- a/ortools/sat/cp_model.proto +++ b/ortools/sat/cp_model.proto @@ -321,11 +321,11 @@ message ConstraintProto { // TODO(user): Remove int_min in favor of lin_min. IntegerArgumentProto int_min = 10; - // The lin_max constraint forces the target to equal the minimum of all + // The lin_min constraint forces the target to equal the minimum of all // linear expressions. LinearArgumentProto lin_min = 28; - // The int_min constraint forces the target to equal the product of all + // The int_prod constraint forces the target to equal the product of all // variables. IntegerArgumentProto int_prod = 11; diff --git a/ortools/sat/cp_model_solver.cc b/ortools/sat/cp_model_solver.cc index 7cf2bed043..50c95400b6 100644 --- a/ortools/sat/cp_model_solver.cc +++ b/ortools/sat/cp_model_solver.cc @@ -940,8 +940,13 @@ void RegisterVariableBoundsLevelZeroExport( const WorkerInfo* const worker_info = model->GetOrCreate(); shared_bounds_manager->ReportPotentialNewBounds( - model_proto, worker_info->worker_id, worker_info->worker_name, - model_variables, new_lower_bounds, new_upper_bounds); + model_proto, worker_info->worker_name, model_variables, + new_lower_bounds, new_upper_bounds); + } + + // If we are not in interleave_search we synchronize right away. + if (!model->Get()->interleave_search()) { + shared_bounds_manager->Synchronize(); } }; @@ -1027,11 +1032,15 @@ void RegisterObjectiveBestBoundExport( std::string worker_name = model->GetOrCreate()->worker_name; auto* integer_trail = model->Get(); const auto broadcast_objective_lower_bound = - [worker_name, objective_var, integer_trail, - shared_response_manager](const std::vector& unused) { + [worker_name, objective_var, integer_trail, shared_response_manager, + model](const std::vector& unused) { shared_response_manager->UpdateInnerObjectiveBounds( worker_name, integer_trail->LevelZeroLowerBound(objective_var), integer_trail->LevelZeroUpperBound(objective_var)); + // If we are not in interleave_search we synchronize right away. + if (!model->Get()->interleave_search()) { + shared_response_manager->Synchronize(); + } }; model->GetOrCreate() ->RegisterLevelZeroModifiedVariablesCallback( @@ -1053,7 +1062,7 @@ void RegisterObjectiveBoundsImport( bool propagate = false; const IntegerValue external_lb = - shared_response_manager->GetInnerObjectiveLowerBound(); + shared_response_manager->SynchronizedInnerObjectiveLowerBound(); const IntegerValue current_lb = integer_trail->LowerBound(objective->objective_var); if (external_lb > current_lb) { @@ -1066,7 +1075,7 @@ void RegisterObjectiveBoundsImport( } const IntegerValue external_ub = - shared_response_manager->GetInnerObjectiveUpperBound(); + shared_response_manager->SynchronizedInnerObjectiveUpperBound(); const IntegerValue current_ub = integer_trail->UpperBound(objective->objective_var); if (external_ub < current_ub) { @@ -1921,18 +1930,6 @@ class FullProblemSolver : public SubSolver { bool previous_task_is_completed_ GUARDED_BY(mutex_) = true; }; -namespace { - -// Returns true if the offset and scaling factor of the given objectives are -// same and false otherwise. -bool CompareObjectiveScalingAndOffset(const CpObjectiveProto& objective1, - const CpObjectiveProto& objective2) { - if (objective1.offset() != objective2.offset()) return false; - if (objective1.scaling_factor() != objective2.scaling_factor()) return false; - return true; -} -} // namespace - // A Subsolver that generate LNS solve from a given neighborhood. class LnsSolver : public SubSolver { public: @@ -2204,21 +2201,21 @@ void SolveCpModelParallel(const CpModelProto& model_proto, CHECK(!parameters.enumerate_all_solutions()) << "Enumerating all solutions in parallel is not supported."; - // If "interleave_search" is true, then the number of strategies is - // 4 if num_search_workers = 1, or 8 otherwise. + // If "interleave_search" is true, then the number of strategies is not + // related to the number of workers. const int num_strategies = parameters.interleave_search() - ? (parameters.reduce_memory_usage_in_interleave_mode() ? 5 : 8) + ? (parameters.reduce_memory_usage_in_interleave_mode() ? 5 : 9) : num_search_workers; std::unique_ptr shared_bounds_manager; - if (global_model->GetOrCreate()->share_level_zero_bounds()) { + if (parameters.share_level_zero_bounds()) { // TODO(user): The current code is a bit brittle because we may have // more SubSolver ids than num_strategies, and each SubSolver might // need to synchronize bounds. Fix, it should be easy to make this number // adapt dynamically in the SharedBoundsManager. shared_bounds_manager = - absl::make_unique(num_strategies + 1, model_proto); + absl::make_unique(num_strategies + 2, model_proto); } std::unique_ptr shared_rins_manager; if (global_model->GetOrCreate()->use_rins_lns()) { @@ -2239,6 +2236,16 @@ void SolveCpModelParallel(const CpModelProto& model_proto, // The list of all the SubSolver that will be used in this parallel search. std::vector> subsolvers; + // Add a synchronization point for the shared classes. + subsolvers.push_back(absl::make_unique( + /*id=*/subsolvers.size(), + [shared_response_manager, &shared_bounds_manager]() { + shared_response_manager->Synchronize(); + if (shared_bounds_manager != nullptr) { + shared_bounds_manager->Synchronize(); + } + })); + if (parameters.use_lns_only()) { // Register something to find a first solution. Note that this is mainly // used for experimentation, and using no LP ususally result in a faster @@ -2333,7 +2340,10 @@ void SolveCpModelParallel(const CpModelProto& model_proto, helper, absl::StrCat("scheduling_random_lns_", strategy_name)), local_params, helper, &shared)); } - if (parameters.use_rins_lns()) { + + // TODO(user): for now this is not deterministic so we disable it on + // interleave search. Fix. + if (parameters.use_rins_lns() && !parameters.interleave_search()) { subsolvers.push_back(absl::make_unique( /*id=*/subsolvers.size(), absl::make_unique( @@ -2364,11 +2374,9 @@ void SolveCpModelParallel(const CpModelProto& model_proto, } // Launch the main search loop. - if (parameters.deterministic_parallel_search()) { - // TODO(user): Make the batch_size independent of the number of threads so - // that we have the same behavior independently of the number of workers! - const int batch_size = 4 * num_search_workers; - DeterministicLoop(subsolvers, num_search_workers, batch_size); + if (parameters.interleave_search()) { + DeterministicLoop(subsolvers, num_search_workers, + parameters.interleave_batch_size()); } else { NonDeterministicLoop(subsolvers, num_search_workers); } diff --git a/ortools/sat/sat_parameters.proto b/ortools/sat/sat_parameters.proto index 3860e8409a..ed88477f3a 100644 --- a/ortools/sat/sat_parameters.proto +++ b/ortools/sat/sat_parameters.proto @@ -756,19 +756,16 @@ message SatParameters { optional int32 num_search_workers = 100 [default = 1]; // Experimental. If this is true, then we interleave all our major search - // strategy and distribute the work amongst num_search_workers. This also - // work with just one worker, in which case the solver is deterministic even - // if deterministic_parallel_search is false. + // strategy and distribute the work amongst num_search_workers. + // + // The search is deterministic (independently of num_search_workers!), and we + // schedule and wait for interleave_batch_size task to be completed before + // synchronizing and scheduling the next batch of tasks. optional bool interleave_search = 136 [default = false]; + optional int32 interleave_batch_size = 134 [default = 1]; // Temporary parameter until the memory usage is more optimized. - optional bool reduce_memory_usage_in_interleave_mode = 141 [default = true]; - - // Make the parallelization deterministic. Currently, this only work with - // use_lns_only(). - // - // TODO(user): make it work without use_lns_only(). - optional bool deterministic_parallel_search = 134 [default = false]; + optional bool reduce_memory_usage_in_interleave_mode = 141 [default = false]; // Allows objective sharing between workers. optional bool share_objective_bounds = 113 [default = true]; diff --git a/ortools/sat/synchronization.cc b/ortools/sat/synchronization.cc index b2eb6163dc..0b02bb2450 100644 --- a/ortools/sat/synchronization.cc +++ b/ortools/sat/synchronization.cc @@ -252,6 +252,24 @@ IntegerValue SharedResponseManager::GetInnerObjectiveUpperBound() { return IntegerValue(inner_objective_upper_bound_); } +void SharedResponseManager::Synchronize() { + absl::MutexLock mutex_lock(&mutex_); + synchronized_inner_objective_lower_bound_ = + IntegerValue(inner_objective_lower_bound_); + synchronized_inner_objective_upper_bound_ = + IntegerValue(inner_objective_upper_bound_); +} + +IntegerValue SharedResponseManager::SynchronizedInnerObjectiveLowerBound() { + absl::MutexLock mutex_lock(&mutex_); + return synchronized_inner_objective_lower_bound_; +} + +IntegerValue SharedResponseManager::SynchronizedInnerObjectiveUpperBound() { + absl::MutexLock mutex_lock(&mutex_); + return synchronized_inner_objective_upper_bound_; +} + IntegerValue SharedResponseManager::BestSolutionInnerObjectiveValue() { absl::MutexLock mutex_lock(&mutex_); return IntegerValue(best_solution_objective_value_); @@ -487,9 +505,12 @@ SharedBoundsManager::SharedBoundsManager(int num_workers, const CpModelProto& model_proto) : num_workers_(num_workers), num_variables_(model_proto.variables_size()), - changed_variables_per_workers_(num_workers), lower_bounds_(num_variables_, kint64min), - upper_bounds_(num_variables_, kint64max) { + upper_bounds_(num_variables_, kint64max), + changed_variables_per_workers_(num_workers), + synchronized_lower_bounds_(num_variables_, kint64min), + synchronized_upper_bounds_(num_variables_, kint64max) { + changed_variables_since_last_synchronize_.ClearAndResize(num_variables_); for (int i = 0; i < num_workers_; ++i) { changed_variables_per_workers_[i].ClearAndResize(num_variables_); } @@ -497,12 +518,14 @@ SharedBoundsManager::SharedBoundsManager(int num_workers, lower_bounds_[i] = model_proto.variables(i).domain(0); const int domain_size = model_proto.variables(i).domain_size(); upper_bounds_[i] = model_proto.variables(i).domain(domain_size - 1); + synchronized_lower_bounds_[i] = lower_bounds_[i]; + synchronized_upper_bounds_[i] = upper_bounds_[i]; } } void SharedBoundsManager::ReportPotentialNewBounds( - const CpModelProto& model_proto, int worker_id, - const std::string& worker_name, const std::vector& variables, + const CpModelProto& model_proto, const std::string& worker_name, + const std::vector& variables, const std::vector& new_lower_bounds, const std::vector& new_upper_bounds) { CHECK_EQ(variables.size(), new_lower_bounds.size()); @@ -527,11 +550,8 @@ void SharedBoundsManager::ReportPotentialNewBounds( if (changed_ub) { upper_bounds_[var] = new_ub; } + changed_variables_since_last_synchronize_.Set(var); - for (int j = 0; j < num_workers_; ++j) { - if (worker_id == j) continue; - changed_variables_per_workers_[j].Set(var); - } if (VLOG_IS_ON(2)) { const IntegerVariableProto& var_proto = model_proto.variables(var); const std::string& var_name = @@ -544,8 +564,19 @@ void SharedBoundsManager::ReportPotentialNewBounds( } } -// When called, returns the set of bounds improvements since -// the last time this method was called by the same worker. +void SharedBoundsManager::Synchronize() { + absl::MutexLock mutex_lock(&mutex_); + for (const int var : + changed_variables_since_last_synchronize_.PositionsSetAtLeastOnce()) { + synchronized_lower_bounds_[var] = lower_bounds_[var]; + synchronized_upper_bounds_[var] = upper_bounds_[var]; + for (int j = 0; j < num_workers_; ++j) { + changed_variables_per_workers_[j].Set(var); + } + } + changed_variables_since_last_synchronize_.ClearAll(); +} + void SharedBoundsManager::GetChangedBounds( int worker_id, std::vector* variables, std::vector* new_lower_bounds, @@ -553,16 +584,15 @@ void SharedBoundsManager::GetChangedBounds( variables->clear(); new_lower_bounds->clear(); new_upper_bounds->clear(); - { - absl::MutexLock mutex_lock(&mutex_); - for (const int var : - changed_variables_per_workers_[worker_id].PositionsSetAtLeastOnce()) { - variables->push_back(var); - new_lower_bounds->push_back(lower_bounds_[var]); - new_upper_bounds->push_back(upper_bounds_[var]); - } - changed_variables_per_workers_[worker_id].ClearAll(); + + absl::MutexLock mutex_lock(&mutex_); + for (const int var : + changed_variables_per_workers_[worker_id].PositionsSetAtLeastOnce()) { + variables->push_back(var); + new_lower_bounds->push_back(synchronized_lower_bounds_[var]); + new_upper_bounds->push_back(synchronized_upper_bounds_[var]); } + changed_variables_per_workers_[worker_id].ClearAll(); } } // namespace sat diff --git a/ortools/sat/synchronization.h b/ortools/sat/synchronization.h index 453ad4a16a..af2454e7bb 100644 --- a/ortools/sat/synchronization.h +++ b/ortools/sat/synchronization.h @@ -135,6 +135,12 @@ class SharedResponseManager { IntegerValue GetInnerObjectiveLowerBound(); IntegerValue GetInnerObjectiveUpperBound(); + // These functions return the same as the non-synchronized() version but + // only the values at the last time Synchronize() was called. + void Synchronize(); + IntegerValue SynchronizedInnerObjectiveLowerBound(); + IntegerValue SynchronizedInnerObjectiveUpperBound(); + // Returns the current best solution inner objective value or kInt64Max if // there is no solution. IntegerValue BestSolutionInnerObjectiveValue(); @@ -222,6 +228,11 @@ class SharedResponseManager { int64 inner_objective_upper_bound_ GUARDED_BY(mutex_) = kint64max; int64 best_solution_objective_value_ GUARDED_BY(mutex_) = kint64max; + IntegerValue synchronized_inner_objective_lower_bound_ GUARDED_BY(mutex_) = + IntegerValue(kint64min); + IntegerValue synchronized_inner_objective_upper_bound_ GUARDED_BY(mutex_) = + IntegerValue(kint64max); + double primal_integral_ GUARDED_BY(mutex_) = 0.0; double last_primal_integral_time_stamp_ GUARDED_BY(mutex_) = 0.0; @@ -239,7 +250,7 @@ class SharedBoundsManager { // Reports a set of locally improved variable bounds to the shared bounds // manager. The manager will compare these bounds changes against its // global state, and incorporate the improving ones. - void ReportPotentialNewBounds(const CpModelProto& model_proto, int worker_id, + void ReportPotentialNewBounds(const CpModelProto& model_proto, const std::string& worker_name, const std::vector& variables, const std::vector& new_lower_bounds, @@ -251,16 +262,28 @@ class SharedBoundsManager { std::vector* new_lower_bounds, std::vector* new_upper_bounds); + // Publishes any new bounds so that GetChangedBounds() will reflect the latest + // state. + void Synchronize(); + private: const int num_workers_; const int num_variables_; absl::Mutex mutex_; - std::vector> changed_variables_per_workers_ - GUARDED_BY(mutex_); + // These are always up to date. std::vector lower_bounds_ GUARDED_BY(mutex_); std::vector upper_bounds_ GUARDED_BY(mutex_); + + SparseBitset changed_variables_since_last_synchronize_ + GUARDED_BY(mutex_); + + // These are only updated on Synchronize(). + std::vector> changed_variables_per_workers_ + GUARDED_BY(mutex_); + std::vector synchronized_lower_bounds_ GUARDED_BY(mutex_); + std::vector synchronized_upper_bounds_ GUARDED_BY(mutex_); }; // Stores information on the worker in the parallel context. diff --git a/ortools/util/java/proto.i b/ortools/util/java/proto.i index e2746ceed3..42cd6f83c9 100644 --- a/ortools/util/java/proto.i +++ b/ortools/util/java/proto.i @@ -38,8 +38,6 @@ // // TODO(user): move this file to base/swig/java -%include "ortools/base/base.i" - %{ #include "ortools/base/integral_types.h" %} diff --git a/ortools/util/java/vector.i b/ortools/util/java/vector.i index a96ad6838c..af79859fcd 100644 --- a/ortools/util/java/vector.i +++ b/ortools/util/java/vector.i @@ -20,6 +20,8 @@ // // TODO(user): move to base/swig/java. +%include "stdint.i" + %include "ortools/base/base.i" %{ diff --git a/ortools/util/sorted_interval_list.cc b/ortools/util/sorted_interval_list.cc index 54b063a703..5fc40d4460 100644 --- a/ortools/util/sorted_interval_list.cc +++ b/ortools/util/sorted_interval_list.cc @@ -79,6 +79,7 @@ void UnionOfSortedIntervals(absl::InlinedVector* intervals) { } // namespace +// TODO(user): Use MathUtil::CeilOfRatio / FloorOfRatio instead. int64 CeilRatio(int64 value, int64 positive_coeff) { DCHECK_GT(positive_coeff, 0); const int64 result = value / positive_coeff; @@ -108,7 +109,23 @@ std::ostream& operator<<(std::ostream& out, const Domain& domain) { Domain::Domain(int64 value) : intervals_({{value, value}}) {} -Domain::Domain(int64 left, int64 right) : intervals_({{left, right}}) { +// HACK(user): We spare significant time if we use an initializer here, because +// InlineVector<1> is able to recognize the fast path or "exactly one element". +// I was unable to obtain the same performance with any other recipe, I always +// had at least 1 more cycle. See BM_SingleIntervalDomainConstructor. +// Since the constructor takes very few cycles (most likely less than 10), +// that's quite significant. +namespace { +inline ClosedInterval UncheckedClosedInterval(int64 s, int64 e) { + ClosedInterval i; + i.start = s; + i.end = e; + return i; +} +} // namespace + +Domain::Domain(int64 left, int64 right) + : intervals_({UncheckedClosedInterval(left, right)}) { if (left > right) intervals_.clear(); } @@ -136,6 +153,7 @@ Domain Domain::FromIntervals(absl::Span intervals) { } Domain Domain::FromFlatSpanOfIntervals(absl::Span flat_intervals) { + DCHECK(flat_intervals.size() % 2 == 0) << flat_intervals.size(); Domain result; result.intervals_.reserve(flat_intervals.size() / 2); for (int i = 0; i < flat_intervals.size(); i += 2) { @@ -157,6 +175,7 @@ Domain Domain::FromVectorIntervals( if (interval.size() == 1) { result.intervals_.push_back({interval[0], interval[0]}); } else { + DCHECK_EQ(interval.size(), 2); result.intervals_.push_back({interval[0], interval[1]}); } } @@ -257,23 +276,33 @@ Domain Domain::IntersectionWith(const Domain& domain) const { const auto& a = intervals_; const auto& b = domain.intervals_; for (int i = 0, j = 0; i < a.size() && j < b.size();) { - const ClosedInterval intersection{std::max(a[i].start, b[j].start), - std::min(a[i].end, b[j].end)}; - if (intersection.start > intersection.end) { - // Intersection is empty, we advance past the first interval of the two. - if (a[i].start < b[j].start) { - i++; - } else { - j++; + if (a[i].start <= b[j].start) { + if (a[i].end < b[j].start) { + // Empty intersection. We advance past the first interval. + ++i; + } else { // a[i].end >= b[j].start + // Non-empty intersection: push back the intersection of these two, and + // advance past the first interval to finish. + if (a[i].end <= b[j].end) { + result.intervals_.push_back({b[j].start, a[i].end}); + ++i; + } else { // a[i].end > b[j].end. + result.intervals_.push_back({b[j].start, b[j].end}); + ++j; + } } - } else { - // Intersection is non-empty, we add it to the result and advance past - // the first interval to finish. - result.intervals_.push_back(intersection); - if (a[i].end < b[j].end) { - i++; - } else { - j++; + } else { // a[i].start > b[i].start. + // We do the exact same thing as above, but swapping a and b. + if (b[j].end < a[i].start) { + ++j; + } else { // b[j].end >= a[i].start + if (b[j].end <= a[i].end) { + result.intervals_.push_back({a[i].start, b[j].end}); + ++j; + } else { // a[i].end > b[j].end. + result.intervals_.push_back({a[i].start, a[i].end}); + ++i; + } } } } diff --git a/ortools/util/sorted_interval_list.h b/ortools/util/sorted_interval_list.h index 0d6f13a4dc..c41bafa4ee 100644 --- a/ortools/util/sorted_interval_list.h +++ b/ortools/util/sorted_interval_list.h @@ -22,6 +22,7 @@ #include "absl/container/inlined_vector.h" #include "absl/types/span.h" #include "ortools/base/integral_types.h" +#include "ortools/base/logging.h" namespace operations_research { @@ -30,7 +31,10 @@ namespace operations_research { */ struct ClosedInterval { ClosedInterval() {} - ClosedInterval(int64 s, int64 e) : start(s), end(e) {} + ClosedInterval(int64 s, int64 e) : start(s), end(e) { + DLOG_IF(DFATAL, s > e) << "Invalid ClosedInterval(" << s << ", " << e + << ")"; + } std::string DebugString() const; bool operator==(const ClosedInterval& other) const { @@ -56,6 +60,8 @@ std::ostream& operator<<(std::ostream& out, * Returns true iff we have: * - The intervals appear in increasing order. * - for all i: intervals[i].start <= intervals[i].end + * (should always be true, by construction, but bad intervals can in practice + * escape detection in opt mode). * - for all i but the last: intervals[i].end + 1 < intervals[i+1].start */ bool IntervalsAreSortedAndNonAdjacent( @@ -348,8 +354,8 @@ class Domain { // Invariant: will always satisfy IntervalsAreSortedAndNonAdjacent(). // - // Note that we use InlinedVector for the common case of single interal - // domains. This provide a good performance boost when working with a + // Note that we use InlinedVector for the common case of single internal + // interval. This provide a good performance boost when working with a // std::vector. absl::InlinedVector intervals_; }; diff --git a/ortools/util/time_limit.h b/ortools/util/time_limit.h index 74cb9ce3a7..b3d2df6394 100644 --- a/ortools/util/time_limit.h +++ b/ortools/util/time_limit.h @@ -267,9 +267,7 @@ class TimeLimit { * i.e. \c LimitReached() returns true when the value of * external_boolean_as_limit is true whatever the time limits are. * - * Note 1: The external_boolean_as_limit can be modified during solve. - * - * Note 2: \c ResetLimitFromParameters() will set this Boolean to false. + * Note : The external_boolean_as_limit can be modified during solve. */ void RegisterExternalBooleanAsLimit( std::atomic* external_boolean_as_limit) { @@ -285,7 +283,7 @@ class TimeLimit { /** * Sets new time limits. Note that this does not reset the running max nor - * any registered external boolean or calls to RegisterSigintHandler(). + * any registered external Boolean. */ template void ResetLimitFromParameters(const Parameters& parameters); @@ -481,9 +479,6 @@ inline TimeLimit::TimeLimit(double limit_in_seconds, double deterministic_limit, inline void TimeLimit::ResetTimers(double limit_in_seconds, double deterministic_limit, double instruction_limit) { - if (external_boolean_as_limit_ != nullptr) { - *external_boolean_as_limit_ = false; - } elapsed_deterministic_time_ = 0.0; deterministic_limit_ = deterministic_limit; instruction_limit_ = instruction_limit;