experimental parallel interleaved search; speed up sorted interval lists; misc cleaning of swig files

This commit is contained in:
Laurent Perron
2020-03-24 18:51:13 +01:00
parent 4ba76781a1
commit 702791f2e3
10 changed files with 180 additions and 92 deletions

View File

@@ -321,11 +321,11 @@ message ConstraintProto {
// TODO(user): Remove int_min in favor of lin_min.
IntegerArgumentProto int_min = 10;
// The lin_max constraint forces the target to equal the minimum of all
// The lin_min constraint forces the target to equal the minimum of all
// linear expressions.
LinearArgumentProto lin_min = 28;
// The int_min constraint forces the target to equal the product of all
// The int_prod constraint forces the target to equal the product of all
// variables.
IntegerArgumentProto int_prod = 11;

View File

@@ -940,8 +940,13 @@ void RegisterVariableBoundsLevelZeroExport(
const WorkerInfo* const worker_info =
model->GetOrCreate<WorkerInfo>();
shared_bounds_manager->ReportPotentialNewBounds(
model_proto, worker_info->worker_id, worker_info->worker_name,
model_variables, new_lower_bounds, new_upper_bounds);
model_proto, worker_info->worker_name, model_variables,
new_lower_bounds, new_upper_bounds);
}
// If we are not in interleave_search we synchronize right away.
if (!model->Get<SatParameters>()->interleave_search()) {
shared_bounds_manager->Synchronize();
}
};
@@ -1027,11 +1032,15 @@ void RegisterObjectiveBestBoundExport(
std::string worker_name = model->GetOrCreate<WorkerInfo>()->worker_name;
auto* integer_trail = model->Get<IntegerTrail>();
const auto broadcast_objective_lower_bound =
[worker_name, objective_var, integer_trail,
shared_response_manager](const std::vector<IntegerVariable>& unused) {
[worker_name, objective_var, integer_trail, shared_response_manager,
model](const std::vector<IntegerVariable>& unused) {
shared_response_manager->UpdateInnerObjectiveBounds(
worker_name, integer_trail->LevelZeroLowerBound(objective_var),
integer_trail->LevelZeroUpperBound(objective_var));
// If we are not in interleave_search we synchronize right away.
if (!model->Get<SatParameters>()->interleave_search()) {
shared_response_manager->Synchronize();
}
};
model->GetOrCreate<GenericLiteralWatcher>()
->RegisterLevelZeroModifiedVariablesCallback(
@@ -1053,7 +1062,7 @@ void RegisterObjectiveBoundsImport(
bool propagate = false;
const IntegerValue external_lb =
shared_response_manager->GetInnerObjectiveLowerBound();
shared_response_manager->SynchronizedInnerObjectiveLowerBound();
const IntegerValue current_lb =
integer_trail->LowerBound(objective->objective_var);
if (external_lb > current_lb) {
@@ -1066,7 +1075,7 @@ void RegisterObjectiveBoundsImport(
}
const IntegerValue external_ub =
shared_response_manager->GetInnerObjectiveUpperBound();
shared_response_manager->SynchronizedInnerObjectiveUpperBound();
const IntegerValue current_ub =
integer_trail->UpperBound(objective->objective_var);
if (external_ub < current_ub) {
@@ -1921,18 +1930,6 @@ class FullProblemSolver : public SubSolver {
bool previous_task_is_completed_ GUARDED_BY(mutex_) = true;
};
namespace {
// Returns true if the offset and scaling factor of the given objectives are
// same and false otherwise.
bool CompareObjectiveScalingAndOffset(const CpObjectiveProto& objective1,
const CpObjectiveProto& objective2) {
if (objective1.offset() != objective2.offset()) return false;
if (objective1.scaling_factor() != objective2.scaling_factor()) return false;
return true;
}
} // namespace
// A Subsolver that generate LNS solve from a given neighborhood.
class LnsSolver : public SubSolver {
public:
@@ -2204,21 +2201,21 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
CHECK(!parameters.enumerate_all_solutions())
<< "Enumerating all solutions in parallel is not supported.";
// If "interleave_search" is true, then the number of strategies is
// 4 if num_search_workers = 1, or 8 otherwise.
// If "interleave_search" is true, then the number of strategies is not
// related to the number of workers.
const int num_strategies =
parameters.interleave_search()
? (parameters.reduce_memory_usage_in_interleave_mode() ? 5 : 8)
? (parameters.reduce_memory_usage_in_interleave_mode() ? 5 : 9)
: num_search_workers;
std::unique_ptr<SharedBoundsManager> shared_bounds_manager;
if (global_model->GetOrCreate<SatParameters>()->share_level_zero_bounds()) {
if (parameters.share_level_zero_bounds()) {
// TODO(user): The current code is a bit brittle because we may have
// more SubSolver ids than num_strategies, and each SubSolver might
// need to synchronize bounds. Fix, it should be easy to make this number
// adapt dynamically in the SharedBoundsManager.
shared_bounds_manager =
absl::make_unique<SharedBoundsManager>(num_strategies + 1, model_proto);
absl::make_unique<SharedBoundsManager>(num_strategies + 2, model_proto);
}
std::unique_ptr<SharedRINSNeighborhoodManager> shared_rins_manager;
if (global_model->GetOrCreate<SatParameters>()->use_rins_lns()) {
@@ -2239,6 +2236,16 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
// The list of all the SubSolver that will be used in this parallel search.
std::vector<std::unique_ptr<SubSolver>> subsolvers;
// Add a synchronization point for the shared classes.
subsolvers.push_back(absl::make_unique<SynchronizationPoint>(
/*id=*/subsolvers.size(),
[shared_response_manager, &shared_bounds_manager]() {
shared_response_manager->Synchronize();
if (shared_bounds_manager != nullptr) {
shared_bounds_manager->Synchronize();
}
}));
if (parameters.use_lns_only()) {
// Register something to find a first solution. Note that this is mainly
// used for experimentation, and using no LP ususally result in a faster
@@ -2333,7 +2340,10 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
helper, absl::StrCat("scheduling_random_lns_", strategy_name)),
local_params, helper, &shared));
}
if (parameters.use_rins_lns()) {
// TODO(user): for now this is not deterministic so we disable it on
// interleave search. Fix.
if (parameters.use_rins_lns() && !parameters.interleave_search()) {
subsolvers.push_back(absl::make_unique<LnsSolver>(
/*id=*/subsolvers.size(),
absl::make_unique<RelaxationInducedNeighborhoodGenerator>(
@@ -2364,11 +2374,9 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
}
// Launch the main search loop.
if (parameters.deterministic_parallel_search()) {
// TODO(user): Make the batch_size independent of the number of threads so
// that we have the same behavior independently of the number of workers!
const int batch_size = 4 * num_search_workers;
DeterministicLoop(subsolvers, num_search_workers, batch_size);
if (parameters.interleave_search()) {
DeterministicLoop(subsolvers, num_search_workers,
parameters.interleave_batch_size());
} else {
NonDeterministicLoop(subsolvers, num_search_workers);
}

View File

@@ -756,19 +756,16 @@ message SatParameters {
optional int32 num_search_workers = 100 [default = 1];
// Experimental. If this is true, then we interleave all our major search
// strategy and distribute the work amongst num_search_workers. This also
// work with just one worker, in which case the solver is deterministic even
// if deterministic_parallel_search is false.
// strategy and distribute the work amongst num_search_workers.
//
// The search is deterministic (independently of num_search_workers!), and we
// schedule and wait for interleave_batch_size task to be completed before
// synchronizing and scheduling the next batch of tasks.
optional bool interleave_search = 136 [default = false];
optional int32 interleave_batch_size = 134 [default = 1];
// Temporary parameter until the memory usage is more optimized.
optional bool reduce_memory_usage_in_interleave_mode = 141 [default = true];
// Make the parallelization deterministic. Currently, this only work with
// use_lns_only().
//
// TODO(user): make it work without use_lns_only().
optional bool deterministic_parallel_search = 134 [default = false];
optional bool reduce_memory_usage_in_interleave_mode = 141 [default = false];
// Allows objective sharing between workers.
optional bool share_objective_bounds = 113 [default = true];

View File

@@ -252,6 +252,24 @@ IntegerValue SharedResponseManager::GetInnerObjectiveUpperBound() {
return IntegerValue(inner_objective_upper_bound_);
}
void SharedResponseManager::Synchronize() {
absl::MutexLock mutex_lock(&mutex_);
synchronized_inner_objective_lower_bound_ =
IntegerValue(inner_objective_lower_bound_);
synchronized_inner_objective_upper_bound_ =
IntegerValue(inner_objective_upper_bound_);
}
IntegerValue SharedResponseManager::SynchronizedInnerObjectiveLowerBound() {
absl::MutexLock mutex_lock(&mutex_);
return synchronized_inner_objective_lower_bound_;
}
IntegerValue SharedResponseManager::SynchronizedInnerObjectiveUpperBound() {
absl::MutexLock mutex_lock(&mutex_);
return synchronized_inner_objective_upper_bound_;
}
IntegerValue SharedResponseManager::BestSolutionInnerObjectiveValue() {
absl::MutexLock mutex_lock(&mutex_);
return IntegerValue(best_solution_objective_value_);
@@ -487,9 +505,12 @@ SharedBoundsManager::SharedBoundsManager(int num_workers,
const CpModelProto& model_proto)
: num_workers_(num_workers),
num_variables_(model_proto.variables_size()),
changed_variables_per_workers_(num_workers),
lower_bounds_(num_variables_, kint64min),
upper_bounds_(num_variables_, kint64max) {
upper_bounds_(num_variables_, kint64max),
changed_variables_per_workers_(num_workers),
synchronized_lower_bounds_(num_variables_, kint64min),
synchronized_upper_bounds_(num_variables_, kint64max) {
changed_variables_since_last_synchronize_.ClearAndResize(num_variables_);
for (int i = 0; i < num_workers_; ++i) {
changed_variables_per_workers_[i].ClearAndResize(num_variables_);
}
@@ -497,12 +518,14 @@ SharedBoundsManager::SharedBoundsManager(int num_workers,
lower_bounds_[i] = model_proto.variables(i).domain(0);
const int domain_size = model_proto.variables(i).domain_size();
upper_bounds_[i] = model_proto.variables(i).domain(domain_size - 1);
synchronized_lower_bounds_[i] = lower_bounds_[i];
synchronized_upper_bounds_[i] = upper_bounds_[i];
}
}
void SharedBoundsManager::ReportPotentialNewBounds(
const CpModelProto& model_proto, int worker_id,
const std::string& worker_name, const std::vector<int>& variables,
const CpModelProto& model_proto, const std::string& worker_name,
const std::vector<int>& variables,
const std::vector<int64>& new_lower_bounds,
const std::vector<int64>& new_upper_bounds) {
CHECK_EQ(variables.size(), new_lower_bounds.size());
@@ -527,11 +550,8 @@ void SharedBoundsManager::ReportPotentialNewBounds(
if (changed_ub) {
upper_bounds_[var] = new_ub;
}
changed_variables_since_last_synchronize_.Set(var);
for (int j = 0; j < num_workers_; ++j) {
if (worker_id == j) continue;
changed_variables_per_workers_[j].Set(var);
}
if (VLOG_IS_ON(2)) {
const IntegerVariableProto& var_proto = model_proto.variables(var);
const std::string& var_name =
@@ -544,8 +564,19 @@ void SharedBoundsManager::ReportPotentialNewBounds(
}
}
// When called, returns the set of bounds improvements since
// the last time this method was called by the same worker.
void SharedBoundsManager::Synchronize() {
absl::MutexLock mutex_lock(&mutex_);
for (const int var :
changed_variables_since_last_synchronize_.PositionsSetAtLeastOnce()) {
synchronized_lower_bounds_[var] = lower_bounds_[var];
synchronized_upper_bounds_[var] = upper_bounds_[var];
for (int j = 0; j < num_workers_; ++j) {
changed_variables_per_workers_[j].Set(var);
}
}
changed_variables_since_last_synchronize_.ClearAll();
}
void SharedBoundsManager::GetChangedBounds(
int worker_id, std::vector<int>* variables,
std::vector<int64>* new_lower_bounds,
@@ -553,16 +584,15 @@ void SharedBoundsManager::GetChangedBounds(
variables->clear();
new_lower_bounds->clear();
new_upper_bounds->clear();
{
absl::MutexLock mutex_lock(&mutex_);
for (const int var :
changed_variables_per_workers_[worker_id].PositionsSetAtLeastOnce()) {
variables->push_back(var);
new_lower_bounds->push_back(lower_bounds_[var]);
new_upper_bounds->push_back(upper_bounds_[var]);
}
changed_variables_per_workers_[worker_id].ClearAll();
absl::MutexLock mutex_lock(&mutex_);
for (const int var :
changed_variables_per_workers_[worker_id].PositionsSetAtLeastOnce()) {
variables->push_back(var);
new_lower_bounds->push_back(synchronized_lower_bounds_[var]);
new_upper_bounds->push_back(synchronized_upper_bounds_[var]);
}
changed_variables_per_workers_[worker_id].ClearAll();
}
} // namespace sat

View File

@@ -135,6 +135,12 @@ class SharedResponseManager {
IntegerValue GetInnerObjectiveLowerBound();
IntegerValue GetInnerObjectiveUpperBound();
// These functions return the same as the non-synchronized() version but
// only the values at the last time Synchronize() was called.
void Synchronize();
IntegerValue SynchronizedInnerObjectiveLowerBound();
IntegerValue SynchronizedInnerObjectiveUpperBound();
// Returns the current best solution inner objective value or kInt64Max if
// there is no solution.
IntegerValue BestSolutionInnerObjectiveValue();
@@ -222,6 +228,11 @@ class SharedResponseManager {
int64 inner_objective_upper_bound_ GUARDED_BY(mutex_) = kint64max;
int64 best_solution_objective_value_ GUARDED_BY(mutex_) = kint64max;
IntegerValue synchronized_inner_objective_lower_bound_ GUARDED_BY(mutex_) =
IntegerValue(kint64min);
IntegerValue synchronized_inner_objective_upper_bound_ GUARDED_BY(mutex_) =
IntegerValue(kint64max);
double primal_integral_ GUARDED_BY(mutex_) = 0.0;
double last_primal_integral_time_stamp_ GUARDED_BY(mutex_) = 0.0;
@@ -239,7 +250,7 @@ class SharedBoundsManager {
// Reports a set of locally improved variable bounds to the shared bounds
// manager. The manager will compare these bounds changes against its
// global state, and incorporate the improving ones.
void ReportPotentialNewBounds(const CpModelProto& model_proto, int worker_id,
void ReportPotentialNewBounds(const CpModelProto& model_proto,
const std::string& worker_name,
const std::vector<int>& variables,
const std::vector<int64>& new_lower_bounds,
@@ -251,16 +262,28 @@ class SharedBoundsManager {
std::vector<int64>* new_lower_bounds,
std::vector<int64>* new_upper_bounds);
// Publishes any new bounds so that GetChangedBounds() will reflect the latest
// state.
void Synchronize();
private:
const int num_workers_;
const int num_variables_;
absl::Mutex mutex_;
std::vector<SparseBitset<int64>> changed_variables_per_workers_
GUARDED_BY(mutex_);
// These are always up to date.
std::vector<int64> lower_bounds_ GUARDED_BY(mutex_);
std::vector<int64> upper_bounds_ GUARDED_BY(mutex_);
SparseBitset<int64> changed_variables_since_last_synchronize_
GUARDED_BY(mutex_);
// These are only updated on Synchronize().
std::vector<SparseBitset<int64>> changed_variables_per_workers_
GUARDED_BY(mutex_);
std::vector<int64> synchronized_lower_bounds_ GUARDED_BY(mutex_);
std::vector<int64> synchronized_upper_bounds_ GUARDED_BY(mutex_);
};
// Stores information on the worker in the parallel context.

View File

@@ -38,8 +38,6 @@
//
// TODO(user): move this file to base/swig/java
%include "ortools/base/base.i"
%{
#include "ortools/base/integral_types.h"
%}

View File

@@ -20,6 +20,8 @@
//
// TODO(user): move to base/swig/java.
%include "stdint.i"
%include "ortools/base/base.i"
%{

View File

@@ -79,6 +79,7 @@ void UnionOfSortedIntervals(absl::InlinedVector<ClosedInterval, 1>* intervals) {
} // namespace
// TODO(user): Use MathUtil::CeilOfRatio / FloorOfRatio instead.
int64 CeilRatio(int64 value, int64 positive_coeff) {
DCHECK_GT(positive_coeff, 0);
const int64 result = value / positive_coeff;
@@ -108,7 +109,23 @@ std::ostream& operator<<(std::ostream& out, const Domain& domain) {
Domain::Domain(int64 value) : intervals_({{value, value}}) {}
Domain::Domain(int64 left, int64 right) : intervals_({{left, right}}) {
// HACK(user): We spare significant time if we use an initializer here, because
// InlineVector<1> is able to recognize the fast path or "exactly one element".
// I was unable to obtain the same performance with any other recipe, I always
// had at least 1 more cycle. See BM_SingleIntervalDomainConstructor.
// Since the constructor takes very few cycles (most likely less than 10),
// that's quite significant.
namespace {
inline ClosedInterval UncheckedClosedInterval(int64 s, int64 e) {
ClosedInterval i;
i.start = s;
i.end = e;
return i;
}
} // namespace
Domain::Domain(int64 left, int64 right)
: intervals_({UncheckedClosedInterval(left, right)}) {
if (left > right) intervals_.clear();
}
@@ -136,6 +153,7 @@ Domain Domain::FromIntervals(absl::Span<const ClosedInterval> intervals) {
}
Domain Domain::FromFlatSpanOfIntervals(absl::Span<const int64> flat_intervals) {
DCHECK(flat_intervals.size() % 2 == 0) << flat_intervals.size();
Domain result;
result.intervals_.reserve(flat_intervals.size() / 2);
for (int i = 0; i < flat_intervals.size(); i += 2) {
@@ -157,6 +175,7 @@ Domain Domain::FromVectorIntervals(
if (interval.size() == 1) {
result.intervals_.push_back({interval[0], interval[0]});
} else {
DCHECK_EQ(interval.size(), 2);
result.intervals_.push_back({interval[0], interval[1]});
}
}
@@ -257,23 +276,33 @@ Domain Domain::IntersectionWith(const Domain& domain) const {
const auto& a = intervals_;
const auto& b = domain.intervals_;
for (int i = 0, j = 0; i < a.size() && j < b.size();) {
const ClosedInterval intersection{std::max(a[i].start, b[j].start),
std::min(a[i].end, b[j].end)};
if (intersection.start > intersection.end) {
// Intersection is empty, we advance past the first interval of the two.
if (a[i].start < b[j].start) {
i++;
} else {
j++;
if (a[i].start <= b[j].start) {
if (a[i].end < b[j].start) {
// Empty intersection. We advance past the first interval.
++i;
} else { // a[i].end >= b[j].start
// Non-empty intersection: push back the intersection of these two, and
// advance past the first interval to finish.
if (a[i].end <= b[j].end) {
result.intervals_.push_back({b[j].start, a[i].end});
++i;
} else { // a[i].end > b[j].end.
result.intervals_.push_back({b[j].start, b[j].end});
++j;
}
}
} else {
// Intersection is non-empty, we add it to the result and advance past
// the first interval to finish.
result.intervals_.push_back(intersection);
if (a[i].end < b[j].end) {
i++;
} else {
j++;
} else { // a[i].start > b[i].start.
// We do the exact same thing as above, but swapping a and b.
if (b[j].end < a[i].start) {
++j;
} else { // b[j].end >= a[i].start
if (b[j].end <= a[i].end) {
result.intervals_.push_back({a[i].start, b[j].end});
++j;
} else { // a[i].end > b[j].end.
result.intervals_.push_back({a[i].start, a[i].end});
++i;
}
}
}
}

View File

@@ -22,6 +22,7 @@
#include "absl/container/inlined_vector.h"
#include "absl/types/span.h"
#include "ortools/base/integral_types.h"
#include "ortools/base/logging.h"
namespace operations_research {
@@ -30,7 +31,10 @@ namespace operations_research {
*/
struct ClosedInterval {
ClosedInterval() {}
ClosedInterval(int64 s, int64 e) : start(s), end(e) {}
ClosedInterval(int64 s, int64 e) : start(s), end(e) {
DLOG_IF(DFATAL, s > e) << "Invalid ClosedInterval(" << s << ", " << e
<< ")";
}
std::string DebugString() const;
bool operator==(const ClosedInterval& other) const {
@@ -56,6 +60,8 @@ std::ostream& operator<<(std::ostream& out,
* Returns true iff we have:
* - The intervals appear in increasing order.
* - for all i: intervals[i].start <= intervals[i].end
* (should always be true, by construction, but bad intervals can in practice
* escape detection in opt mode).
* - for all i but the last: intervals[i].end + 1 < intervals[i+1].start
*/
bool IntervalsAreSortedAndNonAdjacent(
@@ -348,8 +354,8 @@ class Domain {
// Invariant: will always satisfy IntervalsAreSortedAndNonAdjacent().
//
// Note that we use InlinedVector for the common case of single interal
// domains. This provide a good performance boost when working with a
// Note that we use InlinedVector for the common case of single internal
// interval. This provide a good performance boost when working with a
// std::vector<Domain>.
absl::InlinedVector<ClosedInterval, 1> intervals_;
};

View File

@@ -267,9 +267,7 @@ class TimeLimit {
* i.e. \c LimitReached() returns true when the value of
* external_boolean_as_limit is true whatever the time limits are.
*
* Note 1: The external_boolean_as_limit can be modified during solve.
*
* Note 2: \c ResetLimitFromParameters() will set this Boolean to false.
* Note : The external_boolean_as_limit can be modified during solve.
*/
void RegisterExternalBooleanAsLimit(
std::atomic<bool>* external_boolean_as_limit) {
@@ -285,7 +283,7 @@ class TimeLimit {
/**
* Sets new time limits. Note that this does not reset the running max nor
* any registered external boolean or calls to RegisterSigintHandler().
* any registered external Boolean.
*/
template <typename Parameters>
void ResetLimitFromParameters(const Parameters& parameters);
@@ -481,9 +479,6 @@ inline TimeLimit::TimeLimit(double limit_in_seconds, double deterministic_limit,
inline void TimeLimit::ResetTimers(double limit_in_seconds,
double deterministic_limit,
double instruction_limit) {
if (external_boolean_as_limit_ != nullptr) {
*external_boolean_as_limit_ = false;
}
elapsed_deterministic_time_ = 0.0;
deterministic_limit_ = deterministic_limit;
instruction_limit_ = instruction_limit;