Files
ortools-clone/ortools/pdlp/sharder.cc
2025-03-13 15:56:35 +01:00

335 lines
12 KiB
C++

// Copyright 2010-2025 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ortools/pdlp/sharder.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <functional>
#include <vector>
#include "Eigen/Core"
#include "Eigen/SparseCore"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/log/vlog_is_on.h"
#include "absl/synchronization/blocking_counter.h"
#include "absl/time/time.h"
#include "ortools/base/mathutil.h"
#include "ortools/base/timer.h"
#include "ortools/pdlp/scheduler.h"
namespace operations_research::pdlp {
using ::Eigen::VectorXd;
Sharder::Sharder(const int64_t num_elements, const int num_shards,
Scheduler* const scheduler,
const std::function<int64_t(int64_t)>& element_mass)
: scheduler_(scheduler) {
CHECK_GE(num_elements, 0);
if (num_elements == 0) {
shard_starts_.push_back(0);
return;
}
CHECK_GE(num_shards, 1);
shard_starts_.reserve(
std::min(static_cast<int64_t>(num_shards), num_elements) + 1);
shard_masses_.reserve(
std::min(static_cast<int64_t>(num_shards), num_elements));
int64_t overall_mass = 0;
for (int64_t elem = 0; elem < num_elements; ++elem) {
overall_mass += element_mass(elem);
}
shard_starts_.push_back(0);
int64_t this_shard_mass = element_mass(0);
for (int64_t elem = 1; elem < num_elements; ++elem) {
int64_t this_elem_mass = element_mass(elem);
if (this_shard_mass + (this_elem_mass / 2) >= overall_mass / num_shards) {
// `elem` starts a new shard.
shard_masses_.push_back(this_shard_mass);
shard_starts_.push_back(elem);
this_shard_mass = this_elem_mass;
} else {
this_shard_mass += this_elem_mass;
}
}
shard_starts_.push_back(num_elements);
shard_masses_.push_back(this_shard_mass);
CHECK_EQ(NumShards(), shard_masses_.size());
}
Sharder::Sharder(const int64_t num_elements, const int num_shards,
Scheduler* const scheduler)
: scheduler_(scheduler) {
CHECK_GE(num_elements, 0);
if (num_elements == 0) {
shard_starts_.push_back(0);
return;
}
CHECK_GE(num_shards, 1);
shard_starts_.reserve(std::min(int64_t{num_shards}, num_elements) + 1);
shard_masses_.reserve(std::min(int64_t{num_shards}, num_elements));
if (num_shards >= num_elements) {
for (int64_t element = 0; element < num_elements; ++element) {
shard_starts_.push_back(static_cast<int>(element));
shard_masses_.push_back(1);
}
} else {
for (int shard = 0; shard < num_shards; ++shard) {
const int64_t this_shard_start = ((num_elements * shard) / num_shards);
const int64_t next_shard_start =
((num_elements * (shard + 1)) / num_shards);
if (next_shard_start - this_shard_start > 0) {
shard_starts_.push_back(this_shard_start);
shard_masses_.push_back(next_shard_start - this_shard_start);
}
}
}
shard_starts_.push_back(num_elements);
CHECK_EQ(NumShards(), shard_masses_.size());
}
Sharder::Sharder(const Sharder& other_sharder, const int64_t num_elements)
// The `std::max()` protects against `other_sharder.NumShards() == 0`, which
// will happen if `other_sharder` had `num_elements == 0`.
: Sharder(num_elements, std::max(1, other_sharder.NumShards()),
other_sharder.scheduler_) {}
void Sharder::ParallelForEachShard(
const std::function<void(const Shard&)>& func) const {
if (scheduler_) {
absl::BlockingCounter counter(NumShards());
VLOG(2) << "Starting ParallelForEachShard()";
scheduler_->ParallelFor(0, NumShards(), [&](int shard_num) {
WallTimer timer;
if (VLOG_IS_ON(2)) {
timer.Start();
}
func(Shard(shard_num, this));
if (VLOG_IS_ON(2)) {
timer.Stop();
VLOG(2) << "Shard " << shard_num << " with " << ShardSize(shard_num)
<< " elements and " << ShardMass(shard_num)
<< " mass finished with "
<< ShardMass(shard_num) /
std::max(int64_t{1},
absl::ToInt64Microseconds(timer.GetDuration()))
<< " mass/usec.";
}
});
VLOG(2) << "Done ParallelForEachShard()";
} else {
for (int shard_num = 0; shard_num < NumShards(); ++shard_num) {
func(Shard(shard_num, this));
}
}
}
double Sharder::ParallelSumOverShards(
const std::function<double(const Shard&)>& func) const {
VectorXd local_sums(NumShards());
ParallelForEachShard([&](const Sharder::Shard& shard) {
local_sums[shard.Index()] = func(shard);
});
return local_sums.sum();
}
bool Sharder::ParallelTrueForAllShards(
const std::function<bool(const Shard&)>& func) const {
// Recall `std::vector<bool>` is not thread-safe.
std::vector<int> local_result(NumShards());
ParallelForEachShard([&](const Sharder::Shard& shard) {
local_result[shard.Index()] = static_cast<int>(func(shard));
});
return std::all_of(local_result.begin(), local_result.end(),
[](const int v) { return static_cast<bool>(v); });
}
VectorXd TransposedMatrixVectorProduct(
const Eigen::SparseMatrix<double, Eigen::ColMajor, int64_t>& matrix,
const VectorXd& vector, const Sharder& sharder) {
CHECK_EQ(vector.size(), matrix.rows());
VectorXd answer(matrix.cols());
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
// NOTE: For very sparse columns, assignment to `shard(answer)` incurs a
// measurable overhead compared to using a constructor
// (i.e. `VectorXd temp = ...`). It is not clear why this is the case, nor
// how to avoid it.
shard(answer) = shard(matrix).transpose() * vector;
});
return answer;
}
void SetZero(const Sharder& sharder, VectorXd& dest) {
dest.resize(sharder.NumElements());
sharder.ParallelForEachShard(
[&](const Sharder::Shard& shard) { shard(dest).setZero(); });
}
VectorXd ZeroVector(const Sharder& sharder) {
VectorXd result(sharder.NumElements());
SetZero(sharder, result);
return result;
}
VectorXd OnesVector(const Sharder& sharder) {
VectorXd result(sharder.NumElements());
sharder.ParallelForEachShard(
[&](const Sharder::Shard& shard) { shard(result).setOnes(); });
return result;
}
void AddScaledVector(const double scale, const VectorXd& increment,
const Sharder& sharder, VectorXd& dest) {
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
shard(dest) += scale * shard(increment);
});
}
void AssignVector(const VectorXd& vec, const Sharder& sharder, VectorXd& dest) {
dest.resize(vec.size());
sharder.ParallelForEachShard(
[&](const Sharder::Shard& shard) { shard(dest) = shard(vec); });
}
VectorXd CloneVector(const VectorXd& vec, const Sharder& sharder) {
VectorXd dest;
AssignVector(vec, sharder, dest);
return dest;
}
void CoefficientWiseProductInPlace(const VectorXd& scale,
const Sharder& sharder, VectorXd& dest) {
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
shard(dest) = shard(dest).cwiseProduct(shard(scale));
});
}
void CoefficientWiseQuotientInPlace(const VectorXd& scale,
const Sharder& sharder, VectorXd& dest) {
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
shard(dest) = shard(dest).cwiseQuotient(shard(scale));
});
}
double Dot(const VectorXd& v1, const VectorXd& v2, const Sharder& sharder) {
return sharder.ParallelSumOverShards(
[&](const Sharder::Shard& shard) { return shard(v1).dot(shard(v2)); });
}
double LInfNorm(const VectorXd& vector, const Sharder& sharder) {
VectorXd local_max(sharder.NumShards());
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
local_max[shard.Index()] = shard(vector).lpNorm<Eigen::Infinity>();
});
return local_max.lpNorm<Eigen::Infinity>();
}
double L1Norm(const VectorXd& vector, const Sharder& sharder) {
return sharder.ParallelSumOverShards(
[&](const Sharder::Shard& shard) { return shard(vector).lpNorm<1>(); });
}
double SquaredNorm(const VectorXd& vector, const Sharder& sharder) {
return sharder.ParallelSumOverShards(
[&](const Sharder::Shard& shard) { return shard(vector).squaredNorm(); });
}
double Norm(const VectorXd& vector, const Sharder& sharder) {
return std::sqrt(SquaredNorm(vector, sharder));
}
double SquaredDistance(const VectorXd& vector1, const VectorXd& vector2,
const Sharder& sharder) {
return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
return (shard(vector1) - shard(vector2)).squaredNorm();
});
}
double Distance(const VectorXd& vector1, const VectorXd& vector2,
const Sharder& sharder) {
return std::sqrt(SquaredDistance(vector1, vector2, sharder));
}
double ScaledLInfNorm(const VectorXd& vector, const VectorXd& scale,
const Sharder& sharder) {
VectorXd local_max(sharder.NumShards());
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
local_max[shard.Index()] =
shard(vector).cwiseProduct(shard(scale)).lpNorm<Eigen::Infinity>();
});
return local_max.lpNorm<Eigen::Infinity>();
}
double ScaledSquaredNorm(const VectorXd& vector, const VectorXd& scale,
const Sharder& sharder) {
return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
return shard(vector).cwiseProduct(shard(scale)).squaredNorm();
});
}
double ScaledNorm(const VectorXd& vector, const VectorXd& scale,
const Sharder& sharder) {
return std::sqrt(ScaledSquaredNorm(vector, scale, sharder));
}
VectorXd ScaledColLInfNorm(
const Eigen::SparseMatrix<double, Eigen::ColMajor, int64_t>& matrix,
const VectorXd& row_scaling_vec, const VectorXd& col_scaling_vec,
const Sharder& sharder) {
CHECK_EQ(matrix.cols(), col_scaling_vec.size());
CHECK_EQ(matrix.rows(), row_scaling_vec.size());
VectorXd answer(matrix.cols());
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
auto matrix_shard = shard(matrix);
auto col_scaling_shard = shard(col_scaling_vec);
for (int64_t col_num = 0; col_num < shard(matrix).outerSize(); ++col_num) {
double max = 0.0;
for (decltype(matrix_shard)::InnerIterator it(matrix_shard, col_num); it;
++it) {
max = std::max(max, std::abs(it.value() * row_scaling_vec[it.row()]));
}
shard(answer)[col_num] = max * std::abs(col_scaling_shard[col_num]);
}
});
return answer;
}
VectorXd ScaledColL2Norm(
const Eigen::SparseMatrix<double, Eigen::ColMajor, int64_t>& matrix,
const VectorXd& row_scaling_vec, const VectorXd& col_scaling_vec,
const Sharder& sharder) {
CHECK_EQ(matrix.cols(), col_scaling_vec.size());
CHECK_EQ(matrix.rows(), row_scaling_vec.size());
VectorXd answer(matrix.cols());
sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
auto matrix_shard = shard(matrix);
auto col_scaling_shard = shard(col_scaling_vec);
for (int64_t col_num = 0; col_num < shard(matrix).outerSize(); ++col_num) {
double sum_of_squares = 0.0;
for (decltype(matrix_shard)::InnerIterator it(matrix_shard, col_num); it;
++it) {
sum_of_squares +=
MathUtil::Square(it.value() * row_scaling_vec[it.row()]);
}
shard(answer)[col_num] =
std::sqrt(sum_of_squares) * std::abs(col_scaling_shard[col_num]);
}
});
return answer;
}
} // namespace operations_research::pdlp