Files
ortools-clone/ortools/pdlp/trust_region.h
Corentin Le Molgat a66a6daac7 Bump Copyright to 2025
2025-01-10 11:35:44 +01:00

424 lines
18 KiB
C++

// Copyright 2010-2025 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PDLP_TRUST_REGION_H_
#define PDLP_TRUST_REGION_H_
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include <vector>
#include "Eigen/Core"
#include "absl/algorithm/container.h"
#include "absl/log/check.h"
#include "ortools/base/mathutil.h"
#include "ortools/pdlp/sharded_quadratic_program.h"
#include "ortools/pdlp/sharder.h"
namespace operations_research::pdlp {
struct TrustRegionResult {
// The step_size of the solution.
double solution_step_size;
// The value objective_vector^T * (solution - center_point)
// when using the linear-time solver for LPs and QPs with objective matrix not
// treated in the prox term. When using the approximate solver for QPs, this
// field contains the value
// 0.5 * (solution - center_point)^T * objective_matrix * (
// solution - center_point)
// + objective_vector^T * (solution - center_point)
// instead.
double objective_value;
// The solution.
Eigen::VectorXd solution;
};
// Solves the following trust-region problem with bound constraints:
// min_x `objective_vector`^T * (x - `center_point`)
// s.t. `variable_lower_bounds` <= x <= `variable_upper_bounds`
// || x - `center_point` ||_W <= `target_radius`
// where ||y||_W = sqrt(sum_i `norm_weights`[i] * y[i]^2)
// using an exact linear-time method.
// `sharder` should have the same size as the number of variables in the
// problem.
// Assumes that there is always a feasible solution, that is, that
// `variable_lower_bounds` <= `center_point` <= `variable_upper_bounds`, and
// that `norm_weights` > 0, for 0 <= i < `sharder.NumElements()`.
TrustRegionResult SolveTrustRegion(const Eigen::VectorXd& objective_vector,
const Eigen::VectorXd& variable_lower_bounds,
const Eigen::VectorXd& variable_upper_bounds,
const Eigen::VectorXd& center_point,
const Eigen::VectorXd& norm_weights,
double target_radius,
const Sharder& sharder);
// Solves the following trust-region problem with bound constraints:
// min_x (1/2) * (x - `center_point`)^T * Q * (x - `center_point`)
// + `objective_vector`^T * (x - `center_point`)
// s.t. `variable_lower_bounds` <= x <= `variable_upper_bounds`
// || x - `center_point` ||_W <= `target_radius`
// where ||y||_W = sqrt(sum_i `norm_weights`[i] * y[i]^2).
// It replaces the ball constraint || x - `center_point` ||_W <= `target_radius`
// with the equivalent constraint 0.5 * || x - `center_point` ||_W^2 <= 0.5 *
// `target_radius`^2 and does a binary search for a Lagrange multiplier for the
// latter constraint that is at most (`solve_tolerance` * max(1, lambda*)) away
// from the optimum Lagrange multiplier lambda*.
// `sharder` should have the same size as the number of variables in the
// problem.
// Assumes that there is always a feasible solution, that is, that
// `variable_lower_bounds` <= `center_point` <= `variable_upper_bounds`, and
// that `norm_weights` > 0, for 0 <= i < `sharder.NumElements()`.
TrustRegionResult SolveDiagonalTrustRegion(
const Eigen::VectorXd& objective_vector,
const Eigen::VectorXd& objective_matrix_diagonal,
const Eigen::VectorXd& variable_lower_bounds,
const Eigen::VectorXd& variable_upper_bounds,
const Eigen::VectorXd& center_point, const Eigen::VectorXd& norm_weights,
double target_radius, const Sharder& sharder, double solve_tolerance);
// Like `SolveDiagonalTrustRegion`, but extracts the problem data from a
// `ShardedQuadraticProgram` and implicitly concatenates the primal and dual
// parts before solving the trust-region subproblem.
TrustRegionResult SolveDiagonalQpTrustRegion(
const ShardedQuadraticProgram& sharded_qp,
const Eigen::VectorXd& primal_solution,
const Eigen::VectorXd& dual_solution,
const Eigen::VectorXd& primal_gradient,
const Eigen::VectorXd& dual_gradient, const double primal_weight,
double target_radius, double solve_tolerance);
struct LocalizedLagrangianBounds {
// The value of the Lagrangian function L(x, y) at the given solution.
double lagrangian_value;
// A lower bound on the Lagrangian value, valid for the given radius.
double lower_bound;
// An upper bound on the Lagrangian value, valid for the given radius.
double upper_bound;
// The radius used when computing the bounds.
double radius;
};
inline double BoundGap(const LocalizedLagrangianBounds& bounds) {
return bounds.upper_bound - bounds.lower_bound;
}
// Defines a norm on a vector partitioned as (x, y) where x is the primal and y
// is the dual. The enum values define a joint norm as a function of ||x||_P and
// ||y||_D, whose definition depends on the context.
enum class PrimalDualNorm {
// The joint norm ||(x,y)||_PD = max{||x||_P, ||y||_D}.
kMaxNorm,
// The joint norm (||(x,y)||_PD)^2 = (||x||_P)^2 + (||y||_D)^2.
kEuclideanNorm
};
// Recall the saddle-point formulation OPT = min_x max_y L(x, y) defined at
// https://developers.google.com/optimization/lp/pdlp_math#saddle-point_formulation.
// This function computes lower and upper bounds on OPT with an additional ball
// or "trust-region" constraint on the domains of x and y.
//
// The bounds are derived from the solution of the following problem:
// min_{x,y}
// ∇_x L(`primal_solution`, `dual_solution`)^T (x - `primal_solution`)
// - ∇_y L(`primal_solution`, `dual_solution`)^T (y - `dual_solution`)
// subject to
// ||(x - `primal_solution`, y - `dual_solution`)||_PD <= `radius`,
// where x and y are constrained to their respective bounds and ||(x,y)||_PD is
// defined by `primal_dual_norm`.
// When `use_diagonal_qp_trust_region_solver` is true, the solver instead solves
// the following problem:
// min_{x,y}
// ∇_x L(`primal_solution`, `dual_solution`)^T (x - `primal_solution`)
// - ∇_y L(`primal_solution`, `dual_solution`)^T (y - `dual_solution`)
// + (1 / 2) * (x - `primal_solution`)^T * `objective_matrix`
// * (x - `primal_solution`),
// subject to
// ||(x - `primal_solution`, y - `dual_solution`)||_PD <= `radius`.
// `use_diagonal_qp_trust_region_solver` == true assumes that `primal_dual_norm`
// is the Euclidean norm and the objective matrix is diagonal.
// See `SolveDiagonalTrustRegion()` above for the meaning of
// `diagonal_qp_trust_region_solver_tolerance`.
//
// In the context of primal_dual_norm, the primal norm ||.||_P is defined as
// (||x||_P)^2 = (1 / 2) * `primal_weight` * ||x||_2^2, and the dual norm
// ||.||_D is defined as
// (||y||_D)^2 = (1 / 2) * (1 / `primal_weight`) * ||y||_2^2.
//
// Given an optimal solution (x, y) to the above problem, the lower bound is
// computed as L(`primal_solution`, `dual_solution`) +
// ∇_x L(`primal_solution`, `dual_solution`)^T (x - `primal_solution`)
// and the upper bound is computed as L(`primal_solution`, `dual_solution`) +
// ∇_y L(`primal_solution`, `dual_solution`)^T (y - `dual_solution`).
//
// The bounds are "localized" because they are guaranteed to bound OPT only if
// the ||.||_PD ball contains an optimal solution.
// `primal_product` and `dual_product` optionally specify the values of
// `constraint_matrix` * `primal_solution` and `constraint_matrix.transpose()` *
// `dual_solution`, respectively. If set to nullptr, they will be computed.
LocalizedLagrangianBounds ComputeLocalizedLagrangianBounds(
const ShardedQuadraticProgram& sharded_qp,
const Eigen::VectorXd& primal_solution,
const Eigen::VectorXd& dual_solution, PrimalDualNorm primal_dual_norm,
double primal_weight, double radius, const Eigen::VectorXd* primal_product,
const Eigen::VectorXd* dual_product,
bool use_diagonal_qp_trust_region_solver,
double diagonal_qp_trust_region_solver_tolerance);
namespace internal {
// These functions templated on `TrustRegionProblem` compute values useful to
// the trust region solve. The templated `TrustRegionProblem` type should
// provide methods:
// `double Objective(int64_t index) const;`
// `double LowerBound(int64_t index) const;`
// `double UpperBound(int64_t index) const;`
// `double CenterPoint(int64_t index) const;`
// `double NormWeight(int64_t index) const;`
// See trust_region.cc for more details and several implementations.
// The distance (in the indexed element) from the center point to the bound, in
// the direction that reduces the objective.
template <typename TrustRegionProblem>
double DistanceAtCriticalStepSize(const TrustRegionProblem& problem,
const int64_t index) {
if (problem.Objective(index) == 0.0) {
return 0.0;
}
if (problem.Objective(index) > 0.0) {
return problem.LowerBound(index) - problem.CenterPoint(index);
} else {
return problem.UpperBound(index) - problem.CenterPoint(index);
}
}
// The critical step size is the step size at which the indexed element hits its
// bound (or infinity if that doesn't happen).
template <typename TrustRegionProblem>
double CriticalStepSize(const TrustRegionProblem& problem,
const int64_t index) {
if (problem.Objective(index) == 0.0) {
return std::numeric_limits<double>::infinity();
}
return -problem.NormWeight(index) *
DistanceAtCriticalStepSize(problem, index) / problem.Objective(index);
}
// The value of the indexed element at the given step_size, projected onto the
// bounds.
template <typename TrustRegionProblem>
double ProjectedValue(const TrustRegionProblem& problem, const int64_t index,
const double step_size) {
const double full_step =
problem.CenterPoint(index) -
step_size * problem.Objective(index) / problem.NormWeight(index);
return std::min(std::max(full_step, problem.LowerBound(index)),
problem.UpperBound(index));
}
// An easy way of computing medians that's slightly off when the length of the
// array is even. `array` is intentionally passed by value.
// `value_function` maps an element of `array` to its (double) value. Returns
// the value of the median element.
template <typename ArrayType, typename ValueFunction>
double EasyMedian(ArrayType array, ValueFunction value_function) {
CHECK_GT(array.size(), 0);
auto middle = array.begin() + (array.size() / 2);
absl::c_nth_element(array, middle,
[&](typename ArrayType::value_type lhs,
typename ArrayType::value_type rhs) {
return value_function(lhs) < value_function(rhs);
});
return value_function(*middle);
}
// Lists the undecided components (from [`start_index`, `end_index`) as those
// with finite critical step sizes. The components with infinite critical step
// sizes will never hit their bounds, so returns their contribution to square of
// the radius.
template <typename TrustRegionProblem>
double ComputeInitialUndecidedComponents(
const TrustRegionProblem& problem, int64_t start_index, int64_t end_index,
std::vector<int64_t>& undecided_components) {
// TODO(user): Evaluate dropping this `reserve()`, since it wastes space
// if many components are decided.
undecided_components.clear();
undecided_components.reserve(end_index - start_index);
double radius_coefficient = 0.0;
for (int64_t index = start_index; index < end_index; ++index) {
if (std::isfinite(internal::CriticalStepSize(problem, index))) {
undecided_components.push_back(index);
} else {
// Simplified from norm_weight * (objective / norm_weight)^2.
radius_coefficient += MathUtil::Square(problem.Objective(index)) /
problem.NormWeight(index);
}
}
return radius_coefficient;
}
template <typename TrustRegionProblem>
double RadiusSquaredOfUndecidedComponents(
const TrustRegionProblem& problem, const double step_size,
const std::vector<int64_t>& undecided_components) {
return absl::c_accumulate(
undecided_components, 0.0, [&](double sum, int64_t index) {
const double distance_at_projected_value =
internal::ProjectedValue(problem, index, step_size) -
problem.CenterPoint(index);
return sum + problem.NormWeight(index) *
MathUtil::Square(distance_at_projected_value);
});
}
// Points whose critical step sizes are greater than or equal to
// `step_size_threshold` are eliminated from the undecided components (we know
// they'll be determined by center_point - step_size * objective /
// norm_weights). Returns the coefficient of step_size^2 that accounts of the
// contribution of the removed variables to the radius squared.
template <typename TrustRegionProblem>
double RemoveCriticalStepsAboveThreshold(
const TrustRegionProblem& problem, const double step_size_threshold,
std::vector<int64_t>& undecided_components) {
double variable_radius_coefficient = 0.0;
for (const int64_t index : undecided_components) {
if (internal::CriticalStepSize(problem, index) >= step_size_threshold) {
// Simplified from norm_weight * (objective / norm_weight)^2.
variable_radius_coefficient +=
MathUtil::Square(problem.Objective(index)) /
problem.NormWeight(index);
}
}
auto result =
std::remove_if(undecided_components.begin(), undecided_components.end(),
[&](const int64_t index) {
return internal::CriticalStepSize(problem, index) >=
step_size_threshold;
});
undecided_components.erase(result, undecided_components.end());
return variable_radius_coefficient;
}
// Points whose critical step sizes are smaller than or equal to
// `step_size_threshold` are eliminated from the undecided components (we know
// they'll always be at their bounds). Returns the weighted distance squared
// from the center point for the removed components.
template <typename TrustRegionProblem>
double RemoveCriticalStepsBelowThreshold(
const TrustRegionProblem& problem, const double step_size_threshold,
std::vector<int64_t>& undecided_components) {
double radius_sq = 0.0;
for (const int64_t index : undecided_components) {
if (internal::CriticalStepSize(problem, index) <= step_size_threshold) {
radius_sq += problem.NormWeight(index) *
MathUtil::Square(
internal::DistanceAtCriticalStepSize(problem, index));
}
}
auto result =
std::remove_if(undecided_components.begin(), undecided_components.end(),
[&](const int64_t index) {
return internal::CriticalStepSize(problem, index) <=
step_size_threshold;
});
undecided_components.erase(result, undecided_components.end());
return radius_sq;
}
// `PrimalTrustRegionProblem` defines the primal trust region problem given a
// `QuadraticProgram`, `primal_solution`, and `primal_gradient`. It captures
// const references to the constructor arguments, which should outlive the class
// instance.
// The corresponding trust region problem is
// min_x `primal_gradient`^T * (x - `primal_solution`)
// s.t. `qp.variable_lower_bounds` <= x <= `qp.variable_upper_bounds`
// || x - `primal_solution` ||_2 <= target_radius
class PrimalTrustRegionProblem {
public:
PrimalTrustRegionProblem(const QuadraticProgram* qp,
const Eigen::VectorXd* primal_solution,
const Eigen::VectorXd* primal_gradient,
const double norm_weight = 1.0)
: qp_(*qp),
primal_solution_(*primal_solution),
primal_gradient_(*primal_gradient),
norm_weight_(norm_weight) {}
double Objective(int64_t index) const { return primal_gradient_[index]; }
double LowerBound(int64_t index) const {
return qp_.variable_lower_bounds[index];
}
double UpperBound(int64_t index) const {
return qp_.variable_upper_bounds[index];
}
double CenterPoint(int64_t index) const { return primal_solution_[index]; }
double NormWeight(int64_t index) const { return norm_weight_; }
private:
const QuadraticProgram& qp_;
const Eigen::VectorXd& primal_solution_;
const Eigen::VectorXd& primal_gradient_;
const double norm_weight_;
};
// `DualTrustRegionProblem` defines the dual trust region problem given a
// `QuadraticProgram`, `dual_solution`, and `dual_gradient`. It captures const
// references to the constructor arguments, which should outlive the class
// instance.
// The corresponding trust region problem is
// max_y `dual_gradient`^T * (y - `dual_solution`)
// s.t. `qp.implicit_dual_lower_bounds` <= y <= `qp.implicit_dual_upper_bounds`
// || y - `dual_solution` ||_2 <= target_radius
// where the implicit dual bounds are those given in
// https://developers.google.com/optimization/lp/pdlp_math#dual_variable_bounds
class DualTrustRegionProblem {
public:
DualTrustRegionProblem(const QuadraticProgram* qp,
const Eigen::VectorXd* dual_solution,
const Eigen::VectorXd* dual_gradient,
const double norm_weight = 1.0)
: qp_(*qp),
dual_solution_(*dual_solution),
dual_gradient_(*dual_gradient),
norm_weight_(norm_weight) {}
double Objective(int64_t index) const {
// The objective is negated because the trust region problem objective is
// minimize, but for the dual problem we want to maximize the gradient.
return -dual_gradient_[index];
}
double LowerBound(int64_t index) const {
return std::isfinite(qp_.constraint_upper_bounds[index])
? -std::numeric_limits<double>::infinity()
: 0.0;
}
double UpperBound(int64_t index) const {
return std::isfinite(qp_.constraint_lower_bounds[index])
? std::numeric_limits<double>::infinity()
: 0.0;
}
double CenterPoint(int64_t index) const { return dual_solution_[index]; }
double NormWeight(int64_t index) const { return norm_weight_; }
private:
const QuadraticProgram& qp_;
const Eigen::VectorXd& dual_solution_;
const Eigen::VectorXd& dual_gradient_;
const double norm_weight_;
};
} // namespace internal
} // namespace operations_research::pdlp
#endif // PDLP_TRUST_REGION_H_