OR-Tools  9.3
trust_region.h
Go to the documentation of this file.
1// Copyright 2010-2021 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef PDLP_TRUST_REGION_H_
15#define PDLP_TRUST_REGION_H_
16
17#include <algorithm>
18#include <cmath>
19#include <cstdint>
20#include <limits>
21#include <vector>
22
23#include "Eigen/Core"
24#include "absl/algorithm/container.h"
29
31
33 // The step_size of the solution.
35 // The value objective_vector' * (solution - center_point)
36 // when using the linear-time solver for LPs and QPs with objective matrix not
37 // treated in the prox term. When using the approximate solver for QPs, this
38 // field contains the value
39 // 0.5 * (solution - center_point)' * objective_matrix * (
40 // solution - center_point)
41 // + objective_vector' * (solution - center_point)
42 // instead.
44 // The solution.
45 Eigen::VectorXd solution;
46};
47
48// Solves the following trust-region problem with bound constraints:
49// min_x objective_vector' * (x - center_point)
50// s.t. variable_lower_bounds <= x <= variable_upper_bounds
51// || x - center_point ||_W <= target_radius
52// where ||y||_W = sqrt(sum_i norm_weights[i] * y[i]^2)
53// using an exact linear-time method.
54// The sharder should have the same size as the number of variables in the
55// problem.
56// Assumes that there is always a feasible solution, that is, that
57// variable_lower_bounds <= center_point <= variable_upper_bounds, and that
58// norm_weights > 0, for 0 <= i < sharder.NumElements().
60 const Eigen::VectorXd& variable_lower_bounds,
61 const Eigen::VectorXd& variable_upper_bounds,
62 const Eigen::VectorXd& center_point,
63 const Eigen::VectorXd& norm_weights,
64 double target_radius,
65 const Sharder& sharder);
66
67// Solves the following trust-region problem with bound constraints:
68// min_x (1/2) * (x - center_point)' * Q * (x - center_point)
69// + objective_vector' * (x - center_point)
70// s.t. variable_lower_bounds <= x <= variable_upper_bounds
71// || x - center_point ||_W <= target_radius
72// where ||y||_W = sqrt(sum_i norm_weights[i] * y[i]^2).
73// It replaces the ball constraint || x - center_point ||_W <= target_radius
74// with the equivalent constraint 0.5 * || x - center_point ||_W^2 <= 0.5 *
75// target_radius^2 and does a binary search for a Lagrange multiplier for the
76// latter constraint that is at most `solve_tolerance * max(1, lambda*)` away
77// from the optimum Lagrange multiplier lambda*.
78// The sharder should have the same size as the number of variables in the
79// problem.
80// Assumes that there is always a feasible solution, that is, that
81// variable_lower_bounds <= center_point <= variable_upper_bounds, and that
82// norm_weights > 0, for 0 <= i < sharder.NumElements().
84 const Eigen::VectorXd& objective_vector,
85 const Eigen::VectorXd& objective_matrix_diagonal,
86 const Eigen::VectorXd& variable_lower_bounds,
87 const Eigen::VectorXd& variable_upper_bounds,
88 const Eigen::VectorXd& center_point, const Eigen::VectorXd& norm_weights,
89 double target_radius, const Sharder& sharder, double solve_tolerance);
90
91// Like SolveDiagonalTrustRegion, but extracts the problem data from a
92// ShardedQuadraticProgram and implicitly concatenates the primal and dual parts
93// before solving the trust-region subproblem.
95 const ShardedQuadraticProgram& sharded_qp,
96 const Eigen::VectorXd& primal_solution,
97 const Eigen::VectorXd& dual_solution,
98 const Eigen::VectorXd& primal_gradient,
99 const Eigen::VectorXd& dual_gradient, const double primal_weight,
100 double target_radius, double solve_tolerance);
101
103 // The value of the Lagrangian function L(x, y) at the given solution.
105 // A lower bound on the Lagrangian value, valid for the given radius.
107 // An upper bound on the Lagrangian value, valid for the given radius.
109 // The radius used when computing the bounds.
110 double radius;
111};
112
114 return bounds.upper_bound - bounds.lower_bound;
115}
116
117// Defines a norm on a vector partitioned as (x, y) where x is the primal and y
118// is the dual. The enum values define a joint norm as a function of ||x||_P and
119// ||y||_D, whose definition depends on the context.
120enum class PrimalDualNorm {
121 // The joint norm ||(x,y)||_PD = max{||x||_P, ||y||_D}.
122 kMaxNorm,
123 // The joint norm (||(x,y)||_PD)^2 = (||x||_P)^2 + (||y||_D)^2.
125};
126
127// Recall the saddle-point formulation OPT = min_x max_y L(x, y) defined at
128// https://developers.google.com/optimization/lp/pdlp_math#saddle-point_formulation.
129// This function computes lower and upper bounds on OPT with an additional ball
130// or "trust- region" constraint on the domains of x and y.
131//
132// The bounds are derived from the solution of the following problem:
133// min_{x,y} ∇_x L(primal_solution, dual_solution)^T (x - primal_solution) -
134// ∇_y L(primal_solution, dual_solution)^T (y - dual_solution)
135// subject to ||(x - primal_solution, y - dual_solution)||_PD <= radius,
136// where x and y are constrained to their respective bounds and ||(x,y)||_PD is
137// defined by primal_dual_norm.
138// When use_diagonal_qp_trust_region_solver is true, the solver instead solves
139// the following problem:
140// min_{x,y} ∇_x L(primal_solution, dual_solution)^T (x - primal_solution) -
141// ∇_y L(primal_solution, dual_solution)^T (y - dual_solution) +
142// (1 / 2) * (x - primal_solution)^T * objective_matrix * (x - primal_solution),
143// subject to ||(x - primal_solution, y - dual_solution)||_PD <= radius.
144// use_diagonal_qp_trust_region_solver == true assumes that primal_dual_norm
145// is the Euclidean norm and the objective matrix is diagonal.
146// See SolveDiagonalTrustRegion() above for the meaning of
147// diagonal_qp_trust_region_solver_tolerance.
148//
149// In the context of primal_dual_norm, the primal norm ||.||_P is defined as
150// (||x||_P)^2 = (1 / 2) * primal_weight * ||x||_2^2, and the dual norm ||.||_D
151// is defined as
152// (||y||_D)^2 = (1 / 2) * (1 / primal_weight) * ||y||_2^2.
153//
154// Given an optimal solution (x, y) to the above problem, the lower bound is
155// computed as L(primal_solution, dual_solution) +
156// ∇_x L(primal_solution, dual_solution)^T (x - primal_solution)
157// and the upper bound is computed as L(primal_solution, dual_solution) +
158// ∇_y L(primal_solution, dual_solution)^T (y - dual_solution).
159//
160// The bounds are "localized" because they are guaranteed to bound OPT only if
161// the ||.||_PD ball contains an optimal solution.
162// The primal_product and dual_product arguments optionally specify the values
163// of constraint_matrix * primal_solution and constraint_matrix.transpose() *
164// dual_solution, respectively. If set to nullptr, they will be computed.
166 const ShardedQuadraticProgram& sharded_qp,
167 const Eigen::VectorXd& primal_solution,
168 const Eigen::VectorXd& dual_solution, PrimalDualNorm primal_dual_norm,
169 double primal_weight, double radius, const Eigen::VectorXd* primal_product,
170 const Eigen::VectorXd* dual_product,
171 bool use_diagonal_qp_trust_region_solver,
172 double diagonal_qp_trust_region_solver_tolerance);
173
174namespace internal {
175
176// These functions templated on TrustRegionProblem compute values useful to the
177// trust region solve. The templated TrustRegionProblem type should provide
178// methods:
179// double Objective(int64_t index) const;
180// double LowerBound(int64_t index) const;
181// double UpperBound(int64_t index) const;
182// double CenterPoint(int64_t index) const;
183// double NormWeight(int64_t index) const;
184// See trust_region.cc for more details and several implementations.
185
186// The distance (in the indexed element) from the center point to the bound, in
187// the direction that reduces the objective.
188template <typename TrustRegionProblem>
189double DistanceAtCriticalStepSize(const TrustRegionProblem& problem,
190 const int64_t index) {
191 if (problem.Objective(index) == 0.0) {
192 return 0.0;
193 }
194 if (problem.Objective(index) > 0.0) {
195 return problem.LowerBound(index) - problem.CenterPoint(index);
196 } else {
197 return problem.UpperBound(index) - problem.CenterPoint(index);
198 }
199}
200
201// The critical step size is the step size at which the indexed element hits its
202// bound (or infinity if that doesn't happen).
203template <typename TrustRegionProblem>
204double CriticalStepSize(const TrustRegionProblem& problem,
205 const int64_t index) {
206 if (problem.Objective(index) == 0.0) {
207 return std::numeric_limits<double>::infinity();
208 }
209 return -problem.NormWeight(index) *
210 DistanceAtCriticalStepSize(problem, index) / problem.Objective(index);
211}
212
213// The value of the indexed element at the given step_size, projected onto the
214// bounds.
215template <typename TrustRegionProblem>
216double ProjectedValue(const TrustRegionProblem& problem, const int64_t index,
217 const double step_size) {
218 const double full_step =
219 problem.CenterPoint(index) -
220 step_size * problem.Objective(index) / problem.NormWeight(index);
221 return std::min(std::max(full_step, problem.LowerBound(index)),
222 problem.UpperBound(index));
223}
224
225// An easy way of computing medians that's slightly off when the length of the
226// array is even. "array" is intentionally passed by copy.
227// "value_function" maps an element of "array" to its (double) value. Returns
228// the value of the median element.
229template <typename ArrayType, typename ValueFunction>
230double EasyMedian(ArrayType array, ValueFunction value_function) {
231 CHECK_GT(array.size(), 0);
232 auto middle = array.begin() + (array.size() / 2);
233 absl::c_nth_element(array, middle,
234 [&](typename ArrayType::value_type lhs,
235 typename ArrayType::value_type rhs) {
236 return value_function(lhs) < value_function(rhs);
237 });
238 return value_function(*middle);
239}
240
241// Lists the undecided components (from [start_index, end_index) as those with
242// finite critical step sizes. The components with infinite critical step sizes
243// will never hit their bounds, so returns their contribution to square of the
244// radius.
245template <typename TrustRegionProblem>
247 const TrustRegionProblem& problem, int64_t start_index, int64_t end_index,
248 std::vector<int64_t>& undecided_components) {
249 // TODO(user): Evaluate dropping this reserve(), since it wastes space
250 // if many components are decided.
251 undecided_components.clear();
252 undecided_components.reserve(end_index - start_index);
253 double radius_coefficient = 0.0;
254 for (int64_t index = start_index; index < end_index; ++index) {
255 if (std::isfinite(internal::CriticalStepSize(problem, index))) {
256 undecided_components.push_back(index);
257 } else {
258 // Simplified from norm_weight * (objective / norm_weight)^2.
259 radius_coefficient += MathUtil::Square(problem.Objective(index)) /
260 problem.NormWeight(index);
261 }
262 }
263 return radius_coefficient;
264}
265
266template <typename TrustRegionProblem>
268 const TrustRegionProblem& problem, const double step_size,
269 const std::vector<int64_t>& undecided_components) {
270 return absl::c_accumulate(
271 undecided_components, 0.0, [&](double sum, int64_t index) {
272 const double distance_at_projected_value =
273 internal::ProjectedValue(problem, index, step_size) -
274 problem.CenterPoint(index);
275 return sum + problem.NormWeight(index) *
276 MathUtil::Square(distance_at_projected_value);
277 });
278}
279
280// Points whose critical step-sizes are greater than or equal to
281// step_size_threshold are eliminated from the undecided components (we know
282// they'll be determined by center_point - step_size * objective /
283// norm_weights). Returns the coefficient of step_size^2 that accounts of the
284// contribution of the removed variables to the radius squared.
285template <typename TrustRegionProblem>
287 const TrustRegionProblem& problem, const double step_size_threshold,
288 std::vector<int64_t>& undecided_components) {
289 double variable_radius_coefficient = 0.0;
290 for (const int64_t index : undecided_components) {
291 if (internal::CriticalStepSize(problem, index) >= step_size_threshold) {
292 // Simplified from norm_weight * (objective / norm_weight)^2.
293 variable_radius_coefficient +=
294 MathUtil::Square(problem.Objective(index)) /
295 problem.NormWeight(index);
296 }
297 }
298 auto result =
299 std::remove_if(undecided_components.begin(), undecided_components.end(),
300 [&](const int64_t index) {
301 return internal::CriticalStepSize(problem, index) >=
302 step_size_threshold;
303 });
304 undecided_components.erase(result, undecided_components.end());
305 return variable_radius_coefficient;
306}
307
308// Points whose critical step-sizes are smaller than or equal to
309// step_size_threshold are eliminated from the undecided components (we know
310// they'll always be at their bounds). Returns the weighted distance squared
311// from the center point for the removed components.
312template <typename TrustRegionProblem>
314 const TrustRegionProblem& problem, const double step_size_threshold,
315 std::vector<int64_t>& undecided_components) {
316 double radius_sq = 0.0;
317 for (const int64_t index : undecided_components) {
318 if (internal::CriticalStepSize(problem, index) <= step_size_threshold) {
319 radius_sq += problem.NormWeight(index) *
322 }
323 }
324 auto result =
325 std::remove_if(undecided_components.begin(), undecided_components.end(),
326 [&](const int64_t index) {
327 return internal::CriticalStepSize(problem, index) <=
328 step_size_threshold;
329 });
330 undecided_components.erase(result, undecided_components.end());
331 return radius_sq;
332}
333
334// PrimalTrustRegionProblem defines the primal trust region problem given a
335// QuadraticProgram, primal solution, and primal gradient. It captures const
336// references to the constructor arguments, which should outlive the class
337// instance.
338// The corresponding trust region problem is
339// min_x primal_gradient' * (x - primal_solution)
340// s.t. qp.variable_lower_bounds <= x <= qp.variable_upper_bounds
341// || x - primal_solution ||_2 <= target_radius
343 public:
345 const Eigen::VectorXd* primal_solution,
346 const Eigen::VectorXd* primal_gradient,
347 const double norm_weight = 1.0)
348 : qp_(*qp),
349 primal_solution_(*primal_solution),
350 primal_gradient_(*primal_gradient),
351 norm_weight_(norm_weight) {}
352 double Objective(int64_t index) const { return primal_gradient_[index]; }
353 double LowerBound(int64_t index) const {
354 return qp_.variable_lower_bounds[index];
355 }
356 double UpperBound(int64_t index) const {
357 return qp_.variable_upper_bounds[index];
358 }
359 double CenterPoint(int64_t index) const { return primal_solution_[index]; }
360 double NormWeight(int64_t index) const { return norm_weight_; }
361
362 private:
363 const QuadraticProgram& qp_;
364 const Eigen::VectorXd& primal_solution_;
365 const Eigen::VectorXd& primal_gradient_;
366 const double norm_weight_;
367};
368
369// DualTrustRegionProblem defines the dual trust region problem given a
370// QuadraticProgram, dual solution, and dual gradient. It captures const
371// references to the constructor arguments, which should outlive the class
372// instance.
373// The corresponding trust region problem is
374// max_y dual_gradient' * (y - dual_solution)
375// s.t. qp.implicit_dual_lower_bounds <= y <= qp.implicit_dual_upper_bounds
376// || y - dual_solution ||_2 <= target_radius
377// where the implicit dual bounds are those given in
378// https://developers.google.com/optimization/lp/pdlp_math#dual_variable_bounds
380 public:
382 const Eigen::VectorXd* dual_solution,
383 const Eigen::VectorXd* dual_gradient,
384 const double norm_weight = 1.0)
385 : qp_(*qp),
386 dual_solution_(*dual_solution),
387 dual_gradient_(*dual_gradient),
388 norm_weight_(norm_weight) {}
389 double Objective(int64_t index) const {
390 // The objective is negated because the trust region problem objective is
391 // minimize, but for the dual problem we want to maximize the gradient.
392 return -dual_gradient_[index];
393 }
394 double LowerBound(int64_t index) const {
395 return std::isfinite(qp_.constraint_upper_bounds[index])
396 ? -std::numeric_limits<double>::infinity()
397 : 0.0;
398 }
399 double UpperBound(int64_t index) const {
400 return std::isfinite(qp_.constraint_lower_bounds[index])
401 ? std::numeric_limits<double>::infinity()
402 : 0.0;
403 }
404 double CenterPoint(int64_t index) const { return dual_solution_[index]; }
405 double NormWeight(int64_t index) const { return norm_weight_; }
406
407 private:
408 const QuadraticProgram& qp_;
409 const Eigen::VectorXd& dual_solution_;
410 const Eigen::VectorXd& dual_gradient_;
411 const double norm_weight_;
412};
413
414} // namespace internal
415
416} // namespace operations_research::pdlp
417
418#endif // PDLP_TRUST_REGION_H_
int64_t max
Definition: alldiff_cst.cc:140
int64_t min
Definition: alldiff_cst.cc:139
#define CHECK_GT(val1, val2)
Definition: base/logging.h:708
static T Square(const T x)
Definition: mathutil.h:101
DualTrustRegionProblem(const QuadraticProgram *qp, const Eigen::VectorXd *dual_solution, const Eigen::VectorXd *dual_gradient, const double norm_weight=1.0)
Definition: trust_region.h:381
PrimalTrustRegionProblem(const QuadraticProgram *qp, const Eigen::VectorXd *primal_solution, const Eigen::VectorXd *primal_gradient, const double norm_weight=1.0)
Definition: trust_region.h:344
SharedBoundsManager * bounds
int index
double ComputeInitialUndecidedComponents(const TrustRegionProblem &problem, int64_t start_index, int64_t end_index, std::vector< int64_t > &undecided_components)
Definition: trust_region.h:246
double ProjectedValue(const TrustRegionProblem &problem, const int64_t index, const double step_size)
Definition: trust_region.h:216
double RemoveCriticalStepsAboveThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
Definition: trust_region.h:286
double DistanceAtCriticalStepSize(const TrustRegionProblem &problem, const int64_t index)
Definition: trust_region.h:189
double CriticalStepSize(const TrustRegionProblem &problem, const int64_t index)
Definition: trust_region.h:204
double RemoveCriticalStepsBelowThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
Definition: trust_region.h:313
double EasyMedian(ArrayType array, ValueFunction value_function)
Definition: trust_region.h:230
double RadiusSquaredOfUndecidedComponents(const TrustRegionProblem &problem, const double step_size, const std::vector< int64_t > &undecided_components)
Definition: trust_region.h:267
TrustRegionResult SolveDiagonalTrustRegion(const VectorXd &objective_vector, const VectorXd &objective_matrix_diagonal, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder, const double solve_tolerance)
TrustRegionResult SolveDiagonalQpTrustRegion(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const VectorXd &primal_gradient, const VectorXd &dual_gradient, const double primal_weight, double target_radius, const double solve_tolerance)
TrustRegionResult SolveTrustRegion(const VectorXd &objective_vector, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder)
LocalizedLagrangianBounds ComputeLocalizedLagrangianBounds(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const PrimalDualNorm primal_dual_norm, const double primal_weight, const double radius, const VectorXd *primal_product, const VectorXd *dual_product, const bool use_diagonal_qp_trust_region_solver, const double diagonal_qp_trust_region_solver_tolerance)
double BoundGap(const LocalizedLagrangianBounds &bounds)
Definition: trust_region.h:113
VectorXd variable_lower_bounds
VectorXd center_point
VectorXd variable_upper_bounds
VectorXd objective_vector
VectorXd norm_weights
VectorXd objective_matrix_diagonal