OR-Tools  9.3
trust_region.cc
Go to the documentation of this file.
1// Copyright 2010-2021 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <cmath>
18#include <cstdint>
19#include <limits>
20#include <utility>
21#include <vector>
22
23#include "Eigen/Core"
24#include "absl/types/optional.h"
31
33
34using ::Eigen::VectorXd;
35
36namespace {
37
38// The functions in this file that are templated on a TrustRegionProblem use the
39// templated class to specify the following trust-region problem with bound
40// constraints:
41// min_x Objective' * (x - CenterPoint)
42// s.t. LowerBound <= x <= UpperBound
43// || x - Centerpoint ||_W <= target_radius
44// where ||y||_W = sqrt(sum_i NormWeight[i] * y[i]^2)
45// The templated TrustRegionProblem type should provide methods:
46// double Objective(int64_t index) const;
47// double LowerBound(int64_t index) const;
48// double UpperBound(int64_t index) const;
49// double CenterPoint(int64_t index) const;
50// double NormWeight(int64_t index) const;
51// which give the values of the corresponding terms in the problem
52// specification. See VectorTrustRegionProblem for an example. The
53// *TrustRegionProblem classes below implement several instances of
54// TrustRegionProblem.
55// On the other hand, the functions that are templated on a
56// DiagonalTrustRegionProblem use the templated class to specify the following
57// trust-region problem with bound constraints:
58// min_x (1 / 2) * (x - CenterPoint)' ObjectiveMatrix * (x - CenterPoint)
59// + Objective' * (x - CenterPoint)
60// s.t. LowerBound <= x <= UpperBound
61// || x - CenterPoint ||_W <= target_radius,
62// where ||y||_W = sqrt(sum_i NormWeight[i] * y[i]^2) and ObjectiveMatrix is
63// assumed to be a diagonal matrix with nonnegative entries. Templated
64// DiagonalTrustRegionProblem types should provide all the methods provided by
65// templated TrustRegionProblem types, as well as:
66// double ObjectiveMatrixDiagonalAt(int64_t index) const;
67// which gives the value of the objective matrix diagonal at a specified index.
68// See DiagonalTrustRegionProblemFromQp for an example that sets up the diagonal
69// trust region problem from an existing ShardedQuadraticProgram.
70
71// VectorTrustRegionProblem uses explicit vectors to define the trust region
72// problem. It captures const references to the vectors used in the constrcutor,
73// which should outlive the class instance.
74class VectorTrustRegionProblem {
75 public:
76 VectorTrustRegionProblem(const VectorXd* objective,
77 const VectorXd* lower_bound,
78 const VectorXd* upper_bound,
79 const VectorXd* center_point,
80 const VectorXd* norm_weight)
81 : objective_(*objective),
82 lower_bound_(*lower_bound),
83 upper_bound_(*upper_bound),
84 center_point_(*center_point),
85 norm_weight_(*norm_weight) {}
86 double Objective(int64_t index) const { return objective_(index); }
87 double LowerBound(int64_t index) const { return lower_bound_(index); }
88 double UpperBound(int64_t index) const { return upper_bound_(index); }
89 double CenterPoint(int64_t index) const { return center_point_(index); }
90 double NormWeight(int64_t index) const { return norm_weight_(index); }
91
92 private:
93 const VectorXd& objective_;
94 const VectorXd& lower_bound_;
95 const VectorXd& upper_bound_;
96 const VectorXd& center_point_;
97 const VectorXd& norm_weight_;
98};
99
100// JointTrustRegionProblem defines the joint primal/dual trust region problem
101// given a QuadraticProgram, primal and dual solutions, primal and dual
102// gradients, and the primal weight. The joint problem (implicitly) concatenates
103// the primal and dual vectors. The class captures const references to the
104// constructor arguments (except primal_weight), which should outlive the class
105// instance.
106// The corresponding trust region problem is
107// min primal_gradient' * (x - primal_solution)
108// - dual_gradient' * (y - dual_solution)
109// s.t. qp.variable_lower_bounds <= x <= qp.variable_upper_bounds
110// qp.implicit_dual_lower_bounds <= y <= qp.implicit_dual_upper_bounds
111// || (x, y) - (primal_solution, dual_solution) ||_2 <= target_radius
112// where the implicit dual bounds are those given in
113// https://developers.google.com/optimization/lp/pdlp_math#dual_variable_bounds
114class JointTrustRegionProblem {
115 public:
116 JointTrustRegionProblem(const QuadraticProgram* qp,
117 const VectorXd* primal_solution,
118 const VectorXd* dual_solution,
119 const VectorXd* primal_gradient,
120 const VectorXd* dual_gradient,
121 const double primal_weight)
122 : qp_(*qp),
123 primal_size_(qp_.variable_lower_bounds.size()),
124 primal_solution_(*primal_solution),
125 dual_solution_(*dual_solution),
126 primal_gradient_(*primal_gradient),
127 dual_gradient_(*dual_gradient),
128 primal_weight_(primal_weight) {}
129 double Objective(int64_t index) const {
130 return index < primal_size_ ? primal_gradient_[index]
131 : -dual_gradient_[index - primal_size_];
132 }
133 double LowerBound(int64_t index) const {
134 return index < primal_size_ ? qp_.variable_lower_bounds[index]
135 : std::isfinite(qp_.constraint_upper_bounds[index - primal_size_])
136 ? -std::numeric_limits<double>::infinity()
137 : 0.0;
138 }
139 double UpperBound(int64_t index) const {
140 return index < primal_size_ ? qp_.variable_upper_bounds[index]
141 : std::isfinite(qp_.constraint_lower_bounds[index - primal_size_])
142 ? std::numeric_limits<double>::infinity()
143 : 0.0;
144 }
145 double CenterPoint(int64_t index) const {
146 return index < primal_size_ ? primal_solution_[index]
147 : dual_solution_[index - primal_size_];
148 }
149 double NormWeight(int64_t index) const {
150 return index < primal_size_ ? 0.5 * primal_weight_ : 0.5 / primal_weight_;
151 }
152
153 private:
154 const QuadraticProgram& qp_;
155 const int64_t primal_size_;
156 const VectorXd& primal_solution_;
157 const VectorXd& dual_solution_;
158 const VectorXd& primal_gradient_;
159 const VectorXd& dual_gradient_;
160 const double primal_weight_;
161};
162
163struct TrustRegionResultStepSize {
164 // The step_size of the solution.
166 // The value objective_vector' * (solution - center_point).
168};
169
170// "problem" is sharded according to the given sharder. Within each shard,
171// this function selects the subset of elements corresponding to
172// indexed_components_by_shard, and takes the median of the critical step sizes
173// of these elements, producing an array A of shard medians. Then returns the
174// median of the array A. CHECK-fails if indexed_components_by_shard is empty
175// for all shards.
176template <typename TrustRegionProblem>
177double MedianOfShardMedians(
178 const TrustRegionProblem& problem,
179 const std::vector<std::vector<int64_t>>& indexed_components_by_shard,
180 const Sharder& sharder) {
181 std::vector<absl::optional<double>> shard_medians(sharder.NumShards(),
182 absl::nullopt);
183 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
184 const auto& indexed_shard_components =
185 indexed_components_by_shard[shard.Index()];
186 if (!indexed_shard_components.empty()) {
187 shard_medians[shard.Index()] = internal::EasyMedian(
188 indexed_shard_components, [&](const int64_t index) {
189 return internal::CriticalStepSize(problem, index);
190 });
191 }
192 });
193 std::vector<double> non_empty_medians;
194 for (const auto& median : shard_medians) {
195 if (median.has_value()) {
196 non_empty_medians.push_back(*median);
197 }
198 }
199 CHECK(!non_empty_medians.empty());
200 return internal::EasyMedian(non_empty_medians,
201 [](const double x) { return x; });
202}
203
204struct InitialState {
205 std::vector<std::vector<int64_t>> undecided_components_by_shard;
207};
208
209template <typename TrustRegionProblem>
210InitialState ComputeInitialState(const TrustRegionProblem& problem,
211 const Sharder& sharder) {
212 InitialState result;
213 result.undecided_components_by_shard.resize(sharder.NumShards());
214 result.radius_coefficient_of_decided_components =
215 sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
216 const int64_t shard_start = sharder.ShardStart(shard.Index());
217 const int64_t shard_size = sharder.ShardSize(shard.Index());
219 problem, shard_start, shard_start + shard_size,
220 result.undecided_components_by_shard[shard.Index()]);
221 });
222 return result;
223}
224
225template <typename TrustRegionProblem>
227 const TrustRegionProblem& problem, const double step_size,
228 const Sharder& sharder,
229 const std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
230 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
232 problem, step_size, undecided_components_by_shard[shard.Index()]);
233 });
234}
235
236template <typename TrustRegionProblem>
238 const TrustRegionProblem& problem, const double step_size_threshold,
239 const Sharder& sharder,
240 std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
241 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
243 problem, step_size_threshold,
244 undecided_components_by_shard[shard.Index()]);
245 });
246}
247
248template <typename TrustRegionProblem>
250 const TrustRegionProblem& problem, const double step_size_threshold,
251 const Sharder& sharder,
252 std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
253 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
255 problem, step_size_threshold,
256 undecided_components_by_shard[shard.Index()]);
257 });
258}
259
260int64_t NumUndecidedComponents(
261 const std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
262 int64_t num_undecided_components = 0;
263 for (const auto& undecided_components : undecided_components_by_shard) {
264 num_undecided_components += undecided_components.size();
265 }
266 return num_undecided_components;
267}
268
269int64_t MaxUndecidedComponentsInAnyShard(
270 const std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
271 int64_t max = 0;
272 for (const auto& undecided_components : undecided_components_by_shard) {
273 max = std::max<int64_t>(max, undecided_components.size());
274 }
275 return max;
276}
277
278template <typename TrustRegionProblem>
279VectorXd ComputeSolution(const TrustRegionProblem& problem,
280 const double step_size, const Sharder& sharder) {
281 VectorXd solution(sharder.NumElements());
282 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
283 const int64_t shard_start = sharder.ShardStart(shard.Index());
284 const int64_t shard_size = sharder.ShardSize(shard.Index());
285 for (int64_t index = shard_start; index < shard_start + shard_size;
286 ++index) {
287 solution[index] = internal::ProjectedValue(problem, index, step_size);
288 }
289 });
290 return solution;
291}
292
293template <typename TrustRegionProblem>
294double ComputeObjectiveValue(const TrustRegionProblem& problem,
295 const double step_size, const Sharder& sharder) {
296 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
297 const int64_t shard_start = sharder.ShardStart(shard.Index());
298 const int64_t shard_size = sharder.ShardSize(shard.Index());
299 double shard_value = 0.0;
300 for (int64_t index = shard_start; index < shard_start + shard_size;
301 ++index) {
302 shard_value += problem.Objective(index) *
303 (internal::ProjectedValue(problem, index, step_size) -
304 problem.CenterPoint(index));
305 }
306 return shard_value;
307 });
308}
309
310// Solves the following trust-region problem with bound constraints:
311// min_x Objective' * (x - CenterPoint)
312// s.t. LowerBound <= x <= UpperBound
313// || x - Centerpoint ||_W <= target_radius
314// where ||y||_W = sqrt(sum_i NormWeight[i] * y[i]^2)
315// given by a TrustRegionProblem (see description at the top of this file),
316// using an exact linear-time method. The sharder's size is used to determine
317// the size of the problem. Assumes that there is always a feasible solution,
318// that is, that problem.LowerBound(i) <= problem.CenterPoint(i) <=
319// problem.UpperBound(i), and that problem.NormWeight(i) > 0, for
320// 0 <= i < sharder.NumElements().
321//
322// The linear-time method is based on the observation that the optimal x will be
323// of the form x(delta) =
324// proj(center_point - delta * objective_vector / norm_weights, bounds)
325// for some delta such that || x(delta) - center_point ||_W = target_radius
326// (except for corner cases where the radius constraint is inactive) and the
327// vector division is element-wise. Therefore we find the critical threshold for
328// each coordinate, and repeatedly: (1) take the median delta, (2) check the
329// corresponding radius, and (3) eliminate half of the data points from
330// consideration.
331template <typename TrustRegionProblem>
332TrustRegionResultStepSize SolveTrustRegionStepSize(
333 const TrustRegionProblem& problem, const double target_radius,
334 const Sharder& sharder) {
335 CHECK_GE(target_radius, 0.0);
336
337 const bool norm_weights_are_positive =
338 sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
339 const int64_t shard_start = sharder.ShardStart(shard.Index());
340 const int64_t shard_size = sharder.ShardSize(shard.Index());
341 for (int64_t index = shard_start; index < shard_start + shard_size;
342 ++index) {
343 if (problem.NormWeight(index) <= 0.0) return false;
344 }
345 return true;
346 });
347 CHECK(norm_weights_are_positive);
348
349 if (target_radius == 0.0) {
350 return {.solution_step_size = 0.0, .objective_value = 0.0};
351 }
352
353 const bool objective_is_all_zeros =
354 sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
355 const int64_t shard_start = sharder.ShardStart(shard.Index());
356 const int64_t shard_size = sharder.ShardSize(shard.Index());
357 for (int64_t index = shard_start; index < shard_start + shard_size;
358 ++index) {
359 if (problem.Objective(index) != 0.0) return false;
360 }
361 return true;
362 });
363 if (objective_is_all_zeros) {
364 return {.solution_step_size = 0.0, .objective_value = 0.0};
365 }
366
367 InitialState initial_state = ComputeInitialState(problem, sharder);
368
369 // The contribution to the weighted radius squared from the variables that we
370 // know are at their bounds in the solution.
371 double fixed_radius_squared = 0.0;
372
373 // This value times step_size^2 gives the contribution to the weighted radius
374 // squared from the variables determined in the solution by the formula
375 // center_point - step_size * objective / norm_weights. These variables are
376 // not at their bounds in the solution, except in degenerate cases.
377 double variable_radius_coefficient =
378 initial_state.radius_coefficient_of_decided_components;
379
380 // For each shard, the components of the variables that aren't accounted for
381 // in fixed_radius_squared or variable_radius_coefficient, i.e., we don't know
382 // if they're at their bounds in the solution.
383 std::vector<std::vector<int64_t>> undecided_components_by_shard(
384 std::move(initial_state.undecided_components_by_shard));
385
386 // These are counters for the number of variables we inspect overall during
387 // the solve, including in the initialization. The "worst case" accounts for
388 // imbalance across the shards by charging each round for the maximum number
389 // of elements in a shard, because shards with fewer elements may correspond
390 // to idle threads.
391 int64_t actual_elements_seen = sharder.NumElements();
392 int64_t worst_case_elements_seen = sharder.NumElements();
393
394 while (NumUndecidedComponents(undecided_components_by_shard) > 0) {
395 worst_case_elements_seen +=
396 MaxUndecidedComponentsInAnyShard(undecided_components_by_shard) *
397 sharder.NumShards();
398 actual_elements_seen +=
399 NumUndecidedComponents(undecided_components_by_shard);
400
401 const double step_size_threshold =
402 MedianOfShardMedians(problem, undecided_components_by_shard, sharder);
403 const double radius_squared_of_undecided_components =
405 problem, /*step_size=*/step_size_threshold, sharder,
407
408 const double radius_squared_at_threshold =
409 radius_squared_of_undecided_components + fixed_radius_squared +
410 variable_radius_coefficient * MathUtil::Square(step_size_threshold);
411
412 if (radius_squared_at_threshold > MathUtil::Square(target_radius)) {
413 variable_radius_coefficient += RemoveCriticalStepsAboveThreshold(
414 problem, step_size_threshold, sharder, undecided_components_by_shard);
415 } else {
416 fixed_radius_squared += RemoveCriticalStepsBelowThreshold(
417 problem, step_size_threshold, sharder, undecided_components_by_shard);
418 }
419 }
420 VLOG(1) << "Total passes through variables: "
421 << actual_elements_seen / static_cast<double>(sharder.NumElements());
422 VLOG(1) << "Theoretical slowdown because of shard imbalance: "
423 << static_cast<double>(worst_case_elements_seen) /
424 actual_elements_seen -
425 1.0;
426
427 // Now that we know exactly which variables are fixed at their bounds,
428 // compute the step size that will give us the exact target radius.
429 // This is the solution to: fixed_radius_squared +
430 // variable_radius_coefficient * step_size^2 == target_radius^2.
431 double step_size = 0.0;
432 if (variable_radius_coefficient > 0.0) {
433 step_size =
434 std::sqrt((MathUtil::Square(target_radius) - fixed_radius_squared) /
435 variable_radius_coefficient);
436 } else {
437 // All variables are fixed at their bounds. So we can take a very large
438 // finite step. We don't use infinity as the step in order to avoid 0 *
439 // infinity = NaN when zeros are present in the objective vector. It's ok if
440 // the result of step_size * objective_vector has infinity components
441 // because these are projected correctly to bounds.
443 }
444
445 return {
446 .solution_step_size = step_size,
447 .objective_value = ComputeObjectiveValue(problem, step_size, sharder)};
448}
449
450} // namespace
451
453 const VectorXd& variable_lower_bounds,
454 const VectorXd& variable_upper_bounds,
455 const VectorXd& center_point,
456 const VectorXd& norm_weights,
457 const double target_radius,
458 const Sharder& sharder) {
459 VectorTrustRegionProblem problem(&objective_vector, &variable_lower_bounds,
461 &norm_weights);
462 TrustRegionResultStepSize solution =
463 SolveTrustRegionStepSize(problem, target_radius, sharder);
464 return TrustRegionResult{
465 .solution_step_size = solution.solution_step_size,
466 .objective_value = solution.objective_value,
467 .solution =
468 ComputeSolution(problem, solution.solution_step_size, sharder),
469 };
470}
471
472// A generic trust region problem of the form:
473// min_{x} (1 / 2) * (x - center_point)'Q(x - center_point)
474// + c'(x - center_point)
475// s.t. l <= (x - center_point) <= u
476// ||x - center_point||_W <= radius
477// where ||z||_W = sqrt(sum_i w_i z_i^2) is a weighted Euclidean norm.
478// It is assumed that the objective matrix Q is a nonnegative diagonal matrix.
480 public:
481 // A reference to the objects passed in the constructor is kept, so they must
482 // outlive the DiagonalTrustRegionProblem instance.
484 const VectorXd* objective_matrix_diagonal,
485 const VectorXd* lower_bounds,
486 const VectorXd* upper_bounds,
487 const VectorXd* center_point,
488 const VectorXd* norm_weights)
489 : objective_vector_(*objective_vector),
490 objective_matrix_diagonal_(*objective_matrix_diagonal),
491 variable_lower_bounds_(*lower_bounds),
492 variable_upper_bounds_(*upper_bounds),
493 center_point_(*center_point),
494 norm_weight_(*norm_weights) {}
495
496 double CenterPoint(int64_t index) const { return center_point_[index]; }
497
498 double NormWeight(int64_t index) const { return norm_weight_[index]; }
499
500 double LowerBound(int64_t index) const {
501 return variable_lower_bounds_[index];
502 }
503
504 double UpperBound(int64_t index) const {
505 return variable_upper_bounds_[index];
506 }
507
508 double Objective(int64_t index) const { return objective_vector_[index]; }
509
510 double ObjectiveMatrixDiagonalAt(int64_t index) const {
511 return objective_matrix_diagonal_[index];
512 }
513
514 private:
515 const VectorXd& objective_vector_;
516 const VectorXd& objective_matrix_diagonal_;
517 const VectorXd& variable_lower_bounds_;
518 const VectorXd& variable_upper_bounds_;
519 const VectorXd& center_point_;
520 const VectorXd& norm_weight_;
521};
522
523// DiagonalTrustRegionProblemFromQp accepts a diagonal quadratic program and
524// information about the current solution and gradient and sets up the following
525// trust-region subproblem:
526// min_{x, y} (x - primal_solution)'Q(x - primal_solution)
527// + primal_gradient'(x - primal_solution)
528// - dual_gradient'(y - dual_solution)
529// s.t. l <= x - primal_solution <= u
530// l_implicit <= y - dual_solution <= u_implicit
531// ||(x, y) - (primal_solution, dual_solution)||_W <= r,
532// where
533// ||(x, y)||_W = sqrt(0.5 * primal_weight ||x||^2 +
534// (0.5 / primal_weight) ||y||^2).
535// This class implements the same methods as DiagonalTrustRegionProblem, but
536// without the need to explicitly copy vectors.
538 public:
539 // A reference to the objects passed in the constructor is kept, so they must
540 // outlive the DiagonalTrustRegionProblemFromQp instance.
542 const VectorXd* primal_solution,
543 const VectorXd* dual_solution,
544 const VectorXd* primal_gradient,
545 const VectorXd* dual_gradient,
546 const double primal_weight)
547 : qp_(*qp),
548 primal_solution_(*primal_solution),
549 dual_solution_(*dual_solution),
550 primal_gradient_(*primal_gradient),
551 dual_gradient_(*dual_gradient),
552 primal_size_(primal_solution->size()),
553 primal_weight_(primal_weight) {}
554
555 double CenterPoint(int64_t index) const {
556 return (index < primal_size_) ? primal_solution_[index]
557 : dual_solution_[index - primal_size_];
558 }
559
560 double NormWeight(int64_t index) const {
561 return (index < primal_size_) ? 0.5 * primal_weight_ : 0.5 / primal_weight_;
562 }
563
564 double LowerBound(int64_t index) const {
565 if (index < primal_size_) {
566 return qp_.variable_lower_bounds[index];
567 } else {
568 return std::isfinite(qp_.constraint_upper_bounds[index - primal_size_])
569 ? -std::numeric_limits<double>::infinity()
570 : 0.0;
571 }
572 }
573
574 double UpperBound(int64_t index) const {
575 if (index < primal_size_) {
576 return qp_.variable_upper_bounds[index];
577 } else {
578 return std::isfinite(qp_.constraint_lower_bounds[index - primal_size_])
579 ? std::numeric_limits<double>::infinity()
580 : 0.0;
581 }
582 }
583
584 double Objective(int64_t index) const {
585 return (index < primal_size_) ? primal_gradient_[index]
586 : -dual_gradient_[index - primal_size_];
587 }
588
589 double ObjectiveMatrixDiagonalAt(int64_t index) const {
590 if (qp_.objective_matrix.has_value()) {
591 return (index < primal_size_) ? qp_.objective_matrix->diagonal()[index]
592 : 0.0;
593 } else {
594 return 0.0;
595 }
596 }
597
598 private:
599 const QuadraticProgram& qp_;
600 const VectorXd& primal_solution_;
601 const VectorXd& dual_solution_;
602 const VectorXd& primal_gradient_;
603 const VectorXd& dual_gradient_;
604 const int64_t primal_size_;
605 const double primal_weight_;
606};
607
608// Computes a single coordinate projection of the scaled difference,
609// sqrt(NormWeight(i)) * (x[i] - CenterPoint(i)), to the corresponding box
610// constraints. As a function of scaling_factor, the difference is equal to
611// (Q[i, i] / NormWeight(i)) + scaling_factor)^{-1} *
612// (-c[i] / sqrt(NormWeight(i))),
613// where Q, c are the objective matrix and vector, respectively.
614template <typename DiagonalTrustRegionProblem>
616 const DiagonalTrustRegionProblem& problem, const int64_t index,
617 const double scaling_factor) {
618 const double weight = problem.NormWeight(index);
619 return std::min(
620 std::max((-problem.Objective(index) / std::sqrt(weight)) /
622 scaling_factor),
623 std::sqrt(weight) *
624 (problem.LowerBound(index) - problem.CenterPoint(index))),
625 std::sqrt(weight) *
626 (problem.UpperBound(index) - problem.CenterPoint(index)));
627}
628
629// Computes the norm of the projection of the difference vector,
630// x - center_point, to the corresponding box constraints. We are using the
631// standard Euclidean norm (instead of the weighted norm) because the solver
632// implicitly reformulates the problem to one with a Euclidean ball constraint
633// first.
634template <typename DiagonalTrustRegionProblem>
636 const Sharder& sharder,
637 const double scaling_factor) {
638 const double squared_norm =
639 sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
640 const int64_t shard_start = sharder.ShardStart(shard.Index());
641 const int64_t shard_end =
642 shard_start + sharder.ShardSize(shard.Index());
643 double sum = 0.0;
644 for (int64_t i = shard_start; i < shard_end; ++i) {
645 const double projected_coordinate =
646 ProjectedValueOfScaledDifference(problem, i, scaling_factor);
647 sum += MathUtil::Square(projected_coordinate);
648 }
649 return sum;
650 });
651 return std::sqrt(squared_norm);
652}
653
654// Finds an approximately optimal scaling factor for the solution of the trust
655// region subproblem, which can be passed on to ProjectedCoordinate() to find
656// an approximately optimal solution to the trust region subproblem. The value
657// returned is guaranteed to be within `solve_tol * max(1, s*)` of the optimal
658// scaling `s*`.
659// TODO(user): figure out what accuracy is useful to callers and redo the
660// stopping criterion accordingly.
661template <typename DiagonalTrustRegionProblem>
663 const Sharder& sharder, const double target_radius,
664 const double solve_tol) {
665 // Determine a search interval using monotonicity of the squared norm of the
666 // candidate solution with respect to the scaling factor.
667 double scaling_factor_lower_bound = 0.0;
668 double scaling_factor_upper_bound = 1.0;
669 while (NormOfDeltaProjection(problem, sharder, scaling_factor_upper_bound) >=
670 target_radius) {
671 scaling_factor_lower_bound = scaling_factor_upper_bound;
672 scaling_factor_upper_bound *= 2;
673 }
674 // Invariant: bounds.upper_bound >= bounds.lower_bound.
675 while ((scaling_factor_upper_bound - scaling_factor_lower_bound) >=
676 solve_tol * std::max(1.0, scaling_factor_lower_bound)) {
677 const double middle =
678 (scaling_factor_lower_bound + scaling_factor_upper_bound) / 2.0;
679 // Norm is monotonically non-increasing as a function of scaling_factor.
680 if (NormOfDeltaProjection(problem, sharder, middle) <= target_radius) {
681 scaling_factor_upper_bound = middle;
682 } else {
683 scaling_factor_lower_bound = middle;
684 }
685 }
686 return (scaling_factor_upper_bound + scaling_factor_lower_bound) / 2.0;
687}
688
689// Solves the diagonal trust region problem using a binary search algorithm.
690// See comment above SolveDiagonalTrustRegion() in trust_region.h for the
691// meaning of solve_tol.
692template <typename DiagonalTrustRegionProblem>
694 const DiagonalTrustRegionProblem& problem, const Sharder& sharder,
695 const double target_radius, const double solve_tol) {
696 CHECK_GE(target_radius, 0.0);
697 const bool norm_weights_are_positive =
698 sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
699 const int64_t shard_start = sharder.ShardStart(shard.Index());
700 for (int64_t i = shard_start;
701 i < shard_start + sharder.ShardSize(shard.Index()); ++i) {
702 if (problem.NormWeight(i) <= 0) {
703 return false;
704 }
705 }
706 return true;
707 });
708 CHECK(norm_weights_are_positive);
709 const double optimal_scaling =
710 FindScalingFactor(problem, sharder, target_radius, solve_tol);
711 VectorXd solution(sharder.NumElements());
712 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
713 const int64_t shard_start = sharder.ShardStart(shard.Index());
714 const int64_t shard_size = sharder.ShardSize(shard.Index());
715 for (int64_t i = shard_start; i < shard_start + shard_size; ++i) {
716 const double weight = problem.NormWeight(i);
717 const double projected_value =
718 ProjectedValueOfScaledDifference(problem, i, optimal_scaling);
719 solution[i] =
720 problem.CenterPoint(i) + std::sqrt(1 / weight) * projected_value;
721 }
722 });
723 const double final_objective_value =
724 sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
725 double local_sum = 0.0;
726 const int64_t shard_start = sharder.ShardStart(shard.Index());
727 for (int64_t i = shard_start;
728 i < shard_start + sharder.ShardSize(shard.Index()); ++i) {
729 const double diff = solution[i] - problem.CenterPoint(i);
730 local_sum +=
731 0.5 * diff * problem.ObjectiveMatrixDiagonalAt(i) * diff +
732 diff * problem.Objective(i);
733 }
734 return local_sum;
735 });
736 return {.solution_step_size = optimal_scaling,
737 .objective_value = final_objective_value,
738 .solution = solution};
739}
740
742 const VectorXd& objective_vector, const VectorXd& objective_matrix_diagonal,
743 const VectorXd& variable_lower_bounds,
744 const VectorXd& variable_upper_bounds, const VectorXd& center_point,
745 const VectorXd& norm_weights, const double target_radius,
746 const Sharder& sharder, const double solve_tolerance) {
750 return SolveDiagonalTrustRegionProblem(problem, sharder, target_radius,
751 solve_tolerance);
752}
753
755 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
756 const VectorXd& dual_solution, const VectorXd& primal_gradient,
757 const VectorXd& dual_gradient, const double primal_weight,
758 double target_radius, const double solve_tolerance) {
759 const int64_t problem_size = sharded_qp.PrimalSize() + sharded_qp.DualSize();
760 DiagonalTrustRegionProblemFromQp problem(&sharded_qp.Qp(), &primal_solution,
761 &dual_solution, &primal_gradient,
762 &dual_gradient, primal_weight);
763
764 const Sharder joint_sharder(sharded_qp.PrimalSharder(), problem_size);
765 const bool norm_weights_are_positive =
766 joint_sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
767 const int64_t shard_start = joint_sharder.ShardStart(shard.Index());
768 for (int64_t i = shard_start;
769 i < shard_start + joint_sharder.ShardSize(shard.Index()); ++i) {
770 if (problem.NormWeight(i) <= 0) {
771 return false;
772 }
773 }
774 return true;
775 });
776 CHECK(norm_weights_are_positive);
777 return SolveDiagonalTrustRegionProblem(problem, joint_sharder, target_radius,
778 solve_tolerance);
779}
780
781namespace {
782
783struct MaxNormBoundResult {
784 // LagrangianPart.value from ComputePrimalGradient and ComputeDualGradient,
785 // respectively.
787 // For the primal, the value
788 // ∇_x L(primal_solution, dual_solution)^T (x^* - primal_solution) where
789 // x^* is the solution of the primal trust region subproblem.
790 // For the dual, the value
791 // ∇_y L(primal_solution, dual_solution)^T (y^* - dual_solution) where
792 // y^* is the solution of the dual trust region subproblem.
793 // This will be a non-positive value for the primal and a non-negative
794 // value for the dual.
796};
797
798MaxNormBoundResult ComputeMaxNormPrimalTrustRegionBound(
799 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
800 const double primal_radius, const VectorXd& dual_product) {
801 LagrangianPart primal_part =
802 ComputePrimalGradient(sharded_qp, primal_solution, dual_product);
803 internal::PrimalTrustRegionProblem primal_problem(
804 &sharded_qp.Qp(), &primal_solution, &primal_part.gradient);
805 TrustRegionResultStepSize trust_region_result = SolveTrustRegionStepSize(
806 primal_problem, primal_radius, sharded_qp.PrimalSharder());
807 return {.part_of_lagrangian_value = primal_part.value,
808 .trust_region_objective_delta = trust_region_result.objective_value};
809}
810
811MaxNormBoundResult ComputeMaxNormDualTrustRegionBound(
812 const ShardedQuadraticProgram& sharded_qp, const VectorXd& dual_solution,
813 const double dual_radius, const VectorXd& primal_product) {
814 LagrangianPart dual_part =
815 ComputeDualGradient(sharded_qp, dual_solution, primal_product);
816 internal::DualTrustRegionProblem dual_problem(
817 &sharded_qp.Qp(), &dual_solution, &dual_part.gradient);
818 TrustRegionResultStepSize trust_region_result = SolveTrustRegionStepSize(
819 dual_problem, dual_radius, sharded_qp.DualSharder());
820 return {.part_of_lagrangian_value = dual_part.value,
821 .trust_region_objective_delta = -trust_region_result.objective_value};
822}
823
824// Returns the largest radius that the primal could move (in Euclidean distance)
825// to match the weighted_distance. This is the largest value of ||x||_2 such
826// that there exists a y such that max{||x||_P, ||y||_D} <= weighted_distance.
827double MaximumPrimalDistanceGivenWeightedDistance(
828 const double weighted_distance, const double primal_weight) {
829 return std::sqrt(2) * weighted_distance / std::sqrt(primal_weight);
830}
831
832// Returns the largest radius that the dual could move (in Euclidean distance)
833// to match the weighted_distance. This is the largest value of ||y||_2 such
834// that there exists an x such that max{||x||_P, ||y||_D} <= weighted_distance.
835double MaximumDualDistanceGivenWeightedDistance(const double weighted_distance,
836 const double primal_weight) {
837 return std::sqrt(2) * weighted_distance * std::sqrt(primal_weight);
838}
839
840LocalizedLagrangianBounds ComputeMaxNormLocalizedLagrangianBounds(
841 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
842 const VectorXd& dual_solution, const double primal_weight,
843 const double radius, const Eigen::VectorXd& primal_product,
844 const Eigen::VectorXd& dual_product) {
845 const double primal_radius =
846 MaximumPrimalDistanceGivenWeightedDistance(radius, primal_weight);
847 const double dual_radius =
848 MaximumDualDistanceGivenWeightedDistance(radius, primal_weight);
849
850 // The max norm means that the optimization over the primal and the dual can
851 // be done independently.
852
853 MaxNormBoundResult primal_result = ComputeMaxNormPrimalTrustRegionBound(
854 sharded_qp, primal_solution, primal_radius, dual_product);
855
856 MaxNormBoundResult dual_result = ComputeMaxNormDualTrustRegionBound(
857 sharded_qp, dual_solution, dual_radius, primal_product);
858
859 const double lagrangian_value = primal_result.part_of_lagrangian_value +
860 dual_result.part_of_lagrangian_value;
861
862 return LocalizedLagrangianBounds{
863 .lagrangian_value = lagrangian_value,
864 .lower_bound =
865 lagrangian_value + primal_result.trust_region_objective_delta,
866 .upper_bound =
867 lagrangian_value + dual_result.trust_region_objective_delta,
868 .radius = radius};
869}
870
871LocalizedLagrangianBounds ComputeEuclideanNormLocalizedLagrangianBounds(
872 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
873 const VectorXd& dual_solution, const double primal_weight,
874 const double radius, const Eigen::VectorXd& primal_product,
875 const Eigen::VectorXd& dual_product,
876 const bool use_diagonal_qp_trust_region_solver,
877 const double diagonal_qp_trust_region_solver_tolerance) {
878 const QuadraticProgram& qp = sharded_qp.Qp();
879 const LagrangianPart primal_part =
880 ComputePrimalGradient(sharded_qp, primal_solution, dual_product);
881 const LagrangianPart dual_part =
882 ComputeDualGradient(sharded_qp, dual_solution, primal_product);
883
884 VectorXd trust_region_solution;
885 const double lagrangian_value = primal_part.value + dual_part.value;
886
887 Sharder joint_sharder(
888 sharded_qp.PrimalSharder(),
889 /*num_elements=*/sharded_qp.PrimalSize() + sharded_qp.DualSize());
890
891 if (use_diagonal_qp_trust_region_solver) {
892 DiagonalTrustRegionProblemFromQp problem(
893 &qp, &primal_solution, &dual_solution, &primal_part.gradient,
894 &dual_part.gradient, primal_weight);
895
896 trust_region_solution = SolveDiagonalTrustRegionProblem(
897 problem, joint_sharder, radius,
898 diagonal_qp_trust_region_solver_tolerance)
899 .solution;
900 } else {
901 JointTrustRegionProblem joint_problem(&qp, &primal_solution, &dual_solution,
902 &primal_part.gradient,
903 &dual_part.gradient, primal_weight);
904
905 TrustRegionResultStepSize trust_region_result =
906 SolveTrustRegionStepSize(joint_problem, radius, joint_sharder);
907
908 trust_region_solution = ComputeSolution(
909 joint_problem, trust_region_result.solution_step_size, joint_sharder);
910 }
911
912 auto primal_trust_region_solution =
913 trust_region_solution.segment(0, sharded_qp.PrimalSize());
914 auto dual_trust_region_solution = trust_region_solution.segment(
915 sharded_qp.PrimalSize(), sharded_qp.DualSize());
916
917 // ∇_x L(primal_solution, dual_solution)^T (x - primal_solution)
918 double primal_objective_delta =
919 sharded_qp.PrimalSharder().ParallelSumOverShards(
920 [&](const Sharder::Shard& shard) {
921 return shard(primal_part.gradient)
922 .dot(shard(primal_trust_region_solution) -
923 shard(primal_solution));
924 });
925
926 // Take into account the quadratic's contribution if the diagonal QP solver
927 // is enabled.
928 if (use_diagonal_qp_trust_region_solver &&
929 sharded_qp.Qp().objective_matrix.has_value()) {
930 primal_objective_delta += sharded_qp.PrimalSharder().ParallelSumOverShards(
931 [&](const Sharder::Shard& shard) {
932 const int shard_start =
933 sharded_qp.PrimalSharder().ShardStart(shard.Index());
934 const int shard_size =
935 sharded_qp.PrimalSharder().ShardSize(shard.Index());
936 double sum = 0.0;
937 for (int i = shard_start; i < shard_start + shard_size; ++i) {
938 sum += 0.5 * sharded_qp.Qp().objective_matrix->diagonal()[i] *
939 MathUtil::Square(primal_trust_region_solution[i] -
940 primal_solution[i]);
941 }
942 return sum;
943 });
944 }
945
946 // ∇_y L(primal_solution, dual_solution)^T (y - dual_solution)
947 const double dual_objective_delta =
948 sharded_qp.DualSharder().ParallelSumOverShards(
949 [&](const Sharder::Shard& shard) {
950 return shard(dual_part.gradient)
951 .dot(shard(dual_trust_region_solution) - shard(dual_solution));
952 });
953
954 return LocalizedLagrangianBounds{
955 .lagrangian_value = lagrangian_value,
956 .lower_bound = lagrangian_value + primal_objective_delta,
957 .upper_bound = lagrangian_value + dual_objective_delta,
958 .radius = radius};
959}
960
961} // namespace
962
964 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
965 const VectorXd& dual_solution, const PrimalDualNorm primal_dual_norm,
966 const double primal_weight, const double radius,
967 const VectorXd* primal_product, const VectorXd* dual_product,
968 const bool use_diagonal_qp_trust_region_solver,
969 const double diagonal_qp_trust_region_solver_tolerance) {
970 const QuadraticProgram& qp = sharded_qp.Qp();
971 VectorXd primal_product_storage;
972 VectorXd dual_product_storage;
973
974 if (primal_product == nullptr) {
975 primal_product_storage = TransposedMatrixVectorProduct(
976 sharded_qp.TransposedConstraintMatrix(), primal_solution,
978 primal_product = &primal_product_storage;
979 }
980 if (dual_product == nullptr) {
981 dual_product_storage =
983 sharded_qp.ConstraintMatrixSharder());
984 dual_product = &dual_product_storage;
985 }
986
987 switch (primal_dual_norm) {
988 case PrimalDualNorm::kMaxNorm:
989 return ComputeMaxNormLocalizedLagrangianBounds(
990 sharded_qp, primal_solution, dual_solution, primal_weight, radius,
991 *primal_product, *dual_product);
992 case PrimalDualNorm::kEuclideanNorm:
993 return ComputeEuclideanNormLocalizedLagrangianBounds(
994 sharded_qp, primal_solution, dual_solution, primal_weight, radius,
995 *primal_product, *dual_product, use_diagonal_qp_trust_region_solver,
996 diagonal_qp_trust_region_solver_tolerance);
997 }
998 LOG(FATAL) << "Unrecognized primal dual norm";
999
1001}
1002
1003} // namespace operations_research::pdlp
int64_t max
Definition: alldiff_cst.cc:140
int64_t min
Definition: alldiff_cst.cc:139
#define CHECK(condition)
Definition: base/logging.h:495
#define CHECK_GE(val1, val2)
Definition: base/logging.h:707
#define LOG(severity)
Definition: base/logging.h:420
#define VLOG(verboselevel)
Definition: base/logging.h:984
static T Square(const T x)
Definition: mathutil.h:101
DiagonalTrustRegionProblemFromQp(const QuadraticProgram *qp, const VectorXd *primal_solution, const VectorXd *dual_solution, const VectorXd *primal_gradient, const VectorXd *dual_gradient, const double primal_weight)
DiagonalTrustRegionProblem(const VectorXd *objective_vector, const VectorXd *objective_matrix_diagonal, const VectorXd *lower_bounds, const VectorXd *upper_bounds, const VectorXd *center_point, const VectorXd *norm_weights)
const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > & TransposedConstraintMatrix() const
double ParallelSumOverShards(const std::function< double(const Shard &)> &func) const
Definition: sharder.cc:131
bool ParallelTrueForAllShards(const std::function< bool(const Shard &)> &func) const
Definition: sharder.cc:140
int64_t ShardSize(int shard) const
Definition: sharder.h:186
int64_t ShardStart(int shard) const
Definition: sharder.h:192
double upper_bound
double lower_bound
int index
const int FATAL
Definition: log_severity.h:32
Fractional Square(Fractional f)
double ComputeInitialUndecidedComponents(const TrustRegionProblem &problem, int64_t start_index, int64_t end_index, std::vector< int64_t > &undecided_components)
Definition: trust_region.h:246
double ProjectedValue(const TrustRegionProblem &problem, const int64_t index, const double step_size)
Definition: trust_region.h:216
double RemoveCriticalStepsAboveThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
Definition: trust_region.h:286
double RemoveCriticalStepsBelowThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
Definition: trust_region.h:313
double EasyMedian(ArrayType array, ValueFunction value_function)
Definition: trust_region.h:230
double RadiusSquaredOfUndecidedComponents(const TrustRegionProblem &problem, const double step_size, const std::vector< int64_t > &undecided_components)
Definition: trust_region.h:267
LagrangianPart ComputePrimalGradient(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_product)
TrustRegionResult SolveDiagonalTrustRegionProblem(const DiagonalTrustRegionProblem &problem, const Sharder &sharder, const double target_radius, const double solve_tol)
double NormOfDeltaProjection(const DiagonalTrustRegionProblem &problem, const Sharder &sharder, const double scaling_factor)
VectorXd TransposedMatrixVectorProduct(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &vector, const Sharder &sharder)
Definition: sharder.cc:151
TrustRegionResult SolveDiagonalTrustRegion(const VectorXd &objective_vector, const VectorXd &objective_matrix_diagonal, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder, const double solve_tolerance)
LagrangianPart ComputeDualGradient(const ShardedQuadraticProgram &sharded_qp, const Eigen::VectorXd &dual_solution, const Eigen::VectorXd &primal_product)
TrustRegionResult SolveDiagonalQpTrustRegion(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const VectorXd &primal_gradient, const VectorXd &dual_gradient, const double primal_weight, double target_radius, const double solve_tolerance)
double FindScalingFactor(const DiagonalTrustRegionProblem &problem, const Sharder &sharder, const double target_radius, const double solve_tol)
TrustRegionResult SolveTrustRegion(const VectorXd &objective_vector, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder)
double ProjectedValueOfScaledDifference(const DiagonalTrustRegionProblem &problem, const int64_t index, const double scaling_factor)
LocalizedLagrangianBounds ComputeLocalizedLagrangianBounds(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const PrimalDualNorm primal_dual_norm, const double primal_weight, const double radius, const VectorXd *primal_product, const VectorXd *dual_product, const bool use_diagonal_qp_trust_region_solver, const double diagonal_qp_trust_region_solver_tolerance)
std::function< int64_t(const Model &)> LowerBound(IntegerVariable v)
Definition: integer.h:1663
std::function< int64_t(const Model &)> UpperBound(IntegerVariable v)
Definition: integer.h:1669
Coefficient ComputeObjectiveValue(const LinearBooleanProblem &problem, const std::vector< bool > &assignment)
int64_t weight
Definition: pack.cc:510
std::vector< double > lower_bounds
std::vector< double > upper_bounds
IntVar *const objective_
Definition: search.cc:3017
Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > constraint_matrix
double trust_region_objective_delta
double part_of_lagrangian_value
double objective_value
std::vector< std::vector< int64_t > > undecided_components_by_shard
double solution_step_size
double radius_coefficient_of_decided_components
VectorXd variable_lower_bounds
VectorXd center_point
VectorXd variable_upper_bounds
VectorXd objective_vector
VectorXd norm_weights
VectorXd objective_matrix_diagonal