OR-Tools  9.3
sharded_optimization_utils.cc
Go to the documentation of this file.
1// Copyright 2010-2021 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <cmath>
18#include <cstdint>
19#include <cstdlib>
20#include <limits>
21#include <numeric>
22#include <random>
23#include <utility>
24#include <vector>
25
26#include "Eigen/Core"
27#include "Eigen/SparseCore"
28#include "absl/algorithm/container.h"
29#include "absl/random/distributions.h"
30#include "absl/types/optional.h"
36
38
39constexpr double kInfinity = std::numeric_limits<double>::infinity();
40using ::Eigen::ColMajor;
41using ::Eigen::SparseMatrix;
42using ::Eigen::VectorXd;
43using ::Eigen::VectorXi;
44
46 : sharder_(sharder) {
47 average_ = VectorXd::Zero(sharder->NumElements());
48}
49
50// We considered the five averaging algorithms M_* listed on the first page of
51// https://www.jstor.org/stable/2286154 and the Kahan summation algorithm
52// (https://en.wikipedia.org/wiki/Kahan_summation_algorithm). Of these only M_14
53// satisfies our desired property that a constant sequence is averaged without
54// roundoff while requiring only a single vector be stored. We therefore use
55// M_14 (actually a natural weighted generalization, see below).
56void ShardedWeightedAverage::Add(const VectorXd& datapoint, double weight) {
57 CHECK_GE(weight, 0.0);
58 CHECK_EQ(datapoint.size(), average_.size());
59 const double weight_ratio = weight / (sum_weights_ + weight);
60 sharder_->ParallelForEachShard([&](const Sharder::Shard& shard) {
61 shard(average_) += weight_ratio * (shard(datapoint) - shard(average_));
62 });
63 sum_weights_ += weight;
64 ++num_terms_;
65}
66
68 // TODO(user): There may be a performance gain from using the sharder to
69 // zero-out the vectors.
70 average_.setZero();
71 sum_weights_ = 0.0;
72 num_terms_ = 0;
73}
74
76 VectorXd result;
77 // TODO(user): consider returning a reference to avoid this copy.
78 AssignVector(average_, *sharder_, result);
79 return result;
80}
81
82namespace {
83
84double CombineBounds(const double v1, const double v2,
85 const double infinite_bound_threshold) {
86 double max = 0.0;
87 if (std::abs(v1) < infinite_bound_threshold) {
88 max = std::abs(v1);
89 }
90 if (std::abs(v2) < infinite_bound_threshold) {
91 max = std::max(max, std::abs(v2));
92 }
93 return max;
94}
95
96struct VectorInfo {
97 int64_t num_finite = 0;
98 int64_t num_nonzero = 0;
99 double largest = 0.0;
100 double smallest = 0.0;
101 double average = 0.0;
102 double l2_norm = 0.0;
103};
104
105struct InfNormInfo {
110};
111
112// The functions below are used to generate default values for
113// QuadraticProgramStats when the underlying program is empty or has no
114// constraints.
115
116double MaxOrZero(const VectorXd& vec) {
117 if (vec.size() == 0) {
118 return 0.0;
119 } else if (std::isinf(vec.maxCoeff())) {
120 return 0.0;
121 } else {
122 return vec.maxCoeff();
123 }
124}
125
126double MinOrZero(const VectorXd& vec) {
127 if (vec.size() == 0) {
128 return 0.0;
129 } else if (std::isinf(vec.minCoeff())) {
130 return 0.0;
131 } else {
132 return vec.minCoeff();
133 }
134}
135
136VectorInfo ComputeVectorInfo(const Eigen::Ref<const VectorXd>& vec,
137 const Sharder& sharder) {
138 VectorXd local_max(sharder.NumShards());
139 VectorXd local_min(sharder.NumShards());
140 VectorXd local_sum(sharder.NumShards());
141 VectorXd local_sum_squared(sharder.NumShards());
142 std::vector<int64_t> local_num_nonzero(sharder.NumShards());
143 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
144 const VectorXd shard_abs = shard(vec).cwiseAbs();
145 local_max[shard.Index()] = shard_abs.maxCoeff();
146 local_min[shard.Index()] = shard_abs.minCoeff();
147 local_sum[shard.Index()] = shard_abs.sum();
148 local_sum_squared[shard.Index()] = shard_abs.squaredNorm();
149 for (double element : shard_abs) {
150 if (element != 0) {
151 local_num_nonzero[shard.Index()] += 1;
152 }
153 }
154 });
155 const int64_t num_elements = vec.size();
156 return VectorInfo{
157 .num_finite = num_elements,
158 .num_nonzero = std::accumulate(local_num_nonzero.begin(),
159 local_num_nonzero.end(), int64_t{0}),
160 .largest = MaxOrZero(local_max),
161 .smallest = MinOrZero(local_min),
162 .average = (num_elements > 0) ? local_sum.sum() / num_elements : NAN,
163 .l2_norm = (num_elements > 0) ? std::sqrt(local_sum_squared.sum()) : 0.0};
164}
165
166VectorInfo VariableBoundGapInfo(const VectorXd& lower_bounds,
167 const VectorXd& upper_bounds,
168 const Sharder& sharder) {
169 VectorXd local_max(sharder.NumShards());
170 VectorXd local_min(sharder.NumShards());
171 VectorXd local_sum(sharder.NumShards());
172 std::vector<int64_t> local_num_finite(sharder.NumShards());
173 std::vector<int64_t> local_num_nonzero(sharder.NumShards());
174 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
175 const auto gap_shard = shard(upper_bounds) - shard(lower_bounds);
176 double max = -kInfinity;
177 double min = kInfinity;
178 double sum = 0.0;
179 int64_t num_finite = 0;
180 int64_t num_nonzero = 0;
181 for (int64_t i = 0; i < gap_shard.size(); ++i) {
182 if (std::isfinite(gap_shard[i])) {
183 max = std::max(max, gap_shard[i]);
184 min = std::min(min, gap_shard[i]);
185 sum += gap_shard[i];
186 num_finite += 1;
187 if (gap_shard[i] != 0) {
188 num_nonzero += 1;
189 }
190 }
191 }
192 local_max[shard.Index()] = max;
193 local_min[shard.Index()] = min;
194 local_sum[shard.Index()] = sum;
195 local_num_finite[shard.Index()] = num_finite;
196 local_num_nonzero[shard.Index()] = num_nonzero;
197 });
198 // If an empty model was given, local_sum could be an empty vector,
199 // in which case calling .sum() directly would crash.
200 const int64_t num_finite = std::accumulate(
201 local_num_finite.begin(), local_num_finite.end(), int64_t{0});
202 return VectorInfo{
203 .num_finite = num_finite,
204 .num_nonzero = std::accumulate(local_num_nonzero.begin(),
205 local_num_nonzero.end(), int64_t{0}),
206 .largest = MaxOrZero(local_max),
207 .smallest = MinOrZero(local_min),
208 .average = (num_finite > 0) ? local_sum.sum() / num_finite : NAN};
209}
210
211VectorInfo MatrixAbsElementInfo(
212 const SparseMatrix<double, ColMajor, int64_t>& matrix,
213 const Sharder& sharder) {
214 VectorXd local_max(sharder.NumShards());
215 VectorXd local_min(sharder.NumShards());
216 VectorXd local_sum(sharder.NumShards());
217 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
218 const auto matrix_shard = shard(matrix);
219 double max = -kInfinity;
220 double min = kInfinity;
221 double sum = 0.0;
222 for (int64_t col_idx = 0; col_idx < matrix_shard.outerSize(); ++col_idx) {
223 for (decltype(matrix_shard)::InnerIterator it(matrix_shard, col_idx); it;
224 ++it) {
225 max = std::max(max, std::abs(it.value()));
226 min = std::min(min, std::abs(it.value()));
227 sum += std::abs(it.value());
228 }
229 }
230 local_max[shard.Index()] = max;
231 local_min[shard.Index()] = min;
232 local_sum[shard.Index()] = sum;
233 });
234 const int64_t num_nonzeros = matrix.nonZeros();
235 return VectorInfo{
236 .num_finite = num_nonzeros,
237 .largest = MaxOrZero(local_max),
238 .smallest = MinOrZero(local_min),
239 .average = (num_nonzeros > 0) ? local_sum.sum() / num_nonzeros : NAN};
240}
241
242VectorInfo CombinedBoundsInfo(const VectorXd& rhs_upper_bounds,
243 const VectorXd& rhs_lower_bounds,
244 const Sharder& sharder,
245 const double infinite_bound_threshold =
246 std::numeric_limits<double>::infinity()) {
247 VectorXd local_max(sharder.NumShards());
248 VectorXd local_min(sharder.NumShards());
249 VectorXd local_sum(sharder.NumShards());
250 VectorXd local_sum_squared(sharder.NumShards());
251 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
252 const auto lb_shard = shard(rhs_lower_bounds);
253 const auto ub_shard = shard(rhs_upper_bounds);
254 double max = -kInfinity;
255 double min = kInfinity;
256 double sum = 0.0;
257 double sum_squared = 0.0;
258 for (int64_t i = 0; i < lb_shard.size(); ++i) {
259 const double combined =
260 CombineBounds(ub_shard[i], lb_shard[i], infinite_bound_threshold);
261 max = std::max(max, combined);
262 min = std::min(min, combined);
263 sum += combined;
264 sum_squared += combined * combined;
265 }
266 local_max[shard.Index()] = max;
267 local_min[shard.Index()] = min;
268 local_sum[shard.Index()] = sum;
269 local_sum_squared[shard.Index()] = sum_squared;
270 });
271 const int num_constraints = rhs_lower_bounds.size();
272 return VectorInfo{
273 .num_finite = num_constraints,
274 .largest = MaxOrZero(local_max),
275 .smallest = MinOrZero(local_min),
276 .average =
277 (num_constraints > 0) ? local_sum.sum() / num_constraints : NAN,
278 .l2_norm =
279 (num_constraints > 0) ? std::sqrt(local_sum_squared.sum()) : 0.0};
280}
281
282InfNormInfo ConstraintMatrixRowColInfo(
283 const SparseMatrix<double, ColMajor, int64_t>& constraint_matrix,
284 const SparseMatrix<double, ColMajor, int64_t>& constraint_matrix_transpose,
285 const Sharder& matrix_sharder, const Sharder& matrix_transpose_sharder) {
286 VectorXd col_norms = ScaledColLInfNorm(
287 constraint_matrix,
288 /*row_scaling_vec=*/VectorXd::Ones(constraint_matrix.rows()),
289 /*col_scaling_vec=*/VectorXd::Ones(constraint_matrix.cols()),
290 matrix_sharder);
291 VectorXd row_norms =
292 ScaledColLInfNorm(constraint_matrix_transpose,
293 VectorXd::Ones(constraint_matrix_transpose.rows()),
294 VectorXd::Ones(constraint_matrix_transpose.cols()),
295 matrix_transpose_sharder);
296 return InfNormInfo{.max_col_norm = MaxOrZero(col_norms),
297 .min_col_norm = MinOrZero(col_norms),
298 .max_row_norm = MaxOrZero(row_norms),
299 .min_row_norm = MinOrZero(row_norms)};
300}
301} // namespace
302
303QuadraticProgramStats ComputeStats(
304 const ShardedQuadraticProgram& qp,
305 const double infinite_constraint_bound_threshold) {
306 // Caution: if the constraint matrix is empty, elementwise operations
307 // (like .coeffs().maxCoeff() or .minCoeff()) will fail.
308 InfNormInfo cons_matrix_norm_info = ConstraintMatrixRowColInfo(
311 VectorInfo cons_matrix_info = MatrixAbsElementInfo(
313 VectorInfo combined_bounds_info = CombinedBoundsInfo(
315 qp.DualSharder(), infinite_constraint_bound_threshold);
316 VectorInfo obj_vec_info =
317 ComputeVectorInfo(qp.Qp().objective_vector, qp.PrimalSharder());
318 VectorInfo gaps_info =
319 VariableBoundGapInfo(qp.Qp().variable_lower_bounds,
321 QuadraticProgramStats program_stats;
322 program_stats.set_num_variables(qp.PrimalSize());
323 program_stats.set_num_constraints(qp.DualSize());
324 program_stats.set_constraint_matrix_col_min_l_inf_norm(
325 cons_matrix_norm_info.min_col_norm);
326 program_stats.set_constraint_matrix_row_min_l_inf_norm(
327 cons_matrix_norm_info.min_row_norm);
328 program_stats.set_constraint_matrix_num_nonzeros(cons_matrix_info.num_finite);
329 program_stats.set_constraint_matrix_abs_max(cons_matrix_info.largest);
330 program_stats.set_constraint_matrix_abs_min(cons_matrix_info.smallest);
331 program_stats.set_constraint_matrix_abs_avg(cons_matrix_info.average);
332 program_stats.set_combined_bounds_max(combined_bounds_info.largest);
333 program_stats.set_combined_bounds_min(combined_bounds_info.smallest);
334 program_stats.set_combined_bounds_avg(combined_bounds_info.average);
335 program_stats.set_combined_bounds_l2_norm(combined_bounds_info.l2_norm);
336 program_stats.set_variable_bound_gaps_num_finite(gaps_info.num_finite);
337 program_stats.set_variable_bound_gaps_max(gaps_info.largest);
338 program_stats.set_variable_bound_gaps_min(gaps_info.smallest);
339 program_stats.set_variable_bound_gaps_avg(gaps_info.average);
340 program_stats.set_objective_vector_abs_max(obj_vec_info.largest);
341 program_stats.set_objective_vector_abs_min(obj_vec_info.smallest);
342 program_stats.set_objective_vector_abs_avg(obj_vec_info.average);
343 program_stats.set_objective_vector_l2_norm(obj_vec_info.l2_norm);
344 if (IsLinearProgram(qp.Qp())) {
345 program_stats.set_objective_matrix_num_nonzeros(0);
346 program_stats.set_objective_matrix_abs_max(0);
347 program_stats.set_objective_matrix_abs_min(0);
348 program_stats.set_objective_matrix_abs_avg(NAN);
349 } else {
350 VectorInfo obj_matrix_info = ComputeVectorInfo(
351 qp.Qp().objective_matrix->diagonal(), qp.PrimalSharder());
352 program_stats.set_objective_matrix_num_nonzeros(
353 obj_matrix_info.num_nonzero);
354 program_stats.set_objective_matrix_abs_max(obj_matrix_info.largest);
355 program_stats.set_objective_matrix_abs_min(obj_matrix_info.smallest);
356 program_stats.set_objective_matrix_abs_avg(obj_matrix_info.average);
357 }
358 return program_stats;
359}
360
361namespace {
362
363enum class ScalingNorm { kL2, kLInf };
364
365// Divides the vector (componentwise) by the square root of the divisor,
366// updating the vector in-place. If a component of the divisor is equal to zero,
367// leaves the component of the vector unchanged. The Sharder should have the
368// same size as the vector. For best performance the Sharder should have been
369// created with the Sharder(int64_t, int, ThreadPool*) constructor.
370void DivideBySquareRootOfDivisor(const VectorXd& divisor,
371 const Sharder& sharder, VectorXd& vector) {
372 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
373 auto vec_shard = shard(vector);
374 auto divisor_shard = shard(divisor);
375 for (int64_t index = 0; index < vec_shard.size(); ++index) {
376 if (divisor_shard[index] != 0) {
377 vec_shard[index] /= std::sqrt(divisor_shard[index]);
378 }
379 }
380 });
381}
382
383void ApplyScalingIterationsForNorm(const ShardedQuadraticProgram& sharded_qp,
384 const int num_iterations,
385 const ScalingNorm norm,
386 VectorXd& row_scaling_vec,
387 VectorXd& col_scaling_vec) {
388 const QuadraticProgram& qp = sharded_qp.Qp();
389 const int64_t num_col = qp.constraint_matrix.cols();
390 const int64_t num_row = qp.constraint_matrix.rows();
391 CHECK_EQ(num_col, col_scaling_vec.size());
392 CHECK_EQ(num_row, row_scaling_vec.size());
393 for (int i = 0; i < num_iterations; ++i) {
394 VectorXd col_norm;
395 VectorXd row_norm;
396 switch (norm) {
397 case ScalingNorm::kL2: {
398 col_norm = ScaledColL2Norm(qp.constraint_matrix, row_scaling_vec,
399 col_scaling_vec,
400 sharded_qp.ConstraintMatrixSharder());
401 row_norm = ScaledColL2Norm(
402 sharded_qp.TransposedConstraintMatrix(), col_scaling_vec,
403 row_scaling_vec, sharded_qp.TransposedConstraintMatrixSharder());
404 break;
405 }
406 case ScalingNorm::kLInf: {
407 col_norm = ScaledColLInfNorm(qp.constraint_matrix, row_scaling_vec,
408 col_scaling_vec,
409 sharded_qp.ConstraintMatrixSharder());
410 row_norm = ScaledColLInfNorm(
411 sharded_qp.TransposedConstraintMatrix(), col_scaling_vec,
412 row_scaling_vec, sharded_qp.TransposedConstraintMatrixSharder());
413 break;
414 }
415 }
416 DivideBySquareRootOfDivisor(col_norm, sharded_qp.PrimalSharder(),
417 col_scaling_vec);
418 DivideBySquareRootOfDivisor(row_norm, sharded_qp.DualSharder(),
419 row_scaling_vec);
420 }
421}
422
423} // namespace
424
426 const int num_iterations, VectorXd& row_scaling_vec,
427 VectorXd& col_scaling_vec) {
428 ApplyScalingIterationsForNorm(sharded_qp, num_iterations, ScalingNorm::kLInf,
429 row_scaling_vec, col_scaling_vec);
430}
431
433 VectorXd& row_scaling_vec, VectorXd& col_scaling_vec) {
434 ApplyScalingIterationsForNorm(sharded_qp, /*num_iterations=*/1,
435 ScalingNorm::kL2, row_scaling_vec,
436 col_scaling_vec);
437}
438
440 ShardedQuadraticProgram& sharded_qp) {
441 ScalingVectors scaling{
442 .row_scaling_vec = VectorXd::Ones(sharded_qp.DualSize()),
443 .col_scaling_vec = VectorXd::Ones(sharded_qp.PrimalSize())};
444 bool do_rescale = false;
445 if (rescaling_options.l_inf_ruiz_iterations > 0) {
446 do_rescale = true;
447 LInfRuizRescaling(sharded_qp, rescaling_options.l_inf_ruiz_iterations,
448 scaling.row_scaling_vec, scaling.col_scaling_vec);
449 }
450 if (rescaling_options.l2_norm_rescaling) {
451 do_rescale = true;
452 L2NormRescaling(sharded_qp, scaling.row_scaling_vec,
453 scaling.col_scaling_vec);
454 }
455 if (do_rescale) {
456 sharded_qp.RescaleQuadraticProgram(scaling.col_scaling_vec,
457 scaling.row_scaling_vec);
458 }
459 return scaling;
460}
461
463 const VectorXd& primal_solution,
464 const VectorXd& dual_product) {
465 LagrangianPart result{.gradient = VectorXd(sharded_qp.PrimalSize())};
466 const QuadraticProgram& qp = sharded_qp.Qp();
467 VectorXd value_parts(sharded_qp.PrimalSharder().NumShards());
469 [&](const Sharder::Shard& shard) {
470 if (IsLinearProgram(qp)) {
471 shard(result.gradient) =
472 shard(qp.objective_vector) - shard(dual_product);
473 value_parts[shard.Index()] =
474 shard(primal_solution).dot(shard(result.gradient));
475 } else {
476 // Note: using auto instead of VectorXd for the type of
477 // objective_product causes eigen to defer the matrix product until it
478 // is used (twice).
479 const VectorXd objective_product =
480 shard(*qp.objective_matrix) * shard(primal_solution);
481 shard(result.gradient) = shard(qp.objective_vector) +
482 objective_product - shard(dual_product);
483 value_parts[shard.Index()] =
484 shard(primal_solution)
485 .dot(shard(result.gradient) - 0.5 * objective_product);
486 }
487 });
488 result.value = value_parts.sum();
489 return result;
490}
491
492double DualSubgradientCoefficient(const double constraint_lower_bound,
493 const double constraint_upper_bound,
494 const double dual,
495 const double primal_product) {
496 if (dual < 0.0) {
497 return constraint_upper_bound;
498 } else if (dual > 0.0) {
499 return constraint_lower_bound;
500 } else if (std::isfinite(constraint_lower_bound) &&
501 std::isfinite(constraint_upper_bound)) {
502 if (primal_product < constraint_lower_bound) {
503 return constraint_lower_bound;
504 } else if (primal_product > constraint_upper_bound) {
505 return constraint_upper_bound;
506 } else {
507 return primal_product;
508 }
509 } else if (std::isfinite(constraint_lower_bound)) {
510 return constraint_lower_bound;
511 } else if (std::isfinite(constraint_upper_bound)) {
512 return constraint_upper_bound;
513 } else {
514 return 0.0;
515 }
516}
517
519 const Eigen::VectorXd& dual_solution,
520 const Eigen::VectorXd& primal_product) {
521 LagrangianPart result{.gradient = VectorXd(sharded_qp.DualSize())};
522 const QuadraticProgram& qp = sharded_qp.Qp();
523 VectorXd value_parts(sharded_qp.DualSharder().NumShards());
525 [&](const Sharder::Shard& shard) {
526 auto constraint_lower_bounds = shard(qp.constraint_lower_bounds);
527 auto constraint_upper_bounds = shard(qp.constraint_upper_bounds);
528 auto dual_solution_shard = shard(dual_solution);
529 auto dual_gradient_shard = shard(result.gradient);
530 auto primal_product_shard = shard(primal_product);
531 double value_sum = 0.0;
532 for (int64_t i = 0; i < dual_gradient_shard.size(); ++i) {
533 dual_gradient_shard[i] = DualSubgradientCoefficient(
534 constraint_lower_bounds[i], constraint_upper_bounds[i],
535 dual_solution_shard[i], primal_product_shard[i]);
536 value_sum += dual_gradient_shard[i] * dual_solution_shard[i];
537 }
538 value_parts[shard.Index()] = value_sum;
539 dual_gradient_shard -= primal_product_shard;
540 });
541 result.value = value_parts.sum();
542 return result;
543}
544
545namespace {
546
547using ::Eigen::ColMajor;
548using ::Eigen::SparseMatrix;
549
550// Scales a vector (in-place) to have norm 1, unless it has norm 0 (in which
551// case it is left unscaled). Returns the norm of the input vector.
552double NormalizeVector(const Sharder& sharder, VectorXd& vector) {
553 const double norm = Norm(vector, sharder);
554 if (norm != 0.0) {
555 sharder.ParallelForEachShard(
556 [&](const Sharder::Shard& shard) { shard(vector) /= norm; });
557 }
558 return norm;
559}
560
561// Estimates the probability that the power method, after k iterations, has
562// relative error > epsilon. This is based on Theorem 4.1(a) (on page 13) from
563// "Estimating the Largest Eigenvalue by the Power and Lanczos Algorithms with a
564// Random Start"
565// https://pdfs.semanticscholar.org/2b2e/a941e55e5fa2ee9d8f4ff393c14482051143.pdf
566double PowerMethodFailureProbability(int64_t dimension, double epsilon, int k) {
567 if (k < 2 || epsilon <= 0.0) {
568 // The theorem requires epsilon > 0 and k >= 2.
569 return 1.0;
570 }
571 return std::min(0.824, 0.354 / (epsilon * (k - 1))) * std::sqrt(dimension) *
572 std::pow(1.0 - epsilon, k - 0.5);
573}
574
575SingularValueAndIterations EstimateMaximumSingularValue(
576 const SparseMatrix<double, ColMajor, int64_t>& matrix,
577 const SparseMatrix<double, ColMajor, int64_t>& matrix_transpose,
578 const absl::optional<VectorXd>& active_set_indicator,
579 const absl::optional<VectorXd>& transpose_active_set_indicator,
580 const Sharder& matrix_sharder, const Sharder& matrix_transpose_sharder,
581 const Sharder& primal_vector_sharder, const Sharder& dual_vector_sharder,
582 const double desired_relative_error, const double failure_probability,
583 std::mt19937& mt_generator) {
584 // Easy case: matrix is diagonal.
585 if (IsDiagonal(matrix, matrix_sharder)) {
586 VectorXd local_max(matrix_sharder.NumShards());
587 matrix_sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
588 const auto matrix_shard = shard(matrix);
589 local_max[shard.Index()] =
590 (matrix_shard *
591 VectorXd::Ones(matrix_sharder.ShardSize(shard.Index())))
592 .lpNorm<Eigen::Infinity>();
593 });
594 return {.singular_value = local_max.lpNorm<Eigen::Infinity>(),
595 .num_iterations = 0,
596 .estimated_relative_error = 0.0};
597 }
598 const int64_t dimension = matrix.cols();
599 VectorXd eigenvector(dimension);
600 // Even though it will be slower, we initialize eigenvector sequentially so
601 // that the result doesn't depend on the number of threads.
602 for (double& entry : eigenvector) {
603 entry = absl::Gaussian<double>(mt_generator);
604 }
605 if (active_set_indicator.has_value()) {
606 CoefficientWiseProductInPlace(*active_set_indicator, primal_vector_sharder,
607 eigenvector);
608 }
609 NormalizeVector(primal_vector_sharder, eigenvector);
610 double eigenvalue_estimate = 0.0;
611
612 int num_iterations = 0;
613 // The maximum singular value of A is the square root of the maximum
614 // eigenvalue of A^T A. epsilon is the relative error needed for the maximum
615 // eigenvalue of A^T A that gives desired_relative_error for the maximum
616 // singular value of A.
617 const double epsilon = 1.0 - MathUtil::Square(1.0 - desired_relative_error);
618 while (PowerMethodFailureProbability(dimension, epsilon, num_iterations) >
619 failure_probability) {
620 VectorXd dual_eigenvector = TransposedMatrixVectorProduct(
621 matrix_transpose, eigenvector, matrix_transpose_sharder);
622 if (transpose_active_set_indicator.has_value()) {
623 CoefficientWiseProductInPlace(*transpose_active_set_indicator,
624 dual_vector_sharder, dual_eigenvector);
625 }
626 VectorXd next_eigenvector =
627 TransposedMatrixVectorProduct(matrix, dual_eigenvector, matrix_sharder);
628 if (active_set_indicator.has_value()) {
629 CoefficientWiseProductInPlace(*active_set_indicator,
630 primal_vector_sharder, next_eigenvector);
631 }
632 eigenvalue_estimate =
633 Dot(eigenvector, next_eigenvector, primal_vector_sharder);
634 eigenvector = std::move(next_eigenvector);
635 ++num_iterations;
636 const double primal_norm =
637 NormalizeVector(primal_vector_sharder, eigenvector);
638
639 VLOG(1) << "Iteration " << num_iterations << " singular value estimate "
640 << std::sqrt(eigenvalue_estimate) << " primal norm " << primal_norm;
641 }
642 return SingularValueAndIterations{
643 .singular_value = std::sqrt(eigenvalue_estimate),
644 .num_iterations = num_iterations,
645 .estimated_relative_error = desired_relative_error};
646}
647
648// Given a primal solution, compute a {0, 1}-valued vector that is nonzero in
649// all the coordinates that are not saturating the primal variable bounds.
650VectorXd ComputePrimalActiveSetIndicator(
651 const ShardedQuadraticProgram& sharded_qp,
652 const VectorXd& primal_solution) {
653 VectorXd indicator(sharded_qp.PrimalSize());
654 sharded_qp.PrimalSharder().ParallelForEachShard(
655 [&](const Sharder::Shard& shard) {
656 const auto lower_bound_shard =
657 shard(sharded_qp.Qp().variable_lower_bounds);
658 const auto upper_bound_shard =
659 shard(sharded_qp.Qp().variable_upper_bounds);
660 const auto primal_solution_shard = shard(primal_solution);
661 auto indicator_shard = shard(indicator);
662 const int64_t shard_size =
663 sharded_qp.PrimalSharder().ShardSize(shard.Index());
664 for (int64_t i = 0; i < shard_size; ++i) {
665 if ((primal_solution_shard[i] == lower_bound_shard[i]) ||
666 (primal_solution_shard[i] == upper_bound_shard[i])) {
667 indicator_shard[i] = 0.0;
668 } else {
669 indicator_shard[i] = 1.0;
670 }
671 }
672 });
673 return indicator;
674}
675
676// Like ComputePrimalActiveSetIndicator(sharded_qp, primal_solution), but this
677// time using the implicit bounds on the dual variable.
678VectorXd ComputeDualActiveSetIndicator(
679 const ShardedQuadraticProgram& sharded_qp, const VectorXd& dual_solution) {
680 VectorXd indicator(sharded_qp.DualSize());
681 sharded_qp.DualSharder().ParallelForEachShard(
682 [&](const Sharder::Shard& shard) {
683 const auto lower_bound_shard =
684 shard(sharded_qp.Qp().constraint_lower_bounds);
685 const auto upper_bound_shard =
686 shard(sharded_qp.Qp().constraint_upper_bounds);
687 const auto dual_solution_shard = shard(dual_solution);
688 auto indicator_shard = shard(indicator);
689 const int64_t shard_size =
690 sharded_qp.DualSharder().ShardSize(shard.Index());
691 for (int64_t i = 0; i < shard_size; ++i) {
692 if (dual_solution_shard[i] == 0.0 &&
693 (std::isinf(lower_bound_shard[i]) ||
694 std::isinf(upper_bound_shard[i]))) {
695 indicator_shard[i] = 0.0;
696 } else {
697 indicator_shard[i] = 1.0;
698 }
699 }
700 });
701 return indicator;
702}
703
704} // namespace
705
707 const ShardedQuadraticProgram& sharded_qp,
708 const absl::optional<VectorXd>& primal_solution,
709 const absl::optional<VectorXd>& dual_solution,
710 const double desired_relative_error, const double failure_probability,
711 std::mt19937& mt_generator) {
712 absl::optional<VectorXd> primal_active_set_indicator;
713 absl::optional<VectorXd> dual_active_set_indicator;
714 if (primal_solution.has_value()) {
715 primal_active_set_indicator =
716 ComputePrimalActiveSetIndicator(sharded_qp, *primal_solution);
717 }
718 if (dual_solution.has_value()) {
719 dual_active_set_indicator =
720 ComputeDualActiveSetIndicator(sharded_qp, *dual_solution);
721 }
722 return EstimateMaximumSingularValue(
723 sharded_qp.Qp().constraint_matrix,
724 sharded_qp.TransposedConstraintMatrix(), primal_active_set_indicator,
725 dual_active_set_indicator, sharded_qp.ConstraintMatrixSharder(),
727 sharded_qp.PrimalSharder(), sharded_qp.DualSharder(),
728 desired_relative_error, failure_probability, mt_generator);
729}
730
732 const QuadraticProgram& qp = sharded_qp.Qp();
733 const bool constraint_bounds_valid =
735 [&](const Sharder::Shard& shard) {
736 return (shard(qp.constraint_lower_bounds).array() <=
737 shard(qp.constraint_upper_bounds).array())
738 .all();
739 });
740 const bool variable_bounds_valid =
742 [&](const Sharder::Shard& shard) {
743 return (shard(qp.variable_lower_bounds).array() <=
744 shard(qp.variable_upper_bounds).array())
745 .all();
746 });
747
748 return constraint_bounds_valid && variable_bounds_valid;
749}
750
752 VectorXd& primal) {
754 [&](const Sharder::Shard& shard) {
755 const QuadraticProgram& qp = sharded_qp.Qp();
756 shard(primal) = shard(primal)
757 .cwiseMin(shard(qp.variable_upper_bounds))
758 .cwiseMax(shard(qp.variable_lower_bounds));
759 });
760}
761
763 VectorXd& dual) {
764 const QuadraticProgram& qp = sharded_qp.Qp();
766 [&](const Sharder::Shard& shard) {
767 const auto lower_bound_shard = shard(qp.constraint_lower_bounds);
768 const auto upper_bound_shard = shard(qp.constraint_upper_bounds);
769 auto dual_shard = shard(dual);
770
771 for (int64_t i = 0; i < dual_shard.size(); ++i) {
772 if (!std::isfinite(upper_bound_shard[i])) {
773 dual_shard[i] = std::max(dual_shard[i], 0.0);
774 }
775 if (!std::isfinite(lower_bound_shard[i])) {
776 dual_shard[i] = std::min(dual_shard[i], 0.0);
777 }
778 }
779 });
780}
781
782} // namespace operations_research::pdlp
int64_t max
Definition: alldiff_cst.cc:140
int64_t min
Definition: alldiff_cst.cc:139
#define CHECK_EQ(val1, val2)
Definition: base/logging.h:703
#define CHECK_GE(val1, val2)
Definition: base/logging.h:707
#define VLOG(verboselevel)
Definition: base/logging.h:984
static T Square(const T x)
Definition: mathutil.h:101
void RescaleQuadraticProgram(const Eigen::VectorXd &col_scaling_vec, const Eigen::VectorXd &row_scaling_vec)
const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > & TransposedConstraintMatrix() const
void Add(const Eigen::VectorXd &datapoint, double weight)
void ParallelForEachShard(const std::function< void(const Shard &)> &func) const
Definition: sharder.cc:97
bool ParallelTrueForAllShards(const std::function< bool(const Shard &)> &func) const
Definition: sharder.cc:140
int index
double Dot(const VectorXd &v1, const VectorXd &v2, const Sharder &sharder)
Definition: sharder.cc:197
LagrangianPart ComputePrimalGradient(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_product)
VectorXd TransposedMatrixVectorProduct(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &vector, const Sharder &sharder)
Definition: sharder.cc:151
void LInfRuizRescaling(const ShardedQuadraticProgram &sharded_qp, const int num_iterations, VectorXd &row_scaling_vec, VectorXd &col_scaling_vec)
LagrangianPart ComputeDualGradient(const ShardedQuadraticProgram &sharded_qp, const Eigen::VectorXd &dual_solution, const Eigen::VectorXd &primal_product)
VectorXd ScaledColLInfNorm(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &row_scaling_vec, const VectorXd &col_scaling_vec, const Sharder &sharder)
Definition: sharder.cc:258
double DualSubgradientCoefficient(const double constraint_lower_bound, const double constraint_upper_bound, const double dual, const double primal_product)
bool HasValidBounds(const QuadraticProgram &qp)
bool IsLinearProgram(const QuadraticProgram &qp)
SingularValueAndIterations EstimateMaximumSingularValueOfConstraintMatrix(const ShardedQuadraticProgram &sharded_qp, const absl::optional< VectorXd > &primal_solution, const absl::optional< VectorXd > &dual_solution, const double desired_relative_error, const double failure_probability, std::mt19937 &mt_generator)
void ProjectToDualVariableBounds(const ShardedQuadraticProgram &sharded_qp, VectorXd &dual)
void CoefficientWiseProductInPlace(const VectorXd &scale, const Sharder &sharder, VectorXd &dest)
Definition: sharder.cc:182
void L2NormRescaling(const ShardedQuadraticProgram &sharded_qp, VectorXd &row_scaling_vec, VectorXd &col_scaling_vec)
VectorXd ScaledColL2Norm(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &row_scaling_vec, const VectorXd &col_scaling_vec, const Sharder &sharder)
Definition: sharder.cc:280
ScalingVectors ApplyRescaling(const RescalingOptions &rescaling_options, ShardedQuadraticProgram &sharded_qp)
QuadraticProgramStats ComputeStats(const ShardedQuadraticProgram &qp, const double infinite_constraint_bound_threshold)
bool IsDiagonal(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const Sharder &sharder)
Definition: sharder.cc:304
void ProjectToPrimalVariableBounds(const ShardedQuadraticProgram &sharded_qp, VectorXd &primal)
double Norm(const VectorXd &vector, const Sharder &sharder)
Definition: sharder.cc:220
void AssignVector(const VectorXd &vec, const Sharder &sharder, VectorXd &dest)
Definition: sharder.cc:169
int64_t Zero()
NOLINT.
int64_t weight
Definition: pack.cc:510
std::vector< double > lower_bounds
std::vector< double > upper_bounds
double min_row_norm
int64_t num_nonzero
double max_row_norm
int64_t num_finite
double largest
double max_col_norm
double min_col_norm
Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > constraint_matrix
std::optional< Eigen::DiagonalMatrix< double, Eigen::Dynamic > > objective_matrix