diff --git a/ortools/glop/basis_representation.cc b/ortools/glop/basis_representation.cc index 88c9bd3ee0..bd0017a0c3 100644 --- a/ortools/glop/basis_representation.cc +++ b/ortools/glop/basis_representation.cc @@ -272,9 +272,10 @@ Status BasisFactorization::MiddleProductFormUpdate( // Initialize scratchpad_ with the right update vector. DCHECK(IsAllZero(scratchpad_)); scratchpad_.resize(right_storage_.num_rows(), 0.0); - for (const EntryIndex i : right_storage_.Column(right_index)) { - const RowIndex row = right_storage_.EntryRow(i); - scratchpad_[row] = right_storage_.EntryCoefficient(i); + const auto view = right_storage_.view(); + for (const EntryIndex i : view.Column(right_index)) { + const RowIndex row = view.EntryRow(i); + scratchpad_[row] = view.EntryCoefficient(i); scratchpad_non_zeros_.push_back(row); } // Subtract the column of U from scratchpad_. diff --git a/ortools/glop/dual_edge_norms.cc b/ortools/glop/dual_edge_norms.cc index 8d2ac77d25..96af1711ce 100644 --- a/ortools/glop/dual_edge_norms.cc +++ b/ortools/glop/dual_edge_norms.cc @@ -91,10 +91,11 @@ void DualEdgeNorms::UpdateBeforeBasisPivot( // Update the norm. int stat_lower_bounded_norms = 0; + auto output = edge_squared_norms_.view(); for (const auto e : direction) { // Note that the update formula used is important to maximize the precision. // See Koberstein's PhD section 8.2.2.1. - edge_squared_norms_[e.row()] += + output[e.row()] += e.coefficient() * (e.coefficient() * new_leaving_squared_norm - 2.0 / pivot * tau[e.row()]); @@ -103,13 +104,13 @@ void DualEdgeNorms::UpdateBeforeBasisPivot( // We can do that with Cauchy-Swartz inequality: // (edge . leaving_column)^2 = 1.0 < ||edge||^2 * ||leaving_column||^2 const Fractional kLowerBound = 1e-4; - if (edge_squared_norms_[e.row()] < kLowerBound) { + if (output[e.row()] < kLowerBound) { if (e.row() == leaving_row) continue; - edge_squared_norms_[e.row()] = kLowerBound; + output[e.row()] = kLowerBound; ++stat_lower_bounded_norms; } } - edge_squared_norms_[leaving_row] = new_leaving_squared_norm; + output[leaving_row] = new_leaving_squared_norm; IF_STATS_ENABLED(stats_.lower_bounded_norms.Add(stat_lower_bounded_norms)); } diff --git a/ortools/glop/entering_variable.cc b/ortools/glop/entering_variable.cc index 31a70be7d0..4f90c2fa2e 100644 --- a/ortools/glop/entering_variable.cc +++ b/ortools/glop/entering_variable.cc @@ -38,8 +38,8 @@ Status EnteringVariable::DualChooseEnteringColumn( Fractional cost_variation, std::vector* bound_flip_candidates, ColIndex* entering_col) { GLOP_RETURN_ERROR_IF_NULL(entering_col); - const DenseRow& update_coefficient = update_row.GetCoefficients(); - const DenseRow& reduced_costs = reduced_costs_->GetReducedCosts(); + const auto update_coefficients = update_row.GetCoefficients().const_view(); + const auto reduced_costs = reduced_costs_->GetReducedCosts().const_view(); SCOPED_TIME_STAT(&stats_); breakpoints_.clear(); @@ -81,8 +81,8 @@ Status EnteringVariable::DualChooseEnteringColumn( // We will add ratio * coeff to this column with a ratio positive or zero. // cost_variation makes sure the leaving variable will be dual-feasible // (its update coeff is sign(cost_variation) * 1.0). - const Fractional coeff = (cost_variation > 0.0) ? update_coefficient[col] - : -update_coefficient[col]; + const Fractional coeff = (cost_variation > 0.0) ? update_coefficients[col] + : -update_coefficients[col]; ColWithRatio entry; if (can_decrease.IsSet(col) && coeff > threshold) { @@ -225,10 +225,10 @@ Status EnteringVariable::DualChooseEnteringColumn( // returned bound_flip_candidates vector. for (int i = bound_flip_candidates->size() - 1; i >= 0; --i) { const ColIndex col = (*bound_flip_candidates)[i]; - if (std::abs(update_coefficient[col]) < pivot_limit) continue; + if (std::abs(update_coefficients[col]) < pivot_limit) continue; VLOG(1) << "Used bound flip to avoid bad pivot. Before: " << best_coeff - << " now: " << std::abs(update_coefficient[col]); + << " now: " << std::abs(update_coefficients[col]); *entering_col = col; break; } @@ -241,8 +241,8 @@ Status EnteringVariable::DualPhaseIChooseEnteringColumn( bool nothing_to_recompute, const UpdateRow& update_row, Fractional cost_variation, ColIndex* entering_col) { GLOP_RETURN_ERROR_IF_NULL(entering_col); - const DenseRow& update_coefficient = update_row.GetCoefficients(); - const DenseRow& reduced_costs = reduced_costs_->GetReducedCosts(); + const auto update_coefficients = update_row.GetCoefficients().const_view(); + const auto reduced_costs = reduced_costs_->GetReducedCosts().const_view(); SCOPED_TIME_STAT(&stats_); // List of breakpoints where a variable change from feasibility to @@ -273,7 +273,7 @@ Status EnteringVariable::DualPhaseIChooseEnteringColumn( DCHECK_NE(variable_type[col], VariableType::FIXED_VARIABLE); // Skip if the coeff is too small to be a numerically stable pivot. - if (std::abs(update_coefficient[col]) < threshold) continue; + if (std::abs(update_coefficients[col]) < threshold) continue; // We will add ratio * coeff to this column. cost_variation makes sure // the leaving variable will be dual-feasible (its update coeff is @@ -281,8 +281,8 @@ Status EnteringVariable::DualPhaseIChooseEnteringColumn( // // TODO(user): This is the same in DualChooseEnteringColumn(), remove // duplication? - const Fractional coeff = (cost_variation > 0.0) ? update_coefficient[col] - : -update_coefficient[col]; + const Fractional coeff = (cost_variation > 0.0) ? update_coefficients[col] + : -update_coefficients[col]; // Only proceed if there is a transition, note that if reduced_costs[col] // is close to zero, then the variable is counted as dual-feasible. diff --git a/ortools/glop/primal_edge_norms.cc b/ortools/glop/primal_edge_norms.cc index adcb2ee881..0c54245faf 100644 --- a/ortools/glop/primal_edge_norms.cc +++ b/ortools/glop/primal_edge_norms.cc @@ -225,16 +225,20 @@ void PrimalEdgeNorms::UpdateEdgeSquaredNorms(ColIndex entering_col, int stat_lower_bounded_norms = 0; const Fractional factor = 2.0 / pivot; + const auto view = compact_matrix_.view(); + auto output = edge_squared_norms_.view(); + const auto direction_left_inverse = + direction_left_inverse_.values.const_view(); for (const ColIndex col : update_row.GetNonZeroPositions()) { const Fractional coeff = update_row.GetCoefficient(col); - const Fractional scalar_product = compact_matrix_.ColumnScalarProduct( - col, direction_left_inverse_.values); - num_operations_ += compact_matrix_.column(col).num_entries().value(); + const Fractional scalar_product = + view.ColumnScalarProduct(col, direction_left_inverse); + num_operations_ += view.ColumnNumEntries(col).value(); // Update the edge squared norm of this column. Note that the update // formula used is important to maximize the precision. See an explanation // in the dual context in Koberstein's PhD thesis, section 8.2.2.1. - edge_squared_norms_[col] += + output[col] += coeff * (coeff * leaving_squared_norm + factor * scalar_product); // Make sure it doesn't go under a known lower bound (TODO(user): ref?). @@ -243,12 +247,12 @@ void PrimalEdgeNorms::UpdateEdgeSquaredNorms(ColIndex entering_col, // slightly faster, but may introduce numerical issues. More generally, // this test is only needed in a few cases, so is it worth it? const Fractional lower_bound = 1.0 + Square(coeff / pivot); - if (edge_squared_norms_[col] < lower_bound) { - edge_squared_norms_[col] = lower_bound; + if (output[col] < lower_bound) { + output[col] = lower_bound; ++stat_lower_bounded_norms; } } - edge_squared_norms_[leaving_col] = leaving_squared_norm; + output[leaving_col] = leaving_squared_norm; stats_.lower_bounded_norms.Add(stat_lower_bounded_norms); } diff --git a/ortools/glop/update_row.cc b/ortools/glop/update_row.cc index 3d494db842..8b8fe8a2b1 100644 --- a/ortools/glop/update_row.cc +++ b/ortools/glop/update_row.cc @@ -95,20 +95,20 @@ void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) { // small entries (no complexity changes). const Fractional drop_tolerance = parameters_.drop_tolerance(); unit_row_left_inverse_filtered_non_zeros_.clear(); + const auto view = transposed_matrix_.view(); if (unit_row_left_inverse_.non_zeros.empty()) { const ColIndex size = unit_row_left_inverse_.values.size(); for (ColIndex col(0); col < size; ++col) { if (std::abs(unit_row_left_inverse_.values[col]) > drop_tolerance) { unit_row_left_inverse_filtered_non_zeros_.push_back(col); - num_row_wise_entries += transposed_matrix_.ColumnNumEntries(col); + num_row_wise_entries += view.ColumnNumEntries(col); } } } else { for (const auto e : unit_row_left_inverse_) { if (std::abs(e.coefficient()) > drop_tolerance) { unit_row_left_inverse_filtered_non_zeros_.push_back(e.column()); - num_row_wise_entries += - transposed_matrix_.ColumnNumEntries(e.column()); + num_row_wise_entries += view.ColumnNumEntries(e.column()); } } } @@ -194,20 +194,21 @@ void UpdateRow::SetParameters(const GlopParameters& parameters) { // the same as, or greater than, the number of columns. void UpdateRow::ComputeUpdatesRowWise() { SCOPED_TIME_STAT(&stats_); - const ColIndex num_cols = matrix_.num_cols(); - coefficient_.AssignToZero(num_cols); + coefficient_.AssignToZero(matrix_.num_cols()); + const auto output_coeffs = coefficient_.view(); + const auto view = transposed_matrix_.view(); for (ColIndex col : unit_row_left_inverse_filtered_non_zeros_) { const Fractional multiplier = unit_row_left_inverse_[col]; - for (const EntryIndex i : transposed_matrix_.Column(col)) { - const ColIndex pos = RowToColIndex(transposed_matrix_.EntryRow(i)); - coefficient_[pos] += multiplier * transposed_matrix_.EntryCoefficient(i); + for (const EntryIndex i : view.Column(col)) { + const ColIndex pos = RowToColIndex(view.EntryRow(i)); + output_coeffs[pos] += multiplier * view.EntryCoefficient(i); } } non_zero_position_list_.clear(); const Fractional drop_tolerance = parameters_.drop_tolerance(); for (const ColIndex col : variables_info_.GetIsRelevantBitRow()) { - if (std::abs(coefficient_[col]) > drop_tolerance) { + if (std::abs(output_coeffs[col]) > drop_tolerance) { non_zero_position_list_.push_back(col); } } @@ -220,20 +221,23 @@ void UpdateRow::ComputeUpdatesRowWiseHypersparse() { const ColIndex num_cols = matrix_.num_cols(); non_zero_position_set_.ClearAndResize(num_cols); coefficient_.resize(num_cols, 0.0); + + const auto output_coeffs = coefficient_.view(); + const auto view = transposed_matrix_.view(); for (ColIndex col : unit_row_left_inverse_filtered_non_zeros_) { const Fractional multiplier = unit_row_left_inverse_[col]; - for (const EntryIndex i : transposed_matrix_.Column(col)) { - const ColIndex pos = RowToColIndex(transposed_matrix_.EntryRow(i)); - const Fractional v = multiplier * transposed_matrix_.EntryCoefficient(i); + for (const EntryIndex i : view.Column(col)) { + const ColIndex pos = RowToColIndex(view.EntryRow(i)); + const Fractional v = multiplier * view.EntryCoefficient(i); if (!non_zero_position_set_.IsSet(pos)) { // Note that we could create the non_zero_position_list_ here, but we // prefer to keep the non-zero positions sorted, so using the bitset is // a good alernative. Of course if the solution is really really sparse, // then sorting non_zero_position_list_ will be faster. - coefficient_[pos] = v; + output_coeffs[pos] = v; non_zero_position_set_.Set(pos); } else { - coefficient_[pos] += v; + output_coeffs[pos] += v; } } } @@ -247,27 +251,28 @@ void UpdateRow::ComputeUpdatesRowWiseHypersparse() { // non-zero coefficients contiguously in a vector is better than keeping // them as they are. Note however that we will iterate only twice on the // update row coefficients during an iteration. - if (std::abs(coefficient_[col]) > drop_tolerance) { + if (std::abs(output_coeffs[col]) > drop_tolerance) { non_zero_position_list_.push_back(col); } } } void UpdateRow::ComputeUpdatesForSingleRow(ColIndex row_as_col) { - const ColIndex num_cols = matrix_.num_cols(); - coefficient_.resize(num_cols, 0.0); - + coefficient_.resize(matrix_.num_cols(), 0.0); non_zero_position_list_.clear(); + const DenseBitRow& is_relevant = variables_info_.GetIsRelevantBitRow(); const Fractional drop_tolerance = parameters_.drop_tolerance(); const Fractional multiplier = unit_row_left_inverse_[row_as_col]; - for (const EntryIndex i : transposed_matrix_.Column(row_as_col)) { - const ColIndex pos = RowToColIndex(transposed_matrix_.EntryRow(i)); + const auto output_coeffs = coefficient_.view(); + const auto view = transposed_matrix_.view(); + for (const EntryIndex i : view.Column(row_as_col)) { + const ColIndex pos = RowToColIndex(view.EntryRow(i)); if (!is_relevant[pos]) continue; - const Fractional v = multiplier * transposed_matrix_.EntryCoefficient(i); + const Fractional v = multiplier * view.EntryCoefficient(i); if (std::abs(v) > drop_tolerance) { - coefficient_[pos] = v; + output_coeffs[pos] = v; non_zero_position_list_.push_back(pos); } } @@ -276,14 +281,17 @@ void UpdateRow::ComputeUpdatesForSingleRow(ColIndex row_as_col) { void UpdateRow::ComputeUpdatesColumnWise() { SCOPED_TIME_STAT(&stats_); - const ColIndex num_cols = matrix_.num_cols(); - const Fractional drop_tolerance = parameters_.drop_tolerance(); - coefficient_.resize(num_cols, 0.0); + coefficient_.resize(matrix_.num_cols(), 0.0); non_zero_position_list_.clear(); + + const Fractional drop_tolerance = parameters_.drop_tolerance(); + const auto output_coeffs = coefficient_.view(); + const auto view = matrix_.view(); + const auto unit_row_left_inverse = unit_row_left_inverse_.values.const_view(); for (const ColIndex col : variables_info_.GetIsRelevantBitRow()) { // Coefficient of the column right inverse on the 'leaving_row'. const Fractional coeff = - matrix_.ColumnScalarProduct(col, unit_row_left_inverse_.values); + view.ColumnScalarProduct(col, unit_row_left_inverse); // Nothing to do if 'coeff' is (almost) zero which does happen due to // sparsity. Note that it shouldn't be too bad to use a non-zero drop @@ -291,7 +299,7 @@ void UpdateRow::ComputeUpdatesColumnWise() { // quantities updated by this update row will eventually be recomputed. if (std::abs(coeff) > drop_tolerance) { non_zero_position_list_.push_back(col); - coefficient_[col] = coeff; + output_coeffs[col] = coeff; } } } @@ -311,9 +319,11 @@ void UpdateRow::ComputeFullUpdateRow(RowIndex leaving_row, // Fills the non-basic column. const Fractional drop_tolerance = parameters_.drop_tolerance(); + const auto view = matrix_.view(); + const auto unit_row_left_inverse = unit_row_left_inverse_.values.const_view(); for (const ColIndex col : variables_info_.GetNotBasicBitRow()) { const Fractional coeff = - matrix_.ColumnScalarProduct(col, unit_row_left_inverse_.values); + view.ColumnScalarProduct(col, unit_row_left_inverse); if (std::abs(coeff) > drop_tolerance) { (*output)[col] = coeff; } diff --git a/ortools/lp_data/lp_types.h b/ortools/lp_data/lp_types.h index 0af2143fba..0f0883c163 100644 --- a/ortools/lp_data/lp_types.h +++ b/ortools/lp_data/lp_types.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "ortools/base/basictypes.h" @@ -250,6 +251,33 @@ inline std::ostream& operator<<(std::ostream& os, ConstraintStatus status) { // Returns the ConstraintStatus corresponding to a given VariableStatus. ConstraintStatus VariableToConstraintStatus(VariableStatus status); +// A span of `T`, indexed by a strict int type `IntType`. Intended to be passed +// by value. See b/259677543. +template +class StrictITISpan { + public: + using IndexType = IntType; + using reference = T&; + using value_type = T; + + StrictITISpan(T* data, IntType size) : data_(data), size_(size) {} + + reference operator[](IntType i) const { + return data_[static_cast(i.value())]; + } + + IntType size() const { return size_; } + + // TODO(user): This should probably be a strictly typed iterator too, but + // `StrongVector::begin()` already suffers from this problem. + auto begin() const { return data_; } + auto end() const { return data_ + static_cast(size_.value()); } + + private: + T* const data_; + const IntType size_; +}; + // Wrapper around an ITIVector to allow (and enforce) creation/resize/assign // to use the index type for the size. // @@ -260,6 +288,9 @@ class StrictITIVector : public absl::StrongVector { public: typedef IntType IndexType; typedef absl::StrongVector ParentType; + using View = StrictITISpan; + using ConstView = StrictITISpan; + // This allows for brace initialization, which is really useful in tests. // It is not 'explicit' by design, so one can do vector = {...}; #if !defined(__ANDROID__) && (!defined(_MSC_VER) || (_MSC_VER >= 1800)) @@ -284,6 +315,10 @@ class StrictITIVector : public absl::StrongVector { IntType capacity() const { return IntType(ParentType::capacity()); } + View view() { return View(ParentType::data(), size()); } + ConstView const_view() const { return ConstView(ParentType::data(), size()); } + ConstView view() const { return const_view(); } + // Since calls to resize() must use a default value, we introduce a new // function for convenience to reduce the size of a vector. void resize_down(IntType size) { diff --git a/ortools/lp_data/sparse.cc b/ortools/lp_data/sparse.cc index 1c2a633b29..339f8ece2d 100644 --- a/ortools/lp_data/sparse.cc +++ b/ortools/lp_data/sparse.cc @@ -506,13 +506,19 @@ void CompactSparseMatrix::PopulateFromTranspose( // Use starts_ to fill the matrix. Note that starts_ is modified so that at // the end it has its final values. - for (ColIndex col(0); col < input.num_cols(); ++col) { + const auto entry_rows = rows_.view(); + const auto input_entry_rows = input.rows_.view(); + const auto entry_coefficients = coefficients_.view(); + const auto input_entry_coefficients = input.coefficients_.view(); + const auto num_cols = input.num_cols(); + const auto starts = starts_.view(); + for (ColIndex col(0); col < num_cols; ++col) { const RowIndex transposed_row = ColToRowIndex(col); for (const EntryIndex i : input.Column(col)) { - const ColIndex transposed_col = RowToColIndex(input.EntryRow(i)); - const EntryIndex index = starts_[transposed_col + 1]++; - coefficients_[index] = input.EntryCoefficient(i); - rows_[index] = transposed_row; + const ColIndex transposed_col = RowToColIndex(input_entry_rows[i]); + const EntryIndex index = starts[transposed_col + 1]++; + entry_coefficients[index] = input_entry_coefficients[i]; + entry_rows[index] = transposed_row; } } @@ -725,7 +731,7 @@ bool TriangularMatrix::IsLowerTriangular() const { for (ColIndex col(0); col < num_cols_; ++col) { if (diagonal_coefficients_[col] == 0.0) return false; for (EntryIndex i : Column(col)) { - if (EntryRow(i) <= ColToRowIndex(col)) return false; + if (rows_[i] <= ColToRowIndex(col)) return false; } } return true; @@ -735,7 +741,7 @@ bool TriangularMatrix::IsUpperTriangular() const { for (ColIndex col(0); col < num_cols_; ++col) { if (diagonal_coefficients_[col] == 0.0) return false; for (EntryIndex i : Column(col)) { - if (EntryRow(i) >= ColToRowIndex(col)) return false; + if (rows_[i] >= ColToRowIndex(col)) return false; } } return true; @@ -752,8 +758,10 @@ void TriangularMatrix::ApplyRowPermutationToNonDiagonalEntries( void TriangularMatrix::CopyColumnToSparseColumn(ColIndex col, SparseColumn* output) const { output->Clear(); + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); for (const EntryIndex i : Column(col)) { - output->SetCoefficient(EntryRow(i), EntryCoefficient(i)); + output->SetCoefficient(entry_rows[i], entry_coefficients[i]); } output->SetCoefficient(ColToRowIndex(col), diagonal_coefficients_[col]); output->CleanUp(); @@ -772,52 +780,58 @@ void TriangularMatrix::LowerSolve(DenseColumn* rhs) const { void TriangularMatrix::LowerSolveStartingAt(ColIndex start, DenseColumn* rhs) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - LowerSolveStartingAtInternal(start, rhs); + LowerSolveStartingAtInternal(start, rhs->view()); } else { - LowerSolveStartingAtInternal(start, rhs); + LowerSolveStartingAtInternal(start, rhs->view()); } } template -void TriangularMatrix::LowerSolveStartingAtInternal(ColIndex start, - DenseColumn* rhs) const { - RETURN_IF_NULL(rhs); +void TriangularMatrix::LowerSolveStartingAtInternal( + ColIndex start, DenseColumn::View rhs) const { const ColIndex begin = std::max(start, first_non_identity_column_); - const ColIndex end = diagonal_coefficients_.size(); + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); + const auto diagonal_coefficients = diagonal_coefficients_.view(); + const ColIndex end = diagonal_coefficients.size(); for (ColIndex col(begin); col < end; ++col) { - const Fractional value = (*rhs)[ColToRowIndex(col)]; + const Fractional value = rhs[ColToRowIndex(col)]; if (value == 0.0) continue; const Fractional coeff = - diagonal_of_ones ? value : value / diagonal_coefficients_[col]; + diagonal_of_ones ? value : value / diagonal_coefficients[col]; if (!diagonal_of_ones) { - (*rhs)[ColToRowIndex(col)] = coeff; + rhs[ColToRowIndex(col)] = coeff; } for (const EntryIndex i : Column(col)) { - (*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i); + rhs[entry_rows[i]] -= coeff * entry_coefficients[i]; } } } void TriangularMatrix::UpperSolve(DenseColumn* rhs) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - UpperSolveInternal(rhs); + UpperSolveInternal(rhs->view()); } else { - UpperSolveInternal(rhs); + UpperSolveInternal(rhs->view()); } } template -void TriangularMatrix::UpperSolveInternal(DenseColumn* rhs) const { - RETURN_IF_NULL(rhs); +void TriangularMatrix::UpperSolveInternal(DenseColumn::View rhs) const { const ColIndex end = first_non_identity_column_; - for (ColIndex col(diagonal_coefficients_.size() - 1); col >= end; --col) { - const Fractional value = (*rhs)[ColToRowIndex(col)]; + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); + const auto diagonal_coefficients = diagonal_coefficients_.view(); + for (ColIndex col(diagonal_coefficients.size() - 1); col >= end; --col) { + const Fractional value = rhs[ColToRowIndex(col)]; if (value == 0.0) continue; const Fractional coeff = - diagonal_of_ones ? value : value / diagonal_coefficients_[col]; + diagonal_of_ones ? value : value / diagonal_coefficients[col]; if (!diagonal_of_ones) { - (*rhs)[ColToRowIndex(col)] = coeff; + rhs[ColToRowIndex(col)] = coeff; } // It is faster to iterate this way (instead of i : Column(col)) because of @@ -825,78 +839,89 @@ void TriangularMatrix::UpperSolveInternal(DenseColumn* rhs) const { // same in both cases. const EntryIndex i_end = starts_[col]; for (EntryIndex i(starts_[col + 1] - 1); i >= i_end; --i) { - (*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i); + rhs[entry_rows[i]] -= coeff * entry_coefficients[i]; } } } void TriangularMatrix::TransposeUpperSolve(DenseColumn* rhs) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - TransposeUpperSolveInternal(rhs); + TransposeUpperSolveInternal(rhs->view()); } else { - TransposeUpperSolveInternal(rhs); + TransposeUpperSolveInternal(rhs->view()); } } template -void TriangularMatrix::TransposeUpperSolveInternal(DenseColumn* rhs) const { - RETURN_IF_NULL(rhs); +void TriangularMatrix::TransposeUpperSolveInternal( + DenseColumn::View rhs) const { const ColIndex end = num_cols_; + const auto starts = starts_.view(); + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); + const auto diagonal_coefficients = diagonal_coefficients_.view(); + EntryIndex i = starts_[first_non_identity_column_]; for (ColIndex col(first_non_identity_column_); col < end; ++col) { - Fractional sum = (*rhs)[ColToRowIndex(col)]; + Fractional sum = rhs[ColToRowIndex(col)]; // Note that this is a bit faster than the simpler // for (const EntryIndex i : Column(col)) { // EntryIndex i is explicitly not modified in outer iterations, since // the last entry in column col is stored contiguously just before the // first entry in column col+1. - const EntryIndex i_end = starts_[col + 1]; + const EntryIndex i_end = starts[col + 1]; const EntryIndex shifted_end = i_end - 3; for (; i < shifted_end; i += 4) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] + - EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)] + - EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)] + - EntryCoefficient(i + 3) * (*rhs)[EntryRow(i + 3)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]] + + entry_coefficients[i + 1] * rhs[entry_rows[i + 1]] + + entry_coefficients[i + 2] * rhs[entry_rows[i + 2]] + + entry_coefficients[i + 3] * rhs[entry_rows[i + 3]]; } if (i < i_end) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]]; if (i + 1 < i_end) { - sum -= EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)]; + sum -= entry_coefficients[i + 1] * rhs[entry_rows[i + 1]]; if (i + 2 < i_end) { - sum -= EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)]; + sum -= entry_coefficients[i + 2] * rhs[entry_rows[i + 2]]; } } i = i_end; } - (*rhs)[ColToRowIndex(col)] = - diagonal_of_ones ? sum : sum / diagonal_coefficients_[col]; + rhs[ColToRowIndex(col)] = + diagonal_of_ones ? sum : sum / diagonal_coefficients[col]; } } void TriangularMatrix::TransposeLowerSolve(DenseColumn* rhs) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - TransposeLowerSolveInternal(rhs); + TransposeLowerSolveInternal(rhs->view()); } else { - TransposeLowerSolveInternal(rhs); + TransposeLowerSolveInternal(rhs->view()); } } template -void TriangularMatrix::TransposeLowerSolveInternal(DenseColumn* rhs) const { - RETURN_IF_NULL(rhs); +void TriangularMatrix::TransposeLowerSolveInternal( + DenseColumn::View rhs) const { const ColIndex end = first_non_identity_column_; // We optimize a bit the solve by skipping the last 0.0 positions. ColIndex col = num_cols_ - 1; - while (col >= end && (*rhs)[ColToRowIndex(col)] == 0.0) { + while (col >= end && rhs[ColToRowIndex(col)] == 0.0) { --col; } - EntryIndex i = starts_[col + 1] - 1; + const auto starts = starts_.view(); + const auto diagonal_coeffs = diagonal_coefficients_.view(); + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); + EntryIndex i = starts[col + 1] - 1; for (; col >= end; --col) { - Fractional sum = (*rhs)[ColToRowIndex(col)]; + Fractional sum = rhs[ColToRowIndex(col)]; // Note that this is a bit faster than the simpler // for (const EntryIndex i : Column(col)) { @@ -904,53 +929,55 @@ void TriangularMatrix::TransposeLowerSolveInternal(DenseColumn* rhs) const { // EntryIndex i is explicitly not modified in outer iterations, since // the last entry in column col is stored contiguously just before the // first entry in column col+1. - const EntryIndex i_end = starts_[col]; + const EntryIndex i_end = starts[col]; const EntryIndex shifted_end = i_end + 3; for (; i >= shifted_end; i -= 4) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] + - EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)] + - EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)] + - EntryCoefficient(i - 3) * (*rhs)[EntryRow(i - 3)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]] + + entry_coefficients[i - 1] * rhs[entry_rows[i - 1]] + + entry_coefficients[i - 2] * rhs[entry_rows[i - 2]] + + entry_coefficients[i - 3] * rhs[entry_rows[i - 3]]; } if (i >= i_end) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]]; if (i >= i_end + 1) { - sum -= EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)]; + sum -= entry_coefficients[i - 1] * rhs[entry_rows[i - 1]]; if (i >= i_end + 2) { - sum -= EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)]; + sum -= entry_coefficients[i - 2] * rhs[entry_rows[i - 2]]; } } i = i_end - 1; } - (*rhs)[ColToRowIndex(col)] = - diagonal_of_ones ? sum : sum / diagonal_coefficients_[col]; + rhs[ColToRowIndex(col)] = + diagonal_of_ones ? sum : sum / diagonal_coeffs[col]; } } void TriangularMatrix::HyperSparseSolve(DenseColumn* rhs, RowIndexVector* non_zero_rows) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - HyperSparseSolveInternal(rhs, non_zero_rows); + HyperSparseSolveInternal(rhs->view(), non_zero_rows); } else { - HyperSparseSolveInternal(rhs, non_zero_rows); + HyperSparseSolveInternal(rhs->view(), non_zero_rows); } } template void TriangularMatrix::HyperSparseSolveInternal( - DenseColumn* rhs, RowIndexVector* non_zero_rows) const { - RETURN_IF_NULL(rhs); + DenseColumn::View rhs, RowIndexVector* non_zero_rows) const { int new_size = 0; + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); for (const RowIndex row : *non_zero_rows) { - if ((*rhs)[row] == 0.0) continue; + if (rhs[row] == 0.0) continue; const ColIndex row_as_col = RowToColIndex(row); const Fractional coeff = - diagonal_of_ones ? (*rhs)[row] - : (*rhs)[row] / diagonal_coefficients_[row_as_col]; - (*rhs)[row] = coeff; + diagonal_of_ones ? rhs[row] + : rhs[row] / diagonal_coefficients_[row_as_col]; + rhs[row] = coeff; for (const EntryIndex i : Column(row_as_col)) { - (*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i); + rhs[entry_rows[i]] -= coeff * entry_coefficients[i]; } (*non_zero_rows)[new_size] = row; ++new_size; @@ -960,27 +987,31 @@ void TriangularMatrix::HyperSparseSolveInternal( void TriangularMatrix::HyperSparseSolveWithReversedNonZeros( DenseColumn* rhs, RowIndexVector* non_zero_rows) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - HyperSparseSolveWithReversedNonZerosInternal(rhs, non_zero_rows); + HyperSparseSolveWithReversedNonZerosInternal(rhs->view(), + non_zero_rows); } else { - HyperSparseSolveWithReversedNonZerosInternal(rhs, non_zero_rows); + HyperSparseSolveWithReversedNonZerosInternal(rhs->view(), + non_zero_rows); } } template void TriangularMatrix::HyperSparseSolveWithReversedNonZerosInternal( - DenseColumn* rhs, RowIndexVector* non_zero_rows) const { - RETURN_IF_NULL(rhs); + DenseColumn::View rhs, RowIndexVector* non_zero_rows) const { int new_start = non_zero_rows->size(); + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); for (const RowIndex row : Reverse(*non_zero_rows)) { - if ((*rhs)[row] == 0.0) continue; + if (rhs[row] == 0.0) continue; const ColIndex row_as_col = RowToColIndex(row); const Fractional coeff = - diagonal_of_ones ? (*rhs)[row] - : (*rhs)[row] / diagonal_coefficients_[row_as_col]; - (*rhs)[row] = coeff; + diagonal_of_ones ? rhs[row] + : rhs[row] / diagonal_coefficients_[row_as_col]; + rhs[row] = coeff; for (const EntryIndex i : Column(row_as_col)) { - (*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i); + rhs[entry_rows[i]] -= coeff * entry_coefficients[i]; } --new_start; (*non_zero_rows)[new_start] = row; @@ -991,20 +1022,23 @@ void TriangularMatrix::HyperSparseSolveWithReversedNonZerosInternal( void TriangularMatrix::TransposeHyperSparseSolve( DenseColumn* rhs, RowIndexVector* non_zero_rows) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - TransposeHyperSparseSolveInternal(rhs, non_zero_rows); + TransposeHyperSparseSolveInternal(rhs->view(), non_zero_rows); } else { - TransposeHyperSparseSolveInternal(rhs, non_zero_rows); + TransposeHyperSparseSolveInternal(rhs->view(), non_zero_rows); } } template void TriangularMatrix::TransposeHyperSparseSolveInternal( - DenseColumn* rhs, RowIndexVector* non_zero_rows) const { - RETURN_IF_NULL(rhs); + DenseColumn::View rhs, RowIndexVector* non_zero_rows) const { int new_size = 0; + + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); for (const RowIndex row : *non_zero_rows) { - Fractional sum = (*rhs)[row]; + Fractional sum = rhs[row]; const ColIndex row_as_col = RowToColIndex(row); // Note that we do the loop in exactly the same way as @@ -1013,22 +1047,22 @@ void TriangularMatrix::TransposeHyperSparseSolveInternal( const EntryIndex i_end = starts_[row_as_col + 1]; const EntryIndex shifted_end = i_end - 3; for (; i < shifted_end; i += 4) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] + - EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)] + - EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)] + - EntryCoefficient(i + 3) * (*rhs)[EntryRow(i + 3)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]] + + entry_coefficients[i + 1] * rhs[entry_rows[i + 1]] + + entry_coefficients[i + 2] * rhs[entry_rows[i + 2]] + + entry_coefficients[i + 3] * rhs[entry_rows[i + 3]]; } if (i < i_end) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]]; if (i + 1 < i_end) { - sum -= EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)]; + sum -= entry_coefficients[i + 1] * rhs[entry_rows[i + 1]]; if (i + 2 < i_end) { - sum -= EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)]; + sum -= entry_coefficients[i + 2] * rhs[entry_rows[i + 2]]; } } } - (*rhs)[row] = + rhs[row] = diagonal_of_ones ? sum : sum / diagonal_coefficients_[row_as_col]; if (sum != 0.0) { (*non_zero_rows)[new_size] = row; @@ -1040,22 +1074,24 @@ void TriangularMatrix::TransposeHyperSparseSolveInternal( void TriangularMatrix::TransposeHyperSparseSolveWithReversedNonZeros( DenseColumn* rhs, RowIndexVector* non_zero_rows) const { + RETURN_IF_NULL(rhs); if (all_diagonal_coefficients_are_one_) { - TransposeHyperSparseSolveWithReversedNonZerosInternal(rhs, + TransposeHyperSparseSolveWithReversedNonZerosInternal(rhs->view(), non_zero_rows); } else { - TransposeHyperSparseSolveWithReversedNonZerosInternal(rhs, + TransposeHyperSparseSolveWithReversedNonZerosInternal(rhs->view(), non_zero_rows); } } template void TriangularMatrix::TransposeHyperSparseSolveWithReversedNonZerosInternal( - DenseColumn* rhs, RowIndexVector* non_zero_rows) const { - RETURN_IF_NULL(rhs); + DenseColumn::View rhs, RowIndexVector* non_zero_rows) const { int new_start = non_zero_rows->size(); + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); for (const RowIndex row : Reverse(*non_zero_rows)) { - Fractional sum = (*rhs)[row]; + Fractional sum = rhs[row]; const ColIndex row_as_col = RowToColIndex(row); // We do the loop this way so that the floating point operations are exactly @@ -1064,22 +1100,22 @@ void TriangularMatrix::TransposeHyperSparseSolveWithReversedNonZerosInternal( const EntryIndex i_end = starts_[row_as_col]; const EntryIndex shifted_end = i_end + 3; for (; i >= shifted_end; i -= 4) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] + - EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)] + - EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)] + - EntryCoefficient(i - 3) * (*rhs)[EntryRow(i - 3)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]] + + entry_coefficients[i - 1] * rhs[entry_rows[i - 1]] + + entry_coefficients[i - 2] * rhs[entry_rows[i - 2]] + + entry_coefficients[i - 3] * rhs[entry_rows[i - 3]]; } if (i >= i_end) { - sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)]; + sum -= entry_coefficients[i] * rhs[entry_rows[i]]; if (i >= i_end + 1) { - sum -= EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)]; + sum -= entry_coefficients[i - 1] * rhs[entry_rows[i - 1]]; if (i >= i_end + 2) { - sum -= EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)]; + sum -= entry_coefficients[i - 2] * rhs[entry_rows[i - 2]]; } } } - (*rhs)[row] = + rhs[row] = diagonal_of_ones ? sum : sum / diagonal_coefficients_[row_as_col]; if (sum != 0.0) { --new_start; @@ -1103,15 +1139,18 @@ void TriangularMatrix::PermutedLowerSolve( initially_all_zero_scratchpad_[e.row()] = e.coefficient(); } + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); const RowIndex end_row(partial_inverse_row_perm.size()); for (RowIndex row(ColToRowIndex(first_non_identity_column_)); row < end_row; ++row) { const RowIndex permuted_row = partial_inverse_row_perm[row]; const Fractional pivot = initially_all_zero_scratchpad_[permuted_row]; if (pivot == 0.0) continue; + for (EntryIndex i : Column(RowToColIndex(row))) { - initially_all_zero_scratchpad_[EntryRow(i)] -= - EntryCoefficient(i) * pivot; + initially_all_zero_scratchpad_[entry_rows[i]] -= + entry_coefficients[i] * pivot; } } @@ -1239,6 +1278,7 @@ void TriangularMatrix::PermutedComputeRowsToConsider( // the call stack). This is faster than an alternate implementation that // uses another Boolean array to detect when we go back in the // depth-first search. + const auto entry_rows = rows_.view(); while (!nodes_to_explore_.empty()) { const RowIndex row = nodes_to_explore_.back(); @@ -1264,7 +1304,7 @@ void TriangularMatrix::PermutedComputeRowsToConsider( EntryIndex i = starts_[col]; EntryIndex end = pruned_ends_[col]; while (i < end) { - const RowIndex entry_row = EntryRow(i); + const RowIndex entry_row = entry_rows[i]; if (!marked_[entry_row]) { --end; @@ -1303,7 +1343,7 @@ void TriangularMatrix::PermutedComputeRowsToConsider( nodes_to_explore_.push_back(kInvalidRow); const EntryIndex end = pruned_ends_[col]; for (EntryIndex i = starts_[col]; i < end; ++i) { - const RowIndex entry_row = EntryRow(i); + const RowIndex entry_row = entry_rows[i]; if (!stored_[entry_row]) { nodes_to_explore_.push_back(entry_row); } @@ -1352,6 +1392,7 @@ void TriangularMatrix::ComputeRowsToConsiderWithDfs( // Topological sort based on Depth-First-Search. // Same remarks as the version implemented in PermutedComputeRowsToConsider(). + const auto entry_rows = rows_.view(); while (!nodes_to_explore_.empty()) { const RowIndex row = nodes_to_explore_.back(); @@ -1379,7 +1420,7 @@ void TriangularMatrix::ComputeRowsToConsiderWithDfs( nodes_to_explore_.back() = -row - 1; for (const EntryIndex i : Column(RowToColIndex(row))) { ++num_ops; - const RowIndex entry_row = EntryRow(i); + const RowIndex entry_row = entry_rows[i]; if (!stored_[entry_row]) { nodes_to_explore_.push_back(entry_row); } @@ -1426,11 +1467,13 @@ void TriangularMatrix::ComputeRowsToConsiderInSortedOrder( stored_.resize(num_rows_, false); for (const RowIndex row : *non_zero_rows) stored_[row] = true; + + const auto entry_rows = rows_.view(); for (int i = 0; i < non_zero_rows->size(); ++i) { const RowIndex row = (*non_zero_rows)[i]; for (const EntryIndex i : Column(RowToColIndex(row))) { ++num_ops; - const RowIndex entry_row = EntryRow(i); + const RowIndex entry_row = entry_rows[i]; if (!stored_[entry_row]) { non_zero_rows->push_back(entry_row); stored_[entry_row] = true; @@ -1461,6 +1504,8 @@ Fractional TriangularMatrix::ComputeInverseInfinityNormUpperBound() const { DenseColumn row_norm_estimate(num_rows_, 1.0); const int num_cols = num_cols_.value(); + const auto entry_rows = rows_.view(); + const auto entry_coefficients = coefficients_.view(); for (int i = 0; i < num_cols; ++i) { const ColIndex col(is_upper ? num_cols - 1 - i : i); DCHECK_NE(diagonal_coefficients_[col], 0.0); @@ -1469,7 +1514,8 @@ Fractional TriangularMatrix::ComputeInverseInfinityNormUpperBound() const { row_norm_estimate[ColToRowIndex(col)] = coeff; for (const EntryIndex i : Column(col)) { - row_norm_estimate[EntryRow(i)] += coeff * std::abs(EntryCoefficient(i)); + row_norm_estimate[entry_rows[i]] += + coeff * std::abs(entry_coefficients[i]); } } diff --git a/ortools/lp_data/sparse.h b/ortools/lp_data/sparse.h index 557602efc7..065d7549df 100644 --- a/ortools/lp_data/sparse.h +++ b/ortools/lp_data/sparse.h @@ -290,7 +290,48 @@ SparseMatrix::PopulateFromPermutedMatrix( // of the matrix columns. class CompactSparseMatrix { public: + // When iteration performance matter, getting a ConstView allows the compiler + // to do better aliasing analysis and not re-read vectors address all the + // time. + class ConstView { + public: + explicit ConstView(const CompactSparseMatrix* matrix) + : coefficients_(matrix->coefficients_.data()), + rows_(matrix->rows_.data()), + starts_(matrix->starts_.data()) {} + + // Functions to iterate on the entries of a given column: + // const auto view = compact_matrix.view(); + // for (const EntryIndex i : view.Column(col)) { + // const RowIndex row = view.EntryRow(i); + // const Fractional coefficient = view.EntryCoefficient(i); + // } + ::util::IntegerRange Column(ColIndex col) const { + return ::util::IntegerRange(starts_[col.value()], + starts_[col.value() + 1]); + } + Fractional EntryCoefficient(EntryIndex i) const { + return coefficients_[i.value()]; + } + RowIndex EntryRow(EntryIndex i) const { return rows_[i.value()]; } + + EntryIndex ColumnNumEntries(ColIndex col) const { + return starts_[col.value() + 1] - starts_[col.value()]; + } + + // Returns the scalar product of the given row vector with the column of + // index col of this matrix. + Fractional ColumnScalarProduct(ColIndex col, + DenseRow::ConstView vector) const; + + private: + const Fractional* const coefficients_; + const RowIndex* const rows_; + const EntryIndex* const starts_; + }; + CompactSparseMatrix() {} + ConstView view() const { return ConstView(this); } // Convenient constructors for tests. // TODO(user): If this is needed in production code, it can be done faster. @@ -357,17 +398,8 @@ class CompactSparseMatrix { return coefficients_.empty(); } - // Functions to iterate on the entries of a given column: - // for (const EntryIndex i : compact_matrix_.Column(col)) { - // const RowIndex row = compact_matrix_.EntryRow(i); - // const Fractional coefficient = compact_matrix_.EntryCoefficient(i); - // } - ::util::IntegerRange Column(ColIndex col) const { - return ::util::IntegerRange(starts_[col], starts_[col + 1]); - } - Fractional EntryCoefficient(EntryIndex i) const { return coefficients_[i]; } - RowIndex EntryRow(EntryIndex i) const { return rows_[i]; } - + // Alternative iteration API compatible with the one from SparseMatrix. + // The ConstView alternative should be faster. ColumnView column(ColIndex col) const { DCHECK_LT(col, num_cols_); @@ -385,31 +417,9 @@ class CompactSparseMatrix { } // Returns the scalar product of the given row vector with the column of index - // col of this matrix. This function is declared in the .h for efficiency. + // col of this matrix. Fractional ColumnScalarProduct(ColIndex col, const DenseRow& vector) const { - // We expand ourselves since we don't really care about the floating - // point order of operation and this seems faster. - EntryIndex i = starts_[col]; - const EntryIndex end = starts_[col + 1]; - const EntryIndex shifted_end = end - 3; - Fractional result = 0.0; - for (; i < shifted_end; i += 4) { - result += coefficients_[i] * vector[RowToColIndex(EntryRow(i))] + - coefficients_[i + 1] * vector[RowToColIndex(EntryRow(i + 1))] + - coefficients_[i + 2] * vector[RowToColIndex(EntryRow(i + 2))] + - coefficients_[i + 3] * vector[RowToColIndex(EntryRow(i + 3))]; - } - if (i < end) { - result += coefficients_[i] * vector[RowToColIndex(EntryRow(i))]; - if (i + 1 < end) { - result += coefficients_[i + 1] * vector[RowToColIndex(EntryRow(i + 1))]; - if (i + 2 < end) { - result += - coefficients_[i + 2] * vector[RowToColIndex(EntryRow(i + 2))]; - } - } - } - return result; + return view().ColumnScalarProduct(col, vector.const_view()); } // Adds a multiple of the given column of this matrix to the given @@ -417,10 +427,12 @@ class CompactSparseMatrix { // function is declared in the .h for efficiency. void ColumnAddMultipleToDenseColumn(ColIndex col, Fractional multiplier, DenseColumn* dense_column) const { - if (multiplier == 0.0) return; RETURN_IF_NULL(dense_column); + if (multiplier == 0.0) return; + const auto entry_rows = rows_.view(); + const auto entry_coeffs = coefficients_.view(); for (const EntryIndex i : Column(col)) { - (*dense_column)[EntryRow(i)] += multiplier * EntryCoefficient(i); + (*dense_column)[entry_rows[i]] += multiplier * entry_coeffs[i]; } } @@ -430,11 +442,12 @@ class CompactSparseMatrix { void ColumnAddMultipleToSparseScatteredColumn(ColIndex col, Fractional multiplier, ScatteredColumn* column) const { - if (multiplier == 0.0) return; RETURN_IF_NULL(column); + if (multiplier == 0.0) return; + const auto entry_rows = rows_.view(); + const auto entry_coeffs = coefficients_.view(); for (const EntryIndex i : Column(col)) { - const RowIndex row = EntryRow(i); - column->Add(row, multiplier * EntryCoefficient(i)); + column->Add(entry_rows[i], multiplier * entry_coeffs[i]); } } @@ -452,8 +465,10 @@ class CompactSparseMatrix { DenseColumn* dense_column) const { RETURN_IF_NULL(dense_column); dense_column->resize(num_rows_, 0.0); + const auto entry_rows = rows_.view(); + const auto entry_coeffs = coefficients_.view(); for (const EntryIndex i : Column(col)) { - (*dense_column)[EntryRow(i)] = EntryCoefficient(i); + (*dense_column)[entry_rows[i]] = entry_coeffs[i]; } } @@ -464,9 +479,11 @@ class CompactSparseMatrix { RETURN_IF_NULL(dense_column); dense_column->resize(num_rows_, 0.0); non_zeros->clear(); + const auto entry_rows = rows_.view(); + const auto entry_coeffs = coefficients_.view(); for (const EntryIndex i : Column(col)) { - const RowIndex row = EntryRow(i); - (*dense_column)[row] = EntryCoefficient(i); + const RowIndex row = entry_rows[i]; + (*dense_column)[row] = entry_coeffs[i]; non_zeros->push_back(row); } } @@ -474,6 +491,11 @@ class CompactSparseMatrix { void Swap(CompactSparseMatrix* other); protected: + // Functions to iterate on the entries of a given column. + ::util::IntegerRange Column(ColIndex col) const { + return ::util::IntegerRange(starts_[col], starts_[col + 1]); + } + // The matrix dimensions, properly updated by full and incremental builders. RowIndex num_rows_; ColIndex num_cols_; @@ -489,6 +511,32 @@ class CompactSparseMatrix { DISALLOW_COPY_AND_ASSIGN(CompactSparseMatrix); }; +inline Fractional CompactSparseMatrix::ConstView::ColumnScalarProduct( + ColIndex col, DenseRow::ConstView vector) const { + // We expand ourselves since we don't really care about the floating + // point order of operation and this seems faster. + int i = starts_[col.value()].value(); + const int end = starts_[col.value() + 1].value(); + const int shifted_end = end - 3; + Fractional result = 0.0; + for (; i < shifted_end; i += 4) { + result += coefficients_[i] * vector[RowToColIndex(rows_[i])] + + coefficients_[i + 1] * vector[RowToColIndex(rows_[i + 1])] + + coefficients_[i + 2] * vector[RowToColIndex(rows_[i + 2])] + + coefficients_[i + 3] * vector[RowToColIndex(rows_[i + 3])]; + } + if (i < end) { + result += coefficients_[i] * vector[RowToColIndex(rows_[i])]; + if (i + 1 < end) { + result += coefficients_[i + 1] * vector[RowToColIndex(rows_[i + 1])]; + if (i + 2 < end) { + result += coefficients_[i + 2] * vector[RowToColIndex(rows_[i + 2])]; + } + } + } + return result; +} + // A matrix view of the basis columns of a CompactSparseMatrix, with basis // specified as a RowToColMapping. This class does not take ownership of the // underlying matrix or basis, and thus they must outlive this class (and keep @@ -766,25 +814,26 @@ class TriangularMatrix : private CompactSparseMatrix { private: // Internal versions of some Solve() functions to avoid code duplication. template - void LowerSolveStartingAtInternal(ColIndex start, DenseColumn* rhs) const; + void LowerSolveStartingAtInternal(ColIndex start, + DenseColumn::View rhs) const; template - void UpperSolveInternal(DenseColumn* rhs) const; + void UpperSolveInternal(DenseColumn::View rhs) const; template - void TransposeLowerSolveInternal(DenseColumn* rhs) const; + void TransposeLowerSolveInternal(DenseColumn::View rhs) const; template - void TransposeUpperSolveInternal(DenseColumn* rhs) const; + void TransposeUpperSolveInternal(DenseColumn::View rhs) const; template - void HyperSparseSolveInternal(DenseColumn* rhs, + void HyperSparseSolveInternal(DenseColumn::View rhs, RowIndexVector* non_zero_rows) const; template void HyperSparseSolveWithReversedNonZerosInternal( - DenseColumn* rhs, RowIndexVector* non_zero_rows) const; + DenseColumn::View rhs, RowIndexVector* non_zero_rows) const; template - void TransposeHyperSparseSolveInternal(DenseColumn* rhs, + void TransposeHyperSparseSolveInternal(DenseColumn::View rhs, RowIndexVector* non_zero_rows) const; template void TransposeHyperSparseSolveWithReversedNonZerosInternal( - DenseColumn* rhs, RowIndexVector* non_zero_rows) const; + DenseColumn::View rhs, RowIndexVector* non_zero_rows) const; // Internal function used by the Add*() functions to finish adding // a new column to a triangular matrix.