[GLOP] Speedups

This commit is contained in:
Laurent Perron
2022-11-22 11:04:33 +01:00
parent 39360d8f0c
commit 8cea0ef3b9
8 changed files with 360 additions and 214 deletions

View File

@@ -272,9 +272,10 @@ Status BasisFactorization::MiddleProductFormUpdate(
// Initialize scratchpad_ with the right update vector.
DCHECK(IsAllZero(scratchpad_));
scratchpad_.resize(right_storage_.num_rows(), 0.0);
for (const EntryIndex i : right_storage_.Column(right_index)) {
const RowIndex row = right_storage_.EntryRow(i);
scratchpad_[row] = right_storage_.EntryCoefficient(i);
const auto view = right_storage_.view();
for (const EntryIndex i : view.Column(right_index)) {
const RowIndex row = view.EntryRow(i);
scratchpad_[row] = view.EntryCoefficient(i);
scratchpad_non_zeros_.push_back(row);
}
// Subtract the column of U from scratchpad_.

View File

@@ -91,10 +91,11 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
// Update the norm.
int stat_lower_bounded_norms = 0;
auto output = edge_squared_norms_.view();
for (const auto e : direction) {
// Note that the update formula used is important to maximize the precision.
// See Koberstein's PhD section 8.2.2.1.
edge_squared_norms_[e.row()] +=
output[e.row()] +=
e.coefficient() * (e.coefficient() * new_leaving_squared_norm -
2.0 / pivot * tau[e.row()]);
@@ -103,13 +104,13 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
// We can do that with Cauchy-Swartz inequality:
// (edge . leaving_column)^2 = 1.0 < ||edge||^2 * ||leaving_column||^2
const Fractional kLowerBound = 1e-4;
if (edge_squared_norms_[e.row()] < kLowerBound) {
if (output[e.row()] < kLowerBound) {
if (e.row() == leaving_row) continue;
edge_squared_norms_[e.row()] = kLowerBound;
output[e.row()] = kLowerBound;
++stat_lower_bounded_norms;
}
}
edge_squared_norms_[leaving_row] = new_leaving_squared_norm;
output[leaving_row] = new_leaving_squared_norm;
IF_STATS_ENABLED(stats_.lower_bounded_norms.Add(stat_lower_bounded_norms));
}

View File

@@ -38,8 +38,8 @@ Status EnteringVariable::DualChooseEnteringColumn(
Fractional cost_variation, std::vector<ColIndex>* bound_flip_candidates,
ColIndex* entering_col) {
GLOP_RETURN_ERROR_IF_NULL(entering_col);
const DenseRow& update_coefficient = update_row.GetCoefficients();
const DenseRow& reduced_costs = reduced_costs_->GetReducedCosts();
const auto update_coefficients = update_row.GetCoefficients().const_view();
const auto reduced_costs = reduced_costs_->GetReducedCosts().const_view();
SCOPED_TIME_STAT(&stats_);
breakpoints_.clear();
@@ -81,8 +81,8 @@ Status EnteringVariable::DualChooseEnteringColumn(
// We will add ratio * coeff to this column with a ratio positive or zero.
// cost_variation makes sure the leaving variable will be dual-feasible
// (its update coeff is sign(cost_variation) * 1.0).
const Fractional coeff = (cost_variation > 0.0) ? update_coefficient[col]
: -update_coefficient[col];
const Fractional coeff = (cost_variation > 0.0) ? update_coefficients[col]
: -update_coefficients[col];
ColWithRatio entry;
if (can_decrease.IsSet(col) && coeff > threshold) {
@@ -225,10 +225,10 @@ Status EnteringVariable::DualChooseEnteringColumn(
// returned bound_flip_candidates vector.
for (int i = bound_flip_candidates->size() - 1; i >= 0; --i) {
const ColIndex col = (*bound_flip_candidates)[i];
if (std::abs(update_coefficient[col]) < pivot_limit) continue;
if (std::abs(update_coefficients[col]) < pivot_limit) continue;
VLOG(1) << "Used bound flip to avoid bad pivot. Before: " << best_coeff
<< " now: " << std::abs(update_coefficient[col]);
<< " now: " << std::abs(update_coefficients[col]);
*entering_col = col;
break;
}
@@ -241,8 +241,8 @@ Status EnteringVariable::DualPhaseIChooseEnteringColumn(
bool nothing_to_recompute, const UpdateRow& update_row,
Fractional cost_variation, ColIndex* entering_col) {
GLOP_RETURN_ERROR_IF_NULL(entering_col);
const DenseRow& update_coefficient = update_row.GetCoefficients();
const DenseRow& reduced_costs = reduced_costs_->GetReducedCosts();
const auto update_coefficients = update_row.GetCoefficients().const_view();
const auto reduced_costs = reduced_costs_->GetReducedCosts().const_view();
SCOPED_TIME_STAT(&stats_);
// List of breakpoints where a variable change from feasibility to
@@ -273,7 +273,7 @@ Status EnteringVariable::DualPhaseIChooseEnteringColumn(
DCHECK_NE(variable_type[col], VariableType::FIXED_VARIABLE);
// Skip if the coeff is too small to be a numerically stable pivot.
if (std::abs(update_coefficient[col]) < threshold) continue;
if (std::abs(update_coefficients[col]) < threshold) continue;
// We will add ratio * coeff to this column. cost_variation makes sure
// the leaving variable will be dual-feasible (its update coeff is
@@ -281,8 +281,8 @@ Status EnteringVariable::DualPhaseIChooseEnteringColumn(
//
// TODO(user): This is the same in DualChooseEnteringColumn(), remove
// duplication?
const Fractional coeff = (cost_variation > 0.0) ? update_coefficient[col]
: -update_coefficient[col];
const Fractional coeff = (cost_variation > 0.0) ? update_coefficients[col]
: -update_coefficients[col];
// Only proceed if there is a transition, note that if reduced_costs[col]
// is close to zero, then the variable is counted as dual-feasible.

View File

@@ -225,16 +225,20 @@ void PrimalEdgeNorms::UpdateEdgeSquaredNorms(ColIndex entering_col,
int stat_lower_bounded_norms = 0;
const Fractional factor = 2.0 / pivot;
const auto view = compact_matrix_.view();
auto output = edge_squared_norms_.view();
const auto direction_left_inverse =
direction_left_inverse_.values.const_view();
for (const ColIndex col : update_row.GetNonZeroPositions()) {
const Fractional coeff = update_row.GetCoefficient(col);
const Fractional scalar_product = compact_matrix_.ColumnScalarProduct(
col, direction_left_inverse_.values);
num_operations_ += compact_matrix_.column(col).num_entries().value();
const Fractional scalar_product =
view.ColumnScalarProduct(col, direction_left_inverse);
num_operations_ += view.ColumnNumEntries(col).value();
// Update the edge squared norm of this column. Note that the update
// formula used is important to maximize the precision. See an explanation
// in the dual context in Koberstein's PhD thesis, section 8.2.2.1.
edge_squared_norms_[col] +=
output[col] +=
coeff * (coeff * leaving_squared_norm + factor * scalar_product);
// Make sure it doesn't go under a known lower bound (TODO(user): ref?).
@@ -243,12 +247,12 @@ void PrimalEdgeNorms::UpdateEdgeSquaredNorms(ColIndex entering_col,
// slightly faster, but may introduce numerical issues. More generally,
// this test is only needed in a few cases, so is it worth it?
const Fractional lower_bound = 1.0 + Square(coeff / pivot);
if (edge_squared_norms_[col] < lower_bound) {
edge_squared_norms_[col] = lower_bound;
if (output[col] < lower_bound) {
output[col] = lower_bound;
++stat_lower_bounded_norms;
}
}
edge_squared_norms_[leaving_col] = leaving_squared_norm;
output[leaving_col] = leaving_squared_norm;
stats_.lower_bounded_norms.Add(stat_lower_bounded_norms);
}

View File

@@ -95,20 +95,20 @@ void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) {
// small entries (no complexity changes).
const Fractional drop_tolerance = parameters_.drop_tolerance();
unit_row_left_inverse_filtered_non_zeros_.clear();
const auto view = transposed_matrix_.view();
if (unit_row_left_inverse_.non_zeros.empty()) {
const ColIndex size = unit_row_left_inverse_.values.size();
for (ColIndex col(0); col < size; ++col) {
if (std::abs(unit_row_left_inverse_.values[col]) > drop_tolerance) {
unit_row_left_inverse_filtered_non_zeros_.push_back(col);
num_row_wise_entries += transposed_matrix_.ColumnNumEntries(col);
num_row_wise_entries += view.ColumnNumEntries(col);
}
}
} else {
for (const auto e : unit_row_left_inverse_) {
if (std::abs(e.coefficient()) > drop_tolerance) {
unit_row_left_inverse_filtered_non_zeros_.push_back(e.column());
num_row_wise_entries +=
transposed_matrix_.ColumnNumEntries(e.column());
num_row_wise_entries += view.ColumnNumEntries(e.column());
}
}
}
@@ -194,20 +194,21 @@ void UpdateRow::SetParameters(const GlopParameters& parameters) {
// the same as, or greater than, the number of columns.
void UpdateRow::ComputeUpdatesRowWise() {
SCOPED_TIME_STAT(&stats_);
const ColIndex num_cols = matrix_.num_cols();
coefficient_.AssignToZero(num_cols);
coefficient_.AssignToZero(matrix_.num_cols());
const auto output_coeffs = coefficient_.view();
const auto view = transposed_matrix_.view();
for (ColIndex col : unit_row_left_inverse_filtered_non_zeros_) {
const Fractional multiplier = unit_row_left_inverse_[col];
for (const EntryIndex i : transposed_matrix_.Column(col)) {
const ColIndex pos = RowToColIndex(transposed_matrix_.EntryRow(i));
coefficient_[pos] += multiplier * transposed_matrix_.EntryCoefficient(i);
for (const EntryIndex i : view.Column(col)) {
const ColIndex pos = RowToColIndex(view.EntryRow(i));
output_coeffs[pos] += multiplier * view.EntryCoefficient(i);
}
}
non_zero_position_list_.clear();
const Fractional drop_tolerance = parameters_.drop_tolerance();
for (const ColIndex col : variables_info_.GetIsRelevantBitRow()) {
if (std::abs(coefficient_[col]) > drop_tolerance) {
if (std::abs(output_coeffs[col]) > drop_tolerance) {
non_zero_position_list_.push_back(col);
}
}
@@ -220,20 +221,23 @@ void UpdateRow::ComputeUpdatesRowWiseHypersparse() {
const ColIndex num_cols = matrix_.num_cols();
non_zero_position_set_.ClearAndResize(num_cols);
coefficient_.resize(num_cols, 0.0);
const auto output_coeffs = coefficient_.view();
const auto view = transposed_matrix_.view();
for (ColIndex col : unit_row_left_inverse_filtered_non_zeros_) {
const Fractional multiplier = unit_row_left_inverse_[col];
for (const EntryIndex i : transposed_matrix_.Column(col)) {
const ColIndex pos = RowToColIndex(transposed_matrix_.EntryRow(i));
const Fractional v = multiplier * transposed_matrix_.EntryCoefficient(i);
for (const EntryIndex i : view.Column(col)) {
const ColIndex pos = RowToColIndex(view.EntryRow(i));
const Fractional v = multiplier * view.EntryCoefficient(i);
if (!non_zero_position_set_.IsSet(pos)) {
// Note that we could create the non_zero_position_list_ here, but we
// prefer to keep the non-zero positions sorted, so using the bitset is
// a good alernative. Of course if the solution is really really sparse,
// then sorting non_zero_position_list_ will be faster.
coefficient_[pos] = v;
output_coeffs[pos] = v;
non_zero_position_set_.Set(pos);
} else {
coefficient_[pos] += v;
output_coeffs[pos] += v;
}
}
}
@@ -247,27 +251,28 @@ void UpdateRow::ComputeUpdatesRowWiseHypersparse() {
// non-zero coefficients contiguously in a vector is better than keeping
// them as they are. Note however that we will iterate only twice on the
// update row coefficients during an iteration.
if (std::abs(coefficient_[col]) > drop_tolerance) {
if (std::abs(output_coeffs[col]) > drop_tolerance) {
non_zero_position_list_.push_back(col);
}
}
}
void UpdateRow::ComputeUpdatesForSingleRow(ColIndex row_as_col) {
const ColIndex num_cols = matrix_.num_cols();
coefficient_.resize(num_cols, 0.0);
coefficient_.resize(matrix_.num_cols(), 0.0);
non_zero_position_list_.clear();
const DenseBitRow& is_relevant = variables_info_.GetIsRelevantBitRow();
const Fractional drop_tolerance = parameters_.drop_tolerance();
const Fractional multiplier = unit_row_left_inverse_[row_as_col];
for (const EntryIndex i : transposed_matrix_.Column(row_as_col)) {
const ColIndex pos = RowToColIndex(transposed_matrix_.EntryRow(i));
const auto output_coeffs = coefficient_.view();
const auto view = transposed_matrix_.view();
for (const EntryIndex i : view.Column(row_as_col)) {
const ColIndex pos = RowToColIndex(view.EntryRow(i));
if (!is_relevant[pos]) continue;
const Fractional v = multiplier * transposed_matrix_.EntryCoefficient(i);
const Fractional v = multiplier * view.EntryCoefficient(i);
if (std::abs(v) > drop_tolerance) {
coefficient_[pos] = v;
output_coeffs[pos] = v;
non_zero_position_list_.push_back(pos);
}
}
@@ -276,14 +281,17 @@ void UpdateRow::ComputeUpdatesForSingleRow(ColIndex row_as_col) {
void UpdateRow::ComputeUpdatesColumnWise() {
SCOPED_TIME_STAT(&stats_);
const ColIndex num_cols = matrix_.num_cols();
const Fractional drop_tolerance = parameters_.drop_tolerance();
coefficient_.resize(num_cols, 0.0);
coefficient_.resize(matrix_.num_cols(), 0.0);
non_zero_position_list_.clear();
const Fractional drop_tolerance = parameters_.drop_tolerance();
const auto output_coeffs = coefficient_.view();
const auto view = matrix_.view();
const auto unit_row_left_inverse = unit_row_left_inverse_.values.const_view();
for (const ColIndex col : variables_info_.GetIsRelevantBitRow()) {
// Coefficient of the column right inverse on the 'leaving_row'.
const Fractional coeff =
matrix_.ColumnScalarProduct(col, unit_row_left_inverse_.values);
view.ColumnScalarProduct(col, unit_row_left_inverse);
// Nothing to do if 'coeff' is (almost) zero which does happen due to
// sparsity. Note that it shouldn't be too bad to use a non-zero drop
@@ -291,7 +299,7 @@ void UpdateRow::ComputeUpdatesColumnWise() {
// quantities updated by this update row will eventually be recomputed.
if (std::abs(coeff) > drop_tolerance) {
non_zero_position_list_.push_back(col);
coefficient_[col] = coeff;
output_coeffs[col] = coeff;
}
}
}
@@ -311,9 +319,11 @@ void UpdateRow::ComputeFullUpdateRow(RowIndex leaving_row,
// Fills the non-basic column.
const Fractional drop_tolerance = parameters_.drop_tolerance();
const auto view = matrix_.view();
const auto unit_row_left_inverse = unit_row_left_inverse_.values.const_view();
for (const ColIndex col : variables_info_.GetNotBasicBitRow()) {
const Fractional coeff =
matrix_.ColumnScalarProduct(col, unit_row_left_inverse_.values);
view.ColumnScalarProduct(col, unit_row_left_inverse);
if (std::abs(coeff) > drop_tolerance) {
(*output)[col] = coeff;
}

View File

@@ -21,6 +21,7 @@
#include <limits>
#include <ostream>
#include <string>
#include <type_traits>
#include <vector>
#include "ortools/base/basictypes.h"
@@ -250,6 +251,33 @@ inline std::ostream& operator<<(std::ostream& os, ConstraintStatus status) {
// Returns the ConstraintStatus corresponding to a given VariableStatus.
ConstraintStatus VariableToConstraintStatus(VariableStatus status);
// A span of `T`, indexed by a strict int type `IntType`. Intended to be passed
// by value. See b/259677543.
template <typename IntType, typename T>
class StrictITISpan {
public:
using IndexType = IntType;
using reference = T&;
using value_type = T;
StrictITISpan(T* data, IntType size) : data_(data), size_(size) {}
reference operator[](IntType i) const {
return data_[static_cast<size_t>(i.value())];
}
IntType size() const { return size_; }
// TODO(user): This should probably be a strictly typed iterator too, but
// `StrongVector::begin()` already suffers from this problem.
auto begin() const { return data_; }
auto end() const { return data_ + static_cast<size_t>(size_.value()); }
private:
T* const data_;
const IntType size_;
};
// Wrapper around an ITIVector to allow (and enforce) creation/resize/assign
// to use the index type for the size.
//
@@ -260,6 +288,9 @@ class StrictITIVector : public absl::StrongVector<IntType, T> {
public:
typedef IntType IndexType;
typedef absl::StrongVector<IntType, T> ParentType;
using View = StrictITISpan<IntType, T>;
using ConstView = StrictITISpan<IntType, const T>;
// This allows for brace initialization, which is really useful in tests.
// It is not 'explicit' by design, so one can do vector = {...};
#if !defined(__ANDROID__) && (!defined(_MSC_VER) || (_MSC_VER >= 1800))
@@ -284,6 +315,10 @@ class StrictITIVector : public absl::StrongVector<IntType, T> {
IntType capacity() const { return IntType(ParentType::capacity()); }
View view() { return View(ParentType::data(), size()); }
ConstView const_view() const { return ConstView(ParentType::data(), size()); }
ConstView view() const { return const_view(); }
// Since calls to resize() must use a default value, we introduce a new
// function for convenience to reduce the size of a vector.
void resize_down(IntType size) {

View File

@@ -506,13 +506,19 @@ void CompactSparseMatrix::PopulateFromTranspose(
// Use starts_ to fill the matrix. Note that starts_ is modified so that at
// the end it has its final values.
for (ColIndex col(0); col < input.num_cols(); ++col) {
const auto entry_rows = rows_.view();
const auto input_entry_rows = input.rows_.view();
const auto entry_coefficients = coefficients_.view();
const auto input_entry_coefficients = input.coefficients_.view();
const auto num_cols = input.num_cols();
const auto starts = starts_.view();
for (ColIndex col(0); col < num_cols; ++col) {
const RowIndex transposed_row = ColToRowIndex(col);
for (const EntryIndex i : input.Column(col)) {
const ColIndex transposed_col = RowToColIndex(input.EntryRow(i));
const EntryIndex index = starts_[transposed_col + 1]++;
coefficients_[index] = input.EntryCoefficient(i);
rows_[index] = transposed_row;
const ColIndex transposed_col = RowToColIndex(input_entry_rows[i]);
const EntryIndex index = starts[transposed_col + 1]++;
entry_coefficients[index] = input_entry_coefficients[i];
entry_rows[index] = transposed_row;
}
}
@@ -725,7 +731,7 @@ bool TriangularMatrix::IsLowerTriangular() const {
for (ColIndex col(0); col < num_cols_; ++col) {
if (diagonal_coefficients_[col] == 0.0) return false;
for (EntryIndex i : Column(col)) {
if (EntryRow(i) <= ColToRowIndex(col)) return false;
if (rows_[i] <= ColToRowIndex(col)) return false;
}
}
return true;
@@ -735,7 +741,7 @@ bool TriangularMatrix::IsUpperTriangular() const {
for (ColIndex col(0); col < num_cols_; ++col) {
if (diagonal_coefficients_[col] == 0.0) return false;
for (EntryIndex i : Column(col)) {
if (EntryRow(i) >= ColToRowIndex(col)) return false;
if (rows_[i] >= ColToRowIndex(col)) return false;
}
}
return true;
@@ -752,8 +758,10 @@ void TriangularMatrix::ApplyRowPermutationToNonDiagonalEntries(
void TriangularMatrix::CopyColumnToSparseColumn(ColIndex col,
SparseColumn* output) const {
output->Clear();
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
for (const EntryIndex i : Column(col)) {
output->SetCoefficient(EntryRow(i), EntryCoefficient(i));
output->SetCoefficient(entry_rows[i], entry_coefficients[i]);
}
output->SetCoefficient(ColToRowIndex(col), diagonal_coefficients_[col]);
output->CleanUp();
@@ -772,52 +780,58 @@ void TriangularMatrix::LowerSolve(DenseColumn* rhs) const {
void TriangularMatrix::LowerSolveStartingAt(ColIndex start,
DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
LowerSolveStartingAtInternal<true>(start, rhs);
LowerSolveStartingAtInternal<true>(start, rhs->view());
} else {
LowerSolveStartingAtInternal<false>(start, rhs);
LowerSolveStartingAtInternal<false>(start, rhs->view());
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::LowerSolveStartingAtInternal(ColIndex start,
DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
void TriangularMatrix::LowerSolveStartingAtInternal(
ColIndex start, DenseColumn::View rhs) const {
const ColIndex begin = std::max(start, first_non_identity_column_);
const ColIndex end = diagonal_coefficients_.size();
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
const auto diagonal_coefficients = diagonal_coefficients_.view();
const ColIndex end = diagonal_coefficients.size();
for (ColIndex col(begin); col < end; ++col) {
const Fractional value = (*rhs)[ColToRowIndex(col)];
const Fractional value = rhs[ColToRowIndex(col)];
if (value == 0.0) continue;
const Fractional coeff =
diagonal_of_ones ? value : value / diagonal_coefficients_[col];
diagonal_of_ones ? value : value / diagonal_coefficients[col];
if (!diagonal_of_ones) {
(*rhs)[ColToRowIndex(col)] = coeff;
rhs[ColToRowIndex(col)] = coeff;
}
for (const EntryIndex i : Column(col)) {
(*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i);
rhs[entry_rows[i]] -= coeff * entry_coefficients[i];
}
}
}
void TriangularMatrix::UpperSolve(DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
UpperSolveInternal<true>(rhs);
UpperSolveInternal<true>(rhs->view());
} else {
UpperSolveInternal<false>(rhs);
UpperSolveInternal<false>(rhs->view());
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::UpperSolveInternal(DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
void TriangularMatrix::UpperSolveInternal(DenseColumn::View rhs) const {
const ColIndex end = first_non_identity_column_;
for (ColIndex col(diagonal_coefficients_.size() - 1); col >= end; --col) {
const Fractional value = (*rhs)[ColToRowIndex(col)];
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
const auto diagonal_coefficients = diagonal_coefficients_.view();
for (ColIndex col(diagonal_coefficients.size() - 1); col >= end; --col) {
const Fractional value = rhs[ColToRowIndex(col)];
if (value == 0.0) continue;
const Fractional coeff =
diagonal_of_ones ? value : value / diagonal_coefficients_[col];
diagonal_of_ones ? value : value / diagonal_coefficients[col];
if (!diagonal_of_ones) {
(*rhs)[ColToRowIndex(col)] = coeff;
rhs[ColToRowIndex(col)] = coeff;
}
// It is faster to iterate this way (instead of i : Column(col)) because of
@@ -825,78 +839,89 @@ void TriangularMatrix::UpperSolveInternal(DenseColumn* rhs) const {
// same in both cases.
const EntryIndex i_end = starts_[col];
for (EntryIndex i(starts_[col + 1] - 1); i >= i_end; --i) {
(*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i);
rhs[entry_rows[i]] -= coeff * entry_coefficients[i];
}
}
}
void TriangularMatrix::TransposeUpperSolve(DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
TransposeUpperSolveInternal<true>(rhs);
TransposeUpperSolveInternal<true>(rhs->view());
} else {
TransposeUpperSolveInternal<false>(rhs);
TransposeUpperSolveInternal<false>(rhs->view());
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::TransposeUpperSolveInternal(DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
void TriangularMatrix::TransposeUpperSolveInternal(
DenseColumn::View rhs) const {
const ColIndex end = num_cols_;
const auto starts = starts_.view();
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
const auto diagonal_coefficients = diagonal_coefficients_.view();
EntryIndex i = starts_[first_non_identity_column_];
for (ColIndex col(first_non_identity_column_); col < end; ++col) {
Fractional sum = (*rhs)[ColToRowIndex(col)];
Fractional sum = rhs[ColToRowIndex(col)];
// Note that this is a bit faster than the simpler
// for (const EntryIndex i : Column(col)) {
// EntryIndex i is explicitly not modified in outer iterations, since
// the last entry in column col is stored contiguously just before the
// first entry in column col+1.
const EntryIndex i_end = starts_[col + 1];
const EntryIndex i_end = starts[col + 1];
const EntryIndex shifted_end = i_end - 3;
for (; i < shifted_end; i += 4) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] +
EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)] +
EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)] +
EntryCoefficient(i + 3) * (*rhs)[EntryRow(i + 3)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]] +
entry_coefficients[i + 1] * rhs[entry_rows[i + 1]] +
entry_coefficients[i + 2] * rhs[entry_rows[i + 2]] +
entry_coefficients[i + 3] * rhs[entry_rows[i + 3]];
}
if (i < i_end) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]];
if (i + 1 < i_end) {
sum -= EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)];
sum -= entry_coefficients[i + 1] * rhs[entry_rows[i + 1]];
if (i + 2 < i_end) {
sum -= EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)];
sum -= entry_coefficients[i + 2] * rhs[entry_rows[i + 2]];
}
}
i = i_end;
}
(*rhs)[ColToRowIndex(col)] =
diagonal_of_ones ? sum : sum / diagonal_coefficients_[col];
rhs[ColToRowIndex(col)] =
diagonal_of_ones ? sum : sum / diagonal_coefficients[col];
}
}
void TriangularMatrix::TransposeLowerSolve(DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
TransposeLowerSolveInternal<true>(rhs);
TransposeLowerSolveInternal<true>(rhs->view());
} else {
TransposeLowerSolveInternal<false>(rhs);
TransposeLowerSolveInternal<false>(rhs->view());
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::TransposeLowerSolveInternal(DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
void TriangularMatrix::TransposeLowerSolveInternal(
DenseColumn::View rhs) const {
const ColIndex end = first_non_identity_column_;
// We optimize a bit the solve by skipping the last 0.0 positions.
ColIndex col = num_cols_ - 1;
while (col >= end && (*rhs)[ColToRowIndex(col)] == 0.0) {
while (col >= end && rhs[ColToRowIndex(col)] == 0.0) {
--col;
}
EntryIndex i = starts_[col + 1] - 1;
const auto starts = starts_.view();
const auto diagonal_coeffs = diagonal_coefficients_.view();
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
EntryIndex i = starts[col + 1] - 1;
for (; col >= end; --col) {
Fractional sum = (*rhs)[ColToRowIndex(col)];
Fractional sum = rhs[ColToRowIndex(col)];
// Note that this is a bit faster than the simpler
// for (const EntryIndex i : Column(col)) {
@@ -904,53 +929,55 @@ void TriangularMatrix::TransposeLowerSolveInternal(DenseColumn* rhs) const {
// EntryIndex i is explicitly not modified in outer iterations, since
// the last entry in column col is stored contiguously just before the
// first entry in column col+1.
const EntryIndex i_end = starts_[col];
const EntryIndex i_end = starts[col];
const EntryIndex shifted_end = i_end + 3;
for (; i >= shifted_end; i -= 4) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] +
EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)] +
EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)] +
EntryCoefficient(i - 3) * (*rhs)[EntryRow(i - 3)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]] +
entry_coefficients[i - 1] * rhs[entry_rows[i - 1]] +
entry_coefficients[i - 2] * rhs[entry_rows[i - 2]] +
entry_coefficients[i - 3] * rhs[entry_rows[i - 3]];
}
if (i >= i_end) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]];
if (i >= i_end + 1) {
sum -= EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)];
sum -= entry_coefficients[i - 1] * rhs[entry_rows[i - 1]];
if (i >= i_end + 2) {
sum -= EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)];
sum -= entry_coefficients[i - 2] * rhs[entry_rows[i - 2]];
}
}
i = i_end - 1;
}
(*rhs)[ColToRowIndex(col)] =
diagonal_of_ones ? sum : sum / diagonal_coefficients_[col];
rhs[ColToRowIndex(col)] =
diagonal_of_ones ? sum : sum / diagonal_coeffs[col];
}
}
void TriangularMatrix::HyperSparseSolve(DenseColumn* rhs,
RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
HyperSparseSolveInternal<true>(rhs, non_zero_rows);
HyperSparseSolveInternal<true>(rhs->view(), non_zero_rows);
} else {
HyperSparseSolveInternal<false>(rhs, non_zero_rows);
HyperSparseSolveInternal<false>(rhs->view(), non_zero_rows);
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::HyperSparseSolveInternal(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
DenseColumn::View rhs, RowIndexVector* non_zero_rows) const {
int new_size = 0;
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
for (const RowIndex row : *non_zero_rows) {
if ((*rhs)[row] == 0.0) continue;
if (rhs[row] == 0.0) continue;
const ColIndex row_as_col = RowToColIndex(row);
const Fractional coeff =
diagonal_of_ones ? (*rhs)[row]
: (*rhs)[row] / diagonal_coefficients_[row_as_col];
(*rhs)[row] = coeff;
diagonal_of_ones ? rhs[row]
: rhs[row] / diagonal_coefficients_[row_as_col];
rhs[row] = coeff;
for (const EntryIndex i : Column(row_as_col)) {
(*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i);
rhs[entry_rows[i]] -= coeff * entry_coefficients[i];
}
(*non_zero_rows)[new_size] = row;
++new_size;
@@ -960,27 +987,31 @@ void TriangularMatrix::HyperSparseSolveInternal(
void TriangularMatrix::HyperSparseSolveWithReversedNonZeros(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
HyperSparseSolveWithReversedNonZerosInternal<true>(rhs, non_zero_rows);
HyperSparseSolveWithReversedNonZerosInternal<true>(rhs->view(),
non_zero_rows);
} else {
HyperSparseSolveWithReversedNonZerosInternal<false>(rhs, non_zero_rows);
HyperSparseSolveWithReversedNonZerosInternal<false>(rhs->view(),
non_zero_rows);
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::HyperSparseSolveWithReversedNonZerosInternal(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
DenseColumn::View rhs, RowIndexVector* non_zero_rows) const {
int new_start = non_zero_rows->size();
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
for (const RowIndex row : Reverse(*non_zero_rows)) {
if ((*rhs)[row] == 0.0) continue;
if (rhs[row] == 0.0) continue;
const ColIndex row_as_col = RowToColIndex(row);
const Fractional coeff =
diagonal_of_ones ? (*rhs)[row]
: (*rhs)[row] / diagonal_coefficients_[row_as_col];
(*rhs)[row] = coeff;
diagonal_of_ones ? rhs[row]
: rhs[row] / diagonal_coefficients_[row_as_col];
rhs[row] = coeff;
for (const EntryIndex i : Column(row_as_col)) {
(*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i);
rhs[entry_rows[i]] -= coeff * entry_coefficients[i];
}
--new_start;
(*non_zero_rows)[new_start] = row;
@@ -991,20 +1022,23 @@ void TriangularMatrix::HyperSparseSolveWithReversedNonZerosInternal(
void TriangularMatrix::TransposeHyperSparseSolve(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
TransposeHyperSparseSolveInternal<true>(rhs, non_zero_rows);
TransposeHyperSparseSolveInternal<true>(rhs->view(), non_zero_rows);
} else {
TransposeHyperSparseSolveInternal<false>(rhs, non_zero_rows);
TransposeHyperSparseSolveInternal<false>(rhs->view(), non_zero_rows);
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::TransposeHyperSparseSolveInternal(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
DenseColumn::View rhs, RowIndexVector* non_zero_rows) const {
int new_size = 0;
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
for (const RowIndex row : *non_zero_rows) {
Fractional sum = (*rhs)[row];
Fractional sum = rhs[row];
const ColIndex row_as_col = RowToColIndex(row);
// Note that we do the loop in exactly the same way as
@@ -1013,22 +1047,22 @@ void TriangularMatrix::TransposeHyperSparseSolveInternal(
const EntryIndex i_end = starts_[row_as_col + 1];
const EntryIndex shifted_end = i_end - 3;
for (; i < shifted_end; i += 4) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] +
EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)] +
EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)] +
EntryCoefficient(i + 3) * (*rhs)[EntryRow(i + 3)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]] +
entry_coefficients[i + 1] * rhs[entry_rows[i + 1]] +
entry_coefficients[i + 2] * rhs[entry_rows[i + 2]] +
entry_coefficients[i + 3] * rhs[entry_rows[i + 3]];
}
if (i < i_end) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]];
if (i + 1 < i_end) {
sum -= EntryCoefficient(i + 1) * (*rhs)[EntryRow(i + 1)];
sum -= entry_coefficients[i + 1] * rhs[entry_rows[i + 1]];
if (i + 2 < i_end) {
sum -= EntryCoefficient(i + 2) * (*rhs)[EntryRow(i + 2)];
sum -= entry_coefficients[i + 2] * rhs[entry_rows[i + 2]];
}
}
}
(*rhs)[row] =
rhs[row] =
diagonal_of_ones ? sum : sum / diagonal_coefficients_[row_as_col];
if (sum != 0.0) {
(*non_zero_rows)[new_size] = row;
@@ -1040,22 +1074,24 @@ void TriangularMatrix::TransposeHyperSparseSolveInternal(
void TriangularMatrix::TransposeHyperSparseSolveWithReversedNonZeros(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
if (all_diagonal_coefficients_are_one_) {
TransposeHyperSparseSolveWithReversedNonZerosInternal<true>(rhs,
TransposeHyperSparseSolveWithReversedNonZerosInternal<true>(rhs->view(),
non_zero_rows);
} else {
TransposeHyperSparseSolveWithReversedNonZerosInternal<false>(rhs,
TransposeHyperSparseSolveWithReversedNonZerosInternal<false>(rhs->view(),
non_zero_rows);
}
}
template <bool diagonal_of_ones>
void TriangularMatrix::TransposeHyperSparseSolveWithReversedNonZerosInternal(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
RETURN_IF_NULL(rhs);
DenseColumn::View rhs, RowIndexVector* non_zero_rows) const {
int new_start = non_zero_rows->size();
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
for (const RowIndex row : Reverse(*non_zero_rows)) {
Fractional sum = (*rhs)[row];
Fractional sum = rhs[row];
const ColIndex row_as_col = RowToColIndex(row);
// We do the loop this way so that the floating point operations are exactly
@@ -1064,22 +1100,22 @@ void TriangularMatrix::TransposeHyperSparseSolveWithReversedNonZerosInternal(
const EntryIndex i_end = starts_[row_as_col];
const EntryIndex shifted_end = i_end + 3;
for (; i >= shifted_end; i -= 4) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)] +
EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)] +
EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)] +
EntryCoefficient(i - 3) * (*rhs)[EntryRow(i - 3)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]] +
entry_coefficients[i - 1] * rhs[entry_rows[i - 1]] +
entry_coefficients[i - 2] * rhs[entry_rows[i - 2]] +
entry_coefficients[i - 3] * rhs[entry_rows[i - 3]];
}
if (i >= i_end) {
sum -= EntryCoefficient(i) * (*rhs)[EntryRow(i)];
sum -= entry_coefficients[i] * rhs[entry_rows[i]];
if (i >= i_end + 1) {
sum -= EntryCoefficient(i - 1) * (*rhs)[EntryRow(i - 1)];
sum -= entry_coefficients[i - 1] * rhs[entry_rows[i - 1]];
if (i >= i_end + 2) {
sum -= EntryCoefficient(i - 2) * (*rhs)[EntryRow(i - 2)];
sum -= entry_coefficients[i - 2] * rhs[entry_rows[i - 2]];
}
}
}
(*rhs)[row] =
rhs[row] =
diagonal_of_ones ? sum : sum / diagonal_coefficients_[row_as_col];
if (sum != 0.0) {
--new_start;
@@ -1103,15 +1139,18 @@ void TriangularMatrix::PermutedLowerSolve(
initially_all_zero_scratchpad_[e.row()] = e.coefficient();
}
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
const RowIndex end_row(partial_inverse_row_perm.size());
for (RowIndex row(ColToRowIndex(first_non_identity_column_)); row < end_row;
++row) {
const RowIndex permuted_row = partial_inverse_row_perm[row];
const Fractional pivot = initially_all_zero_scratchpad_[permuted_row];
if (pivot == 0.0) continue;
for (EntryIndex i : Column(RowToColIndex(row))) {
initially_all_zero_scratchpad_[EntryRow(i)] -=
EntryCoefficient(i) * pivot;
initially_all_zero_scratchpad_[entry_rows[i]] -=
entry_coefficients[i] * pivot;
}
}
@@ -1239,6 +1278,7 @@ void TriangularMatrix::PermutedComputeRowsToConsider(
// the call stack). This is faster than an alternate implementation that
// uses another Boolean array to detect when we go back in the
// depth-first search.
const auto entry_rows = rows_.view();
while (!nodes_to_explore_.empty()) {
const RowIndex row = nodes_to_explore_.back();
@@ -1264,7 +1304,7 @@ void TriangularMatrix::PermutedComputeRowsToConsider(
EntryIndex i = starts_[col];
EntryIndex end = pruned_ends_[col];
while (i < end) {
const RowIndex entry_row = EntryRow(i);
const RowIndex entry_row = entry_rows[i];
if (!marked_[entry_row]) {
--end;
@@ -1303,7 +1343,7 @@ void TriangularMatrix::PermutedComputeRowsToConsider(
nodes_to_explore_.push_back(kInvalidRow);
const EntryIndex end = pruned_ends_[col];
for (EntryIndex i = starts_[col]; i < end; ++i) {
const RowIndex entry_row = EntryRow(i);
const RowIndex entry_row = entry_rows[i];
if (!stored_[entry_row]) {
nodes_to_explore_.push_back(entry_row);
}
@@ -1352,6 +1392,7 @@ void TriangularMatrix::ComputeRowsToConsiderWithDfs(
// Topological sort based on Depth-First-Search.
// Same remarks as the version implemented in PermutedComputeRowsToConsider().
const auto entry_rows = rows_.view();
while (!nodes_to_explore_.empty()) {
const RowIndex row = nodes_to_explore_.back();
@@ -1379,7 +1420,7 @@ void TriangularMatrix::ComputeRowsToConsiderWithDfs(
nodes_to_explore_.back() = -row - 1;
for (const EntryIndex i : Column(RowToColIndex(row))) {
++num_ops;
const RowIndex entry_row = EntryRow(i);
const RowIndex entry_row = entry_rows[i];
if (!stored_[entry_row]) {
nodes_to_explore_.push_back(entry_row);
}
@@ -1426,11 +1467,13 @@ void TriangularMatrix::ComputeRowsToConsiderInSortedOrder(
stored_.resize(num_rows_, false);
for (const RowIndex row : *non_zero_rows) stored_[row] = true;
const auto entry_rows = rows_.view();
for (int i = 0; i < non_zero_rows->size(); ++i) {
const RowIndex row = (*non_zero_rows)[i];
for (const EntryIndex i : Column(RowToColIndex(row))) {
++num_ops;
const RowIndex entry_row = EntryRow(i);
const RowIndex entry_row = entry_rows[i];
if (!stored_[entry_row]) {
non_zero_rows->push_back(entry_row);
stored_[entry_row] = true;
@@ -1461,6 +1504,8 @@ Fractional TriangularMatrix::ComputeInverseInfinityNormUpperBound() const {
DenseColumn row_norm_estimate(num_rows_, 1.0);
const int num_cols = num_cols_.value();
const auto entry_rows = rows_.view();
const auto entry_coefficients = coefficients_.view();
for (int i = 0; i < num_cols; ++i) {
const ColIndex col(is_upper ? num_cols - 1 - i : i);
DCHECK_NE(diagonal_coefficients_[col], 0.0);
@@ -1469,7 +1514,8 @@ Fractional TriangularMatrix::ComputeInverseInfinityNormUpperBound() const {
row_norm_estimate[ColToRowIndex(col)] = coeff;
for (const EntryIndex i : Column(col)) {
row_norm_estimate[EntryRow(i)] += coeff * std::abs(EntryCoefficient(i));
row_norm_estimate[entry_rows[i]] +=
coeff * std::abs(entry_coefficients[i]);
}
}

View File

@@ -290,7 +290,48 @@ SparseMatrix::PopulateFromPermutedMatrix<CompactSparseMatrixView>(
// of the matrix columns.
class CompactSparseMatrix {
public:
// When iteration performance matter, getting a ConstView allows the compiler
// to do better aliasing analysis and not re-read vectors address all the
// time.
class ConstView {
public:
explicit ConstView(const CompactSparseMatrix* matrix)
: coefficients_(matrix->coefficients_.data()),
rows_(matrix->rows_.data()),
starts_(matrix->starts_.data()) {}
// Functions to iterate on the entries of a given column:
// const auto view = compact_matrix.view();
// for (const EntryIndex i : view.Column(col)) {
// const RowIndex row = view.EntryRow(i);
// const Fractional coefficient = view.EntryCoefficient(i);
// }
::util::IntegerRange<EntryIndex> Column(ColIndex col) const {
return ::util::IntegerRange<EntryIndex>(starts_[col.value()],
starts_[col.value() + 1]);
}
Fractional EntryCoefficient(EntryIndex i) const {
return coefficients_[i.value()];
}
RowIndex EntryRow(EntryIndex i) const { return rows_[i.value()]; }
EntryIndex ColumnNumEntries(ColIndex col) const {
return starts_[col.value() + 1] - starts_[col.value()];
}
// Returns the scalar product of the given row vector with the column of
// index col of this matrix.
Fractional ColumnScalarProduct(ColIndex col,
DenseRow::ConstView vector) const;
private:
const Fractional* const coefficients_;
const RowIndex* const rows_;
const EntryIndex* const starts_;
};
CompactSparseMatrix() {}
ConstView view() const { return ConstView(this); }
// Convenient constructors for tests.
// TODO(user): If this is needed in production code, it can be done faster.
@@ -357,17 +398,8 @@ class CompactSparseMatrix {
return coefficients_.empty();
}
// Functions to iterate on the entries of a given column:
// for (const EntryIndex i : compact_matrix_.Column(col)) {
// const RowIndex row = compact_matrix_.EntryRow(i);
// const Fractional coefficient = compact_matrix_.EntryCoefficient(i);
// }
::util::IntegerRange<EntryIndex> Column(ColIndex col) const {
return ::util::IntegerRange<EntryIndex>(starts_[col], starts_[col + 1]);
}
Fractional EntryCoefficient(EntryIndex i) const { return coefficients_[i]; }
RowIndex EntryRow(EntryIndex i) const { return rows_[i]; }
// Alternative iteration API compatible with the one from SparseMatrix.
// The ConstView alternative should be faster.
ColumnView column(ColIndex col) const {
DCHECK_LT(col, num_cols_);
@@ -385,31 +417,9 @@ class CompactSparseMatrix {
}
// Returns the scalar product of the given row vector with the column of index
// col of this matrix. This function is declared in the .h for efficiency.
// col of this matrix.
Fractional ColumnScalarProduct(ColIndex col, const DenseRow& vector) const {
// We expand ourselves since we don't really care about the floating
// point order of operation and this seems faster.
EntryIndex i = starts_[col];
const EntryIndex end = starts_[col + 1];
const EntryIndex shifted_end = end - 3;
Fractional result = 0.0;
for (; i < shifted_end; i += 4) {
result += coefficients_[i] * vector[RowToColIndex(EntryRow(i))] +
coefficients_[i + 1] * vector[RowToColIndex(EntryRow(i + 1))] +
coefficients_[i + 2] * vector[RowToColIndex(EntryRow(i + 2))] +
coefficients_[i + 3] * vector[RowToColIndex(EntryRow(i + 3))];
}
if (i < end) {
result += coefficients_[i] * vector[RowToColIndex(EntryRow(i))];
if (i + 1 < end) {
result += coefficients_[i + 1] * vector[RowToColIndex(EntryRow(i + 1))];
if (i + 2 < end) {
result +=
coefficients_[i + 2] * vector[RowToColIndex(EntryRow(i + 2))];
}
}
}
return result;
return view().ColumnScalarProduct(col, vector.const_view());
}
// Adds a multiple of the given column of this matrix to the given
@@ -417,10 +427,12 @@ class CompactSparseMatrix {
// function is declared in the .h for efficiency.
void ColumnAddMultipleToDenseColumn(ColIndex col, Fractional multiplier,
DenseColumn* dense_column) const {
if (multiplier == 0.0) return;
RETURN_IF_NULL(dense_column);
if (multiplier == 0.0) return;
const auto entry_rows = rows_.view();
const auto entry_coeffs = coefficients_.view();
for (const EntryIndex i : Column(col)) {
(*dense_column)[EntryRow(i)] += multiplier * EntryCoefficient(i);
(*dense_column)[entry_rows[i]] += multiplier * entry_coeffs[i];
}
}
@@ -430,11 +442,12 @@ class CompactSparseMatrix {
void ColumnAddMultipleToSparseScatteredColumn(ColIndex col,
Fractional multiplier,
ScatteredColumn* column) const {
if (multiplier == 0.0) return;
RETURN_IF_NULL(column);
if (multiplier == 0.0) return;
const auto entry_rows = rows_.view();
const auto entry_coeffs = coefficients_.view();
for (const EntryIndex i : Column(col)) {
const RowIndex row = EntryRow(i);
column->Add(row, multiplier * EntryCoefficient(i));
column->Add(entry_rows[i], multiplier * entry_coeffs[i]);
}
}
@@ -452,8 +465,10 @@ class CompactSparseMatrix {
DenseColumn* dense_column) const {
RETURN_IF_NULL(dense_column);
dense_column->resize(num_rows_, 0.0);
const auto entry_rows = rows_.view();
const auto entry_coeffs = coefficients_.view();
for (const EntryIndex i : Column(col)) {
(*dense_column)[EntryRow(i)] = EntryCoefficient(i);
(*dense_column)[entry_rows[i]] = entry_coeffs[i];
}
}
@@ -464,9 +479,11 @@ class CompactSparseMatrix {
RETURN_IF_NULL(dense_column);
dense_column->resize(num_rows_, 0.0);
non_zeros->clear();
const auto entry_rows = rows_.view();
const auto entry_coeffs = coefficients_.view();
for (const EntryIndex i : Column(col)) {
const RowIndex row = EntryRow(i);
(*dense_column)[row] = EntryCoefficient(i);
const RowIndex row = entry_rows[i];
(*dense_column)[row] = entry_coeffs[i];
non_zeros->push_back(row);
}
}
@@ -474,6 +491,11 @@ class CompactSparseMatrix {
void Swap(CompactSparseMatrix* other);
protected:
// Functions to iterate on the entries of a given column.
::util::IntegerRange<EntryIndex> Column(ColIndex col) const {
return ::util::IntegerRange<EntryIndex>(starts_[col], starts_[col + 1]);
}
// The matrix dimensions, properly updated by full and incremental builders.
RowIndex num_rows_;
ColIndex num_cols_;
@@ -489,6 +511,32 @@ class CompactSparseMatrix {
DISALLOW_COPY_AND_ASSIGN(CompactSparseMatrix);
};
inline Fractional CompactSparseMatrix::ConstView::ColumnScalarProduct(
ColIndex col, DenseRow::ConstView vector) const {
// We expand ourselves since we don't really care about the floating
// point order of operation and this seems faster.
int i = starts_[col.value()].value();
const int end = starts_[col.value() + 1].value();
const int shifted_end = end - 3;
Fractional result = 0.0;
for (; i < shifted_end; i += 4) {
result += coefficients_[i] * vector[RowToColIndex(rows_[i])] +
coefficients_[i + 1] * vector[RowToColIndex(rows_[i + 1])] +
coefficients_[i + 2] * vector[RowToColIndex(rows_[i + 2])] +
coefficients_[i + 3] * vector[RowToColIndex(rows_[i + 3])];
}
if (i < end) {
result += coefficients_[i] * vector[RowToColIndex(rows_[i])];
if (i + 1 < end) {
result += coefficients_[i + 1] * vector[RowToColIndex(rows_[i + 1])];
if (i + 2 < end) {
result += coefficients_[i + 2] * vector[RowToColIndex(rows_[i + 2])];
}
}
}
return result;
}
// A matrix view of the basis columns of a CompactSparseMatrix, with basis
// specified as a RowToColMapping. This class does not take ownership of the
// underlying matrix or basis, and thus they must outlive this class (and keep
@@ -766,25 +814,26 @@ class TriangularMatrix : private CompactSparseMatrix {
private:
// Internal versions of some Solve() functions to avoid code duplication.
template <bool diagonal_of_ones>
void LowerSolveStartingAtInternal(ColIndex start, DenseColumn* rhs) const;
void LowerSolveStartingAtInternal(ColIndex start,
DenseColumn::View rhs) const;
template <bool diagonal_of_ones>
void UpperSolveInternal(DenseColumn* rhs) const;
void UpperSolveInternal(DenseColumn::View rhs) const;
template <bool diagonal_of_ones>
void TransposeLowerSolveInternal(DenseColumn* rhs) const;
void TransposeLowerSolveInternal(DenseColumn::View rhs) const;
template <bool diagonal_of_ones>
void TransposeUpperSolveInternal(DenseColumn* rhs) const;
void TransposeUpperSolveInternal(DenseColumn::View rhs) const;
template <bool diagonal_of_ones>
void HyperSparseSolveInternal(DenseColumn* rhs,
void HyperSparseSolveInternal(DenseColumn::View rhs,
RowIndexVector* non_zero_rows) const;
template <bool diagonal_of_ones>
void HyperSparseSolveWithReversedNonZerosInternal(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const;
DenseColumn::View rhs, RowIndexVector* non_zero_rows) const;
template <bool diagonal_of_ones>
void TransposeHyperSparseSolveInternal(DenseColumn* rhs,
void TransposeHyperSparseSolveInternal(DenseColumn::View rhs,
RowIndexVector* non_zero_rows) const;
template <bool diagonal_of_ones>
void TransposeHyperSparseSolveWithReversedNonZerosInternal(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const;
DenseColumn::View rhs, RowIndexVector* non_zero_rows) const;
// Internal function used by the Add*() functions to finish adding
// a new column to a triangular matrix.