minor optim to glop
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "absl/log/check.h"
|
||||
#include "absl/types/span.h"
|
||||
#include "ortools/lp_data/lp_types.h"
|
||||
#include "ortools/lp_data/lp_utils.h"
|
||||
|
||||
@@ -132,15 +133,14 @@ namespace {
|
||||
// norm of the given column, otherwise do the same with a sparse version. In
|
||||
// both cases column is cleared.
|
||||
Fractional ComputeSquaredNormAndResetToZero(
|
||||
const std::vector<RowIndex>& non_zeros, DenseColumn* column) {
|
||||
const std::vector<RowIndex>& non_zeros, absl::Span<Fractional> column) {
|
||||
Fractional sum = 0.0;
|
||||
if (non_zeros.empty()) {
|
||||
sum = SquaredNorm(*column);
|
||||
column->clear();
|
||||
sum = SquaredNormAndResetToZero(column);
|
||||
} else {
|
||||
for (const RowIndex row : non_zeros) {
|
||||
sum += Square((*column)[row]);
|
||||
(*column)[row] = 0.0;
|
||||
sum += Square(column[row.value()]);
|
||||
(column)[row.value()] = 0.0;
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
@@ -152,7 +152,8 @@ Fractional LuFactorization::RightSolveSquaredNorm(const ColumnView& a) const {
|
||||
if (is_identity_factorization_) return SquaredNorm(a);
|
||||
|
||||
non_zero_rows_.clear();
|
||||
dense_zero_scratchpad_.resize(lower_.num_rows(), 0.0);
|
||||
const RowIndex num_rows = lower_.num_rows();
|
||||
dense_zero_scratchpad_.resize(num_rows, 0.0);
|
||||
DCHECK(IsAllZero(dense_zero_scratchpad_));
|
||||
|
||||
for (const SparseColumn::Entry e : a) {
|
||||
@@ -174,8 +175,9 @@ Fractional LuFactorization::RightSolveSquaredNorm(const ColumnView& a) const {
|
||||
upper_.HyperSparseSolveWithReversedNonZeros(&dense_zero_scratchpad_,
|
||||
&non_zero_rows_);
|
||||
}
|
||||
return ComputeSquaredNormAndResetToZero(non_zero_rows_,
|
||||
&dense_zero_scratchpad_);
|
||||
return ComputeSquaredNormAndResetToZero(
|
||||
non_zero_rows_,
|
||||
absl::MakeSpan(dense_zero_scratchpad_.data(), num_rows.value()));
|
||||
}
|
||||
|
||||
Fractional LuFactorization::DualEdgeSquaredNorm(RowIndex row) const {
|
||||
@@ -185,7 +187,8 @@ Fractional LuFactorization::DualEdgeSquaredNorm(RowIndex row) const {
|
||||
col_perm_.empty() ? row : ColToRowIndex(col_perm_[RowToColIndex(row)]);
|
||||
|
||||
non_zero_rows_.clear();
|
||||
dense_zero_scratchpad_.resize(lower_.num_rows(), 0.0);
|
||||
const RowIndex num_rows = lower_.num_rows();
|
||||
dense_zero_scratchpad_.resize(num_rows, 0.0);
|
||||
DCHECK(IsAllZero(dense_zero_scratchpad_));
|
||||
dense_zero_scratchpad_[permuted_row] = 1.0;
|
||||
non_zero_rows_.push_back(permuted_row);
|
||||
@@ -204,8 +207,9 @@ Fractional LuFactorization::DualEdgeSquaredNorm(RowIndex row) const {
|
||||
transpose_lower_.HyperSparseSolveWithReversedNonZeros(
|
||||
&dense_zero_scratchpad_, &non_zero_rows_);
|
||||
}
|
||||
return ComputeSquaredNormAndResetToZero(non_zero_rows_,
|
||||
&dense_zero_scratchpad_);
|
||||
return ComputeSquaredNormAndResetToZero(
|
||||
non_zero_rows_,
|
||||
absl::MakeSpan(dense_zero_scratchpad_.data(), num_rows.value()));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "absl/log/check.h"
|
||||
#include "absl/types/span.h"
|
||||
#include "ortools/lp_data/lp_types.h"
|
||||
#include "ortools/lp_data/scattered_vector.h"
|
||||
#include "ortools/lp_data/sparse_column.h"
|
||||
@@ -70,22 +71,75 @@ Fractional PreciseSquaredNorm(const ScatteredColumn& v) {
|
||||
return sum.Value();
|
||||
}
|
||||
|
||||
Fractional SquaredNorm(const DenseColumn& column) {
|
||||
Fractional sum(0.0);
|
||||
RowIndex row(0);
|
||||
const size_t num_blocks = column.size().value() / 4;
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
// See the comment in ScalarProduct in the header for some notes about the
|
||||
// effect of adding up several squares at a time.
|
||||
sum += Square(column[row++]) + Square(column[row++]) +
|
||||
Square(column[row++]) + Square(column[row++]);
|
||||
Fractional SquaredNorm(absl::Span<const Fractional> data) {
|
||||
// We expand ourselves since we don't really care about the floating
|
||||
// point order of operation and this seems faster.
|
||||
int i = 0;
|
||||
const int end = data.size();
|
||||
const int shifted_end = end - 3;
|
||||
Fractional sum1 = 0.0;
|
||||
Fractional sum2 = 0.0;
|
||||
Fractional sum3 = 0.0;
|
||||
Fractional sum4 = 0.0;
|
||||
for (; i < shifted_end; i += 4) {
|
||||
sum1 += data[i] * data[i];
|
||||
sum2 += data[i + 1] * data[i + 1];
|
||||
sum3 += data[i + 2] * data[i + 2];
|
||||
sum4 += data[i + 3] * data[i + 3];
|
||||
}
|
||||
while (row < column.size()) {
|
||||
sum += Square(column[row++]);
|
||||
Fractional sum = sum1 + sum2 + sum3 + sum4;
|
||||
if (i < end) {
|
||||
sum += data[i] * data[i];
|
||||
if (i + 1 < end) {
|
||||
sum += data[i + 1] * data[i + 1];
|
||||
if (i + 2 < end) {
|
||||
sum += data[i + 2] * data[i + 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
Fractional SquaredNormAndResetToZero(absl::Span<Fractional> data) {
|
||||
// We expand ourselves since we don't really care about the floating
|
||||
// point order of operation and this seems faster.
|
||||
int i = 0;
|
||||
const int end = data.size();
|
||||
const int shifted_end = end - 3;
|
||||
Fractional sum1 = 0.0;
|
||||
Fractional sum2 = 0.0;
|
||||
Fractional sum3 = 0.0;
|
||||
Fractional sum4 = 0.0;
|
||||
for (; i < shifted_end; i += 4) {
|
||||
sum1 += data[i] * data[i];
|
||||
sum2 += data[i + 1] * data[i + 1];
|
||||
sum3 += data[i + 2] * data[i + 2];
|
||||
sum4 += data[i + 3] * data[i + 3];
|
||||
data[i] = 0.0;
|
||||
data[i + 1] = 0.0;
|
||||
data[i + 2] = 0.0;
|
||||
data[i + 3] = 0.0;
|
||||
}
|
||||
Fractional sum = sum1 + sum2 + sum3 + sum4;
|
||||
if (i < end) {
|
||||
sum += data[i] * data[i];
|
||||
data[i] = 0.0;
|
||||
if (i + 1 < end) {
|
||||
sum += data[i + 1] * data[i + 1];
|
||||
data[i + 1] = 0.0;
|
||||
if (i + 2 < end) {
|
||||
sum += data[i + 2] * data[i + 2];
|
||||
data[i + 2] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
Fractional SquaredNorm(const DenseColumn& column) {
|
||||
return SquaredNorm(absl::MakeSpan(column.data(), column.size().value()));
|
||||
}
|
||||
|
||||
Fractional PreciseSquaredNorm(const DenseColumn& column) {
|
||||
KahanSum sum;
|
||||
for (RowIndex row(0); row < column.size(); ++row) {
|
||||
|
||||
@@ -147,6 +147,8 @@ Fractional PartialScalarProduct(const DenseRowOrColumn& u,
|
||||
// Returns the norm^2 (sum of the square of the entries) of the given column.
|
||||
// The precise version uses KahanSum and are about two times slower.
|
||||
Fractional SquaredNorm(const SparseColumn& v);
|
||||
Fractional SquaredNorm(absl::Span<const Fractional> data);
|
||||
Fractional SquaredNormAndResetToZero(absl::Span<Fractional> data);
|
||||
Fractional SquaredNorm(const DenseColumn& column);
|
||||
Fractional SquaredNorm(const ColumnView& v);
|
||||
Fractional SquaredNorm(const ScatteredColumn& v);
|
||||
|
||||
Reference in New Issue
Block a user