diff --git a/ortools/glop/basis_representation.cc b/ortools/glop/basis_representation.cc
index 18552a7f82..bede7d6116 100644
--- a/ortools/glop/basis_representation.cc
+++ b/ortools/glop/basis_representation.cc
@@ -302,21 +302,7 @@ Status BasisFactorization::Update(ColIndex entering_col,
   return ForceRefactorization();
 }
 
-void BasisFactorization::LeftSolve(DenseRow* y) const {
-  SCOPED_TIME_STAT(&stats_);
-  RETURN_IF_NULL(y);
-  BumpDeterministicTimeForSolve(matrix_.num_rows().value());
-  if (use_middle_product_form_update_) {
-    lu_factorization_.LeftSolveU(y);
-    rank_one_factorization_.LeftSolve(y);
-    lu_factorization_.LeftSolveL(y);
-  } else {
-    eta_factorization_.LeftSolve(y);
-    lu_factorization_.LeftSolve(y);
-  }
-}
-
-void BasisFactorization::LeftSolveWithNonZeros(ScatteredRow* y) const {
+void BasisFactorization::LeftSolve(ScatteredRow* y) const {
   SCOPED_TIME_STAT(&stats_);
   RETURN_IF_NULL(y);
   BumpDeterministicTimeForSolve(matrix_.num_rows().value());
@@ -324,49 +310,31 @@ void BasisFactorization::LeftSolveWithNonZeros(ScatteredRow* y) const {
     lu_factorization_.LeftSolveUWithNonZeros(y);
     rank_one_factorization_.LeftSolveWithNonZeros(y);
     lu_factorization_.LeftSolveLWithNonZeros(y, nullptr);
+    y->SortNonZerosIfNeeded();
   } else {
+    y->non_zeros.clear();
     eta_factorization_.LeftSolve(&y->values);
     lu_factorization_.LeftSolve(&y->values);
-    ComputeNonZeros(y->values, &y->non_zeros);
   }
 }
 
-void BasisFactorization::RightSolve(DenseColumn* d) const {
-  SCOPED_TIME_STAT(&stats_);
-  RETURN_IF_NULL(d);
-  BumpDeterministicTimeForSolve(matrix_.num_rows().value());
-  if (use_middle_product_form_update_) {
-    lu_factorization_.RightSolveL(d);
-    rank_one_factorization_.RightSolve(d);
-    lu_factorization_.RightSolveU(d);
-  } else {
-    lu_factorization_.RightSolve(d);
-    eta_factorization_.RightSolve(d);
-  }
-}
-
-void BasisFactorization::RightSolveWithNonZeros(ScatteredColumn* d) const {
+void BasisFactorization::RightSolve(ScatteredColumn* d) const {
   SCOPED_TIME_STAT(&stats_);
   RETURN_IF_NULL(d);
   BumpDeterministicTimeForSolve(d->non_zeros.size());
   if (use_middle_product_form_update_) {
-    lu_factorization_.RightSolveL(&d->values);
-    rank_one_factorization_.RightSolve(&d->values);
-
-    // We need to clear non-zeros because at this point in the code, they don't
-    // correspond to the zeros of d. An empty non_zeros means that
-    // RightSolveWithNonZeros() will recompute them.
-    d->non_zeros.clear();
+    lu_factorization_.RightSolveLWithNonZeros(d);
+    rank_one_factorization_.RightSolveWithNonZeros(d);
     lu_factorization_.RightSolveUWithNonZeros(d);
+    d->SortNonZerosIfNeeded();
   } else {
+    d->non_zeros.clear();
     lu_factorization_.RightSolve(&d->values);
     eta_factorization_.RightSolve(&d->values);
-    ComputeNonZeros(d->values, &d->non_zeros);
-    return;
   }
 }
 
-DenseColumn* BasisFactorization::RightSolveForTau(
+const DenseColumn& BasisFactorization::RightSolveForTau(
     const ScatteredColumn& a) const {
   SCOPED_TIME_STAT(&stats_);
   BumpDeterministicTimeForSolve(matrix_.num_rows().value());
@@ -378,26 +346,19 @@ DenseColumn* BasisFactorization::RightSolveForTau(
       lu_factorization_.RightSolveLWithPermutedInput(a.values, &tau_.values);
       tau_.non_zeros.clear();
     } else {
-      // TODO(user): Extract function.
-      if (tau_.non_zeros.empty()) {
-        tau_.values.assign(matrix_.num_rows(), 0);
-      } else {
-        tau_.values.resize(matrix_.num_rows(), 0);
-        for (const RowIndex row : tau_.non_zeros) {
-          tau_[row] = 0.0;
-        }
-      }
+      ClearAndResizeVectorWithNonZeros(matrix_.num_rows(), &tau_);
       lu_factorization_.RightSolveLForScatteredColumn(a, &tau_);
     }
     rank_one_factorization_.RightSolveWithNonZeros(&tau_);
     lu_factorization_.RightSolveUWithNonZeros(&tau_);
   } else {
+    tau_.non_zeros.clear();
     tau_.values = a.values;
     lu_factorization_.RightSolve(&tau_.values);
     eta_factorization_.RightSolve(&tau_.values);
   }
   tau_is_computed_ = true;
-  return &tau_.values;
+  return tau_.values;
 }
 
 void BasisFactorization::LeftSolveForUnitRow(ColIndex j,
@@ -442,12 +403,13 @@ void BasisFactorization::LeftSolveForUnitRow(ColIndex j,
   if (tau_is_computed_) {
     tau_is_computed_ = false;
     tau_computation_can_be_optimized_ =
-        lu_factorization_.LeftSolveLWithNonZeros(y, &tau_.values);
+        lu_factorization_.LeftSolveLWithNonZeros(y, &tau_);
     tau_.non_zeros.clear();
   } else {
     tau_computation_can_be_optimized_ = false;
     lu_factorization_.LeftSolveLWithNonZeros(y, nullptr);
   }
+  y->SortNonZerosIfNeeded();
 }
 
 void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
@@ -456,10 +418,10 @@ void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
   RETURN_IF_NULL(d);
   BumpDeterministicTimeForSolve(matrix_.column(col).num_entries().value());
   if (!use_middle_product_form_update_) {
+    d->non_zeros.clear();
     lu_factorization_.SparseRightSolve(matrix_.column(col), matrix_.num_rows(),
                                        &d->values);
     eta_factorization_.RightSolve(&d->values);
-    ComputeNonZeros(d->values, &d->non_zeros);
     return;
   }
 
@@ -484,6 +446,7 @@ void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
         right_storage_.AddDenseColumnWithNonZeros(d->values, d->non_zeros);
   }
   lu_factorization_.RightSolveUWithNonZeros(d);
+  d->SortNonZerosIfNeeded();
 }
 
 Fractional BasisFactorization::RightSolveSquaredNorm(const SparseColumn& a)
@@ -536,7 +499,8 @@ Fractional BasisFactorization::ComputeInverseOneNorm() const {
   const ColIndex num_cols = RowToColIndex(num_rows);
   Fractional norm = 0.0;
   for (ColIndex col(0); col < num_cols; ++col) {
-    DenseColumn right_hand_side(num_rows, 0.0);
+    ScatteredColumn right_hand_side;
+    right_hand_side.values.AssignToZero(num_rows);
     right_hand_side[ColToRowIndex(col)] = 1.0;
     // Get a column of the matrix inverse.
     RightSolve(&right_hand_side);
@@ -557,7 +521,8 @@ Fractional BasisFactorization::ComputeInverseInfinityNorm() const {
   const ColIndex num_cols = RowToColIndex(num_rows);
   DenseColumn row_sum(num_rows, 0.0);
   for (ColIndex col(0); col < num_cols; ++col) {
-    DenseColumn right_hand_side(num_rows, 0.0);
+    ScatteredColumn right_hand_side;
+    right_hand_side.values.AssignToZero(num_rows);
     right_hand_side[ColToRowIndex(col)] = 1.0;
     // Get a column of the matrix inverse.
     RightSolve(&right_hand_side);
diff --git a/ortools/glop/basis_representation.h b/ortools/glop/basis_representation.h
index f9b34801e4..bbeaf840c1 100644
--- a/ortools/glop/basis_representation.h
+++ b/ortools/glop/basis_representation.h
@@ -205,28 +205,25 @@ class BasisFactorization {
                 const ScatteredColumn& direction) MUST_USE_RESULT;
 
   // Left solves the system y.B = rhs, where y initialy contains rhs.
-  // The second version also computes the non-zero positions of the result.
-  void LeftSolve(DenseRow* y) const;
-  void LeftSolveWithNonZeros(ScatteredRow* y) const;
+  void LeftSolve(ScatteredRow* y) const;
 
   // Left solves the system y.B = e_j, where e_j has only 1 non-zero
   // coefficient of value 1.0 at position 'j'.
   void LeftSolveForUnitRow(ColIndex j, ScatteredRow* y) const;
 
-  // Same as RightSolve() for matrix.column(col).
-  // This also exploits its sparsity.
-  void RightSolveForProblemColumn(ColIndex col, ScatteredColumn* d) const;
-
   // Right solves the system B.d = a where the input is the initial value of d.
-  void RightSolve(DenseColumn* d) const;
-  void RightSolveWithNonZeros(ScatteredColumn* d) const;
+  void RightSolve(ScatteredColumn* d) const;
+
+  // Same as RightSolve() for matrix.column(col). This also exploits its
+  // sparsity.
+  void RightSolveForProblemColumn(ColIndex col, ScatteredColumn* d) const;
 
   // Specialized version for ComputeTau() in DualEdgeNorms. This reuses an
   // intermediate result of the last LeftSolveForUnitRow() in order to save a
   // permutation if it is available. Note that the input 'a' should always be
   // equal to the last result of LeftSolveForUnitRow() and will be used for a
   // DCHECK() or if the intermediate result wasn't kept.
-  DenseColumn* RightSolveForTau(const ScatteredColumn& a) const;
+  const DenseColumn& RightSolveForTau(const ScatteredColumn& a) const;
 
   // Returns the norm of B^{-1}.a, this is a specific function because
   // it is a bit faster and it avoids polluting the stats of RightSolve().
diff --git a/ortools/glop/dual_edge_norms.cc b/ortools/glop/dual_edge_norms.cc
index a642647388..4afc0dc19b 100644
--- a/ortools/glop/dual_edge_norms.cc
+++ b/ortools/glop/dual_edge_norms.cc
@@ -45,7 +45,7 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
     const ScatteredRow& unit_row_left_inverse) {
   // No need to update if we will recompute it from scratch later.
   if (recompute_edge_squared_norms_) return;
-  DenseColumn* tau = ComputeTau(TransposedView(unit_row_left_inverse));
+  const DenseColumn& tau = ComputeTau(TransposedView(unit_row_left_inverse));
   SCOPED_TIME_STAT(&stats_);
 
   // ||unit_row_left_inverse||^2 is the same as
@@ -80,7 +80,7 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
     // See Koberstein's PhD section 8.2.2.1.
     edge_squared_norms_[row] +=
         direction[row] *
-        (direction[row] * new_leaving_squared_norm - 2.0 / pivot * (*tau)[row]);
+        (direction[row] * new_leaving_squared_norm - 2.0 / pivot * tau[row]);
 
     // Avoid 0.0 norms (The 1e-4 is the value used by Koberstein).
     // TODO(user): use a more precise lower bound depending on the column norm?
@@ -111,12 +111,12 @@ void DualEdgeNorms::ComputeEdgeSquaredNorms() {
   recompute_edge_squared_norms_ = false;
 }
 
-DenseColumn* DualEdgeNorms::ComputeTau(
+const DenseColumn& DualEdgeNorms::ComputeTau(
     const ScatteredColumn& unit_row_left_inverse) {
   SCOPED_TIME_STAT(&stats_);
-  DenseColumn* result =
+  const DenseColumn& result =
       basis_factorization_.RightSolveForTau(unit_row_left_inverse);
-  IF_STATS_ENABLED(stats_.tau_density.Add(Density(Transpose(*result))));
+  IF_STATS_ENABLED(stats_.tau_density.Add(Density(Transpose(result))));
   return result;
 }
 
diff --git a/ortools/glop/dual_edge_norms.h b/ortools/glop/dual_edge_norms.h
index 1d389461f9..0efd24dd87 100644
--- a/ortools/glop/dual_edge_norms.h
+++ b/ortools/glop/dual_edge_norms.h
@@ -91,7 +91,7 @@ class DualEdgeNorms {
 
   // Computes the vector tau needed to update the norms using a right solve:
   //     B.tau = (u_i)^T, u_i.B = e_i for i = leaving_row.
-  DenseColumn* ComputeTau(const ScatteredColumn& unit_row_left_inverse);
+  const DenseColumn& ComputeTau(const ScatteredColumn& unit_row_left_inverse);
 
   // Statistics.
   struct Stats : public StatsGroup {
diff --git a/ortools/glop/lu_factorization.cc b/ortools/glop/lu_factorization.cc
index 3b452689e1..77009547aa 100644
--- a/ortools/glop/lu_factorization.cc
+++ b/ortools/glop/lu_factorization.cc
@@ -195,8 +195,7 @@ DenseColumn* DenseRowAsColumn(DenseRow* y) {
 void LuFactorization::RightSolveL(DenseColumn* x) const {
   SCOPED_TIME_STAT(&stats_);
   if (!is_identity_factorization_) {
-    ApplyPermutationWhenInputIsProbablySparse(row_perm_,
-                                              &dense_zero_scratchpad_, x);
+    PermuteWithScratchpad(row_perm_, &dense_zero_scratchpad_, x);
     lower_.LowerSolve(x);
   }
 }
@@ -295,6 +294,7 @@ void LuFactorization::RightSolveLForSparseColumn(const SparseColumn& b,
   }
 
   lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
   if (x->non_zeros.empty()) {
     lower_.LowerSolveStartingAt(first_column_to_consider, &x->values);
   } else {
@@ -302,19 +302,37 @@ void LuFactorization::RightSolveLForSparseColumn(const SparseColumn& b,
   }
 }
 
+void LuFactorization::RightSolveLWithNonZeros(ScatteredColumn* x) const {
+  if (is_identity_factorization_) return;
+  if (x->non_zeros.empty()) {
+    return RightSolveL(&x->values);
+  }
+  PermuteWithKnownNonZeros(row_perm_, &dense_zero_scratchpad_, &x->values,
+                           &x->non_zeros);
+  lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
+  if (x->non_zeros.empty()) {
+    lower_.LowerSolve(&x->values);
+  } else {
+    lower_.HyperSparseSolve(&x->values, &x->non_zeros);
+  }
+}
+
 // TODO(user): This code is almost the same a RightSolveLForSparseColumn()
 // except that the API to iterate on the input is different. Find a way to
 // deduplicate the code.
 void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
                                                     ScatteredColumn* x) const {
+  if (b.non_zeros.empty()) {
+    *x = b;
+    x->non_zeros.clear();
+    return RightSolveL(&x->values);
+  }
   SCOPED_TIME_STAT(&stats_);
   DCHECK(IsAllZero(x->values));
   x->non_zeros.clear();
   if (is_identity_factorization_) {
-    for (const RowIndex row : b.non_zeros) {
-      (*x)[row] = b[row];
-      x->non_zeros.push_back(row);
-    }
+    *x = b;
     return;
   }
 
@@ -340,6 +358,7 @@ void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
   }
 
   lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
   if (x->non_zeros.empty()) {
     lower_.LowerSolveStartingAt(first_column_to_consider, &x->values);
   } else {
@@ -350,11 +369,18 @@ void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
 void LuFactorization::LeftSolveUWithNonZeros(ScatteredRow* y) const {
   SCOPED_TIME_STAT(&stats_);
   if (is_identity_factorization_) return;
+  if (!col_perm_.empty()) {
+    // TODO(user): This is just used in the test, in a real solve we never
+    // have a column permutation because we absorb it in the basis.
+    y->non_zeros.clear();
+    LeftSolveU(&y->values);
+    return;
+  }
 
-  DCHECK(col_perm_.empty());
   DenseColumn* const x = DenseRowAsColumn(&y->values);
   RowIndexVector* const nz = reinterpret_cast<RowIndexVector*>(&y->non_zeros);
   transpose_upper_.ComputeRowsToConsiderInSortedOrder(nz);
+  y->non_zeros_are_sorted = true;
   if (nz->empty()) {
     upper_.TransposeUpperSolve(x);
   } else {
@@ -365,7 +391,6 @@ void LuFactorization::LeftSolveUWithNonZeros(ScatteredRow* y) const {
 void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
   SCOPED_TIME_STAT(&stats_);
   if (is_identity_factorization_) {
-    if (x->non_zeros.empty()) ComputeNonZeros(x->values, &x->non_zeros);
     return;
   }
 
@@ -373,8 +398,9 @@ void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
   // non_zeros starts to be too big, we clear it and thus switch back to a
   // normal sparse solve.
   upper_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
   if (x->non_zeros.empty()) {
-    upper_.UpperSolveWithNonZeros(&x->values, &x->non_zeros);
+    upper_.UpperSolve(&x->values);
   } else {
     upper_.HyperSparseSolveWithReversedNonZeros(&x->values, &x->non_zeros);
   }
@@ -383,16 +409,16 @@ void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
     // Note(user): This is only needed for the test because in Glop the
     // col_perm_ is always "absorbed" by permuting the basis. So it is
     // fine to do it in a non hyper-sparse way.
-    PermuteAndComputeNonZeros(inverse_col_perm_, &dense_zero_scratchpad_,
-                              &x->values, &x->non_zeros);
+    x->non_zeros.clear();
+    PermuteWithScratchpad(inverse_col_perm_, &dense_zero_scratchpad_,
+                          &x->values);
   }
 }
 
 bool LuFactorization::LeftSolveLWithNonZeros(
-    ScatteredRow* y, DenseColumn* result_before_permutation) const {
+    ScatteredRow* y, ScatteredColumn* result_before_permutation) const {
   SCOPED_TIME_STAT(&stats_);
   if (is_identity_factorization_) {
-    if (y->non_zeros.empty()) ComputeNonZeros(y->values, &y->non_zeros);
     // It is not advantageous to fill result_before_permutation in this case.
     return false;
   }
@@ -402,6 +428,7 @@ bool LuFactorization::LeftSolveLWithNonZeros(
   // Hypersparse?
   RowIndex last_non_zero_row = ColToRowIndex(y->values.size() - 1);
   transpose_lower_.ComputeRowsToConsiderInSortedOrder(nz);
+  y->non_zeros_are_sorted = true;
   if (nz->empty()) {
     lower_.TransposeLowerSolve(x, &last_non_zero_row);
   } else {
@@ -414,8 +441,7 @@ bool LuFactorization::LeftSolveLWithNonZeros(
     // should be the case because the hyper-sparse functions makes sure of that.
     // We also DCHECK() this below.
     if (nz->empty()) {
-      PermuteAndComputeNonZeros(inverse_row_perm_, &dense_zero_scratchpad_, x,
-                                nz);
+      PermuteWithScratchpad(inverse_row_perm_, &dense_zero_scratchpad_, x);
     } else {
       PermuteWithKnownNonZeros(inverse_row_perm_, &dense_zero_scratchpad_, x,
                                nz);
@@ -430,7 +456,8 @@ bool LuFactorization::LeftSolveLWithNonZeros(
     // This computes the same thing as in the other branch but also keeps the
     // original x in result_before_permutation. Because of this, it is faster to
     // use a different algorithm.
-    x->swap(*result_before_permutation);
+    result_before_permutation->non_zeros.clear();
+    x->swap(result_before_permutation->values);
     x->AssignToZero(inverse_row_perm_.size());
     y->non_zeros.clear();
     for (RowIndex row(0); row <= last_non_zero_row; ++row) {
@@ -438,7 +465,6 @@ bool LuFactorization::LeftSolveLWithNonZeros(
       if (value != 0.0) {
         const RowIndex permuted_row = inverse_row_perm_[row];
         (*x)[permuted_row] = value;
-        y->non_zeros.push_back(RowToColIndex(permuted_row));
       }
     }
     return true;
@@ -468,6 +494,7 @@ ColIndex LuFactorization::LeftSolveUForUnitRow(ColIndex col,
     RowIndexVector* const nz = reinterpret_cast<RowIndexVector*>(&y->non_zeros);
     DenseColumn* const x = reinterpret_cast<DenseColumn*>(&y->values);
     transpose_upper_.ComputeRowsToConsiderInSortedOrder(nz);
+    y->non_zeros_are_sorted = true;
     if (y->non_zeros.empty()) {
       transpose_upper_.LowerSolveStartingAt(permuted_col, x);
     } else {
diff --git a/ortools/glop/lu_factorization.h b/ortools/glop/lu_factorization.h
index 24745a54df..c087ad068c 100644
--- a/ortools/glop/lu_factorization.h
+++ b/ortools/glop/lu_factorization.h
@@ -124,19 +124,19 @@ class LuFactorization {
   // Important: the output y must be of the correct size and all zero.
   ColIndex LeftSolveUForUnitRow(ColIndex col, ScatteredRow* y) const;
 
-  // Specialized version of RightSolveU() and LeftSolveU() that may exploit the
-  // initial non-zeros if it is non-empty. In addition,
-  // RightSolveUWithNonZeros() always return the non-zeros of the output.
+  // Specialized version that may exploit the initial non-zeros if it is
+  // non-empty.
+  void RightSolveLWithNonZeros(ScatteredColumn* x) const;
   void RightSolveUWithNonZeros(ScatteredColumn* x) const;
   void LeftSolveUWithNonZeros(ScatteredRow* y) const;
 
-  // Specialized version of LeftSolveL() that also computes the non-zero
-  // pattern of the output. Moreover, if result_before_permutation is not NULL,
-  // it is filled with the result just before row_perm_ is applied to it and
-  // true is returned. If result_before_permutation is not filled, then false is
-  // returned.
+  // Specialized version of LeftSolveL() that may exploit the initial non_zeros
+  // of y if it is non empty. Moreover, if result_before_permutation is not
+  // NULL, it might be filled with the result just before row_perm_ is applied
+  // to it and true is returned. If result_before_permutation is not filled,
+  // then false is returned.
   bool LeftSolveLWithNonZeros(ScatteredRow* y,
-                              DenseColumn* result_before_permutation) const;
+                              ScatteredColumn* result_before_permutation) const;
 
   // Returns the given column of U.
   // It will only be valid until the next call to GetColumnOfU().
diff --git a/ortools/glop/primal_edge_norms.cc b/ortools/glop/primal_edge_norms.cc
index 95479899e7..d781de373e 100644
--- a/ortools/glop/primal_edge_norms.cc
+++ b/ortools/glop/primal_edge_norms.cc
@@ -142,13 +142,16 @@ void PrimalEdgeNorms::ComputeEdgeSquaredNorms() {
   recompute_edge_squared_norms_ = false;
 }
 
+// TODO(user): It should be possible to reorganize the code and call this when
+// the value of direction is no longer needed. This will simplify the code and
+// avoid a copy here.
 void PrimalEdgeNorms::ComputeDirectionLeftInverse(
     ColIndex entering_col, const ScatteredColumn& direction) {
   SCOPED_TIME_STAT(&stats_);
 
-  // Initialize direction_left_inverse_ to direction.values .
-  // Note the special case when the non-zero vector is empty which means we
-  // don't know and need to use the dense version.
+  // Initialize direction_left_inverse_ to direction. Note the special case when
+  // the non-zero vector is empty which means we don't know and need to use the
+  // dense version.
   const ColIndex size = RowToColIndex(direction.values.size());
   const double kThreshold = 0.05 * size.value();
   if (!direction_left_inverse_.non_zeros.empty() &&
@@ -163,14 +166,10 @@ void PrimalEdgeNorms::ComputeDirectionLeftInverse(
     direction_left_inverse_.non_zeros.clear();
   }
 
-  // Depending on the sparsity of the input, we decide which version to use.
   if (direction.non_zeros.size() < kThreshold) {
-    direction_left_inverse_.non_zeros =
-        *reinterpret_cast<ColIndexVector const*>(&direction.non_zeros);
-    basis_factorization_.LeftSolveWithNonZeros(&direction_left_inverse_);
-  } else {
-    basis_factorization_.LeftSolve(&direction_left_inverse_.values);
+    direction_left_inverse_.non_zeros = TransposedView(direction).non_zeros;
   }
+  basis_factorization_.LeftSolve(&direction_left_inverse_);
 
   // TODO(user): Refactorize if estimated accuracy above a threshold.
   IF_STATS_ENABLED(stats_.direction_left_inverse_accuracy.Add(
diff --git a/ortools/glop/rank_one_update.h b/ortools/glop/rank_one_update.h
index f91544540d..deaae30871 100644
--- a/ortools/glop/rank_one_update.h
+++ b/ortools/glop/rank_one_update.h
@@ -63,14 +63,13 @@ class RankOneUpdateElementaryMatrix {
         -storage_->ColumnScalarProduct(v_index_, Transpose(*x)) / mu_;
     storage_->ColumnAddMultipleToDenseColumn(u_index_, multiplier, x);
   }
-  void RightSolveWithNonZeros(
-      ScatteredColumn* x, StrictITIVector<RowIndex, bool>* is_non_zero) const {
+  void RightSolveWithNonZeros(ScatteredColumn* x) const {
     DCHECK(!IsSingular());
     const Fractional multiplier =
         -storage_->ColumnScalarProduct(v_index_, Transpose(x->values)) / mu_;
     if (multiplier != 0.0) {
-      storage_->ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
-          u_index_, multiplier, &x->values, is_non_zero, &x->non_zeros);
+      storage_->ColumnAddMultipleToSparseScatteredColumn(u_index_, multiplier,
+                                                         x);
     }
   }
 
@@ -83,16 +82,13 @@ class RankOneUpdateElementaryMatrix {
     storage_->ColumnAddMultipleToDenseColumn(v_index_, multiplier,
                                              reinterpret_cast<DenseColumn*>(y));
   }
-  void LeftSolveWithNonZeros(
-      ScatteredRow* y, StrictITIVector<ColIndex, bool>* is_non_zero) const {
+  void LeftSolveWithNonZeros(ScatteredRow* y) const {
     DCHECK(!IsSingular());
     const Fractional multiplier =
         -storage_->ColumnScalarProduct(u_index_, y->values) / mu_;
     if (multiplier != 0.0) {
-      storage_->ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
-          v_index_, multiplier, reinterpret_cast<DenseColumn*>(&y->values),
-          reinterpret_cast<StrictITIVector<RowIndex, bool>*>(is_non_zero),
-          reinterpret_cast<std::vector<RowIndex>*>(&y->non_zeros));
+      storage_->ColumnAddMultipleToSparseScatteredColumn(
+          v_index_, multiplier, reinterpret_cast<ScatteredColumn*>(y));
     }
   }
 
@@ -172,20 +168,19 @@ class RankOneUpdateFactorization {
     }
 
     // tmp_row_is_non_zero_ is always all false before and after this code.
-    tmp_col_is_non_zero_.resize(y->values.size(), false);
-    DCHECK(std::all_of(tmp_col_is_non_zero_.begin(), tmp_col_is_non_zero_.end(),
-                       [](bool v) { return !v; }));
-    for (const ColIndex col : y->non_zeros) tmp_col_is_non_zero_[col] = true;
+    y->is_non_zero.resize(y->values.size(), false);
+    DCHECK(IsAllFalse(y->is_non_zero));
+    for (const ColIndex col : y->non_zeros) y->is_non_zero[col] = true;
     const int hypersparse_threshold = static_cast<int>(
         hypersparse_ratio_ * static_cast<double>(y->values.size().value()));
     for (int i = elementary_matrices_.size() - 1; i >= 0; --i) {
       if (y->non_zeros.size() < hypersparse_threshold) {
-        elementary_matrices_[i].LeftSolveWithNonZeros(y, &tmp_col_is_non_zero_);
+        elementary_matrices_[i].LeftSolveWithNonZeros(y);
       } else {
         elementary_matrices_[i].LeftSolve(&y->values);
       }
     }
-    for (const ColIndex col : y->non_zeros) tmp_col_is_non_zero_[col] = false;
+    for (const ColIndex col : y->non_zeros) y->is_non_zero[col] = false;
     if (y->non_zeros.size() >= hypersparse_threshold) y->non_zeros.clear();
   }
 
@@ -207,23 +202,21 @@ class RankOneUpdateFactorization {
       return;
     }
 
-    // tmp_row_is_non_zero_ is always all false before and after this code.
-    tmp_row_is_non_zero_.resize(d->values.size(), false);
-    DCHECK(std::all_of(tmp_row_is_non_zero_.begin(), tmp_row_is_non_zero_.end(),
-                       [](bool v) { return !v; }));
-    for (const RowIndex row : d->non_zeros) tmp_row_is_non_zero_[row] = true;
+    // d->is_non_zero is always all false before and after this code.
+    d->is_non_zero.resize(d->values.size(), false);
+    DCHECK(IsAllFalse(d->is_non_zero));
+    for (const RowIndex row : d->non_zeros) d->is_non_zero[row] = true;
     const size_t end = elementary_matrices_.size();
     const int hypersparse_threshold = static_cast<int>(
         hypersparse_ratio_ * static_cast<double>(d->values.size().value()));
     for (int i = 0; i < end; ++i) {
       if (d->non_zeros.size() < hypersparse_threshold) {
-        elementary_matrices_[i].RightSolveWithNonZeros(d,
-                                                       &tmp_row_is_non_zero_);
+        elementary_matrices_[i].RightSolveWithNonZeros(d);
       } else {
         elementary_matrices_[i].RightSolve(&d->values);
       }
     }
-    for (const RowIndex row : d->non_zeros) tmp_row_is_non_zero_[row] = false;
+    for (const RowIndex row : d->non_zeros) d->is_non_zero[row] = false;
     if (d->non_zeros.size() >= hypersparse_threshold) d->non_zeros.clear();
   }
 
@@ -233,8 +226,6 @@ class RankOneUpdateFactorization {
   double hypersparse_ratio_;
   EntryIndex num_entries_;
   std::vector<RankOneUpdateElementaryMatrix> elementary_matrices_;
-  mutable StrictITIVector<RowIndex, bool> tmp_row_is_non_zero_;
-  mutable StrictITIVector<ColIndex, bool> tmp_col_is_non_zero_;
   DISALLOW_COPY_AND_ASSIGN(RankOneUpdateFactorization);
 };
 
diff --git a/ortools/glop/reduced_costs.cc b/ortools/glop/reduced_costs.cc
index f4e934014a..21375e6bea 100644
--- a/ortools/glop/reduced_costs.cc
+++ b/ortools/glop/reduced_costs.cc
@@ -245,7 +245,7 @@ void ReducedCosts::PerturbCosts() {
         std::max(max_cost_magnitude, std::abs(objective_[col]));
   }
 
-  objective_perturbation_.assign(matrix_.num_cols(), 0.0);
+  objective_perturbation_.AssignToZero(matrix_.num_cols());
   for (ColIndex col(0); col < structural_size; ++col) {
     const Fractional objective = objective_[col];
     const Fractional magnitude =
@@ -297,7 +297,7 @@ void ReducedCosts::ShiftCost(ColIndex col) {
 
 void ReducedCosts::ClearAndRemoveCostShifts() {
   SCOPED_TIME_STAT(&stats_);
-  objective_perturbation_.assign(matrix_.num_cols(), 0.0);
+  objective_perturbation_.AssignToZero(matrix_.num_cols());
   recompute_basic_objective_ = true;
   recompute_basic_objective_left_inverse_ = true;
   recompute_reduced_costs_ = true;
@@ -320,7 +320,7 @@ const DenseRow& ReducedCosts::GetReducedCosts() {
 const DenseColumn& ReducedCosts::GetDualValues() {
   SCOPED_TIME_STAT(&stats_);
   ComputeBasicObjectiveLeftInverse();
-  return Transpose(basic_objective_left_inverse_);
+  return Transpose(basic_objective_left_inverse_.values);
 }
 
 void ReducedCosts::RecomputeReducedCostsAndPrimalEnteringCandidatesIfNeeded() {
@@ -366,9 +366,9 @@ void ReducedCosts::ComputeReducedCosts() {
 #endif
   if (num_omp_threads == 1) {
     for (ColIndex col(0); col < num_cols; ++col) {
-      reduced_costs_[col] =
-          objective_[col] + objective_perturbation_[col] -
-          matrix_.ColumnScalarProduct(col, basic_objective_left_inverse_);
+      reduced_costs_[col] = objective_[col] + objective_perturbation_[col] -
+                            matrix_.ColumnScalarProduct(
+                                col, basic_objective_left_inverse_.values);
 
       // We also compute the dual residual error y.B - c_B.
       if (is_basic.IsSet(col)) {
@@ -386,9 +386,9 @@ void ReducedCosts::ComputeReducedCosts() {
 #pragma omp parallel for num_threads(num_omp_threads)
     for (int i = 0; i < parallel_loop_size; i++) {
       const ColIndex col(i);
-      reduced_costs_[col] =
-          objective_[col] + objective_perturbation_[col] -
-          matrix_.ColumnScalarProduct(col, basic_objective_left_inverse_);
+      reduced_costs_[col] = objective_[col] + objective_perturbation_[col] -
+                            matrix_.ColumnScalarProduct(
+                                col, basic_objective_left_inverse_.values);
 
       if (is_basic.IsSet(col)) {
         thread_local_dual_residual_error[omp_get_thread_num()] =
@@ -422,15 +422,15 @@ void ReducedCosts::ComputeReducedCosts() {
 
 void ReducedCosts::ComputeBasicObjectiveLeftInverse() {
   SCOPED_TIME_STAT(&stats_);
-  basic_objective_left_inverse_.resize(RowToColIndex(matrix_.num_rows()), 0.0);
   if (recompute_basic_objective_) {
     ComputeBasicObjective();
   }
-  basic_objective_left_inverse_ = basic_objective_;
+  basic_objective_left_inverse_.values = basic_objective_;
+  basic_objective_left_inverse_.non_zeros.clear();
   basis_factorization_.LeftSolve(&basic_objective_left_inverse_);
   recompute_basic_objective_left_inverse_ = false;
   IF_STATS_ENABLED(stats_.basic_objective_left_inverse_density.Add(
-      Density(basic_objective_left_inverse_)));
+      Density(basic_objective_left_inverse_.values)));
 
   // TODO(user): Estimate its accuracy by a few scalar products, and refactorize
   // if it is above a threshold?
@@ -487,10 +487,19 @@ void ReducedCosts::UpdateReducedCosts(ColIndex entering_col,
   // we can use them in ComputeCurrentDualResidualError().
   const ScatteredRow& unit_row_left_inverse =
       update_row->GetUnitRowLeftInverse();
-  for (const ColIndex col : unit_row_left_inverse.non_zeros) {
-    const ColIndex slack_col = first_slack_col + col;
-    const Fractional coeff = unit_row_left_inverse[col];
-    reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
+  if (unit_row_left_inverse.non_zeros.empty()) {
+    const ColIndex num_cols = unit_row_left_inverse.values.size();
+    for (ColIndex col(0); col < num_cols; ++col) {
+      const ColIndex slack_col = first_slack_col + col;
+      const Fractional coeff = unit_row_left_inverse[col];
+      reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
+    }
+  } else {
+    for (const ColIndex col : unit_row_left_inverse.non_zeros) {
+      const ColIndex slack_col = first_slack_col + col;
+      const Fractional coeff = unit_row_left_inverse[col];
+      reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
+    }
   }
   reduced_costs_[leaving_col] = new_leaving_reduced_cost;
 
diff --git a/ortools/glop/reduced_costs.h b/ortools/glop/reduced_costs.h
index d1f8320a02..b15e6a4b48 100644
--- a/ortools/glop/reduced_costs.h
+++ b/ortools/glop/reduced_costs.h
@@ -273,7 +273,7 @@ class ReducedCosts {
   // algorithm, but may gives us a good idea of the current precision of our
   // estimates. It is also faster to compute the unit_row_left_inverse_ because
   // of sparsity.
-  DenseRow basic_objective_left_inverse_;
+  ScatteredRow basic_objective_left_inverse_;
 
   // This is usually parameters_.dual_feasibility_tolerance() except when the
   // dual residual error |y.B - c_B| is higher than it and we have to increase
diff --git a/ortools/glop/revised_simplex.cc b/ortools/glop/revised_simplex.cc
index 8c5d59422b..3e2e565c64 100644
--- a/ortools/glop/revised_simplex.cc
+++ b/ortools/glop/revised_simplex.cc
@@ -781,7 +781,7 @@ bool RevisedSimplex::InitializeBoundsAndTestIfUnchanged(
   SCOPED_TIME_STAT(&function_stats_);
   lower_bound_.resize(num_cols_, 0.0);
   upper_bound_.resize(num_cols_, 0.0);
-  bound_perturbation_.assign(num_cols_, 0.0);
+  bound_perturbation_.AssignToZero(num_cols_);
 
   // Variable bounds, for both non-slack and slack variables.
   bool bounds_are_unchanged = true;
@@ -1325,7 +1325,7 @@ void RevisedSimplex::CorrectErrorsOnVariableValues() {
 
 void RevisedSimplex::ComputeVariableValuesError() {
   SCOPED_TIME_STAT(&function_stats_);
-  error_.assign(num_rows_, 0.0);
+  error_.AssignToZero(num_rows_);
   const DenseRow& variable_values = variable_values_.GetDenseRow();
   for (ColIndex col(0); col < num_cols_; ++col) {
     const Fractional value = variable_values[col];
@@ -1338,9 +1338,21 @@ void RevisedSimplex::ComputeDirection(ColIndex col) {
   DCHECK_COL_BOUNDS(col);
   basis_factorization_.RightSolveForProblemColumn(col, &direction_);
   direction_infinity_norm_ = 0.0;
-  for (const RowIndex row : direction_.non_zeros) {
-    direction_infinity_norm_ =
-        std::max(direction_infinity_norm_, std::abs(direction_[row]));
+  if (direction_.non_zeros.empty()) {
+    // We still compute the direction non-zeros because our code relies on it.
+    for (RowIndex row(0); row < num_rows_; ++row) {
+      const Fractional value = direction_[row];
+      if (value != 0.0) {
+        direction_.non_zeros.push_back(row);
+        direction_infinity_norm_ =
+            std::max(direction_infinity_norm_, std::abs(value));
+      }
+    }
+  } else {
+    for (const RowIndex row : direction_.non_zeros) {
+      direction_infinity_norm_ =
+          std::max(direction_infinity_norm_, std::abs(direction_[row]));
+    }
   }
   IF_STATS_ENABLED(ratio_test_stats_.direction_density.Add(
       num_rows_ == 0 ? 0.0
@@ -1952,27 +1964,50 @@ void RevisedSimplex::DualPhaseIUpdatePriceOnReducedCostChange(
         ++num_dual_infeasible_positions_;
       }
       if (!something_to_do) {
+        initially_all_zero_scratchpad_.is_non_zero.resize(num_rows_, false);
         initially_all_zero_scratchpad_.values.resize(num_rows_, 0.0);
         something_to_do = true;
       }
-      compact_matrix_.ColumnAddMultipleToDenseColumn(
+      compact_matrix_.ColumnAddMultipleToSparseScatteredColumn(
           col, sign - dual_infeasibility_improvement_direction_[col],
-          &initially_all_zero_scratchpad_.values);
+          &initially_all_zero_scratchpad_);
       dual_infeasibility_improvement_direction_[col] = sign;
     }
   }
   if (something_to_do) {
+    // TODO(user): This code is duplicated with UpdateGivenNonBasicVariables()
+    // and more generally with the one in RankOneUpdateFactorization. Fix.
+    if (ShouldUseDenseIteration(initially_all_zero_scratchpad_)) {
+      initially_all_zero_scratchpad_.non_zeros.clear();
+      initially_all_zero_scratchpad_.is_non_zero.assign(num_rows_, false);
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        initially_all_zero_scratchpad_.is_non_zero[row] = false;
+      }
+    }
+
     const VariableTypeRow& variable_type = variables_info_.GetTypeRow();
     const Fractional threshold = parameters_.ratio_test_zero_threshold();
-    basis_factorization_.RightSolveWithNonZeros(
-        &initially_all_zero_scratchpad_);
-    for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
-      dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
-      initially_all_zero_scratchpad_[row] = 0.0;
-      is_dual_entering_candidate_.Set(
-          row,
-          IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
-                                       variable_type[basis_[row]], threshold));
+    basis_factorization_.RightSolve(&initially_all_zero_scratchpad_);
+    if (initially_all_zero_scratchpad_.non_zeros.empty()) {
+      for (RowIndex row(0); row < num_rows_; ++row) {
+        if (initially_all_zero_scratchpad_[row] == 0.0) continue;
+        dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
+        is_dual_entering_candidate_.Set(
+            row, IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
+                                              variable_type[basis_[row]],
+                                              threshold));
+      }
+      initially_all_zero_scratchpad_.values.AssignToZero(num_rows_);
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
+        initially_all_zero_scratchpad_[row] = 0.0;
+        is_dual_entering_candidate_.Set(
+            row, IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
+                                              variable_type[basis_[row]],
+                                              threshold));
+      }
     }
     initially_all_zero_scratchpad_.non_zeros.clear();
   }
@@ -1999,9 +2034,9 @@ Status RevisedSimplex::DualPhaseIChooseLeavingVariableRow(
       dual_pricing_vector_.empty()) {
     // Recompute everything from scratch.
     num_dual_infeasible_positions_ = 0;
-    dual_pricing_vector_.assign(num_rows_, 0.0);
+    dual_pricing_vector_.AssignToZero(num_rows_);
     is_dual_entering_candidate_.ClearAndResize(num_rows_);
-    dual_infeasibility_improvement_direction_.assign(num_cols_, 0.0);
+    dual_infeasibility_improvement_direction_.AssignToZero(num_cols_);
     DualPhaseIUpdatePriceOnReducedCostChange(
         variables_info_.GetIsRelevantBitRow());
   } else {
@@ -2247,7 +2282,7 @@ Status RevisedSimplex::Minimize(TimeLimit* time_limit) {
   if (feasibility_phase_) {
     // Initialize the primal phase-I objective.
     // Note that this temporarily erases the problem objective.
-    objective_.assign(num_cols_, 0.0);
+    objective_.AssignToZero(num_cols_);
     UpdatePrimalPhaseICosts(
         util::IntegerRange<RowIndex>(RowIndex(0), num_rows_));
   }
@@ -2368,7 +2403,7 @@ Status RevisedSimplex::Minimize(TimeLimit* time_limit) {
       } else {
         VLOG(1) << "Unbounded problem.";
         problem_status_ = ProblemStatus::PRIMAL_UNBOUNDED;
-        solution_primal_ray_.assign(num_cols_, 0.0);
+        solution_primal_ray_.AssignToZero(num_cols_);
         for (RowIndex row(0); row < num_rows_; ++row) {
           const ColIndex col = basis_[row];
           solution_primal_ray_[col] = -direction_[row];
@@ -2650,7 +2685,7 @@ Status RevisedSimplex::DualMinimize(TimeLimit* time_limit) {
         solution_dual_ray_ =
             Transpose(update_row_.GetUnitRowLeftInverse().values);
         update_row_.RecomputeFullUpdateRow(leaving_row);
-        solution_dual_ray_row_combination_.assign(num_cols_, 0.0);
+        solution_dual_ray_row_combination_.AssignToZero(num_cols_);
         for (const ColIndex col : update_row_.GetNonZeroPositions()) {
           solution_dual_ray_row_combination_[col] =
               update_row_.GetCoefficient(col);
diff --git a/ortools/glop/update_row.cc b/ortools/glop/update_row.cc
index 6aa92275eb..27c7dff130 100644
--- a/ortools/glop/update_row.cc
+++ b/ortools/glop/update_row.cc
@@ -68,8 +68,7 @@ void UpdateRow::ComputeUnitRowLeftInverse(RowIndex leaving_row) {
                                   unit_row_left_inverse_.values) -
       1.0));
   IF_STATS_ENABLED(stats_.unit_row_left_inverse_density.Add(
-      static_cast<double>(unit_row_left_inverse_.non_zeros.size()) /
-      static_cast<double>(matrix_.num_rows().value())));
+      Density(unit_row_left_inverse_.values())));
 }
 
 void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) {
@@ -78,7 +77,8 @@ void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) {
   ComputeUnitRowLeftInverse(leaving_row);
   SCOPED_TIME_STAT(&stats_);
 
-  if (parameters_.use_transposed_matrix()) {
+  if (parameters_.use_transposed_matrix() &&
+      !unit_row_left_inverse_.non_zeros.empty()) {
     // Number of entries that ComputeUpdatesRowWise() will need to look at.
     EntryIndex num_row_wise_entries(0);
     for (const ColIndex col : unit_row_left_inverse_.non_zeros) {
diff --git a/ortools/glop/variable_values.cc b/ortools/glop/variable_values.cc
index ba2682158a..582849cf42 100644
--- a/ortools/glop/variable_values.cc
+++ b/ortools/glop/variable_values.cc
@@ -66,10 +66,11 @@ void VariableValues::RecomputeBasicVariableValues() {
   SCOPED_TIME_STAT(&stats_);
   DCHECK(basis_factorization_.IsRefactorized());
   const RowIndex num_rows = matrix_.num_rows();
-  scratchpad_.assign(num_rows, 0.0);
+  scratchpad_.non_zeros.clear();
+  scratchpad_.values.AssignToZero(num_rows);
   for (const ColIndex col : variables_info_.GetNotBasicBitRow()) {
     const Fractional value = variable_values_[col];
-    matrix_.ColumnAddMultipleToDenseColumn(col, -value, &scratchpad_);
+    matrix_.ColumnAddMultipleToDenseColumn(col, -value, &scratchpad_.values);
   }
   basis_factorization_.RightSolve(&scratchpad_);
   for (RowIndex row(0); row < num_rows; ++row) {
@@ -79,13 +80,14 @@ void VariableValues::RecomputeBasicVariableValues() {
 
 Fractional VariableValues::ComputeMaximumPrimalResidual() const {
   SCOPED_TIME_STAT(&stats_);
-  scratchpad_.assign(matrix_.num_rows(), 0.0);
+  scratchpad_.non_zeros.clear();
+  scratchpad_.values.AssignToZero(matrix_.num_rows());
   const ColIndex num_cols = matrix_.num_cols();
   for (ColIndex col(0); col < num_cols; ++col) {
     const Fractional value = variable_values_[col];
-    matrix_.ColumnAddMultipleToDenseColumn(col, value, &scratchpad_);
+    matrix_.ColumnAddMultipleToDenseColumn(col, value, &scratchpad_.values);
   }
-  return InfinityNorm(scratchpad_);
+  return InfinityNorm(scratchpad_.values);
 }
 
 Fractional VariableValues::ComputeMaximumPrimalInfeasibility() const {
@@ -144,24 +146,45 @@ void VariableValues::UpdateGivenNonBasicVariables(
     const std::vector<ColIndex>& cols_to_update, bool update_basic_variables) {
   SCOPED_TIME_STAT(&stats_);
   if (update_basic_variables) {
-    initially_all_zero_scratchpad_.values.resize(matrix_.num_rows(), 0.0);
+    const RowIndex num_rows = matrix_.num_rows();
+    initially_all_zero_scratchpad_.values.resize(num_rows, 0.0);
+    initially_all_zero_scratchpad_.is_non_zero.resize(num_rows, false);
     DCHECK(IsAllZero(initially_all_zero_scratchpad_.values));
+    DCHECK(IsAllFalse(initially_all_zero_scratchpad_.is_non_zero));
+
+    // TODO(user): Abort the non-zeros computation earlier if dense.
     for (ColIndex col : cols_to_update) {
       const Fractional old_value = variable_values_[col];
       SetNonBasicVariableValueFromStatus(col);
-      matrix_.ColumnAddMultipleToDenseColumn(
+      matrix_.ColumnAddMultipleToSparseScatteredColumn(
           col, variable_values_[col] - old_value,
-          &initially_all_zero_scratchpad_.values);
+          &initially_all_zero_scratchpad_);
     }
-    basis_factorization_.RightSolveWithNonZeros(
-        &initially_all_zero_scratchpad_);
-    for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
-      variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
-      initially_all_zero_scratchpad_[row] = 0.0;
+    if (ShouldUseDenseIteration(initially_all_zero_scratchpad_)) {
+      initially_all_zero_scratchpad_.non_zeros.clear();
+      initially_all_zero_scratchpad_.is_non_zero.assign(num_rows, false);
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        initially_all_zero_scratchpad_.is_non_zero[row] = false;
+      }
+    }
+
+    basis_factorization_.RightSolve(&initially_all_zero_scratchpad_);
+    if (initially_all_zero_scratchpad_.non_zeros.empty()) {
+      for (RowIndex row(0); row < num_rows; ++row) {
+        variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
+      }
+      initially_all_zero_scratchpad_.values.AssignToZero(num_rows);
+      ResetPrimalInfeasibilityInformation();
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
+        initially_all_zero_scratchpad_[row] = 0.0;
+      }
+      UpdatePrimalInfeasibilityInformation(
+          initially_all_zero_scratchpad_.non_zeros);
+      initially_all_zero_scratchpad_.non_zeros.clear();
     }
-    UpdatePrimalInfeasibilityInformation(
-        initially_all_zero_scratchpad_.non_zeros);
-    initially_all_zero_scratchpad_.non_zeros.clear();
   } else {
     for (ColIndex col : cols_to_update) {
       SetNonBasicVariableValueFromStatus(col);
@@ -182,22 +205,31 @@ void VariableValues::ResetPrimalInfeasibilityInformation() {
   const RowIndex num_rows = matrix_.num_rows();
   primal_squared_infeasibilities_.resize(num_rows, 0.0);
   primal_infeasible_positions_.ClearAndResize(num_rows);
-  UpdatePrimalInfeasibilities(
-      util::IntegerRange<RowIndex>(RowIndex(0), num_rows));
+
+  const DenseRow& lower_bounds = variables_info_.GetVariableLowerBounds();
+  const DenseRow& upper_bounds = variables_info_.GetVariableUpperBounds();
+  for (RowIndex row(0); row < num_rows; ++row) {
+    const ColIndex col = basis_[row];
+    const Fractional value = variable_values_[col];
+    const Fractional magnitude =
+        std::max(value - upper_bounds[col], lower_bounds[col] - value);
+    if (magnitude > tolerance_) {
+      primal_squared_infeasibilities_[row] = Square(magnitude);
+      primal_infeasible_positions_.Set(row);
+    }
+  }
 }
 
 void VariableValues::UpdatePrimalInfeasibilityInformation(
     const std::vector<RowIndex>& rows) {
   if (primal_squared_infeasibilities_.size() != matrix_.num_rows()) {
     ResetPrimalInfeasibilityInformation();
-  } else {
-    SCOPED_TIME_STAT(&stats_);
-    UpdatePrimalInfeasibilities(rows);
+    return;
   }
-}
 
-template <typename Rows>
-void VariableValues::UpdatePrimalInfeasibilities(const Rows& rows) {
+  // Note(user): this is the same as the code in
+  // ResetPrimalInfeasibilityInformation(), but we do need the clear part.
+  SCOPED_TIME_STAT(&stats_);
   const DenseRow& lower_bounds = variables_info_.GetVariableLowerBounds();
   const DenseRow& upper_bounds = variables_info_.GetVariableUpperBounds();
   for (const RowIndex row : rows) {
diff --git a/ortools/glop/variable_values.h b/ortools/glop/variable_values.h
index 21f07f5b77..c30da3a766 100644
--- a/ortools/glop/variable_values.h
+++ b/ortools/glop/variable_values.h
@@ -104,10 +104,6 @@ class VariableValues {
   std::string StatString() const { return stats_.StatString(); }
 
  private:
-  // Internal version of UpdatePrimalInfeasibilityInformation().
-  template <typename Rows>
-  void UpdatePrimalInfeasibilities(const Rows& rows);
-
   // Input problem data.
   const CompactSparseMatrix& matrix_;
   const RowToColMapping& basis_;
@@ -123,7 +119,7 @@ class VariableValues {
   DenseBitColumn primal_infeasible_positions_;
 
   mutable StatsGroup stats_;
-  mutable DenseColumn scratchpad_;
+  mutable ScatteredColumn scratchpad_;
 
   // A temporary scattered column that is always reset to all zero after use.
   ScatteredColumn initially_all_zero_scratchpad_;
diff --git a/ortools/lp_data/lp_types.h b/ortools/lp_data/lp_types.h
index 809e43f882..711a3cc6b9 100644
--- a/ortools/lp_data/lp_types.h
+++ b/ortools/lp_data/lp_types.h
@@ -343,38 +343,6 @@ typedef StrictITIVector<RowIndex, ColIndex> RowToColMapping;
 // Column of constraints (slack variables) statuses.
 typedef StrictITIVector<RowIndex, ConstraintStatus> ConstraintStatusColumn;
 
-// A simple struct that contains a DenseColumn and its non-zeros indices.
-struct ScatteredColumn {
-  DenseColumn values;
-
-  // This can be left empty in which case we just have the dense representation
-  // above. Otherwise, it should always be a subset of the actual non-zeros.
-  //
-  // Note that the non-zeros should always be sorted by increasing index. This
-  // allows to have exactly the same iteration behavior if we iterate on the
-  // non-zeros or directly in a dense-way on the values.
-  RowIndexVector non_zeros;
-
-  Fractional operator[](RowIndex row) const { return values[row]; }
-  Fractional& operator[](RowIndex row) { return values[row]; }
-};
-
-// Same as ScatteredColumn for a row.
-struct ScatteredRow {
-  DenseRow values;
-  ColIndexVector non_zeros;
-
-  Fractional operator[](ColIndex col) const { return values[col]; }
-  Fractional& operator[](ColIndex col) { return values[col]; }
-};
-
-inline const ScatteredRow& TransposedView(const ScatteredColumn& c) {
-  return reinterpret_cast<const ScatteredRow&>(c);
-}
-inline const ScatteredColumn& TransposedView(const ScatteredRow& r) {
-  return reinterpret_cast<const ScatteredColumn&>(r);
-}
-
 // Returns true if it is more advantageous to use a dense iteration rather than
 // using the non-zeros positions.
 //
@@ -391,6 +359,46 @@ bool ShouldUseDenseIteration(const ScatteredRowOrCol& v) {
              static_cast<double>(v.values.size().value());
 }
 
+// A simple struct that contains a DenseVector and its non-zeros indices.
+template <typename Index>
+struct ScatteredVector {
+  StrictITIVector<Index, Fractional> values;
+
+  // This can be left empty in which case we just have the dense representation
+  // above. Otherwise, it should always be a subset of the actual non-zeros.
+  bool non_zeros_are_sorted = false;
+  std::vector<Index> non_zeros;
+
+  // Temporary vector used in some sparse computation on the ScatteredColumn.
+  // True indicate a possible non-zero value. Note that its state is not always
+  // consistent.
+  StrictITIVector<Index, bool> is_non_zero;
+
+  Fractional operator[](Index index) const { return values[index]; }
+  Fractional& operator[](Index index) { return values[index]; }
+
+  // Sorting the non-zeros is not always needed, but it allows us to have
+  // exactly the same behavior while using a sparse iteration or a dense one. So
+  // we always do it after a Solve().
+  void SortNonZerosIfNeeded() {
+    if (!non_zeros_are_sorted) {
+      std::sort(non_zeros.begin(), non_zeros.end());
+      non_zeros_are_sorted = true;
+    }
+  }
+};
+
+// Specialization used in the code.
+struct ScatteredColumn : public ScatteredVector<RowIndex> {};
+struct ScatteredRow : public ScatteredVector<ColIndex> {};
+
+inline const ScatteredRow& TransposedView(const ScatteredColumn& c) {
+  return reinterpret_cast<const ScatteredRow&>(c);
+}
+inline const ScatteredColumn& TransposedView(const ScatteredRow& r) {
+  return reinterpret_cast<const ScatteredColumn&>(r);
+}
+
 // This is used during the deterministic time computation to convert a given
 // number of floating-point operations to something in the same order of
 // magnitude as a second (on a 2014 desktop).
diff --git a/ortools/lp_data/lp_utils.h b/ortools/lp_data/lp_utils.h
index 2b5a45484e..4df7a8b0d6 100644
--- a/ortools/lp_data/lp_utils.h
+++ b/ortools/lp_data/lp_utils.h
@@ -137,6 +137,9 @@ Fractional InfinityNorm(const DenseColumn& v);
 Fractional InfinityNorm(const SparseColumn& v);
 
 // Returns the fraction of non-zero entries of the given row.
+//
+// TODO(user): Take a Scattered row/col instead. This is only used to report
+// stats, but we should still have a sparse version to do it faster.
 double Density(const DenseRow& row);
 
 // Sets to 0.0 all entries of the given row whose fabs() is lower than the given
@@ -205,35 +208,31 @@ inline bool IsAllZero(const Container& input) {
   return true;
 }
 
-// Permutes the given dense vector and computes the positions of its non-zeros.
-// It uses for this an all zero dense vector (zero_scratchpad).
-// This operation is efficient for a sparse vector because:
-// - It combines two iterations in one.
-// - It avoids cache pollution by not looking at unecessary permuted locations.
-// Note that the produced non_zeros is not ordered which may be a drawback.
-// TODO(user): Investigate alternatives with an ordered non_zeros.
-template <typename IndexType, typename PermutationIndexType,
-          typename NonZeroIndexType>
-inline void PermuteAndComputeNonZeros(
+// Returns true if the given vector of bool is all false.
+template <typename BoolVector>
+bool IsAllFalse(const BoolVector& v) {
+  return std::all_of(v.begin(), v.end(), [](bool value) { return !value; });
+}
+
+// Permutes the given dense vector. It uses for this an all zero scratchpad.
+template <typename IndexType, typename PermutationIndexType>
+inline void PermuteWithScratchpad(
     const Permutation<PermutationIndexType>& permutation,
     StrictITIVector<IndexType, Fractional>* zero_scratchpad,
-    StrictITIVector<IndexType, Fractional>* output,
-    std::vector<NonZeroIndexType>* non_zeros) {
-  non_zeros->clear();
+    StrictITIVector<IndexType, Fractional>* input_output) {
   DCHECK(IsAllZero(*zero_scratchpad));
-  zero_scratchpad->swap(*output);
-  output->resize(zero_scratchpad->size(), 0.0);
-  const IndexType end = zero_scratchpad->size();
-  for (IndexType index(0); index < end; ++index) {
+  const IndexType size = input_output->size();
+  zero_scratchpad->swap(*input_output);
+  input_output->resize(size, 0.0);
+  for (IndexType index(0); index < size; ++index) {
     const Fractional value = (*zero_scratchpad)[index];
     if (value != 0.0) {
-      (*zero_scratchpad)[index] = 0.0;
       const IndexType permuted_index(
           permutation[PermutationIndexType(index.value())].value());
-      (*output)[permuted_index] = value;
-      non_zeros->push_back(NonZeroIndexType(permuted_index.value()));
+      (*input_output)[permuted_index] = value;
     }
   }
+  zero_scratchpad->assign(size, 0.0);
 }
 
 // Same as PermuteAndComputeNonZeros() except that we assume that the given
@@ -249,7 +248,6 @@ inline void PermuteWithKnownNonZeros(
   output->resize(zero_scratchpad->size(), 0.0);
   for (IndexType& index_ref : *non_zeros) {
     const Fractional value = (*zero_scratchpad)[index_ref];
-    DCHECK_NE(value, 0.0);
     (*zero_scratchpad)[index_ref] = 0.0;
     const IndexType permuted_index(permutation[index_ref]);
     (*output)[permuted_index] = value;
@@ -257,25 +255,6 @@ inline void PermuteWithKnownNonZeros(
   }
 }
 
-// Same algorithm as PermuteAndComputeNonZeros() above when the non-zeros are
-// not needed. This should be faster than a simple ApplyPermutation() if the
-// input vector is relatively sparse. The input is the initial value of output.
-inline void ApplyPermutationWhenInputIsProbablySparse(
-    const Permutation<RowIndex>& permutation, DenseColumn* zero_scratchpad,
-    DenseColumn* output) {
-  const RowIndex num_rows(permutation.size());
-  DCHECK(IsAllZero(*zero_scratchpad));
-  zero_scratchpad->swap(*output);
-  output->resize(num_rows, 0.0);
-  for (RowIndex row(0); row < num_rows; ++row) {
-    const Fractional value = (*zero_scratchpad)[row];
-    if (value != 0.0) {
-      (*zero_scratchpad)[row] = 0.0;
-      (*output)[permutation[row]] = value;
-    }
-  }
-}
-
 // Sets a dense vector for which the non zeros are known to be non_zeros.
 template <typename IndexType, typename ScatteredRowOrCol>
 inline void ClearAndResizeVectorWithNonZeros(IndexType size,
@@ -284,7 +263,8 @@ inline void ClearAndResizeVectorWithNonZeros(IndexType size,
   // compared to the wanted size. Note that in most cases the vector will
   // already be of the correct size.
   const double kSparseThreshold = 0.05;
-  if (v->non_zeros.size() < kSparseThreshold * size.value()) {
+  if (!v->non_zeros.empty() &&
+      v->non_zeros.size() < kSparseThreshold * size.value()) {
     for (const IndexType index : v->non_zeros) {
       DCHECK_LT(index, v->values.size());
       (*v)[index] = 0.0;
diff --git a/ortools/lp_data/sparse.cc b/ortools/lp_data/sparse.cc
index 1957d920ac..fdad0bed25 100644
--- a/ortools/lp_data/sparse.cc
+++ b/ortools/lp_data/sparse.cc
@@ -756,36 +756,19 @@ void TriangularMatrix::LowerSolveStartingAtInternal(ColIndex start,
 
 void TriangularMatrix::UpperSolve(DenseColumn* rhs) const {
   if (all_diagonal_coefficients_are_one_) {
-    UpperSolveWithNonZerosInternal<true, false>(rhs, nullptr);
+    UpperSolveInternal<true>(rhs);
   } else {
-    UpperSolveWithNonZerosInternal<false, false>(rhs, nullptr);
+    UpperSolveInternal<false>(rhs);
   }
 }
 
-void TriangularMatrix::UpperSolveWithNonZeros(
-    DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
-  if (all_diagonal_coefficients_are_one_) {
-    UpperSolveWithNonZerosInternal<true, true>(rhs, non_zero_rows);
-  } else {
-    UpperSolveWithNonZerosInternal<false, true>(rhs, non_zero_rows);
-  }
-}
-
-template <bool diagonal_of_ones, bool with_non_zeros>
-void TriangularMatrix::UpperSolveWithNonZerosInternal(
-    DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
+template <bool diagonal_of_ones>
+void TriangularMatrix::UpperSolveInternal(DenseColumn* rhs) const {
   RETURN_IF_NULL(rhs);
-  if (with_non_zeros) {
-    RETURN_IF_NULL(non_zero_rows);
-    non_zero_rows->clear();
-  }
   const ColIndex end = first_non_identity_column_;
   for (ColIndex col(diagonal_coefficients_.size() - 1); col >= end; --col) {
     const Fractional value = (*rhs)[ColToRowIndex(col)];
     if (value == 0.0) continue;
-    if (with_non_zeros) {
-      non_zero_rows->push_back(ColToRowIndex(col));
-    }
     const Fractional coeff =
         diagonal_of_ones ? value : value / diagonal_coefficients_[col];
     if (!diagonal_of_ones) {
@@ -800,18 +783,6 @@ void TriangularMatrix::UpperSolveWithNonZerosInternal(
       (*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i);
     }
   }
-
-  // Finish filling the non_zero_rows vector if needed.
-  if (with_non_zeros) {
-    for (RowIndex row(end.value() - 1); row >= 0; --row) {
-      if ((*rhs)[row] != 0.0) {
-        non_zero_rows->push_back(row);
-      }
-    }
-
-    // Sorting the non-zero positions gives better performance later.
-    std::reverse(non_zero_rows->begin(), non_zero_rows->end());
-  }
 }
 
 void TriangularMatrix::TransposeUpperSolve(DenseColumn* rhs) const {
diff --git a/ortools/lp_data/sparse.h b/ortools/lp_data/sparse.h
index 2a5cb67b2f..26b7dbe6f1 100644
--- a/ortools/lp_data/sparse.h
+++ b/ortools/lp_data/sparse.h
@@ -418,20 +418,20 @@ class CompactSparseMatrix {
   // Same as ColumnAddMultipleToDenseColumn() but also adds the new non-zeros to
   // the non_zeros vector. A non-zero is "new" if is_non_zero[row] was false,
   // and we update dense_column[row]. This functions also updates is_non_zero.
-  void ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
-      ColIndex col, Fractional multiplier, DenseColumn* dense_column,
-      StrictITIVector<RowIndex, bool>* is_non_zero,
-      std::vector<RowIndex>* non_zeros) const {
+  void ColumnAddMultipleToSparseScatteredColumn(ColIndex col,
+                                                Fractional multiplier,
+                                                ScatteredColumn* column) const {
     if (multiplier == 0.0) return;
-    RETURN_IF_NULL(dense_column);
+    RETURN_IF_NULL(column);
     for (const EntryIndex i : Column(col)) {
       const RowIndex row = EntryRow(i);
-      (*dense_column)[row] += multiplier * EntryCoefficient(i);
-      if (!(*is_non_zero)[row]) {
-        (*is_non_zero)[row] = true;
-        non_zeros->push_back(row);
+      (*column)[row] += multiplier * EntryCoefficient(i);
+      if (!column->is_non_zero[row]) {
+        column->is_non_zero[row] = true;
+        column->non_zeros.push_back(row);
       }
     }
+    column->non_zeros_are_sorted = false;
   }
 
   // Copies the given column of this matrix into the given dense_column.
@@ -588,10 +588,6 @@ class TriangularMatrix : private CompactSparseMatrix {
   // This also computes the last non-zero row position (if not nullptr).
   void TransposeLowerSolve(DenseColumn* rhs, RowIndex* last_non_zero_row) const;
 
-  // This also computes the non-zero row positions (if not nullptr).
-  void UpperSolveWithNonZeros(DenseColumn* rhs,
-                              RowIndexVector* non_zero_rows) const;
-
   // Hyper-sparse version of the triangular solve functions. The passed
   // non_zero_rows should contain the positions of the symbolic non-zeros of the
   // result in the order in which they need to be accessed (or in the reverse
@@ -714,9 +710,8 @@ class TriangularMatrix : private CompactSparseMatrix {
   // Internal versions of some Solve() functions to avoid code duplication.
   template <bool diagonal_of_ones>
   void LowerSolveStartingAtInternal(ColIndex start, DenseColumn* rhs) const;
-  template <bool diagonal_of_ones, bool with_non_zeros>
-  void UpperSolveWithNonZerosInternal(DenseColumn* rhs,
-                                      RowIndexVector* non_zero_rows) const;
+  template <bool diagonal_of_ones>
+  void UpperSolveInternal(DenseColumn* rhs) const;
   template <bool diagonal_of_ones>
   void TransposeLowerSolveInternal(DenseColumn* rhs,
                                    RowIndex* last_non_zero_row) const;