OR-Tools  9.1
fp_utils.cc
Go to the documentation of this file.
1 // Copyright 2010-2021 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #include "ortools/util/fp_utils.h"
15 
16 #include <limits.h>
17 #include <stdint.h>
18 
19 #include <algorithm>
20 #include <cmath>
21 #include <limits>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/base/casts.h"
26 #include "absl/base/internal/endian.h"
28 #include "ortools/base/logging.h"
29 #include "ortools/util/bitset.h"
30 
31 namespace operations_research {
32 
33 namespace {
34 
35 void ReorderAndCapTerms(double* min, double* max) {
36  if (*min > *max) std::swap(*min, *max);
37  if (*min > 0.0) *min = 0.0;
38  if (*max < 0.0) *max = 0.0;
39 }
40 
41 template <bool use_bounds>
42 void ComputeScalingErrors(const std::vector<double>& input,
43  const std::vector<double>& lb,
44  const std::vector<double>& ub, double scaling_factor,
46  double* max_scaled_sum_error) {
47  double max_error = 0.0;
48  double min_error = 0.0;
50  const int size = input.size();
51  for (int i = 0; i < size; ++i) {
52  const double x = input[i];
53  if (x == 0.0) continue;
54  const double scaled = x * scaling_factor;
55 
56  if (scaled == 0.0) {
57  *max_relative_coeff_error = std::numeric_limits<double>::infinity();
58  } else {
60  *max_relative_coeff_error, std::abs(std::round(scaled) / scaled - 1));
61  }
62 
63  const double error = std::round(scaled) - scaled;
64  const double error_lb = (use_bounds ? error * lb[i] : -error);
65  const double error_ub = (use_bounds ? error * ub[i] : error);
66  max_error += std::max(error_lb, error_ub);
67  min_error += std::min(error_lb, error_ub);
68  }
69  *max_scaled_sum_error = std::max(std::abs(max_error), std::abs(min_error));
70 }
71 
72 template <bool use_bounds>
73 void GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
74  const std::vector<double>& lb,
75  const std::vector<double>& ub,
76  int64_t max_absolute_sum,
77  double* scaling_factor) {
78  const double kInfinity = std::numeric_limits<double>::infinity();
79 
80  // We start by initializing the returns value to the "error" state.
81  *scaling_factor = 0;
82 
83  // Abort in the "error" state if max_absolute_sum doesn't make sense.
84  if (max_absolute_sum < 0) return;
85 
86  // Our scaling scaling_factor will be 2^factor_exponent.
87  //
88  // TODO(user): Consider using a non-power of two factor if the error can't be
89  // zero? Note however that using power of two has the extra advantage that
90  // subsequent int64_t -> double -> scaled back to int64_t will loose no extra
91  // information.
92  int factor_exponent = 0;
93  uint64_t sum_min = 0; // negated.
94  uint64_t sum_max = 0;
95  bool recompute_sum = false;
96  bool is_first_value = true;
97  const int msb = MostSignificantBitPosition64(max_absolute_sum);
98  const int size = input.size();
99  for (int i = 0; i < size; ++i) {
100  const double x = input[i];
101  double min_term = use_bounds ? x * lb[i] : -x;
102  double max_term = use_bounds ? x * ub[i] : x;
103  ReorderAndCapTerms(&min_term, &max_term);
104 
105  // If min_term or max_term is not finite, then abort in the "error" state.
106  if (!(min_term > -kInfinity && max_term < kInfinity)) return;
107 
108  // A value of zero can just be skipped (and needs to because the code below
109  // doesn't handle it correctly).
110  if (min_term == 0.0 && max_term == 0.0) continue;
111 
112  // Compute the greatest candidate such that
113  // round(fabs(c).2^candidate) <= max_absolute_sum.
114  const double c = std::max(-min_term, max_term);
115  int candidate = msb - ilogb(c);
116  if (std::round(ldexp(std::abs(c), candidate)) > max_absolute_sum) {
117  --candidate;
118  }
119  DCHECK_LE(std::abs(static_cast<int64_t>(round(ldexp(c, candidate)))),
120  max_absolute_sum);
121 
122  // Update factor_exponent which is the min of all the candidates.
123  if (is_first_value || candidate < factor_exponent) {
124  is_first_value = false;
125  factor_exponent = candidate;
126  recompute_sum = true;
127  } else {
128  // Update the sum of absolute values of the numbers seen so far.
129  sum_min -=
130  static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
131  sum_max +=
132  static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
133  if (sum_min > max_absolute_sum || sum_max > max_absolute_sum) {
134  factor_exponent--;
135  recompute_sum = true;
136  }
137  }
138 
139  // TODO(user): This is not super efficient, but note that in practice we
140  // will only scan the vector of values about log(size) times. It is possible
141  // to maintain an upper bound on the abs_sum in linear time instead, but the
142  // code and corner cases are a lot more involved. Also, we currently only
143  // use this code in situations where its run-time is negligeable compared to
144  // the rest.
145  while (recompute_sum) {
146  sum_min = 0;
147  sum_max = 0;
148  for (int j = 0; j <= i; ++j) {
149  const double x = input[j];
150  double min_term = use_bounds ? x * lb[j] : -x;
151  double max_term = use_bounds ? x * ub[j] : x;
152  ReorderAndCapTerms(&min_term, &max_term);
153  sum_min -=
154  static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
155  sum_max +=
156  static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
157  }
158  if (sum_min > max_absolute_sum || sum_max > max_absolute_sum) {
159  factor_exponent--;
160  } else {
161  recompute_sum = false;
162  }
163  }
164  }
165  *scaling_factor = ldexp(1.0, factor_exponent);
166 }
167 
168 } // namespace
169 
170 void ComputeScalingErrors(const std::vector<double>& input,
171  const std::vector<double>& lb,
172  const std::vector<double>& ub, double scaling_factor,
173  double* max_relative_coeff_error,
174  double* max_scaled_sum_error) {
175  ComputeScalingErrors<true>(input, lb, ub, scaling_factor,
176  max_relative_coeff_error, max_scaled_sum_error);
177 }
178 
179 double GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
180  const std::vector<double>& lb,
181  const std::vector<double>& ub,
182  int64_t max_absolute_sum) {
183  double scaling_factor;
184  GetBestScalingOfDoublesToInt64<true>(input, lb, ub, max_absolute_sum,
185  &scaling_factor);
186  return scaling_factor;
187 }
188 
189 void GetBestScalingOfDoublesToInt64(const std::vector<double>& input,
190  int64_t max_absolute_sum,
191  double* scaling_factor,
192  double* max_relative_coeff_error) {
193  double max_scaled_sum_error;
194  GetBestScalingOfDoublesToInt64<false>(input, {}, {}, max_absolute_sum,
195  scaling_factor);
196  ComputeScalingErrors<false>(input, {}, {}, *scaling_factor,
197  max_relative_coeff_error, &max_scaled_sum_error);
198 }
199 
200 int64_t ComputeGcdOfRoundedDoubles(const std::vector<double>& x,
201  double scaling_factor) {
202  int64_t gcd = 0;
203  for (int i = 0; i < x.size() && gcd != 1; ++i) {
204  int64_t value = std::abs(std::round(x[i] * scaling_factor));
205  DCHECK_GE(value, 0);
206  if (value == 0) continue;
207  if (gcd == 0) {
208  gcd = value;
209  continue;
210  }
211  // GCD(gcd, value) = GCD(value, gcd % value);
212  while (value != 0) {
213  const int64_t r = gcd % value;
214  gcd = value;
215  value = r;
216  }
217  }
218  DCHECK_GE(gcd, 0);
219  return gcd > 0 ? gcd : 1;
220 }
221 
222 int fast_ilogb(double value) {
223  static_assert(CHAR_BIT == 8);
224  static_assert(sizeof(double) == 8);
225  // Get little-endian bit-representation of the floating point value.
226  const uint64_t bit_rep =
227  absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
228  return static_cast<int>((bit_rep >> 52) & 0x7FF) - 1023;
229 }
230 
231 void fast_scalbn_inplace(double& mutable_value, int exponent) {
232  mutable_value = fast_scalbn(mutable_value, exponent);
233 }
234 
235 double fast_scalbn(double value, int exponent) {
236  if (value == 0.0) return 0.0;
237  uint64_t bit_rep =
238  absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
239  // Binary representation is: (sign-bit)(11 exponent bits)(52 mantissa bits)
240  constexpr uint64_t kExponentMask(0x7FF0000000000000);
241  // This addition relies on the fact that signed numbers are written in
242  // two-s complement, and is correct as long as the sum does not
243  // overflow/underflow the result.
244  const uint64_t value_exponent =
245  (bit_rep + (static_cast<uint64_t>(exponent) << 52)) & kExponentMask;
246  bit_rep &= ~kExponentMask;
247  bit_rep |= value_exponent;
248  return absl::bit_cast<double>(absl::little_endian::ToHost64(bit_rep));
249 }
250 
251 } // namespace operations_research
void fast_scalbn_inplace(double &mutable_value, int exponent)
Definition: fp_utils.cc:231
int64_t min
Definition: alldiff_cst.cc:139
void swap(IdMap< K, V > &a, IdMap< K, V > &b)
Definition: id_map.h:263
void ComputeScalingErrors(const std::vector< double > &input, const std::vector< double > &lb, const std::vector< double > &ub, double scaling_factor, double *max_relative_coeff_error, double *max_scaled_sum_error)
Definition: fp_utils.cc:170
double GetBestScalingOfDoublesToInt64(const std::vector< double > &input, const std::vector< double > &lb, const std::vector< double > &ub, int64_t max_absolute_sum)
Definition: fp_utils.cc:179
double max_relative_coeff_error
double fast_scalbn(double value, int exponent)
Definition: fp_utils.cc:235
int64_t max
Definition: alldiff_cst.cc:140
int MostSignificantBitPosition64(uint64_t n)
Definition: bitset.h:231
int64_t ComputeGcdOfRoundedDoubles(const std::vector< double > &x, double scaling_factor)
Definition: fp_utils.cc:200
const double kInfinity
Definition: lp_types.h:84
static int input(yyscan_t yyscanner)
#define DCHECK_GE(val1, val2)
Definition: base/logging.h:890
#define DCHECK_LE(val1, val2)
Definition: base/logging.h:888
Collection of objects used to extend the Constraint Solver library.
int fast_ilogb(double value)
Definition: fp_utils.cc:222
int64_t value