ortools-clone/ortools/math_opt/samples/cpp/linear_regression.cc

// Copyright 2010-2022 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Solve a linear regression problem on random data.
//
// Problem data:
//   There are num_features features, indexed by j.
//   There are num_examples training examples indexed by i.
//   x_ij: The feature value for example i and feature j in the training data.
//   y_i: The label for example i in the training data.
//
// Decision variables:
//   beta_j: the coefficient to learn for each feature j.
//   z_i: the prediction error for example i.
//
// Optimization problem:
//    min   sum_i z_i^2
//    s.t.  y_i - sum_j beta_j * x_ij = z_i
//
// This is the unregularized linear regression problem.
//
// This example solves the problem on randomly generated (x, y) data. The data
// is generated by assuming some true values for beta (generated at random,
// i.i.d. N(0, 1)), then drawing each x_ij as N(0, 1) and then computing
//   y_i = beta * x_i + N(0, noise)
// where noise is a command line flag.
//
// After solving the optimization problem above to recover values for beta, the
// in sample and out of sample loss (average squared prediction error) for the
// learned model are printed.
//
// For an advanced version, see:
//   ortools/math_opt/codelabs/regression/
#include <iostream>
#include <ostream>
#include <utility>
#include <vector>

#include "absl/algorithm/container.h"
#include "absl/random/random.h"
#include "absl/status/status.h"
#include "ortools/base/init_google.h"
#include "ortools/base/logging.h"
#include "ortools/base/status_builder.h"
#include "ortools/base/status_macros.h"
#include "ortools/math_opt/cpp/math_opt.h"

ABSL_FLAG(operations_research::math_opt::SolverType, solver_type,
          operations_research::math_opt::SolverType::kGurobi,
          "The solver needs to support quadratic objectives, e.g. pdlp, "
          "gurobi, or osqp.");

ABSL_FLAG(int, num_features, 10,
          "The number of features in the linear regression model.");
ABSL_FLAG(int, num_examples, 100,
          "The number of examples to use in the train and test sets.");
ABSL_FLAG(double, noise, 3.0,
          "The standard deviation of the noise on the labels.");

namespace {

namespace math_opt = operations_research::math_opt;

// Data for a learned function f(x) = sum_j beta_j * x_j.
struct LinearModel {
  // Has size equal to the number of features.
  std::vector<double> betas;
};

// Creates a random linear function where each beta is i.i.d. Normal(0,1).
LinearModel RandomLinearModel(const int num_features) {
  LinearModel result;
  absl::BitGen bit_gen;
  for (int j = 0; j < num_features; ++j) {
    result.betas.push_back(absl::Gaussian<double>(bit_gen));
  }
  return result;
}

// A training example associating the label `y` to features `xs`.
struct LabeledExample {
  // Has size equal to the number of features.
  std::vector<double> xs;
  double y = 0.0;
};

// Creates num_examples random examples where y approx ground_truth(xs).
//
// Specifically, y = ground_truth(xs) + N(0, noise_stddev), where the xs are
// generated randomly as N(0,1) and are jointly independent with the noise.
std::vector<LabeledExample> RandomData(const LinearModel& ground_truth,
                                       const int num_examples,
                                       const double noise_stddev) {
  const int num_features = static_cast<int>(ground_truth.betas.size());
  absl::BitGen bit_gen;
  std::vector<LabeledExample> examples;
  for (int i = 0; i < num_examples; ++i) {
    LabeledExample example;
    for (int j = 0; j < num_features; ++j) {
      example.xs.push_back(absl::Gaussian<double>(bit_gen));
    }
    example.y =
        absl::c_inner_product(example.xs, ground_truth.betas,
                              absl::Gaussian(bit_gen, 0.0, noise_stddev));
    examples.push_back(std::move(example));
  }
  return examples;
}

// Computes the average squared error between model(example.xs) and example.y.
double L2Loss(const LinearModel& model,
              const std::vector<LabeledExample>& examples) {
  double result = 0.0;
  for (const LabeledExample& example : examples) {
    CHECK_EQ(example.xs.size(), model.betas.size());
    const double error =
        example.y - absl::c_inner_product(example.xs, model.betas, 0.0);
    result += error * error;
  }
  return examples.empty() ? 0.0 : result / examples.size();
}

// Computes and returns the linear function minimizing L2Loss on train_data by
// solving a quadratic optimization problem, or returns a Status error if the
// solver fails to find an optimal solution.
absl::StatusOr<LinearModel> Train(
    const std::vector<LabeledExample>& train_data) {
  const int num_features = static_cast<int>(train_data[0].xs.size());
  const int num_train = static_cast<int>(train_data.size());
  math_opt::Model model("linear_regression");
  // Create the decision variables: beta, and z.
  std::vector<math_opt::Variable> betas;
  for (int j = 0; j < num_features; ++j) {
    betas.push_back(model.AddVariable(absl::StrCat("beta_", j)));
  }
  std::vector<math_opt::Variable> zs;
  for (int i = 0; i < num_train; ++i) {
    zs.push_back(model.AddVariable(absl::StrCat("z_", i)));
  }
  // Set the objective function:
  //   minimize sum_i z_i^2
  model.Minimize(math_opt::QuadraticExpression::InnerProduct(zs, zs));
  // Add the constraints:
  //   z_i = y_i - x_i * beta
  for (int i = 0; i < num_train; ++i) {
    const LabeledExample& example = train_data[i];
    model.AddLinearConstraint(
        zs[i] == example.y - math_opt::InnerProduct(example.xs, betas));
  }

  // Done building the model, now solve.
  math_opt::SolveArguments args;
  args.parameters.enable_output = true;
  ASSIGN_OR_RETURN(const math_opt::SolveResult result,
                   Solve(model, absl::GetFlag(FLAGS_solver_type), args));
  RETURN_IF_ERROR(result.termination.IsOptimal());
  std::cout << "Training time: " << result.solve_time() << std::endl;
  return LinearModel{.betas = Values(result.variable_values(), betas)};
}

absl::Status Main() {
  const int num_features = absl::GetFlag(FLAGS_num_features);
  const int num_train = absl::GetFlag(FLAGS_num_examples);
  const double noise_stddev = absl::GetFlag(FLAGS_noise);
  const int num_test = num_train;

  // Generate random training and test data.
  const LinearModel ground_truth = RandomLinearModel(num_features);
  const std::vector<LabeledExample> train_data =
      RandomData(ground_truth, num_train, noise_stddev);
  const std::vector<LabeledExample> test_data =
      RandomData(ground_truth, num_test, noise_stddev);

  // Solve the regression problem.
  ASSIGN_OR_RETURN(const LinearModel learned_model, Train(train_data));

  // Evaluate the solution.
  std::cout << "In sample loss: " << L2Loss(learned_model, train_data)
            << std::endl;
  std::cout << "Out of sample loss: " << L2Loss(learned_model, test_data)
            << std::endl;
  return absl::OkStatus();
}

}  // namespace

int main(int argc, char** argv) {
  InitGoogle(argv[0], &argc, &argv, true);
  const absl::Status status = Main();
  if (!status.ok()) {
    LOG(QFATAL) << status;
  }
  return 0;
}