Files
ortools-clone/ortools/math_opt/samples/cpp/linear_regression.cc
2023-11-17 16:25:02 +01:00

205 lines
7.5 KiB
C++

// Copyright 2010-2022 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Solve a linear regression problem on random data.
//
// Problem data:
// There are num_features features, indexed by j.
// There are num_examples training examples indexed by i.
// x_ij: The feature value for example i and feature j in the training data.
// y_i: The label for example i in the training data.
//
// Decision variables:
// beta_j: the coefficient to learn for each feature j.
// z_i: the prediction error for example i.
//
// Optimization problem:
// min sum_i z_i^2
// s.t. y_i - sum_j beta_j * x_ij = z_i
//
// This is the unregularized linear regression problem.
//
// This example solves the problem on randomly generated (x, y) data. The data
// is generated by assuming some true values for beta (generated at random,
// i.i.d. N(0, 1)), then drawing each x_ij as N(0, 1) and then computing
// y_i = beta * x_i + N(0, noise)
// where noise is a command line flag.
//
// After solving the optimization problem above to recover values for beta, the
// in sample and out of sample loss (average squared prediction error) for the
// learned model are printed.
//
// For an advanced version, see:
// ortools/math_opt/codelabs/regression/
#include <iostream>
#include <ostream>
#include <utility>
#include <vector>
#include "absl/algorithm/container.h"
#include "absl/random/random.h"
#include "absl/status/status.h"
#include "ortools/base/init_google.h"
#include "ortools/base/logging.h"
#include "ortools/base/status_builder.h"
#include "ortools/base/status_macros.h"
#include "ortools/math_opt/cpp/math_opt.h"
ABSL_FLAG(operations_research::math_opt::SolverType, solver_type,
operations_research::math_opt::SolverType::kGurobi,
"The solver needs to support quadratic objectives, e.g. pdlp, "
"gurobi, or osqp.");
ABSL_FLAG(int, num_features, 10,
"The number of features in the linear regression model.");
ABSL_FLAG(int, num_examples, 100,
"The number of examples to use in the train and test sets.");
ABSL_FLAG(double, noise, 3.0,
"The standard deviation of the noise on the labels.");
namespace {
namespace math_opt = operations_research::math_opt;
// Data for a learned function f(x) = sum_j beta_j * x_j.
struct LinearModel {
// Has size equal to the number of features.
std::vector<double> betas;
};
// Creates a random linear function where each beta is i.i.d. Normal(0,1).
LinearModel RandomLinearModel(const int num_features) {
LinearModel result;
absl::BitGen bit_gen;
for (int j = 0; j < num_features; ++j) {
result.betas.push_back(absl::Gaussian<double>(bit_gen));
}
return result;
}
// A training example associating the label `y` to features `xs`.
struct LabeledExample {
// Has size equal to the number of features.
std::vector<double> xs;
double y = 0.0;
};
// Creates num_examples random examples where y approx ground_truth(xs).
//
// Specifically, y = ground_truth(xs) + N(0, noise_stddev), where the xs are
// generated randomly as N(0,1) and are jointly independent with the noise.
std::vector<LabeledExample> RandomData(const LinearModel& ground_truth,
const int num_examples,
const double noise_stddev) {
const int num_features = static_cast<int>(ground_truth.betas.size());
absl::BitGen bit_gen;
std::vector<LabeledExample> examples;
for (int i = 0; i < num_examples; ++i) {
LabeledExample example;
for (int j = 0; j < num_features; ++j) {
example.xs.push_back(absl::Gaussian<double>(bit_gen));
}
example.y =
absl::c_inner_product(example.xs, ground_truth.betas,
absl::Gaussian(bit_gen, 0.0, noise_stddev));
examples.push_back(std::move(example));
}
return examples;
}
// Computes the average squared error between model(example.xs) and example.y.
double L2Loss(const LinearModel& model,
const std::vector<LabeledExample>& examples) {
double result = 0.0;
for (const LabeledExample& example : examples) {
CHECK_EQ(example.xs.size(), model.betas.size());
const double error =
example.y - absl::c_inner_product(example.xs, model.betas, 0.0);
result += error * error;
}
return examples.empty() ? 0.0 : result / examples.size();
}
// Computes and returns the linear function minimizing L2Loss on train_data by
// solving a quadratic optimization problem, or returns a Status error if the
// solver fails to find an optimal solution.
absl::StatusOr<LinearModel> Train(
const std::vector<LabeledExample>& train_data) {
const int num_features = static_cast<int>(train_data[0].xs.size());
const int num_train = static_cast<int>(train_data.size());
math_opt::Model model("linear_regression");
// Create the decision variables: beta, and z.
std::vector<math_opt::Variable> betas;
for (int j = 0; j < num_features; ++j) {
betas.push_back(model.AddVariable(absl::StrCat("beta_", j)));
}
std::vector<math_opt::Variable> zs;
for (int i = 0; i < num_train; ++i) {
zs.push_back(model.AddVariable(absl::StrCat("z_", i)));
}
// Set the objective function:
// minimize sum_i z_i^2
model.Minimize(math_opt::QuadraticExpression::InnerProduct(zs, zs));
// Add the constraints:
// z_i = y_i - x_i * beta
for (int i = 0; i < num_train; ++i) {
const LabeledExample& example = train_data[i];
model.AddLinearConstraint(
zs[i] == example.y - math_opt::InnerProduct(example.xs, betas));
}
// Done building the model, now solve.
math_opt::SolveArguments args;
args.parameters.enable_output = true;
ASSIGN_OR_RETURN(const math_opt::SolveResult result,
Solve(model, absl::GetFlag(FLAGS_solver_type), args));
RETURN_IF_ERROR(result.termination.IsOptimal());
std::cout << "Training time: " << result.solve_time() << std::endl;
return LinearModel{.betas = Values(result.variable_values(), betas)};
}
absl::Status Main() {
const int num_features = absl::GetFlag(FLAGS_num_features);
const int num_train = absl::GetFlag(FLAGS_num_examples);
const double noise_stddev = absl::GetFlag(FLAGS_noise);
const int num_test = num_train;
// Generate random training and test data.
const LinearModel ground_truth = RandomLinearModel(num_features);
const std::vector<LabeledExample> train_data =
RandomData(ground_truth, num_train, noise_stddev);
const std::vector<LabeledExample> test_data =
RandomData(ground_truth, num_test, noise_stddev);
// Solve the regression problem.
ASSIGN_OR_RETURN(const LinearModel learned_model, Train(train_data));
// Evaluate the solution.
std::cout << "In sample loss: " << L2Loss(learned_model, train_data)
<< std::endl;
std::cout << "Out of sample loss: " << L2Loss(learned_model, test_data)
<< std::endl;
return absl::OkStatus();
}
} // namespace
int main(int argc, char** argv) {
InitGoogle(argv[0], &argc, &argv, true);
const absl::Status status = Main();
if (!status.ok()) {
LOG(QFATAL) << status;
}
return 0;
}