205 lines
7.5 KiB
C++
205 lines
7.5 KiB
C++
// Copyright 2010-2022 Google LLC
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Solve a linear regression problem on random data.
|
|
//
|
|
// Problem data:
|
|
// There are num_features features, indexed by j.
|
|
// There are num_examples training examples indexed by i.
|
|
// x_ij: The feature value for example i and feature j in the training data.
|
|
// y_i: The label for example i in the training data.
|
|
//
|
|
// Decision variables:
|
|
// beta_j: the coefficient to learn for each feature j.
|
|
// z_i: the prediction error for example i.
|
|
//
|
|
// Optimization problem:
|
|
// min sum_i z_i^2
|
|
// s.t. y_i - sum_j beta_j * x_ij = z_i
|
|
//
|
|
// This is the unregularized linear regression problem.
|
|
//
|
|
// This example solves the problem on randomly generated (x, y) data. The data
|
|
// is generated by assuming some true values for beta (generated at random,
|
|
// i.i.d. N(0, 1)), then drawing each x_ij as N(0, 1) and then computing
|
|
// y_i = beta * x_i + N(0, noise)
|
|
// where noise is a command line flag.
|
|
//
|
|
// After solving the optimization problem above to recover values for beta, the
|
|
// in sample and out of sample loss (average squared prediction error) for the
|
|
// learned model are printed.
|
|
//
|
|
// For an advanced version, see:
|
|
// ortools/math_opt/codelabs/regression/
|
|
#include <iostream>
|
|
#include <ostream>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "absl/algorithm/container.h"
|
|
#include "absl/random/random.h"
|
|
#include "absl/status/status.h"
|
|
#include "ortools/base/init_google.h"
|
|
#include "ortools/base/logging.h"
|
|
#include "ortools/base/status_builder.h"
|
|
#include "ortools/base/status_macros.h"
|
|
#include "ortools/math_opt/cpp/math_opt.h"
|
|
|
|
ABSL_FLAG(operations_research::math_opt::SolverType, solver_type,
|
|
operations_research::math_opt::SolverType::kGurobi,
|
|
"The solver needs to support quadratic objectives, e.g. pdlp, "
|
|
"gurobi, or osqp.");
|
|
|
|
ABSL_FLAG(int, num_features, 10,
|
|
"The number of features in the linear regression model.");
|
|
ABSL_FLAG(int, num_examples, 100,
|
|
"The number of examples to use in the train and test sets.");
|
|
ABSL_FLAG(double, noise, 3.0,
|
|
"The standard deviation of the noise on the labels.");
|
|
|
|
namespace {
|
|
|
|
namespace math_opt = operations_research::math_opt;
|
|
|
|
// Data for a learned function f(x) = sum_j beta_j * x_j.
|
|
struct LinearModel {
|
|
// Has size equal to the number of features.
|
|
std::vector<double> betas;
|
|
};
|
|
|
|
// Creates a random linear function where each beta is i.i.d. Normal(0,1).
|
|
LinearModel RandomLinearModel(const int num_features) {
|
|
LinearModel result;
|
|
absl::BitGen bit_gen;
|
|
for (int j = 0; j < num_features; ++j) {
|
|
result.betas.push_back(absl::Gaussian<double>(bit_gen));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// A training example associating the label `y` to features `xs`.
|
|
struct LabeledExample {
|
|
// Has size equal to the number of features.
|
|
std::vector<double> xs;
|
|
double y = 0.0;
|
|
};
|
|
|
|
// Creates num_examples random examples where y approx ground_truth(xs).
|
|
//
|
|
// Specifically, y = ground_truth(xs) + N(0, noise_stddev), where the xs are
|
|
// generated randomly as N(0,1) and are jointly independent with the noise.
|
|
std::vector<LabeledExample> RandomData(const LinearModel& ground_truth,
|
|
const int num_examples,
|
|
const double noise_stddev) {
|
|
const int num_features = static_cast<int>(ground_truth.betas.size());
|
|
absl::BitGen bit_gen;
|
|
std::vector<LabeledExample> examples;
|
|
for (int i = 0; i < num_examples; ++i) {
|
|
LabeledExample example;
|
|
for (int j = 0; j < num_features; ++j) {
|
|
example.xs.push_back(absl::Gaussian<double>(bit_gen));
|
|
}
|
|
example.y =
|
|
absl::c_inner_product(example.xs, ground_truth.betas,
|
|
absl::Gaussian(bit_gen, 0.0, noise_stddev));
|
|
examples.push_back(std::move(example));
|
|
}
|
|
return examples;
|
|
}
|
|
|
|
// Computes the average squared error between model(example.xs) and example.y.
|
|
double L2Loss(const LinearModel& model,
|
|
const std::vector<LabeledExample>& examples) {
|
|
double result = 0.0;
|
|
for (const LabeledExample& example : examples) {
|
|
CHECK_EQ(example.xs.size(), model.betas.size());
|
|
const double error =
|
|
example.y - absl::c_inner_product(example.xs, model.betas, 0.0);
|
|
result += error * error;
|
|
}
|
|
return examples.empty() ? 0.0 : result / examples.size();
|
|
}
|
|
|
|
// Computes and returns the linear function minimizing L2Loss on train_data by
|
|
// solving a quadratic optimization problem, or returns a Status error if the
|
|
// solver fails to find an optimal solution.
|
|
absl::StatusOr<LinearModel> Train(
|
|
const std::vector<LabeledExample>& train_data) {
|
|
const int num_features = static_cast<int>(train_data[0].xs.size());
|
|
const int num_train = static_cast<int>(train_data.size());
|
|
math_opt::Model model("linear_regression");
|
|
// Create the decision variables: beta, and z.
|
|
std::vector<math_opt::Variable> betas;
|
|
for (int j = 0; j < num_features; ++j) {
|
|
betas.push_back(model.AddVariable(absl::StrCat("beta_", j)));
|
|
}
|
|
std::vector<math_opt::Variable> zs;
|
|
for (int i = 0; i < num_train; ++i) {
|
|
zs.push_back(model.AddVariable(absl::StrCat("z_", i)));
|
|
}
|
|
// Set the objective function:
|
|
// minimize sum_i z_i^2
|
|
model.Minimize(math_opt::QuadraticExpression::InnerProduct(zs, zs));
|
|
// Add the constraints:
|
|
// z_i = y_i - x_i * beta
|
|
for (int i = 0; i < num_train; ++i) {
|
|
const LabeledExample& example = train_data[i];
|
|
model.AddLinearConstraint(
|
|
zs[i] == example.y - math_opt::InnerProduct(example.xs, betas));
|
|
}
|
|
|
|
// Done building the model, now solve.
|
|
math_opt::SolveArguments args;
|
|
args.parameters.enable_output = true;
|
|
ASSIGN_OR_RETURN(const math_opt::SolveResult result,
|
|
Solve(model, absl::GetFlag(FLAGS_solver_type), args));
|
|
RETURN_IF_ERROR(result.termination.IsOptimal());
|
|
std::cout << "Training time: " << result.solve_time() << std::endl;
|
|
return LinearModel{.betas = Values(result.variable_values(), betas)};
|
|
}
|
|
|
|
absl::Status Main() {
|
|
const int num_features = absl::GetFlag(FLAGS_num_features);
|
|
const int num_train = absl::GetFlag(FLAGS_num_examples);
|
|
const double noise_stddev = absl::GetFlag(FLAGS_noise);
|
|
const int num_test = num_train;
|
|
|
|
// Generate random training and test data.
|
|
const LinearModel ground_truth = RandomLinearModel(num_features);
|
|
const std::vector<LabeledExample> train_data =
|
|
RandomData(ground_truth, num_train, noise_stddev);
|
|
const std::vector<LabeledExample> test_data =
|
|
RandomData(ground_truth, num_test, noise_stddev);
|
|
|
|
// Solve the regression problem.
|
|
ASSIGN_OR_RETURN(const LinearModel learned_model, Train(train_data));
|
|
|
|
// Evaluate the solution.
|
|
std::cout << "In sample loss: " << L2Loss(learned_model, train_data)
|
|
<< std::endl;
|
|
std::cout << "Out of sample loss: " << L2Loss(learned_model, test_data)
|
|
<< std::endl;
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
} // namespace
|
|
|
|
int main(int argc, char** argv) {
|
|
InitGoogle(argv[0], &argc, &argv, true);
|
|
const absl::Status status = Main();
|
|
if (!status.ok()) {
|
|
LOG(QFATAL) << status;
|
|
}
|
|
return 0;
|
|
}
|