sync set cover

This commit is contained in:
Laurent Perron
2025-01-27 13:48:59 +01:00
parent d04d3798f1
commit 240b86f7ff
7 changed files with 137 additions and 81 deletions

View File

@@ -120,11 +120,14 @@ cc_test(
"//ortools/base:dump_vars",
"//ortools/base:gmock_main",
"//ortools/base:mathutil",
"//ortools/base:timer",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/random",
"@com_google_absl//absl/random:bit_gen_ref",
"@com_google_absl//absl/random:distributions",
"@com_google_absl//absl/time",
"@com_google_absl//absl/types:span",
"@com_google_benchmark//:benchmark",
],
@@ -286,6 +289,7 @@ cc_library(
srcs = ["set_cover_model.cc"],
hdrs = ["set_cover_model.h"],
deps = [
":radix_sort",
":set_cover_cc_proto",
"//ortools/base:intops",
"//ortools/base:strong_vector",
@@ -296,6 +300,7 @@ cc_library(
"@com_google_absl//absl/random:distributions",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:span",
],
)
@@ -495,7 +500,6 @@ cc_library(
":dynamic_partition",
":dynamic_permutation",
":sparse_permutation",
"//ortools/base:dump_vars",
"//ortools/base:murmur",
"//ortools/graph",
"//ortools/graph:iterators",
@@ -503,9 +507,12 @@ cc_library(
"//ortools/util:stats",
"//ortools/util:time_limit",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/base:log_severity",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/numeric:int128",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",

View File

@@ -14,8 +14,8 @@
#include "ortools/algorithms/find_graph_symmetries.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include <memory>
#include <numeric>
#include <string>
@@ -23,9 +23,12 @@
#include <vector>
#include "absl/algorithm/container.h"
#include "absl/base/log_severity.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/flags/flag.h"
#include "absl/memory/memory.h"
#include "absl/log/check.h"
#include "absl/numeric/int128.h"
#include "absl/status/status.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
@@ -36,6 +39,8 @@
#include "ortools/algorithms/dynamic_partition.h"
#include "ortools/algorithms/dynamic_permutation.h"
#include "ortools/algorithms/sparse_permutation.h"
#include "ortools/base/logging.h"
#include "ortools/graph/graph.h"
#include "ortools/graph/iterators.h"
#include "ortools/graph/util.h"
@@ -699,9 +704,17 @@ GraphSymmetryFinder::FindOneSuitablePermutation(
// DCHECKs() and statistics.
ScopedTimeDistributionUpdater search_time_updater(&stats_.search_time);
DCHECK_EQ("", tmp_dynamic_permutation_.DebugString());
DCHECK_EQ(
base_partition->DebugString(/*sort_parts_lexicographically=*/false),
image_partition->DebugString(/*sort_parts_lexicographically=*/false));
DCHECK_EQ(base_partition->NumParts(), image_partition->NumParts());
if (DEBUG_MODE) {
for (int i = 0; i < base_partition->NumParts(); ++i) {
DCHECK_EQ(base_partition->FprintOfPart(i),
image_partition->FprintOfPart(i))
<< base_partition->DebugString(/*sort_parts_lexicographically=*/false)
<< " "
<< image_partition->DebugString(
/*sort_parts_lexicographically=*/false);
}
}
DCHECK(search_states_.empty());
// These will be used during the search. See their usage.

View File

@@ -44,16 +44,21 @@ message SetCoverSolutionResponse {
enum Status {
// Undefined.
UNDEFINED = 0;
// The solver found the proven optimal solution.
OPTIMAL = 1;
// The solver had enough time to find some solution that satisfied all
// constraints, but it did not reach the optimal.
FEASIBLE = 2;
// The model does not have any solution.
INFEASIBLE = 3;
// The model is invalid.
INVALID = 4;
}
// For future use. TODO(user): Implement.
optional Status status = 1;

View File

@@ -289,10 +289,9 @@ void IncreasingCountingSort(uint32_t radix, int shift, std::vector<Key>& keys,
const auto num_keys = keys.size();
// In this order for stability.
for (int64_t i = num_keys - 1; i >= 0; --i) {
Counter& c = counts[Bucket(keys[i], shift, radix)];
scratch_keys[c - 1] = keys[i];
scratch_payloads[c - 1] = payloads[i];
--c;
Counter c = --counts[Bucket(keys[i], shift, radix)];
scratch_keys[c] = keys[i];
scratch_payloads[c] = payloads[i];
}
std::swap(keys, scratch_keys);
std::swap(payloads, scratch_payloads);
@@ -301,14 +300,14 @@ void IncreasingCountingSort(uint32_t radix, int shift, std::vector<Key>& keys,
template <typename Key, typename Payload>
void RadixSort(int radix_log, std::vector<Key>& keys,
std::vector<Payload>& payloads, Key min_key, Key max_key) {
std::vector<Payload>& payloads, Key /*min_key*/, Key max_key) {
// range_log is the number of bits necessary to represent the max_key
// We could as well use max_key - min_key, but it is more expensive to
// compute.
const int range_log = internal::NumBitsToRepresent(max_key);
DCHECK_EQ(internal::NumBitsToRepresent(0), 0);
DCHECK_LE(internal::NumBitsToRepresent(std::numeric_limits<Key>::max()),
sizeof(Key) * CHAR_BIT);
std::numeric_limits<Key>::digits);
const int radix = 1 << radix_log; // By definition.
std::vector<uint32_t> counters(radix, 0);
std::vector<Key> scratch_keys(keys.size());
@@ -330,7 +329,7 @@ std::vector<ElementIndex> GetUncoveredElementsSortedByDegree(
keys.reserve(num_elements);
const SparseRowView& rows = inv->model()->rows();
BaseInt max_degree = 0;
for (ElementIndex element : inv->model()->ElementRange()) {
for (const ElementIndex element : inv->model()->ElementRange()) {
// Already covered elements should not be considered.
if (inv->coverage()[element] != 0) continue;
degree_sorted_elements.push_back(element);
@@ -729,8 +728,8 @@ bool GuidedLocalSearch::NextSolution(int num_iterations) {
}
Cost GuidedLocalSearch::ComputeDelta(SubsetIndex subset) const {
float delta = (penalization_factor_ * penalties_[subset] +
inv_->model()->subset_costs()[subset]);
const float delta = (penalization_factor_ * penalties_[subset] +
inv_->model()->subset_costs()[subset]);
if (inv_->is_selected()[subset] && inv_->ComputeIsRedundant(subset)) {
return delta;
} else if (!inv_->is_selected()[subset]) {

View File

@@ -29,6 +29,8 @@
#include "absl/random/distributions.h"
#include "absl/random/random.h"
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
#include "ortools/algorithms/radix_sort.h"
#include "ortools/algorithms/set_cover.pb.h"
#include "ortools/base/logging.h"
@@ -322,7 +324,9 @@ void SetCoverModel::ReserveNumElementsInSubset(ElementIndex num_elements,
void SetCoverModel::SortElementsInSubsets() {
for (const SubsetIndex subset : SubsetRange()) {
std::sort(columns_[subset].begin(), columns_[subset].end());
// std::sort(columns_[subset].begin(), columns_[subset].end());
BaseInt* data = reinterpret_cast<BaseInt*>(columns_[subset].data());
RadixSort(absl::MakeSpan(data, columns_[subset].size()));
}
elements_in_subsets_are_sorted_ = true;
}
@@ -336,7 +340,10 @@ void SetCoverModel::CreateSparseRowView() {
for (const SubsetIndex subset : SubsetRange()) {
// Sort the columns. It's not super-critical to improve performance here
// as this needs to be done only once.
std::sort(columns_[subset].begin(), columns_[subset].end());
// std::sort(columns_[subset].begin(), columns_[subset].end());
BaseInt* data = reinterpret_cast<BaseInt*>(columns_[subset].data());
RadixSort(absl::MakeSpan(data, columns_[subset].size()));
for (const ElementIndex element : columns_[subset]) {
++row_sizes[element];
}
@@ -346,7 +353,7 @@ void SetCoverModel::CreateSparseRowView() {
}
for (const SubsetIndex subset : SubsetRange()) {
for (const ElementIndex element : columns_[subset]) {
rows_[element].push_back(subset);
rows_[element].emplace_back(subset);
}
}
row_view_is_valid_ = true;
@@ -392,8 +399,10 @@ SetCoverProto SetCoverModel::ExportModelAsProto() const {
100.0 * subset.value() / num_subsets());
SetCoverProto::Subset* subset_proto = message.add_subset();
subset_proto->set_cost(subset_costs_[subset]);
SparseColumn column = columns_[subset];
std::sort(column.begin(), column.end());
SparseColumn column = columns_[subset]; // Copy is intentional.
// std::sort(column.begin(), column.end());
BaseInt* data = reinterpret_cast<BaseInt*>(column.data());
RadixSort(absl::MakeSpan(data, column.size()));
for (const ElementIndex element : column) {
subset_proto->add_element(element.value());
}

View File

@@ -13,9 +13,12 @@
#include "ortools/algorithms/set_cover_reader.h"
#include <sys/types.h>
#include <cctype>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <string>
#include <vector>
@@ -38,8 +41,6 @@ namespace operations_research {
class SetCoverReader {
public:
explicit SetCoverReader(File* file);
absl::string_view GetLine() { return line_; }
void Advance() { ++line_iter_; }
absl::string_view GetNextToken();
double ParseNextDouble();
int64_t ParseNextInteger();
@@ -63,6 +64,11 @@ SetCoverReader::SetCoverReader(File* file)
size_t SetCoverReader::SkipBlanks(size_t pos) const {
const size_t size = line_.size();
// As it is expected that the blanks will be spaces, we can skip them faster
// by checking for spaces only.
for (; pos < size && line_[pos] == ' '; ++pos) {
}
// We skip all forms of blanks to be on the safe side.
for (; pos < size && std::isspace(line_[pos]); ++pos) {
}
return pos;
@@ -195,66 +201,66 @@ SetCoverModel ReadSetCoverProto(absl::string_view filename, bool binary) {
}
namespace {
// A class to write a line of text to a file.
// The line is written in chunks of at most max_cols characters.
class LineWriter {
// A class to format data and write it to a file.
// Text is formatted in chunks of at most max_cols characters.
// Text is actually written to the file when the current chunk is full or when
// FlushLine() is called.
// FlushLine() must be called before closing the file.
class LineFormatter {
public:
LineWriter(File* file, int max_cols)
explicit LineFormatter(File* file)
: LineFormatter(file, std::numeric_limits<int64_t>::max()) {}
LineFormatter(File* file, int64_t max_cols)
: num_cols_(0), max_cols_(max_cols), line_(), file_(file) {}
~LineWriter() { Close(); }
~LineFormatter() { CHECK(line_.empty()); }
void Write(absl::string_view text) {
void Append(absl::string_view text) {
const int text_size = text.size();
if (!text.empty() && text_size + num_cols_ > max_cols_) {
CHECK_OK(file::WriteString(file_, absl::StrCat(line_, "\n"),
file::Defaults()));
line_.clear();
num_cols_ = 0;
FlushLine();
}
absl::StrAppend(&line_, text);
num_cols_ += text_size;
}
void Write(BaseInt value) { Write(absl::StrCat(value, " ")); }
void Append(BaseInt value) { Append(absl::StrCat(value, " ")); }
void Write(double value) { Write(absl::StrFormat("%.17g ", value)); }
void Append(double value) { Append(absl::StrFormat("%.17g ", value)); }
void Close() {
void FlushLine() {
CHECK_OK(
file::WriteString(file_, absl::StrCat(line_, "\n"), file::Defaults()));
line_.clear();
num_cols_ = 0;
}
private:
int num_cols_;
int max_cols_;
int64_t num_cols_;
int64_t max_cols_;
std::string line_;
File* file_;
};
} // namespace
void WriteOrlibScp(const SetCoverModel& model, absl::string_view filename) {
const int kMaxCols = 80;
File* file(file::OpenOrDie(filename, "w", file::Defaults()));
CHECK_OK(file::WriteString(
file, absl::StrCat(model.num_elements(), " ", model.num_subsets(), "\n"),
file::Defaults()));
{ // RAII for the file writer.
LineWriter cost_writer(file, kMaxCols);
for (const SubsetIndex subset : model.SubsetRange()) {
cost_writer.Write(model.subset_costs()[subset]);
}
for (const ElementIndex element : model.ElementRange()) {
LOG_EVERY_N_SEC(INFO, 5)
<< absl::StrFormat("Writing element %d (%.1f%%)", element.value(),
100.0 * element.value() / model.num_elements());
CHECK_OK(file::WriteString(
file, absl::StrCat(model.rows()[element].size(), "\n"),
file::Defaults()));
LineWriter row_writer(file, kMaxCols);
for (const SubsetIndex subset : model.rows()[element]) {
row_writer.Write(subset.value() + 1);
}
LineFormatter formatter(file);
formatter.Append(model.num_elements());
formatter.Append(model.num_subsets());
formatter.FlushLine();
for (const SubsetIndex subset : model.SubsetRange()) {
formatter.Append(model.subset_costs()[subset]);
}
formatter.FlushLine();
for (const ElementIndex element : model.ElementRange()) {
LOG_EVERY_N_SEC(INFO, 5)
<< absl::StrFormat("Writing element %d (%.1f%%)", element.value(),
100.0 * element.value() / model.num_elements());
formatter.Append(absl::StrCat(model.rows()[element].size(), "\n"));
for (const SubsetIndex subset : model.rows()[element]) {
formatter.Append(subset.value() + 1);
}
formatter.FlushLine();
}
LOG(INFO) << "Finished writing the model.";
file->Close(file::Defaults()).IgnoreError();
@@ -262,24 +268,21 @@ void WriteOrlibScp(const SetCoverModel& model, absl::string_view filename) {
// Beware the fact that elements written are converted to 1-indexed.
void WriteOrlibRail(const SetCoverModel& model, absl::string_view filename) {
const int kMaxCols = 80;
File* file(file::OpenOrDie(filename, "w", file::Defaults()));
CHECK_OK(file::WriteString(
file, absl::StrCat(model.num_elements(), " ", model.num_subsets(), "\n"),
file::Defaults()));
LineFormatter formatter(file);
for (const SubsetIndex subset : model.SubsetRange()) {
LOG_EVERY_N_SEC(INFO, 5)
<< absl::StrFormat("Writing subset %d (%.1f%%)", subset.value(),
100.0 * subset.value() / model.num_subsets());
CHECK_OK(
file::WriteString(file,
absl::StrCat(model.subset_costs()[subset], " ",
model.columns()[subset].size(), "\n"),
file::Defaults()));
LineWriter writer(file, kMaxCols);
formatter.Append(model.subset_costs()[subset]);
formatter.Append(static_cast<BaseInt>(model.columns()[subset].size()));
for (const ElementIndex element : model.columns()[subset]) {
writer.Write(element.value() + 1);
formatter.Append(element.value() + 1);
}
formatter.FlushLine();
}
LOG(INFO) << "Finished writing the model.";
file->Close(file::Defaults()).IgnoreError();
@@ -343,13 +346,14 @@ void WriteSetCoverSolutionText(const SetCoverModel& model,
CHECK_OK(file::WriteString(
file, absl::StrCat(solution.size(), " ", cardinality, " ", cost, "\n"),
file::Defaults()));
const int kMaxCols = 80;
LineWriter writer(file, kMaxCols);
LineFormatter formatter(file);
for (BaseInt subset(0); subset < solution.size(); ++subset) {
if (solution[SubsetIndex(subset)]) {
writer.Write(subset);
formatter.Append(subset);
}
}
formatter.FlushLine();
file->Close(file::Defaults()).IgnoreError();
}
void WriteSetCoverSolutionProto(const SetCoverModel& model,

View File

@@ -18,7 +18,6 @@
#include "absl/log/check.h"
#include "absl/strings/match.h"
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "ortools/algorithms/set_cover_heuristics.h"
#include "ortools/algorithms/set_cover_invariant.h"
@@ -42,9 +41,6 @@ ABSL_FLAG(std::string, output_fmt, "",
"If out is non-empty, use the given format for the output.");
ABSL_FLAG(std::string, output_model, "",
"If non-empty, write the set cover model to the given file. ");
ABSL_FLAG(std::string, output_model_fmt, "",
"If output_model is non-empty, use the given format for the output "
"model file. Can be proto, proto_bin, OrlibRail, OrlibScp.");
ABSL_FLAG(bool, generate, false, "Generate a new model from the input model.");
ABSL_FLAG(int, num_elements_wanted, 0,
@@ -61,6 +57,9 @@ ABSL_FLAG(std::string, improvement, "", "Solution improvement method.");
ABSL_FLAG(int, num_threads, 1,
"Number of threads to use by the underlying solver.");
ABSL_FLAG(bool, solve, false, "Solve the model.");
ABSL_FLAG(bool, stats, false, "Log stats about the model.");
namespace operations_research {
using CL = SetCoverInvariant::ConsistencyLevel;
@@ -142,7 +141,8 @@ FileFormat ParseFileFormat(const std::string& format_name) {
}
}
SetCoverModel ReadModel(absl::string_view input_file, FileFormat input_format) {
SetCoverModel ReadModel(const std::string& input_file,
FileFormat input_format) {
switch (input_format) {
case FileFormat::ORLIB_SCP:
return ReadOrlibScp(input_file);
@@ -160,7 +160,7 @@ SetCoverModel ReadModel(absl::string_view input_file, FileFormat input_format) {
}
}
SubsetBoolVector ReadSolution(absl::string_view input_file,
SubsetBoolVector ReadSolution(const std::string& input_file,
FileFormat input_format) {
switch (input_format) {
case FileFormat::TXT:
@@ -175,8 +175,9 @@ SubsetBoolVector ReadSolution(absl::string_view input_file,
}
}
void WriteModel(const SetCoverModel& model, absl::string_view output_file,
void WriteModel(const SetCoverModel& model, const std::string& output_file,
FileFormat output_format) {
LOG(INFO) << "Writing model to " << output_file;
switch (output_format) {
case FileFormat::ORLIB_SCP:
WriteOrlibScp(model, output_file);
@@ -197,7 +198,7 @@ void WriteModel(const SetCoverModel& model, absl::string_view output_file,
}
void WriteSolution(const SetCoverModel& model, const SubsetBoolVector& solution,
absl::string_view output_file, FileFormat output_format) {
const std::string& output_file, FileFormat output_format) {
switch (output_format) {
case FileFormat::TXT:
WriteSetCoverSolutionText(model, solution, output_file);
@@ -233,20 +234,38 @@ void Run() {
const auto& input_format = ParseFileFormat(absl::GetFlag(FLAGS_input_fmt));
const auto& output = absl::GetFlag(FLAGS_output);
const auto& output_format = ParseFileFormat(absl::GetFlag(FLAGS_output_fmt));
if (input.empty()) {
LOG(FATAL) << "No input file specified.";
}
if (!input.empty() && input_format == FileFormat::EMPTY) {
LOG(FATAL) << "Input format cannot be empty.";
}
if (!output.empty() && output_format == FileFormat::EMPTY) {
LOG(FATAL) << "Output format cannot be empty.";
}
SetCoverModel model = ReadModel(input, input_format);
model.CreateSparseRowView();
if (absl::GetFlag(FLAGS_generate)) {
model.CreateSparseRowView();
model = SetCoverModel::GenerateRandomModelFrom(
model, absl::GetFlag(FLAGS_num_elements_wanted),
absl::GetFlag(FLAGS_num_subsets_wanted), absl::GetFlag(FLAGS_row_scale),
absl::GetFlag(FLAGS_column_scale), absl::GetFlag(FLAGS_cost_scale));
}
if (!output.empty()) {
CHECK(output_format != FileFormat::EMPTY);
if (output_format == FileFormat::ORLIB_SCP) {
model.CreateSparseRowView();
}
WriteModel(model, output, output_format);
}
LogStats(input, &model);
SetCoverInvariant inv = RunLazyElementDegree(input, &model);
auto problem = output.empty() ? input : output;
if (absl::GetFlag(FLAGS_stats)) {
LogStats(problem, &model);
}
if (absl::GetFlag(FLAGS_solve)) {
LOG(INFO) << "Solving " << problem;
model.CreateSparseRowView();
SetCoverInvariant inv = RunLazyElementDegree(problem, &model);
}
}
} // namespace operations_research