433 lines
15 KiB
C++
433 lines
15 KiB
C++
// Copyright 2010-2025 Google LLC
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "ortools/set_cover/set_cover_reader.h"
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <algorithm>
|
|
#include <cctype>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <limits>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "absl/log/check.h"
|
|
#include "absl/log/log.h"
|
|
#include "absl/strings/numbers.h"
|
|
#include "absl/strings/str_cat.h"
|
|
#include "absl/strings/str_format.h"
|
|
#include "absl/strings/str_split.h"
|
|
#include "absl/strings/string_view.h"
|
|
#include "ortools/base/file.h"
|
|
#include "ortools/base/filesystem.h"
|
|
#include "ortools/base/helpers.h"
|
|
#include "ortools/base/options.h"
|
|
#include "ortools/set_cover/base_types.h"
|
|
#include "ortools/set_cover/set_cover.pb.h"
|
|
#include "ortools/set_cover/set_cover_model.h"
|
|
#include "ortools/util/filelineiter.h"
|
|
|
|
namespace operations_research {
|
|
|
|
class SetCoverReader {
|
|
public:
|
|
explicit SetCoverReader(File* file);
|
|
absl::string_view GetNextToken();
|
|
double ParseNextDouble();
|
|
int64_t ParseNextInteger();
|
|
|
|
private:
|
|
size_t SkipBlanks(size_t pos) const;
|
|
size_t SkipNonBlanks(size_t pos) const;
|
|
FileLineIterator line_iter_;
|
|
absl::string_view line_;
|
|
int start_pos_;
|
|
int end_pos_;
|
|
};
|
|
|
|
SetCoverReader::SetCoverReader(File* file)
|
|
: line_iter_(file, FileLineIterator::REMOVE_INLINE_CR |
|
|
FileLineIterator::REMOVE_BLANK_LINES),
|
|
start_pos_(0),
|
|
end_pos_(0) {
|
|
line_ = *line_iter_;
|
|
}
|
|
|
|
size_t SetCoverReader::SkipBlanks(size_t pos) const {
|
|
const size_t size = line_.size();
|
|
// As it is expected that the blanks will be spaces, we can skip them faster
|
|
// by checking for spaces only.
|
|
for (; pos < size && line_[pos] == ' '; ++pos) {
|
|
}
|
|
// We skip all forms of blanks to be on the safe side.
|
|
for (; pos < size && std::isspace(line_[pos]); ++pos) {
|
|
}
|
|
return pos;
|
|
}
|
|
|
|
size_t SetCoverReader::SkipNonBlanks(size_t pos) const {
|
|
const size_t size = line_.size();
|
|
for (; pos < size && !std::isspace(line_[pos]); ++pos) {
|
|
}
|
|
return pos;
|
|
}
|
|
|
|
absl::string_view SetCoverReader::GetNextToken() {
|
|
const size_t size = line_.size();
|
|
start_pos_ = SkipBlanks(end_pos_);
|
|
if (start_pos_ >= size) {
|
|
++line_iter_;
|
|
line_ = *line_iter_;
|
|
start_pos_ = SkipBlanks(0);
|
|
}
|
|
end_pos_ = SkipNonBlanks(start_pos_);
|
|
return line_.substr(start_pos_, end_pos_ - start_pos_);
|
|
}
|
|
|
|
double SetCoverReader::ParseNextDouble() {
|
|
double value = 0;
|
|
CHECK(absl::SimpleAtod(GetNextToken(), &value));
|
|
return value;
|
|
}
|
|
|
|
int64_t SetCoverReader::ParseNextInteger() {
|
|
int64_t value = 0;
|
|
CHECK(absl::SimpleAtoi(GetNextToken(), &value));
|
|
return value;
|
|
}
|
|
|
|
namespace {
|
|
double Percent(SubsetIndex subset, BaseInt total) {
|
|
return subset.value() == 0 ? 0 : 100.0 * subset.value() / total;
|
|
}
|
|
|
|
double Percent(ElementIndex element, BaseInt total) {
|
|
return element.value() == 0 ? 0 : 100.0 * element.value() / total;
|
|
}
|
|
} // namespace
|
|
|
|
// This is a row-based format where the elements are 1-indexed.
|
|
SetCoverModel ReadOrlibScp(absl::string_view filename) {
|
|
CHECK_OK(file::Exists(filename, file::Defaults()));
|
|
SetCoverModel model;
|
|
File* file(file::OpenOrDie(filename, "r", file::Defaults()));
|
|
SetCoverReader reader(file);
|
|
const ElementIndex num_rows(reader.ParseNextInteger());
|
|
const SubsetIndex num_cols(reader.ParseNextInteger());
|
|
model.ResizeNumSubsets(num_cols);
|
|
for (SubsetIndex subset : SubsetRange(num_cols)) {
|
|
const double cost(reader.ParseNextDouble());
|
|
model.SetSubsetCost(subset, cost);
|
|
}
|
|
for (ElementIndex element : ElementRange(num_rows)) {
|
|
LOG_EVERY_N_SEC(INFO, 5)
|
|
<< absl::StrFormat("Reading element %d (%.1f%%)", element.value(),
|
|
Percent(element, model.num_elements()));
|
|
const RowEntryIndex row_size(reader.ParseNextInteger());
|
|
for (RowEntryIndex entry(0); entry < row_size; ++entry) {
|
|
// Correct the 1-indexing.
|
|
const SubsetIndex subset(reader.ParseNextInteger() - 1);
|
|
model.AddElementToSubset(element, subset);
|
|
}
|
|
}
|
|
LOG(INFO) << "Read " << model.num_subsets() << " subsets, "
|
|
<< model.num_elements() << " elements";
|
|
file->Close(file::Defaults()).IgnoreError();
|
|
model.CreateSparseRowView();
|
|
return model;
|
|
}
|
|
|
|
// This is a column-based format where the elements are 1-indexed.
|
|
SetCoverModel ReadOrlibRail(absl::string_view filename) {
|
|
CHECK_OK(file::Exists(filename, file::Defaults()));
|
|
SetCoverModel model;
|
|
File* file(file::OpenOrDie(filename, "r", file::Defaults()));
|
|
SetCoverReader reader(file);
|
|
const ElementIndex num_rows(reader.ParseNextInteger());
|
|
const SubsetIndex num_cols(reader.ParseNextInteger());
|
|
model.ResizeNumSubsets(num_cols);
|
|
for (SubsetIndex subset : SubsetRange(num_cols)) {
|
|
LOG_EVERY_N_SEC(INFO, 5)
|
|
<< absl::StrFormat("Reading subset %d (%.1f%%)", subset.value(),
|
|
Percent(subset, model.num_subsets()));
|
|
const double cost(reader.ParseNextDouble());
|
|
model.SetSubsetCost(subset, cost);
|
|
const ColumnEntryIndex column_size(reader.ParseNextInteger());
|
|
model.ReserveNumElementsInSubset(column_size.value(), subset.value());
|
|
for (const ColumnEntryIndex _ : ColumnEntryRange(column_size)) {
|
|
// Correct the 1-indexing.
|
|
const ElementIndex element(reader.ParseNextInteger() - 1);
|
|
model.AddElementToSubset(element, subset);
|
|
}
|
|
}
|
|
LOG(INFO) << "Read " << model.num_subsets() << " subsets, "
|
|
<< model.num_elements() << " elements";
|
|
file->Close(file::Defaults()).IgnoreError();
|
|
model.CreateSparseRowView();
|
|
return model;
|
|
}
|
|
|
|
SetCoverModel ReadFimiDat(absl::string_view filename) {
|
|
CHECK_OK(file::Exists(filename, file::Defaults()));
|
|
SetCoverModel model;
|
|
SubsetIndex subset(0);
|
|
// Read the file once to discover the smallest element index.
|
|
BaseInt smallest_element = std::numeric_limits<BaseInt>::max();
|
|
BaseInt largest_element = 0;
|
|
for (const std::string& line : FileLines(filename)) {
|
|
std::vector<std::string> elements = absl::StrSplit(line, ' ');
|
|
if (elements.back().empty() || elements.back()[0] == '\0') {
|
|
elements.pop_back();
|
|
}
|
|
for (const std::string& number_str : elements) {
|
|
BaseInt element;
|
|
CHECK(absl::SimpleAtoi(number_str, &element));
|
|
smallest_element = std::min(smallest_element, element);
|
|
largest_element = std::max(largest_element, element);
|
|
}
|
|
}
|
|
DLOG(INFO) << "Smallest element: " << smallest_element
|
|
<< ", Largest element: " << largest_element;
|
|
ElementBoolVector element_seen(largest_element + 1, false);
|
|
for (const std::string& line : FileLines(filename)) {
|
|
LOG_EVERY_N_SEC(INFO, 5)
|
|
<< absl::StrFormat("Reading subset %d", subset.value());
|
|
std::vector<std::string> elements = absl::StrSplit(line, ' ');
|
|
if (elements.back().empty() || elements.back()[0] == '\0') {
|
|
elements.pop_back();
|
|
}
|
|
model.AddEmptySubset(1);
|
|
// As there can be repetitions in the data, we need to keep track of the
|
|
// elements already added to the subset.
|
|
std::vector<ElementIndex> elements_list;
|
|
for (const std::string& number_str : elements) {
|
|
BaseInt raw_element;
|
|
CHECK(absl::SimpleAtoi(number_str, &raw_element));
|
|
// Re-index the elements starting from 0.
|
|
ElementIndex element(raw_element - smallest_element);
|
|
if (element_seen[element]) {
|
|
DLOG(INFO) << "Element " << element << " already in subset "
|
|
<< subset.value();
|
|
continue;
|
|
}
|
|
element_seen[element] = true;
|
|
elements_list.push_back(element);
|
|
CHECK_GE(element.value(), 0);
|
|
model.AddElementToLastSubset(element);
|
|
}
|
|
// Clean up the list of elements.
|
|
for (const ElementIndex element : elements_list) {
|
|
element_seen[element] = false;
|
|
}
|
|
++subset;
|
|
}
|
|
LOG(INFO) << "Read " << model.num_subsets() << " subsets, "
|
|
<< model.num_elements() << " elements";
|
|
model.CreateSparseRowView();
|
|
return model;
|
|
}
|
|
|
|
SetCoverModel ReadSetCoverProto(absl::string_view filename, bool binary) {
|
|
CHECK_OK(file::Exists(filename, file::Defaults()));
|
|
SetCoverModel model;
|
|
SetCoverProto message;
|
|
if (binary) {
|
|
CHECK_OK(file::GetBinaryProto(filename, &message, file::Defaults()));
|
|
} else {
|
|
CHECK_OK(file::GetTextProto(filename, &message, file::Defaults()));
|
|
}
|
|
model.ImportModelFromProto(message);
|
|
return model;
|
|
}
|
|
|
|
namespace {
|
|
// A class to format data and write it to a file.
|
|
// Text is formatted in chunks of at most max_cols characters.
|
|
// Text is actually written to the file when the current chunk is full or when
|
|
// FlushLine() is called.
|
|
// FlushLine() must be called before closing the file.
|
|
class LineFormatter {
|
|
public:
|
|
explicit LineFormatter(File* file)
|
|
: LineFormatter(file, std::numeric_limits<int64_t>::max()) {}
|
|
LineFormatter(File* file, int64_t max_cols)
|
|
: num_cols_(0), max_cols_(max_cols), line_(), file_(file) {}
|
|
~LineFormatter() { CHECK(line_.empty()); }
|
|
|
|
void Append(absl::string_view text) {
|
|
const int text_size = text.size();
|
|
if (!text.empty() && text_size + num_cols_ > max_cols_) {
|
|
FlushLine();
|
|
}
|
|
absl::StrAppend(&line_, text);
|
|
num_cols_ += text_size;
|
|
}
|
|
|
|
void Append(BaseInt value) { Append(absl::StrCat(value, " ")); }
|
|
|
|
void Append(double value) { Append(absl::StrFormat("%.17g ", value)); }
|
|
|
|
void FlushLine() {
|
|
CHECK_OK(
|
|
file::WriteString(file_, absl::StrCat(line_, "\n"), file::Defaults()));
|
|
line_.clear();
|
|
num_cols_ = 0;
|
|
}
|
|
|
|
private:
|
|
int64_t num_cols_;
|
|
int64_t max_cols_;
|
|
std::string line_;
|
|
File* file_;
|
|
};
|
|
} // namespace
|
|
|
|
void WriteOrlibScp(const SetCoverModel& model, absl::string_view filename) {
|
|
File* file(file::OpenOrDie(filename, "w", file::Defaults()));
|
|
LineFormatter formatter(file);
|
|
formatter.Append(model.num_elements());
|
|
formatter.Append(model.num_subsets());
|
|
formatter.FlushLine();
|
|
for (const SubsetIndex subset : model.SubsetRange()) {
|
|
formatter.Append(model.subset_costs()[subset]);
|
|
}
|
|
formatter.FlushLine();
|
|
for (const ElementIndex element : model.ElementRange()) {
|
|
LOG_EVERY_N_SEC(INFO, 5)
|
|
<< absl::StrFormat("Writing element %d (%.1f%%)", element.value(),
|
|
Percent(element, model.num_elements()));
|
|
formatter.Append(absl::StrCat(model.rows()[element].size(), "\n"));
|
|
for (const SubsetIndex subset : model.rows()[element]) {
|
|
formatter.Append(subset.value() + 1);
|
|
}
|
|
formatter.FlushLine();
|
|
}
|
|
LOG(INFO) << "Finished writing the model.";
|
|
file->Close(file::Defaults()).IgnoreError();
|
|
}
|
|
|
|
// Beware the fact that elements written are converted to 1-indexed.
|
|
void WriteOrlibRail(const SetCoverModel& model, absl::string_view filename) {
|
|
File* file(file::OpenOrDie(filename, "w", file::Defaults()));
|
|
CHECK_OK(file::WriteString(
|
|
file, absl::StrCat(model.num_elements(), " ", model.num_subsets(), "\n"),
|
|
file::Defaults()));
|
|
LineFormatter formatter(file);
|
|
for (const SubsetIndex subset : model.SubsetRange()) {
|
|
LOG_EVERY_N_SEC(INFO, 5)
|
|
<< absl::StrFormat("Writing subset %d (%.1f%%)", subset.value(),
|
|
Percent(subset, model.num_subsets()));
|
|
formatter.Append(model.subset_costs()[subset]);
|
|
formatter.Append(static_cast<BaseInt>(model.columns()[subset].size()));
|
|
for (const ElementIndex element : model.columns()[subset]) {
|
|
formatter.Append(element.value() + 1);
|
|
}
|
|
formatter.FlushLine();
|
|
}
|
|
LOG(INFO) << "Finished writing the model.";
|
|
file->Close(file::Defaults()).IgnoreError();
|
|
}
|
|
|
|
void WriteSetCoverProto(const SetCoverModel& model, absl::string_view filename,
|
|
bool binary) {
|
|
const SetCoverProto message = model.ExportModelAsProto();
|
|
if (binary) {
|
|
CHECK_OK(file::SetBinaryProto(filename, message, file::Defaults()));
|
|
} else {
|
|
CHECK_OK(file::SetTextProto(filename, message, file::Defaults()));
|
|
}
|
|
}
|
|
|
|
SubsetBoolVector ReadSetCoverSolutionText(absl::string_view filename) {
|
|
CHECK_OK(file::Exists(filename, file::Defaults()));
|
|
SubsetBoolVector solution;
|
|
File* file(file::OpenOrDie(filename, "r", file::Defaults()));
|
|
SetCoverReader reader(file);
|
|
const BaseInt num_cols(reader.ParseNextInteger());
|
|
solution.resize(num_cols, false);
|
|
const BaseInt cardinality(reader.ParseNextInteger());
|
|
for (int i = 0; i < cardinality; ++i) {
|
|
// NOTE(user): The solution is 0-indexed.
|
|
const SubsetIndex subset(reader.ParseNextInteger());
|
|
solution[subset] = true;
|
|
}
|
|
file->Close(file::Defaults()).IgnoreError();
|
|
return solution;
|
|
}
|
|
|
|
SubsetBoolVector ReadSetCoverSolutionProto(absl::string_view filename,
|
|
bool binary) {
|
|
CHECK_OK(file::Exists(filename, file::Defaults()));
|
|
SubsetBoolVector solution;
|
|
SetCoverSolutionResponse message;
|
|
if (binary) {
|
|
CHECK_OK(file::GetBinaryProto(filename, &message, file::Defaults()));
|
|
} else {
|
|
CHECK_OK(file::GetTextProto(filename, &message, file::Defaults()));
|
|
}
|
|
solution.resize(message.num_subsets(), false);
|
|
// NOTE(user): The solution is 0-indexed.
|
|
for (const BaseInt subset : message.subset()) {
|
|
solution[SubsetIndex(subset)] = true;
|
|
}
|
|
return solution;
|
|
}
|
|
|
|
void WriteSetCoverSolutionText(const SetCoverModel& model,
|
|
const SubsetBoolVector& solution,
|
|
absl::string_view filename) {
|
|
File* file(file::OpenOrDie(filename, "w", file::Defaults()));
|
|
BaseInt cardinality(0);
|
|
Cost cost(0);
|
|
for (SubsetIndex subset(0); subset.value() < solution.size(); ++subset) {
|
|
if (solution[subset]) {
|
|
++cardinality;
|
|
cost += model.subset_costs()[subset];
|
|
}
|
|
}
|
|
CHECK_OK(file::WriteString(
|
|
file, absl::StrCat(solution.size(), " ", cardinality, " ", cost, "\n"),
|
|
file::Defaults()));
|
|
LineFormatter formatter(file);
|
|
for (BaseInt subset(0); subset < solution.size(); ++subset) {
|
|
if (solution[SubsetIndex(subset)]) {
|
|
formatter.Append(subset);
|
|
}
|
|
}
|
|
formatter.FlushLine();
|
|
file->Close(file::Defaults()).IgnoreError();
|
|
}
|
|
|
|
void WriteSetCoverSolutionProto(const SetCoverModel& model,
|
|
const SubsetBoolVector& solution,
|
|
absl::string_view filename, bool binary) {
|
|
SetCoverSolutionResponse message;
|
|
message.set_num_subsets(solution.size());
|
|
Cost cost(0);
|
|
for (SubsetIndex subset(0); subset.value() < solution.size(); ++subset) {
|
|
if (solution[subset]) {
|
|
message.add_subset(subset.value());
|
|
cost += model.subset_costs()[subset];
|
|
}
|
|
}
|
|
message.set_cost(cost);
|
|
if (binary) {
|
|
CHECK_OK(file::SetBinaryProto(filename, message, file::Defaults()));
|
|
} else {
|
|
CHECK_OK(file::SetTextProto(filename, message, file::Defaults()));
|
|
}
|
|
}
|
|
} // namespace operations_research
|