423 lines
13 KiB
C++
423 lines
13 KiB
C++
// Copyright 2010-2025 Google LLC
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Set of integer tuples (fixed-size arrays, all of the same size) with
|
|
// a basic API.
|
|
// It supports several types of integer arrays transparently, with an
|
|
// inherent storage based on int64_t arrays.
|
|
//
|
|
// The key feature is the "lazy" copy:
|
|
// - Copying an IntTupleSet won't actually copy the data right away; we
|
|
// will just have several IntTupleSet pointing at the same data.
|
|
// - Modifying an IntTupleSet which shares his data with others
|
|
// will create a new, modified instance of the data payload, and make
|
|
// the IntTupleSet point to that new data.
|
|
// - Modifying an IntTupleSet that doesn't share its data with any other
|
|
// IntTupleSet will modify the data directly.
|
|
// Therefore, you don't need to use const IntTupleSet& in methods. Just do:
|
|
// void MyMethod(IntTupleSet tuple_set) { ... }
|
|
//
|
|
// This class is thread hostile as the copy and reference counter are
|
|
// not protected by a mutex.
|
|
|
|
#ifndef ORTOOLS_UTIL_TUPLE_SET_H_
|
|
#define ORTOOLS_UTIL_TUPLE_SET_H_
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <vector>
|
|
|
|
#include "absl/container/flat_hash_map.h"
|
|
#include "absl/container/flat_hash_set.h"
|
|
#include "ortools/base/hash.h"
|
|
#include "ortools/base/logging.h"
|
|
|
|
namespace operations_research {
|
|
// ----- Main IntTupleSet class -----
|
|
class IntTupleSet {
|
|
public:
|
|
// Creates an empty tuple set with a fixed length for all tuples.
|
|
explicit IntTupleSet(int arity);
|
|
// Copy constructor (it actually does a lazy copy, see toplevel comment).
|
|
IntTupleSet(const IntTupleSet& set); // NOLINT
|
|
~IntTupleSet();
|
|
|
|
// Clears data.
|
|
void Clear();
|
|
|
|
// Inserts the tuple to the set. It does nothing if the tuple is
|
|
// already in the set. The size of the tuple must be equal to the
|
|
// arity of the set. It returns the index at which the tuple was
|
|
// inserted (-1 if it was already present).
|
|
int Insert(const std::vector<int>& tuple);
|
|
int Insert(const std::vector<int64_t>& tuple);
|
|
// Arity fixed version of Insert removing the need for a vector for the user.
|
|
int Insert2(int64_t v0, int64_t v1);
|
|
int Insert3(int64_t v0, int64_t v1, int64_t v2);
|
|
int Insert4(int64_t v0, int64_t v1, int64_t v2, int64_t v3);
|
|
// Inserts the tuples.
|
|
void InsertAll(const std::vector<std::vector<int64_t> >& tuples);
|
|
void InsertAll(const std::vector<std::vector<int> >& tuples);
|
|
|
|
// Checks if the tuple is in the set.
|
|
bool Contains(const std::vector<int>& tuple) const;
|
|
bool Contains(const std::vector<int64_t>& tuple) const;
|
|
|
|
// Returns the number of tuples.
|
|
int NumTuples() const;
|
|
// Get the given tuple's value at the given position. The indices
|
|
// of the tuples correspond to the order in which they were
|
|
// inserted.
|
|
int64_t Value(int tuple_index, int pos_in_tuple) const;
|
|
// Returns the arity of the set.
|
|
int Arity() const;
|
|
// Access the raw data, see IntTupleSet::Data::flat_tuples_.
|
|
const int64_t* RawData() const;
|
|
// Returns the number of different values in the given column.
|
|
int NumDifferentValuesInColumn(int col) const;
|
|
// Return a copy of the set, sorted by the "col"-th value of each
|
|
// tuples. The sort is stable.
|
|
IntTupleSet SortedByColumn(int col) const;
|
|
// Returns a copy of the tuple set lexicographically sorted.
|
|
IntTupleSet SortedLexicographically() const;
|
|
|
|
private:
|
|
// Class that holds the actual data of an IntTupleSet. It handles
|
|
// the reference counters, etc.
|
|
class Data {
|
|
public:
|
|
explicit Data(int arity);
|
|
Data(const Data& data);
|
|
~Data();
|
|
void AddSharedOwner();
|
|
bool RemovedSharedOwner();
|
|
Data* CopyIfShared();
|
|
template <class T>
|
|
int Insert(const std::vector<T>& tuple);
|
|
template <class T>
|
|
bool Contains(const std::vector<T>& candidate) const;
|
|
template <class T>
|
|
int64_t Fingerprint(const std::vector<T>& tuple) const;
|
|
int NumTuples() const;
|
|
int64_t Value(int index, int pos) const;
|
|
int Arity() const;
|
|
const int64_t* RawData() const;
|
|
void Clear();
|
|
|
|
private:
|
|
const int arity_;
|
|
int num_owners_;
|
|
// Concatenation of all tuples ever added.
|
|
std::vector<int64_t> flat_tuples_;
|
|
// Maps a tuple's fingerprint to the list of tuples with this
|
|
// fingerprint, represented by their start index in the
|
|
// flat_tuples_ vector.
|
|
absl::flat_hash_map<int64_t, std::vector<int> > tuple_fprint_to_index_;
|
|
};
|
|
|
|
// Used to represent a light representation of a tuple.
|
|
struct IndexData {
|
|
int index;
|
|
IntTupleSet::Data* data;
|
|
IndexData(int i, IntTupleSet::Data* const d) : index(i), data(d) {}
|
|
static bool Compare(const IndexData& a, const IndexData& b);
|
|
};
|
|
|
|
struct IndexValue {
|
|
int index;
|
|
int64_t value;
|
|
IndexValue(int i, int64_t v) : index(i), value(v) {}
|
|
static bool Compare(const IndexValue& a, const IndexValue& b);
|
|
};
|
|
|
|
mutable Data* data_;
|
|
};
|
|
|
|
// ----- Data -----
|
|
inline IntTupleSet::Data::Data(int arity) : arity_(arity), num_owners_(0) {}
|
|
|
|
inline IntTupleSet::Data::Data(const Data& data)
|
|
: arity_(data.arity_),
|
|
num_owners_(0),
|
|
flat_tuples_(data.flat_tuples_),
|
|
tuple_fprint_to_index_(data.tuple_fprint_to_index_) {}
|
|
|
|
inline IntTupleSet::Data::~Data() {}
|
|
|
|
inline void IntTupleSet::Data::AddSharedOwner() { num_owners_++; }
|
|
|
|
inline bool IntTupleSet::Data::RemovedSharedOwner() {
|
|
return (--num_owners_ == 0);
|
|
}
|
|
|
|
inline IntTupleSet::Data* IntTupleSet::Data::CopyIfShared() {
|
|
if (num_owners_ > 1) { // Copy on write.
|
|
Data* const new_data = new Data(*this);
|
|
RemovedSharedOwner();
|
|
new_data->AddSharedOwner();
|
|
return new_data;
|
|
}
|
|
return this;
|
|
}
|
|
|
|
template <class T>
|
|
int IntTupleSet::Data::Insert(const std::vector<T>& tuple) {
|
|
DCHECK(arity_ == 0 || flat_tuples_.size() % arity_ == 0);
|
|
CHECK_EQ(arity_, tuple.size());
|
|
DCHECK_EQ(1, num_owners_);
|
|
if (!Contains(tuple)) {
|
|
const int index = NumTuples();
|
|
const int offset = flat_tuples_.size();
|
|
flat_tuples_.resize(offset + arity_);
|
|
// On mac os X, using this instead of push_back gives a 10x speedup!
|
|
for (int i = 0; i < arity_; ++i) {
|
|
flat_tuples_[offset + i] = tuple[i];
|
|
}
|
|
const int64_t fingerprint = Fingerprint(tuple);
|
|
tuple_fprint_to_index_[fingerprint].push_back(index);
|
|
return index;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
template <class T>
|
|
bool IntTupleSet::Data::Contains(const std::vector<T>& candidate) const {
|
|
if (candidate.size() != arity_) {
|
|
return false;
|
|
}
|
|
const int64_t fingerprint = Fingerprint(candidate);
|
|
if (tuple_fprint_to_index_.contains(fingerprint)) {
|
|
const std::vector<int>& indices = tuple_fprint_to_index_.at(fingerprint);
|
|
for (int i = 0; i < indices.size(); ++i) {
|
|
const int tuple_index = indices[i];
|
|
for (int j = 0; j < arity_; ++j) {
|
|
if (candidate[j] != flat_tuples_[tuple_index * arity_ + j]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <class T>
|
|
int64_t IntTupleSet::Data::Fingerprint(const std::vector<T>& tuple) const {
|
|
switch (arity_) {
|
|
case 0:
|
|
return 0;
|
|
case 1:
|
|
return tuple[0];
|
|
case 2: {
|
|
uint64_t x = tuple[0];
|
|
uint64_t y = uint64_t{0xe08c1d668b756f82};
|
|
uint64_t z = tuple[1];
|
|
mix(x, y, z);
|
|
return z;
|
|
}
|
|
default: {
|
|
uint64_t x = tuple[0];
|
|
uint64_t y = uint64_t{0xe08c1d668b756f82};
|
|
for (int i = 1; i < tuple.size(); ++i) {
|
|
uint64_t z = tuple[i];
|
|
mix(x, y, z);
|
|
x = z;
|
|
}
|
|
return x;
|
|
}
|
|
}
|
|
}
|
|
|
|
inline int IntTupleSet::Data::NumTuples() const {
|
|
return tuple_fprint_to_index_.size();
|
|
}
|
|
|
|
inline int64_t IntTupleSet::Data::Value(int index, int pos) const {
|
|
DCHECK_GE(index, 0);
|
|
DCHECK_LT(index, flat_tuples_.size() / arity_);
|
|
DCHECK_GE(pos, 0);
|
|
DCHECK_LT(pos, arity_);
|
|
return flat_tuples_[index * arity_ + pos];
|
|
}
|
|
|
|
inline int IntTupleSet::Data::Arity() const { return arity_; }
|
|
|
|
inline const int64_t* IntTupleSet::Data::RawData() const {
|
|
return flat_tuples_.data();
|
|
}
|
|
|
|
inline void IntTupleSet::Data::Clear() {
|
|
flat_tuples_.clear();
|
|
tuple_fprint_to_index_.clear();
|
|
}
|
|
|
|
inline IntTupleSet::IntTupleSet(int arity) : data_(new Data(arity)) {
|
|
CHECK_GE(arity, 0);
|
|
data_->AddSharedOwner();
|
|
}
|
|
|
|
inline IntTupleSet::IntTupleSet(const IntTupleSet& set) : data_(set.data_) {
|
|
data_->AddSharedOwner();
|
|
}
|
|
|
|
inline IntTupleSet::~IntTupleSet() {
|
|
CHECK(data_ != nullptr);
|
|
if (data_->RemovedSharedOwner()) {
|
|
delete data_;
|
|
}
|
|
}
|
|
|
|
inline void IntTupleSet::Clear() {
|
|
data_ = data_->CopyIfShared();
|
|
data_->Clear();
|
|
}
|
|
|
|
inline int IntTupleSet::Insert(const std::vector<int>& tuple) {
|
|
data_ = data_->CopyIfShared();
|
|
return data_->Insert(tuple);
|
|
}
|
|
|
|
inline int IntTupleSet::Insert(const std::vector<int64_t>& tuple) {
|
|
data_ = data_->CopyIfShared();
|
|
return data_->Insert(tuple);
|
|
}
|
|
|
|
inline int IntTupleSet::Insert2(int64_t v0, int64_t v1) {
|
|
std::vector<int64_t> tuple(2);
|
|
tuple[0] = v0;
|
|
tuple[1] = v1;
|
|
return Insert(tuple);
|
|
}
|
|
|
|
inline int IntTupleSet::Insert3(int64_t v0, int64_t v1, int64_t v2) {
|
|
std::vector<int64_t> tuple(3);
|
|
tuple[0] = v0;
|
|
tuple[1] = v1;
|
|
tuple[2] = v2;
|
|
return Insert(tuple);
|
|
}
|
|
|
|
inline int IntTupleSet::Insert4(int64_t v0, int64_t v1, int64_t v2,
|
|
int64_t v3) {
|
|
std::vector<int64_t> tuple(4);
|
|
tuple[0] = v0;
|
|
tuple[1] = v1;
|
|
tuple[2] = v2;
|
|
tuple[3] = v3;
|
|
return Insert(tuple);
|
|
}
|
|
|
|
inline bool IntTupleSet::Contains(const std::vector<int>& tuple) const {
|
|
return data_->Contains(tuple);
|
|
}
|
|
|
|
inline bool IntTupleSet::Contains(const std::vector<int64_t>& tuple) const {
|
|
return data_->Contains(tuple);
|
|
}
|
|
|
|
inline void IntTupleSet::InsertAll(
|
|
const std::vector<std::vector<int> >& tuples) {
|
|
data_ = data_->CopyIfShared();
|
|
for (int i = 0; i < tuples.size(); ++i) {
|
|
Insert(tuples[i]);
|
|
}
|
|
}
|
|
|
|
inline void IntTupleSet::InsertAll(
|
|
const std::vector<std::vector<int64_t> >& tuples) {
|
|
data_ = data_->CopyIfShared();
|
|
for (int i = 0; i < tuples.size(); ++i) {
|
|
Insert(tuples[i]);
|
|
}
|
|
}
|
|
|
|
inline int IntTupleSet::NumTuples() const { return data_->NumTuples(); }
|
|
|
|
inline int64_t IntTupleSet::Value(int index, int pos) const {
|
|
return data_->Value(index, pos);
|
|
}
|
|
|
|
inline int IntTupleSet::Arity() const { return data_->Arity(); }
|
|
|
|
inline const int64_t* IntTupleSet::RawData() const { return data_->RawData(); }
|
|
|
|
inline int IntTupleSet::NumDifferentValuesInColumn(int col) const {
|
|
if (col < 0 || col >= data_->Arity()) {
|
|
return 0;
|
|
}
|
|
absl::flat_hash_set<int64_t> values;
|
|
for (int i = 0; i < data_->NumTuples(); ++i) {
|
|
values.insert(data_->Value(i, col));
|
|
}
|
|
return values.size();
|
|
}
|
|
|
|
inline bool IntTupleSet::IndexValue::Compare(const IndexValue& a,
|
|
const IndexValue& b) {
|
|
return a.value < b.value || (a.value == b.value && a.index < b.index);
|
|
}
|
|
|
|
inline IntTupleSet IntTupleSet::SortedByColumn(int col) const {
|
|
std::vector<IndexValue> keys;
|
|
keys.reserve(data_->NumTuples());
|
|
for (int index = 0; index < data_->NumTuples(); ++index) {
|
|
keys.push_back(IndexValue(index, data_->Value(index, col)));
|
|
}
|
|
std::sort(keys.begin(), keys.end(), IntTupleSet::IndexValue::Compare);
|
|
const int arity = data_->Arity();
|
|
IntTupleSet sorted(arity);
|
|
for (int i = 0; i < keys.size(); ++i) {
|
|
const int64_t* tuple_ptr = data_->RawData() + keys[i].index * arity;
|
|
sorted.Insert(std::vector<int64_t>(tuple_ptr, tuple_ptr + arity));
|
|
}
|
|
return sorted;
|
|
}
|
|
|
|
inline bool IntTupleSet::IndexData::Compare(const IndexData& a,
|
|
const IndexData& b) {
|
|
const IntTupleSet::Data* const data = a.data;
|
|
const int arity = data->Arity();
|
|
for (int i = 0; i < arity; ++i) {
|
|
const int64_t value1 = data->Value(a.index, i);
|
|
const int64_t value2 = data->Value(b.index, i);
|
|
if (value1 < value2) {
|
|
return true;
|
|
}
|
|
if (value1 > value2) {
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
inline IntTupleSet IntTupleSet::SortedLexicographically() const {
|
|
std::vector<IndexData> keys;
|
|
keys.reserve(data_->NumTuples());
|
|
for (int index = 0; index < data_->NumTuples(); ++index) {
|
|
keys.push_back(IndexData(index, data_));
|
|
}
|
|
std::sort(keys.begin(), keys.end(), IntTupleSet::IndexData::Compare);
|
|
const int arity = data_->Arity();
|
|
IntTupleSet sorted(arity);
|
|
for (int i = 0; i < keys.size(); ++i) {
|
|
std::vector<int64_t> tuple(arity);
|
|
const int64_t* tuple_ptr = data_->RawData() + keys[i].index * arity;
|
|
sorted.Insert(std::vector<int64_t>(tuple_ptr, tuple_ptr + arity));
|
|
}
|
|
return sorted;
|
|
}
|
|
} // namespace operations_research
|
|
|
|
#endif // ORTOOLS_UTIL_TUPLE_SET_H_
|