diff --git a/MODULE.bazel b/MODULE.bazel index 38e6907d7a..f25fdcf1b5 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -15,6 +15,7 @@ module( # see https://registry.bazel.build/ bazel_dep(name = "abseil-cpp", version = "20250127.1") bazel_dep(name = "bazel_skylib", version = "1.7.1") +bazel_dep(name = "bzip2", version = "1.0.8.bcr.2") bazel_dep(name = "contrib_rules_jvm", version = "0.28.0") bazel_dep(name = "eigen", version = "4.0.0-20241125.bcr.1") bazel_dep(name = "fuzztest", version = "20250214.0", repo_name = "com_google_fuzztest") diff --git a/ortools/base/BUILD.bazel b/ortools/base/BUILD.bazel index 2601fe0de0..7cd3f37a0c 100644 --- a/ortools/base/BUILD.bazel +++ b/ortools/base/BUILD.bazel @@ -216,12 +216,14 @@ cc_library( "helpers.h", "options.h", ], + defines = ["USE_BZIP2"], deps = [ ":status_macros", "@abseil-cpp//absl/log", "@abseil-cpp//absl/log:check", "@abseil-cpp//absl/status", "@abseil-cpp//absl/strings", + "@bzip2//:bz2", "@com_google_protobuf//:protobuf", "@zlib", ], diff --git a/ortools/base/file.cc b/ortools/base/file.cc index 247301e20b..e79194f959 100644 --- a/ortools/base/file.cc +++ b/ortools/base/file.cc @@ -15,7 +15,11 @@ #include #include + +#include +#if defined(USE_BZIP2) #include +#endif #include #if defined(_MSC_VER) @@ -46,12 +50,15 @@ namespace { enum class Format { NORMAL_FILE, GZIP_FILE, + BZIP2_FILE }; static Format GetFormatFromName(absl::string_view name) { const int size = name.size(); if (size > 4 && name.substr(size - 3) == ".gz") { return Format::GZIP_FILE; + } else if (size > 5 && name.substr(size - 4) == ".bz2") { + return Format::BZIP2_FILE; } else { return Format::NORMAL_FILE; } @@ -183,6 +190,72 @@ class GzFile : public File { private: gzFile f_; }; + + #if defined(USE_BZIP2) + class Bz2File : public File { + public: + Bz2File(BZFILE* bz_file, absl::string_view name) : File(name), f_(bz_file) {} + virtual ~Bz2File() = default; + + // Reads "size" bytes to buf from file, buf should be pre-allocated. + size_t Read(void* buf, size_t size) override { + return BZ2_bzread(f_, buf, size); + } + + // Writes "size" bytes of buf to file, buf should be pre-allocated. + size_t Write(const void* buf, size_t size) override { + return BZ2_bzwrite(f_, const_cast(buf), size); + } + + // Closes the file and delete the underlying FILE* descriptor. + absl::Status Close(int flags) override { + absl::Status status; + if (f_ == nullptr) { + return absl::OkStatus(); + } + BZ2_bzclose(f_); + f_ = nullptr; + delete this; + return absl::OkStatus(); + } + + // Flushes buffer. + bool Flush() override { return BZ2_bzflush(f_) == 0; } + + // Returns file size. + size_t Size() override { + BZFILE* file; + std::string null_terminated_name = std::string(name_); + #if defined(_MSC_VER) + file = BZ2_bzopen (null_terminated_name.c_str(), "rb"); + #else + file = BZ2_bzopen (null_terminated_name.c_str(), "r"); + #endif + if (!file) { + LOG(FATAL) << "Cannot get the size of '" << name_ + << "': " << strerror(errno); + } + + const int kLength = 5 * 1024; + unsigned char buffer[kLength]; + size_t uncompressed_size = 0; + while (1) { + int err; + int bytes_read; + bytes_read = BZ2_bzread(file, buffer, kLength - 1); + uncompressed_size += bytes_read; + if (bytes_read < kLength - 1) break; + } + BZ2_bzclose(file); + return uncompressed_size; + } + + bool Open() const override { return f_ != nullptr; } + + private: + BZFILE* f_; + }; + #endif // USE_BZIP2 } // namespace @@ -215,13 +288,21 @@ File* File::Open(absl::string_view file_name, absl::string_view mode) { case Format::GZIP_FILE: { gzFile gz_file = gzopen(null_terminated_name.c_str(), null_terminated_mode.c_str()); - if (!gz_file) { - return nullptr; - } + if (!gz_file) return nullptr; return new GzFile(gz_file, file_name); } + case Format::BZIP2_FILE: { +#if defined(USE_BZIP2) + BZFILE* bz_file = + BZ2_bzopen(null_terminated_name.c_str(), null_terminated_mode.c_str()); + if (!bz_file) return nullptr; + return new Bz2File(bz_file, file_name); +#else + LOG(ERROR) << "Using bzip2 files is not supported"; + return nullptr; +#endif + } } - return nullptr; } int64_t File::ReadToString(std::string* line, uint64_t max_length) {