Files
ortools-clone/ortools/util/filelineiter.h
Corentin Le Molgat b4b226801b update include guards
2025-11-05 11:54:02 +01:00

177 lines
5.5 KiB
C++

// Copyright 2010-2025 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Allows to read a text file line by line with:
// for (const std::string& line : FileLines("myfile.txt")) { ... }
//
// More details:
// * The lines are separated by '\n' (which is removed by default) and have no
// size limits.
// * Consecutive '\n' result in empty lines being produced.
// * If not empty, the string after the last '\n' is produced as the last line.
// * Options are available to keep the trailing '\n' for each line, to remove
// carriage-return characters ('\r'), and to remove blank lines.
//
#ifndef ORTOOLS_UTIL_FILELINEITER_H_
#define ORTOOLS_UTIL_FILELINEITER_H_
#include <algorithm>
#include <cstdint>
#include <string>
#include "ortools/base/file.h"
#include "ortools/base/logging.h"
// Implements the minimum interface for a range-based for loop iterator.
class FileLineIterator {
public:
enum : int {
DEFAULT = 0x0000,
REMOVE_LINEFEED = DEFAULT,
KEEP_LINEFEED = 0x0001, // Terminating \n in result.
REMOVE_INLINE_CR = 0x0002, // Remove \r characters.
REMOVE_BLANK_LINES = 0x0004, // Remove empty or \n-only lines.
};
FileLineIterator(File* file, int options)
: next_position_after_eol_(0),
buffer_size_(0),
file_(file),
options_(options) {
ReadNextLine();
}
const std::string& operator*() const { return line_; }
bool operator!=(const FileLineIterator& other) const {
return file_ != other.file_;
}
void operator++() { ReadNextLine(); }
private:
bool HasOption(int option) const { return options_ & option; }
void ReadNextLine() {
line_.clear();
if (file_ == nullptr) return;
do {
while (true) {
int i = next_position_after_eol_;
for (; i < buffer_size_; ++i) {
if (buffer_[i] == '\n') break;
}
if (i == buffer_size_) {
line_.append(&buffer_[next_position_after_eol_],
i - next_position_after_eol_);
buffer_size_ = file_->Read(&buffer_, kBufferSize);
if (buffer_size_ < 0) {
LOG(WARNING) << "Error while reading file.";
file_ = nullptr;
break;
}
next_position_after_eol_ = 0;
if (buffer_size_ == 0) {
if (line_.empty()) {
file_ = nullptr;
}
break;
}
} else {
line_.append(&buffer_[next_position_after_eol_],
i - next_position_after_eol_ + 1);
next_position_after_eol_ = i + 1;
break;
}
}
PostProcessLine();
} while (file_ != nullptr && HasOption(REMOVE_BLANK_LINES) &&
(line_.empty() || line_ == "\n"));
}
void PostProcessLine() {
if (HasOption(REMOVE_INLINE_CR)) {
line_.erase(std::remove(line_.begin(), line_.end(), '\r'), line_.end());
}
const auto eol = std::find(line_.begin(), line_.end(), '\n');
if (!HasOption(KEEP_LINEFEED) && eol != line_.end()) {
line_.erase(eol);
}
}
static constexpr int kBufferSize = 5 * 1024;
char buffer_[kBufferSize];
int next_position_after_eol_;
int64_t buffer_size_;
File* file_;
std::string line_;
const int options_;
};
class FileLines {
public:
// Initializes with a provided file, taking ownership of it.
//
// If file is nullptr, this class behaves as if the file was empty.
//
// Usage:
//
// File* file = nullptr;
// RETURN_IF_ERROR(file::Open(filename, "r", &file, file::Defaults()));
// for (const absl::string_view line : FileLines(filename, file)) {
// ...
// }
//
FileLines(absl::string_view filename, File* const file,
const int options = FileLineIterator::DEFAULT)
: file_(file), options_(options) {
if (!file_) {
return;
}
}
// Initializes the FileLines ignoring errors.
//
// Please prefer the other constructor combined with file::Open() in new code
// so that missing files are properly detected. This version would only print
// a warning and act as if the file was empty.
explicit FileLines(absl::string_view filename,
int options = FileLineIterator::DEFAULT)
: FileLines(
filename,
[&]() {
File* file = nullptr;
if (!file::Open(filename, "r", &file, file::Defaults()).ok()) {
LOG(WARNING) << "Could not open: " << filename;
}
return file;
}(),
options) {}
FileLines(const FileLines&) = delete;
FileLines& operator=(const FileLines&) = delete;
~FileLines() {
if (file_ != nullptr) file_->Close(file::Defaults()).IgnoreError();
}
FileLineIterator begin() { return FileLineIterator(file_, options_); }
FileLineIterator end() const { return FileLineIterator(nullptr, options_); }
private:
// Can be nullptr when the FileLines() constructor is used instead of
// FileLines::New().
File* file_;
const int options_;
};
#endif // ORTOOLS_UTIL_FILELINEITER_H_