diff --git a/CMakeLists.txt b/CMakeLists.txt index 48ba1b8602..eea50649d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,10 @@ CMAKE_DEPENDENT_OPTION(BUILD_Protobuf "Build the Protobuf dependency Library" OF "NOT BUILD_DEPS" ON) message(STATUS "Build protobuf: ${BUILD_Protobuf}") +CMAKE_DEPENDENT_OPTION(BUILD_re2 "Build the re2 dependency Library" OFF + "NOT BUILD_DEPS" ON) +message(STATUS "Build re2: ${BUILD_re2}") + # Optional third party solvers (enabled by default) CMAKE_DEPENDENT_OPTION(USE_SCIP "Use the Scip solver" ON "BUILD_CXX" OFF) message(STATUS "SCIP support: ${USE_SCIP}") diff --git a/WORKSPACE b/WORKSPACE index 04ab7f7287..f305ffbacf 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -46,6 +46,12 @@ git_repository( remote = "https://github.com/abseil/abseil-cpp.git", ) +git_repository( + name = "com_google_re2", + commit = "0dade9f", # release 2021-11-01 + remote = "https://github.com/google/re2.git", +) + git_repository( name = "com_google_googletest", commit = "703bd9c", # release-1.10.0 diff --git a/cmake/cpp.cmake b/cmake/cpp.cmake index fde0011493..57e142386d 100644 --- a/cmake/cpp.cmake +++ b/cmake/cpp.cmake @@ -124,6 +124,7 @@ target_link_libraries(${PROJECT_NAME} PUBLIC ZLIB::ZLIB ${ABSL_DEPS} protobuf::libprotobuf + re2::re2 ${COINOR_DEPS} $<$:libscip> $<$:GLPK::GLPK> diff --git a/cmake/dependencies/CMakeLists.txt b/cmake/dependencies/CMakeLists.txt index 486c7dc961..6858d959ca 100644 --- a/cmake/dependencies/CMakeLists.txt +++ b/cmake/dependencies/CMakeLists.txt @@ -73,6 +73,56 @@ if(BUILD_absl) message(CHECK_PASS "fetched") endif() +# ############################################################################## +# Protobuf +# ############################################################################## +if(BUILD_Protobuf) + message(CHECK_START "Fetching Protobuf") + list(APPEND CMAKE_MESSAGE_INDENT " ") + set(protobuf_BUILD_TESTS OFF) + set(protobuf_BUILD_EXPORT OFF) + set(protobuf_MSVC_STATIC_RUNTIME OFF) + # FetchContent_Declare(SOURCE_SUBDIR) was introduced in 3.18 + if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18") + FetchContent_Declare( + protobuf + GIT_REPOSITORY "https://github.com/protocolbuffers/protobuf.git" + GIT_TAG "v3.18.0" + GIT_SUBMODULES "" + PATCH_COMMAND git apply --ignore-whitespace "${CMAKE_CURRENT_LIST_DIR}/../../patches/protobuf-v3.18.0.patch" + SOURCE_SUBDIR cmake) + FetchContent_MakeAvailable(protobuf) + else() + fetch_git_dependency( + NAME Protobuf + REPOSITORY "https://github.com/protocolbuffers/protobuf.git" + TAG "v3.18.0" + PATCH_COMMAND "git apply --ignore-whitespace \"${CMAKE_CURRENT_LIST_DIR}/../../patches/protobuf-v3.18.0.patch\"" + SOURCE_SUBDIR cmake + ) + endif() + list(POP_BACK CMAKE_MESSAGE_INDENT) + message(CHECK_PASS "fetched") +endif() + +# ############################################################################## +# RE2 +# ############################################################################## +if(BUILD_re2) + message(CHECK_START "Fetching re2") + list(APPEND CMAKE_MESSAGE_INDENT " ") + set(ABSL_ENABLE_INSTALL ON) + FetchContent_Declare( + re2 + GIT_REPOSITORY "https://github.com/google/re2.git" + GIT_TAG "2021-11-01" + #PATCH_COMMAND git apply --ignore-whitespace "${CMAKE_CURRENT_LIST_DIR}/../../patches/re2-2021-11-01.patch" + ) + FetchContent_MakeAvailable(re2) + list(POP_BACK CMAKE_MESSAGE_INDENT) + message(CHECK_PASS "fetched") +endif() + # ############################################################################## # SCIP # ############################################################################## @@ -124,43 +174,11 @@ if(BUILD_GLPK) endif() # ############################################################################## -# Protobuf +# Coinutils # ############################################################################## -if(BUILD_Protobuf) - message(CHECK_START "Fetching Protobuf") - list(APPEND CMAKE_MESSAGE_INDENT " ") - set(protobuf_BUILD_TESTS OFF) - set(protobuf_BUILD_EXPORT OFF) - set(protobuf_MSVC_STATIC_RUNTIME OFF) - # FetchContent_Declare(SOURCE_SUBDIR) was introduced in 3.18 - if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18") - FetchContent_Declare( - protobuf - GIT_REPOSITORY "https://github.com/protocolbuffers/protobuf.git" - GIT_TAG "v3.18.0" - GIT_SUBMODULES "" - PATCH_COMMAND git apply --ignore-whitespace "${CMAKE_CURRENT_LIST_DIR}/../../patches/protobuf-v3.18.0.patch" - SOURCE_SUBDIR cmake) - FetchContent_MakeAvailable(protobuf) - else() - fetch_git_dependency( - NAME Protobuf - REPOSITORY "https://github.com/protocolbuffers/protobuf.git" - TAG "v3.18.0" - PATCH_COMMAND "git apply --ignore-whitespace \"${CMAKE_CURRENT_LIST_DIR}/../../patches/protobuf-v3.18.0.patch\"" - SOURCE_SUBDIR cmake - ) - endif() - list(POP_BACK CMAKE_MESSAGE_INDENT) - message(CHECK_PASS "fetched") -endif() - # Coin-OR does not support C++17 (use of 'register' storage class specifier) set(CMAKE_CXX_STANDARD 11) -# ############################################################################## -# Coinutils -# ############################################################################## if(BUILD_CoinUtils) message(CHECK_START "Fetching CoinUtils") list(APPEND CMAKE_MESSAGE_INDENT " ") diff --git a/cmake/deps.cmake b/cmake/deps.cmake index bf1eef5a68..9677d71639 100644 --- a/cmake/deps.cmake +++ b/cmake/deps.cmake @@ -48,6 +48,13 @@ if(NOT TARGET protobuf::libprotobuf) message(FATAL_ERROR "Target protobuf::libprotobuf not available.") endif() +if(NOT BUILD_re2) + find_package(re2 REQUIRED) +endif() +if(NOT TARGET re2::re2) + message(FATAL_ERROR "Target re2::re2 not available.") +endif() + if(USE_SCIP) if(NOT BUILD_SCIP) find_package(SCIP REQUIRED) diff --git a/ortools/base/BUILD b/ortools/base/BUILD index 3717ade6da..83b9749ed7 100644 --- a/ortools/base/BUILD +++ b/ortools/base/BUILD @@ -63,6 +63,19 @@ cc_library( ], ) +cc_library( + name = "case", + srcs = [ + "case.cc", + ], + hdrs = [ + "case.h", + ], + deps = [ + ":base", + ], +) + cc_library( name = "container_logging", hdrs = [ diff --git a/ortools/base/case.cc b/ortools/base/case.cc new file mode 100644 index 0000000000..375efbebc2 --- /dev/null +++ b/ortools/base/case.cc @@ -0,0 +1,126 @@ +// Copyright 2010-2021 Google LLC +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains string processing functions related to +// uppercase, lowercase, etc. + +#include "ortools/base/case.h" + +#include +#include + +//#include "base/port.h" +#include "absl/hash/hash.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" + +namespace strings { + +std::ostream& operator<<(std::ostream& os, + const AsciiCapitalizationType& type) { + switch (type) { + case AsciiCapitalizationType::kLower: + return os << "kLower"; + case AsciiCapitalizationType::kUpper: + return os << "kUpper"; + case AsciiCapitalizationType::kFirst: + return os << "kFirst"; + case AsciiCapitalizationType::kMixed: + return os << "kMixed"; + case AsciiCapitalizationType::kNoAlpha: + return os << "kNoAlpha"; + default: + return os << "INVALID"; + } +} + +AsciiCapitalizationType GetAsciiCapitalization(const absl::string_view input) { + const char* s = input.data(); + const char* const end = s + input.size(); + // find the caps type of the first alpha char + for (; s != end && !(absl::ascii_isupper(*s) || absl::ascii_islower(*s)); + ++s) { + } + if (s == end) return AsciiCapitalizationType::kNoAlpha; + const AsciiCapitalizationType firstcapstype = + (absl::ascii_islower(*s)) ? AsciiCapitalizationType::kLower + : AsciiCapitalizationType::kUpper; + + // skip ahead to the next alpha char + for (++s; s != end && !(absl::ascii_isupper(*s) || absl::ascii_islower(*s)); + ++s) { + } + if (s == end) return firstcapstype; + const AsciiCapitalizationType capstype = + (absl::ascii_islower(*s)) ? AsciiCapitalizationType::kLower + : AsciiCapitalizationType::kUpper; + + if (firstcapstype == AsciiCapitalizationType::kLower && + capstype == AsciiCapitalizationType::kUpper) + return AsciiCapitalizationType::kMixed; + + for (; s != end; ++s) + if ((absl::ascii_isupper(*s) && + capstype != AsciiCapitalizationType::kUpper) || + (absl::ascii_islower(*s) && + capstype != AsciiCapitalizationType::kLower)) + return AsciiCapitalizationType::kMixed; + + if (firstcapstype == AsciiCapitalizationType::kUpper && + capstype == AsciiCapitalizationType::kLower) + return AsciiCapitalizationType::kFirst; + return capstype; +} + +int AsciiCaseInsensitiveCompare(absl::string_view s1, absl::string_view s2) { + if (s1.size() == s2.size()) { + return strncasecmp(s1.data(), s2.data(), s1.size()); + } else if (s1.size() < s2.size()) { + int res = strncasecmp(s1.data(), s2.data(), s1.size()); + return (res == 0) ? -1 : res; + } else { + int res = strncasecmp(s1.data(), s2.data(), s2.size()); + return (res == 0) ? 1 : res; + } +} + +size_t AsciiCaseInsensitiveHash::operator()(absl::string_view s) const { + //return absl::HashOf(absl::AsciiStrToLower(s)); + return std::hash{}(absl::AsciiStrToLower(s)); +} + +bool AsciiCaseInsensitiveEq::operator()(absl::string_view s1, + absl::string_view s2) const { + return s1.size() == s2.size() && + strncasecmp(s1.data(), s2.data(), s1.size()) == 0; +} + +void MakeAsciiTitlecase(std::string* s, absl::string_view delimiters) { + bool upper = true; + for (auto &ch : *s) { + if (upper) { + ch = absl::ascii_toupper(ch); + } + upper = (absl::StrContains(delimiters, ch)); + } +} + +std::string MakeAsciiTitlecase(absl::string_view s, + absl::string_view delimiters) { + std::string result(s); + MakeAsciiTitlecase(&result, delimiters); + return result; +} + +} // namespace strings diff --git a/ortools/base/case.h b/ortools/base/case.h new file mode 100644 index 0000000000..aa8cc80f58 --- /dev/null +++ b/ortools/base/case.h @@ -0,0 +1,132 @@ +// Copyright 2010-2021 Google LLC +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This package contains character classification functions for evaluating +// the case state of strings, and converting strings to uppercase, lowercase, +// etc. +// +// Unlike (or absl/strings/ascii.h), the functions in this file +// are designed to operate on strings, not single characters. +// +// If you need to process UTF8 strings, take a look at files in i18n/utf8. +// +// Except for those marked as "using the C/POSIX locale", these functions are +// for ASCII strings only. + +#ifndef OR_TOOLS_BASE_CASE_H_ +#define OR_TOOLS_BASE_CASE_H_ + +#ifndef _MSC_VER +#include // for strcasecmp, but msvc does not have this header +#endif + +#include +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" // disable some warnings on Windows +#include "absl/strings/ascii.h" +#include "absl/strings/string_view.h" + +namespace strings { + +// Enum values returned by GetAsciiCapitalization(). +enum class AsciiCapitalizationType { + kLower, // Entirely lowercase + kUpper, // Entirely uppercase + kFirst, // First letter uppercase + kMixed, // Mixed case + kNoAlpha // Not an alphabetic string +}; + +// Prints the name of an enum value. +std::ostream& operator<<(std::ostream& os, const AsciiCapitalizationType& type); + +// GetAsciiCapitalization() +// +// Returns a value indicating whether an ASCII string is entirely lowercase, +// entirely uppercase, first letter uppercase, or mixed case, as returned by +// `absl::ascii_islower()` and `absl::ascii_isupper()`. +AsciiCapitalizationType GetAsciiCapitalization(absl::string_view input); + +// AsciiCaseInsensitiveCompare() +// +// Performs a case-insensitive absl::string_view comparison. +// Returns: +// less than 0: if s1 < s2 +// equal to 0: if s1 == s2 +// greater than 0: if s1 > s2 +int AsciiCaseInsensitiveCompare(absl::string_view s1, absl::string_view s2); + +// AsciiCaseInsensitiveLess() +// +// Performs a case-insensitive less-than absl::string_view comparison. This +// function object is useful as a template parameter for set/map of +// absl::string_view-compatible types, if uniqueness of keys is +// case-insensitive. +// Can be used for heterogeneous lookups in associative containers. Example: +// +// absl::btree_map map; +// absl::string_view key = ...; +// auto it = map.find(key); +struct AsciiCaseInsensitiveLess { + // Enable heterogeneous lookup. + using is_transparent = void; + bool operator()(absl::string_view s1, absl::string_view s2) const { + return AsciiCaseInsensitiveCompare(s1, s2) < 0; + } +}; + +// AsciiCaseInsensitiveHash and AsciiCaseInsensitiveEq +// +// Performs a case-insensitive hash/eq absl::string_view operations. This +// function objects are useful as a template parameter for hash set/map of +// absl::string_view-compatible types, if uniqueness of keys is +// case-insensitive. +// Can be used for heterogeneous lookups in absl associative containers. +// Example: +// +// absl::flat_hash_map +// map; +// absl::string_view key = ...; +// auto it = map.find(key); +struct AsciiCaseInsensitiveHash { + using is_transparent = void; + size_t operator()(absl::string_view s) const; +}; +struct AsciiCaseInsensitiveEq { + using is_transparent = void; + bool operator()(absl::string_view s1, absl::string_view s2) const; +}; + +// MakeAsciiTitlecase() +// +// Capitalizes the first character of each word in a string, using the set of +// characters in `delimiters` to use as word boundaries. This function can be +// implemented using a regular expression, but this version should be more +// efficient. +void MakeAsciiTitlecase(std::string* s, absl::string_view delimiters); + +// As above but with string_view as input +std::string MakeAsciiTitlecase(absl::string_view s, + absl::string_view delimiters); + +} // namespace strings + +#endif // OR_TOOLS_BASE_CASE_H_ diff --git a/ortools/lp_data/BUILD b/ortools/lp_data/BUILD index 5b8d4c63ff..56ba5c9537 100644 --- a/ortools/lp_data/BUILD +++ b/ortools/lp_data/BUILD @@ -201,20 +201,24 @@ cc_library( ], ) -#cc_library( -# name = "lp_parser", -# testonly = 1, -# srcs = ["lp_parser.cc"], -# hdrs = ["lp_parser.h"], -# copts = SAFE_FP_CODE, -# deps = [ -# ":base", -# ":lp_data", -# "//ortools/base", -# "@com_google_absl//absl/strings", -# "//ortools/util/regexp/re2", -# ], -#) +cc_library( + name = "lp_parser", + srcs = ["lp_parser.cc"], + hdrs = ["lp_parser.h"], + copts = SAFE_FP_CODE, + deps = [ + ":base", + ":lp_data", + ":proto_utils", + "//ortools/base", + "//ortools/base:case", + "//ortools/base:map_util", + "//ortools/linear_solver:linear_solver_cc_proto", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/status:statusor", + "@com_google_re2//:re2", + ], +) #cc_library( # name = "lp_constraint_classifier", diff --git a/ortools/lp_data/CMakeLists.txt b/ortools/lp_data/CMakeLists.txt index 9be192ea43..ff1dbc8f1e 100644 --- a/ortools/lp_data/CMakeLists.txt +++ b/ortools/lp_data/CMakeLists.txt @@ -18,5 +18,6 @@ target_link_libraries(${NAME} PRIVATE absl::strings absl::str_format protobuf::libprotobuf + re2::re2 ${PROJECT_NAME}::proto) #add_library(${PROJECT_NAME}::lp_data ALIAS ${NAME}) diff --git a/ortools/lp_data/lp_parser.cc b/ortools/lp_data/lp_parser.cc new file mode 100644 index 0000000000..e03ce1e27f --- /dev/null +++ b/ortools/lp_data/lp_parser.cc @@ -0,0 +1,462 @@ +// Copyright 2010-2021 Google LLC +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ortools/lp_data/lp_parser.h" + +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "ortools/base/case.h" +#include "ortools/base/map_util.h" +#include "ortools/linear_solver/linear_solver.pb.h" +#include "ortools/lp_data/proto_utils.h" +#include "re2/re2.h" +#include "re2/stringpiece.h" + +namespace operations_research { +namespace glop { + +namespace { + +using ::absl::StatusOr; + +enum class TokenType { + ERROR, + END, + ADDAND, + VALUE, + INF, + NAME, + SIGN_LE, + SIGN_EQ, + SIGN_GE, + COMA, +}; + +bool TokenIsBound(TokenType token_type) { + if (token_type == TokenType::VALUE || token_type == TokenType::INF) { + return true; + } + return false; +} + +// Not thread safe. +class LPParser { + public: + // Accepts the string in LP file format (used by LinearProgram::Dump()). + // On success, populates the linear program *lp and returns true. Otherwise, + // returns false and leaves *lp in an unspecified state. + ABSL_MUST_USE_RESULT bool Parse(absl::string_view model, LinearProgram* lp); + + private: + bool ParseEmptyLine(absl::string_view line); + bool ParseObjective(absl::string_view objective); + bool ParseIntegerVariablesList(absl::string_view line); + bool ParseConstraint(absl::string_view constraint); + TokenType ConsumeToken(absl::string_view* sp); + bool SetVariableBounds(ColIndex col, Fractional lb, Fractional ub); + + // Linear program populated by the Parse() method. Not owned. + LinearProgram* lp_; + + // Contains the last consumed coefficient and name. The name can be the + // optimization direction, a constraint name, or a variable name. + Fractional consumed_coeff_; + std::string consumed_name_; + + // To remember whether the variable bounds had already been set. + std::set bounded_variables_; +}; + +bool LPParser::Parse(absl::string_view model, LinearProgram* lp) { + lp_ = lp; + bounded_variables_.clear(); + lp_->Clear(); + + std::vector lines = + absl::StrSplit(model, ';', absl::SkipEmpty()); + bool has_objective = false; + + for (absl::string_view line : lines) { + if (!has_objective && ParseObjective(line)) { + has_objective = true; + } else if (!ParseConstraint(line) && !ParseIntegerVariablesList(line) && + !ParseEmptyLine(line)) { + LOG(INFO) << "Error in line: " << line; + return false; + } + } + + // Bound the non-bounded variables between -inf and +inf. We need to do this, + // as glop bounds a variable by default between 0 and +inf. + for (ColIndex col(0); col < lp_->num_variables(); ++col) { + if (bounded_variables_.find(col) == bounded_variables_.end()) { + lp_->SetVariableBounds(col, -kInfinity, +kInfinity); + } + } + + lp_->CleanUp(); + return true; +} + +bool LPParser::ParseEmptyLine(absl::string_view line) { + if (ConsumeToken(&line) == TokenType::END) return true; + return false; +} + +bool LPParser::ParseObjective(absl::string_view objective) { + // Get the required optimization direction. + if (ConsumeToken(&objective) != TokenType::NAME) return false; + if (absl::EqualsIgnoreCase(consumed_name_, "min")) { + lp_->SetMaximizationProblem(false); + } else if (absl::EqualsIgnoreCase(consumed_name_, "max")) { + lp_->SetMaximizationProblem(true); + } else { + return false; + } + + // Get the optional offset. + TokenType token_type = ConsumeToken(&objective); + if (token_type == TokenType::VALUE) { + lp_->SetObjectiveOffset(consumed_coeff_); + token_type = ConsumeToken(&objective); + } else { + lp_->SetObjectiveOffset(0.0); + } + + // Get the addands. + while (token_type == TokenType::ADDAND) { + const ColIndex col = lp_->FindOrCreateVariable(consumed_name_); + if (lp_->objective_coefficients()[col] != 0.0) return false; + lp_->SetObjectiveCoefficient(col, consumed_coeff_); + token_type = ConsumeToken(&objective); + } + return token_type == TokenType::END; +} + +bool LPParser::ParseIntegerVariablesList(absl::string_view line) { + // Get the required "int" or "bin" keyword. + bool binary_list = false; + if (ConsumeToken(&line) != TokenType::NAME) return false; + if (absl::EqualsIgnoreCase(consumed_name_, "bin")) { + binary_list = true; + } else if (!absl::EqualsIgnoreCase(consumed_name_, "int")) { + return false; + } + + // Get the list of integer variables, separated by optional comas. + TokenType token_type = ConsumeToken(&line); + while (token_type == TokenType::ADDAND) { + if (consumed_coeff_ != 1.0) return false; + const ColIndex col = lp_->FindOrCreateVariable(consumed_name_); + lp_->SetVariableType(col, LinearProgram::VariableType::INTEGER); + if (binary_list && !SetVariableBounds(col, 0.0, 1.0)) return false; + token_type = ConsumeToken(&line); + if (token_type == TokenType::COMA) { + token_type = ConsumeToken(&line); + } + } + + // The last token must be END. + if (token_type != TokenType::END) return false; + return true; +} + +bool LPParser::ParseConstraint(absl::string_view constraint) { + const StatusOr parsed_constraint_or_status = + ::operations_research::glop::ParseConstraint(constraint); + if (!parsed_constraint_or_status.ok()) return false; + const ParsedConstraint& parsed_constraint = + parsed_constraint_or_status.value(); + + // Set the variables bounds without creating new constraints. + if (parsed_constraint.name.empty() && + parsed_constraint.coefficients.size() == 1 && + parsed_constraint.coefficients[0] == 1.0) { + const ColIndex col = + lp_->FindOrCreateVariable(parsed_constraint.variable_names[0]); + if (!SetVariableBounds(col, parsed_constraint.lower_bound, + parsed_constraint.upper_bound)) { + return false; + } + } else { + const RowIndex num_constraints_before_adding_variable = + lp_->num_constraints(); + // The constaint has a name, or there are more than variable, or the + // coefficient is not 1. Thus, create and fill a new constraint. + // We don't use SetConstraintName() because constraints named that way + // cannot be found via FindOrCreateConstraint() (see comment on + // SetConstraintName()), which can be useful for tests using ParseLP. + const RowIndex row = + parsed_constraint.name.empty() + ? lp_->CreateNewConstraint() + : lp_->FindOrCreateConstraint(parsed_constraint.name); + if (lp_->num_constraints() == num_constraints_before_adding_variable) { + // No constraints were added, meaning we found one. + LOG(INFO) << "Two constraints with the same name: " + << parsed_constraint.name; + return false; + } + if (!AreBoundsValid(parsed_constraint.lower_bound, + parsed_constraint.upper_bound)) { + return false; + } + lp_->SetConstraintBounds(row, parsed_constraint.lower_bound, + parsed_constraint.upper_bound); + for (int i = 0; i < parsed_constraint.variable_names.size(); ++i) { + const ColIndex variable = + lp_->FindOrCreateVariable(parsed_constraint.variable_names[i]); + lp_->SetCoefficient(row, variable, parsed_constraint.coefficients[i]); + } + } + return true; +} + +bool LPParser::SetVariableBounds(ColIndex col, Fractional lb, Fractional ub) { + if (bounded_variables_.find(col) == bounded_variables_.end()) { + // The variable was not bounded yet, thus reset its bounds. + bounded_variables_.insert(col); + lp_->SetVariableBounds(col, -kInfinity, kInfinity); + } + // Set the bounds only if their stricter and valid. + lb = std::max(lb, lp_->variable_lower_bounds()[col]); + ub = std::min(ub, lp_->variable_upper_bounds()[col]); + if (!AreBoundsValid(lb, ub)) return false; + lp_->SetVariableBounds(col, lb, ub); + return true; +} + +TokenType ConsumeToken(absl::string_view* sv, std::string* consumed_name, + double* consumed_coeff) { + DCHECK(consumed_name != nullptr); + DCHECK(consumed_coeff != nullptr); + // We use LazyRE2 everywhere so that all the patterns are just compiled once + // when they are needed for the first time. This speed up the code + // significantly. Note that the use of LazyRE2 is thread safe. + static const LazyRE2 kEndPattern = {R"(\s*)"}; + + // There is nothing more to consume. + auto sp = std::unique_ptr(new re2::StringPiece(*sv)).get(); + if (sp->empty() || RE2::FullMatch(*sp, *kEndPattern)) { + return TokenType::END; + } + + // Return NAME if the next token is a line name, or integer variable list + // indicator. + static const LazyRE2 kNamePattern1 = {R"(\s*(\w[\w[\]]*):)"}; + static const LazyRE2 kNamePattern2 = {R"((?i)\s*(int)\s*:?)"}; + static const LazyRE2 kNamePattern3 = {R"((?i)\s*(bin)\s*:?)"}; + if (RE2::Consume(sp, *kNamePattern1, consumed_name)) return TokenType::NAME; + if (RE2::Consume(sp, *kNamePattern2, consumed_name)) return TokenType::NAME; + if (RE2::Consume(sp, *kNamePattern3, consumed_name)) return TokenType::NAME; + + // Return SIGN_* if the next token is a relation sign. + static const LazyRE2 kLePattern = {R"(\s*<=?)"}; + if (RE2::Consume(sp, *kLePattern)) return TokenType::SIGN_LE; + static const LazyRE2 kEqPattern = {R"(\s*=)"}; + if (RE2::Consume(sp, *kEqPattern)) return TokenType::SIGN_EQ; + static const LazyRE2 kGePattern = {R"(\s*>=?)"}; + if (RE2::Consume(sp, *kGePattern)) return TokenType::SIGN_GE; + + // Return COMA if the next token is a coma. + static const LazyRE2 kComaPattern = {R"(\s*\,)"}; + if (RE2::Consume(sp, *kComaPattern)) return TokenType::COMA; + + // Consume all plus and minus signs. + std::string sign; + int minus_count = 0; + static const LazyRE2 kSignPattern = {R"(\s*([-+]{1}))"}; + while (RE2::Consume(sp, *kSignPattern, &sign)) { + if (sign == "-") minus_count++; + } + + // Return INF if the next token is an infinite value. + static const LazyRE2 kInfPattern = {R"((?i)\s*inf)"}; + if (RE2::Consume(sp, *kInfPattern)) { + *consumed_coeff = minus_count % 2 == 0 ? kInfinity : -kInfinity; + return TokenType::INF; + } + + // Check if the next token is a value. If it is infinite return INF. + std::string coeff; + bool has_value = false; + static const LazyRE2 kValuePattern = { + R"(\s*([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?))"}; + if (RE2::Consume(sp, *kValuePattern, &coeff)) { + if (!google::protobuf::safe_strtod(coeff, consumed_coeff)) { + // Note: If strtod(), Consume(), and kValuePattern are correct, this + // should never happen. + LOG(ERROR) << "Text: " << coeff << " was matched by RE2 to be " + << "a floating point number, but safe_strtod() failed."; + return TokenType::ERROR; + } + if (!IsFinite(*consumed_coeff)) { + VLOG(1) << "Value " << coeff << " treated as infinite."; + return TokenType::INF; + } + has_value = true; + } else { + *consumed_coeff = 1.0; + } + if (minus_count % 2 == 1) *consumed_coeff *= -1.0; + + // Return ADDAND (coefficient and name) if the next token is a variable name. + // Otherwise, if we found a finite value previously, return VALUE. + // Otherwise, return ERROR. + std::string multiplication; + static const LazyRE2 kAddandPattern = {R"(\s*(\*?)\s*([a-zA-Z_)][\w[\])]*))"}; + if (RE2::Consume(sp, *kAddandPattern, &multiplication, consumed_name)) { + if (!multiplication.empty() && !has_value) return TokenType::ERROR; + return TokenType::ADDAND; + } else if (has_value) { + return TokenType::VALUE; + } + + return TokenType::ERROR; +} + +TokenType LPParser::ConsumeToken(absl::string_view* sp) { + using ::operations_research::glop::ConsumeToken; + return ConsumeToken(sp, &consumed_name_, &consumed_coeff_); +} + +} // namespace + +StatusOr ParseConstraint(absl::string_view constraint) { + ParsedConstraint parsed_constraint; + // Get the name, if present. + absl::string_view constraint_copy(constraint); + std::string consumed_name; + Fractional consumed_coeff; + if (ConsumeToken(&constraint_copy, &consumed_name, &consumed_coeff) == + TokenType::NAME) { + parsed_constraint.name = consumed_name; + constraint = constraint_copy; + } + + Fractional left_bound; + Fractional right_bound; + TokenType left_sign(TokenType::END); + TokenType right_sign(TokenType::END); + absl::flat_hash_set used_variables; + + // Get the left bound and the relation sign, if present. + TokenType token_type = + ConsumeToken(&constraint, &consumed_name, &consumed_coeff); + if (TokenIsBound(token_type)) { + left_bound = consumed_coeff; + left_sign = ConsumeToken(&constraint, &consumed_name, &consumed_coeff); + if (left_sign != TokenType::SIGN_LE && left_sign != TokenType::SIGN_EQ && + left_sign != TokenType::SIGN_GE) { + return absl::InvalidArgumentError( + "Expected an equality/inequality sign for the left bound."); + } + token_type = ConsumeToken(&constraint, &consumed_name, &consumed_coeff); + } + + // Get the addands, if present. + while (token_type == TokenType::ADDAND) { + if (used_variables.contains(consumed_name)) { + return absl::InvalidArgumentError( + absl::StrCat("Duplicate variable name: ", consumed_name)); + } + used_variables.insert(consumed_name); + parsed_constraint.variable_names.push_back(consumed_name); + parsed_constraint.coefficients.push_back(consumed_coeff); + token_type = ConsumeToken(&constraint, &consumed_name, &consumed_coeff); + } + + // If the left sign was EQ there can be no right side. + if (left_sign == TokenType::SIGN_EQ && token_type != TokenType::END) { + return absl::InvalidArgumentError( + "Equality constraints can have only one bound."); + } + + // Get the right sign and the right bound, if present. + if (token_type != TokenType::END) { + right_sign = token_type; + if (right_sign != TokenType::SIGN_LE && right_sign != TokenType::SIGN_EQ && + right_sign != TokenType::SIGN_GE) { + return absl::InvalidArgumentError( + "Expected an equality/inequality sign for the right bound."); + } + // If the right sign is EQ, there can be no left side. + if (left_sign != TokenType::END && right_sign == TokenType::SIGN_EQ) { + return absl::InvalidArgumentError( + "Equality constraints can have only one bound."); + } + if (!TokenIsBound( + ConsumeToken(&constraint, &consumed_name, &consumed_coeff))) { + return absl::InvalidArgumentError("Bound value was expected."); + } + right_bound = consumed_coeff; + if (ConsumeToken(&constraint, &consumed_name, &consumed_coeff) != + TokenType::END) { + return absl::InvalidArgumentError( + absl::StrCat("End of input was expected, found: ", constraint)); + } + } + + // There was no constraint! + if (left_sign == TokenType::END && right_sign == TokenType::END) { + return absl::InvalidArgumentError("The input constraint was empty."); + } + + // Calculate bounds to set. + parsed_constraint.lower_bound = -kInfinity; + parsed_constraint.upper_bound = kInfinity; + if (left_sign == TokenType::SIGN_LE || left_sign == TokenType::SIGN_EQ) { + parsed_constraint.lower_bound = left_bound; + } + if (left_sign == TokenType::SIGN_GE || left_sign == TokenType::SIGN_EQ) { + parsed_constraint.upper_bound = left_bound; + } + if (right_sign == TokenType::SIGN_LE || right_sign == TokenType::SIGN_EQ) { + parsed_constraint.upper_bound = + std::min(parsed_constraint.upper_bound, right_bound); + } + if (right_sign == TokenType::SIGN_GE || right_sign == TokenType::SIGN_EQ) { + parsed_constraint.lower_bound = + std::max(parsed_constraint.lower_bound, right_bound); + } + return parsed_constraint; +} + +bool ParseLp(absl::string_view model, LinearProgram* lp) { + LPParser parser; + return parser.Parse(model, lp); +} + +} // namespace glop + +absl::StatusOr ModelProtoFromLpFormat(absl::string_view model) { + glop::LinearProgram lp; + if (!ParseLp(model, &lp)) { + return absl::InvalidArgumentError("Parsing error, see LOGs for details."); + } + MPModelProto model_proto; + LinearProgramToMPModelProto(lp, &model_proto); + return model_proto; +} + +} // namespace operations_research diff --git a/ortools/lp_data/lp_parser.h b/ortools/lp_data/lp_parser.h new file mode 100644 index 0000000000..b6b8fb0aee --- /dev/null +++ b/ortools/lp_data/lp_parser.h @@ -0,0 +1,123 @@ +// Copyright 2010-2021 Google LLC +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A simple parser of a linear program from string. +// +// We accept a format produced by LinearProgram::Dump(), which is similar to +// LP file used by lp_solve (see http://lpsolve.sourceforge.net/5.1/index.htm). +// Example: +// 1: min: 1 + x1 + 2 * x2; +// 2: 0 <= x1 <= 1; +// 3: x2 >= 2; +// 4: r1: 1 <= x1 - x2 <= 2; +// 5: 0 <= x1 + x2 <= inf; +// 6: int x1, x3; +// 7: bin x2; +// +// Line 1 is the objective, line 2 and 3 define variable bounds, line 4 is a +// named constraint, line 5 is an unnamed constraint. Line 6 is the list of +// integer variables. Line 7 is the list of binary variables. The lines can be +// in any order, the line numbers do _not_ belong to the string being parsed. +// +// Caveats: +// 1. Plus sign and multiplication sign are optional. Thus, "min: 1 x1 x2" is +// the same as "min: 1*x1 + x2". All consecutive signs will be compacted into +// one sign using mathematical rules (i.e., the parity of minus sign). +// E.g., "min: ++---+ - +x1" is the same as "min: x1". +// 2. A constraint consists of two or three parts. A two part constraint has +// a bound on the left (resp. right) side and variables on the right +// (resp. left) side, with the two parts being separeted by any of the +// relation signs "<", "<=", "=", ">=", ">". +// 3. A three part constraint has the variables in the middle part, and two +// bounds on the left and right side, with all three parts being separated by +// any of "<", "<=", ">=", ">". +// 4. "<" means "<=", and ">" means ">=". +// 5. An unnamed constraint involving exactly one variable with coefficient +// equal to 1, defines the variable bound(s). Otherwise, the constraint +// defines a new constraint. +// 6. If there is no bound defined for a variable, it will be assumed to be +// unbounded (i.e., from -inf to +inf). +// 7. A bound must be a number or "inf". A coefficient must be finite and +// cannot overflow. A number can be represented in a scientific notation, +// e.g., +1.2E-2. Consequently, +// "min: 1e2" means minimization of 100, +// "min: 1 e2" means minimization of 1*e2, where e2 is a variable, +// "min: 1 + e2" means minimization of 1 + e2, where e2 is a variable, +// "min: 1 1*e2" means minimization of 1 + e2, where e2 is a variable. +// "min: 1 1e2" is invalid as it would mean minimization of 1 + 100. +// 8. In a constraint, in the part with variables, all elements must be +// variables with optional coefficients and signs (i.e., no offset is +// allowed). +// 9. Variables in the objective, and in each of the constraint cannot repeat. +// E.g., this is invalid: "min: x + x". +// 10. The offset in the objective must be specified at the beginning, i.e., +// after min: or max: and before any variables. +// 11. The parsing will fail if due to bounding of a variable the lower bound +// becomes strictly greater than the upper bound. E.g., these fail to +// parse: "min x; 1 <= x <= 0;", "min x; 0 <= x <= 1; 2 <= x <= 3". On the +// other hand the parser does _not_ attempt to "round" the bounds for integer +// variables. E.g., "min x; 0.5 <= x <= 0.8; int x" results in bounding the x +// variable between 0.5 and 0.8, despite there is no integer value it can +// take. Similarly, "min x; bin x; x <= 0.5" results in bounding the x +// variable between 0.0 and 0.5, despite the only value it can take is 0. + +#ifndef OR_TOOLS_LP_DATA_LP_PARSER_H_ +#define OR_TOOLS_LP_DATA_LP_PARSER_H_ + +#include +#include + +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "ortools/linear_solver/linear_solver.pb.h" +#include "ortools/lp_data/lp_data.h" + +namespace operations_research { + +// This calls ParseLp() under the hood. See below. +absl::StatusOr ModelProtoFromLpFormat(absl::string_view model); + +namespace glop { + +// Like ModelProtoFromLpFormat(), but outputs a glop::LinearProgram. +ABSL_MUST_USE_RESULT bool ParseLp(absl::string_view model, LinearProgram* lp); + +// Represents a constraint parsed from the LP file format (used by +// LinearProgram::Dump()). +struct ParsedConstraint { + // The name of the constraint. Empty if the constraint has no name. + std::string name; + // Contains the names of the variables used in the constraint, in the order in + // which they appear in the string representation. + std::vector variable_names; + // Contains the coefficients of the variables used in the constraint. Note + // that variable_names and coefficients are parallel arrays, i.e. + // coefficients[i] is the coefficient for variable_names[i]. + std::vector coefficients; + // The lower bound of the constraint. Set to -infinity when the constraint has + // no lower bound. + Fractional lower_bound; + // The upper bound of the constraint. Set to +infinity when the constraint has + // no upper bound. + Fractional upper_bound; +}; + +// Parses a constraint in the format used by LinearProgram::Dump(). Returns an +// InvalidArgumentError with an appropriate error message when the parsing +// fails. +absl::StatusOr ParseConstraint(absl::string_view constraint); + +} // namespace glop +} // namespace operations_research + +#endif // OR_TOOLS_LP_DATA_LP_PARSER_H_