diff --git a/.travis.yml b/.travis.yml index 361501ea8..f9c567360 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ matrix: apt: packages: - g++-4.9 + - libre2-dev sources: &sources - llvm-toolchain-precise-3.8 - ubuntu-toolchain-r-test @@ -16,6 +17,7 @@ matrix: apt: packages: - clang-3.8 + - libre2-dev sources: *sources - os: osx osx_image: xcode8 @@ -49,4 +51,4 @@ notifications: channels: - "chat.freenode.net#jsonnet" template: - - "%{repository}/%{branch} (%{commit} - %{author}): %{message}" \ No newline at end of file + - "%{repository}/%{branch} (%{commit} - %{author}): %{message}" diff --git a/CMakeLists.txt b/CMakeLists.txt index cdd5367eb..c195c09f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,8 +29,50 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${GLOBAL_OUTPUT_PATH}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${GLOBAL_OUTPUT_PATH}) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${GLOBAL_OUTPUT_PATH}) +# Include external RE2 project. This runs a CMake sub-script +# (RE2CMakeLists.txt.in) that downloads googletest source. It's then built as part +# of the jsonnet project. The conventional way of handling CMake dependencies is +# to use a find_package script, which finds and installs the library from +# known locations on the local machine. Downloading the library ourselves +# allows us to pin to a specific version and makes things easier for users +# who don't have package managers. + +# Generate and download RE2 project. +set(RE2_DIR ${GLOBAL_OUTPUT_PATH}/re2-download) +configure_file(RE2CMakeLists.txt.in ${RE2_DIR}/CMakeLists.txt) +execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${RE2_DIR} +) +if(result) + message(FATAL_ERROR "RE2 download failed: ${result}") +endif() + +# Build RE2. +execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${RE2_DIR}) +if(result) + message(FATAL_ERROR "Build step for re2 failed: ${result}") +endif() + +# Add RE2 directly to our build. This defines +# the re2 target. +add_subdirectory(${GLOBAL_OUTPUT_PATH}/re2-src + ${GLOBAL_OUTPUT_PATH}/re2-build) + +# Include RE2 headers. +include_directories("${RE2_SOURCE_DIR}/include") + +# Allow linking into a shared library. +set_property(TARGET re2 PROPERTY POSITION_INDEPENDENT_CODE ON) + +# RE2 requires pthreads +set_property(TARGET re2 PROPERTY INTERFACE_COMPILE_OPTIONS $<${UNIX}:-pthread>) +set_property(TARGET re2 PROPERTY INTERFACE_LINK_LIBRARIES $<${UNIX}:-pthread>) + # Include external googletest project. This runs a CMake sub-script -# (CMakeLists.txt.in) that downloads googletest source. It's then built as part +# (GoogleTestCMakeLists.txt.in) that downloads googletest source. It's then built as part # of the jsonnet project. The conventional way of handling CMake dependencies is # to use a find_package script, which finds and installs the library from # known locations on the local machine. Downloading the library ourselves @@ -41,7 +83,7 @@ if (BUILD_TESTS AND NOT USE_SYSTEM_GTEST) # Generate and download googletest project. set(GOOGLETEST_DIR ${GLOBAL_OUTPUT_PATH}/googletest-download) - configure_file(CMakeLists.txt.in ${GOOGLETEST_DIR}/CMakeLists.txt) + configure_file(GoogleTestCMakeLists.txt.in ${GOOGLETEST_DIR}/CMakeLists.txt) execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . RESULT_VARIABLE result WORKING_DIRECTORY ${GOOGLETEST_DIR} diff --git a/CMakeLists.txt.in b/GoogleTestCMakeLists.txt.in similarity index 100% rename from CMakeLists.txt.in rename to GoogleTestCMakeLists.txt.in diff --git a/Makefile b/Makefile index 97702eb5a..3db6e4542 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ CFLAGS ?= -g $(OPT) -Wall -Wextra -pedantic -std=c99 -fPIC -Iinclude MAKEDEPENDFLAGS ?= -Iinclude -Ithird_party/md5 -Ithird_party/json EMCXXFLAGS = $(CXXFLAGS) -g0 -Os --memory-init-file 0 -s DISABLE_EXCEPTION_CATCHING=0 -s OUTLINING_LIMIT=10000 -s RESERVED_FUNCTION_POINTERS=20 -s ASSERTIONS=1 -s ALLOW_MEMORY_GROWTH=1 EMCFLAGS = $(CFLAGS) --memory-init-file 0 -s DISABLE_EXCEPTION_CATCHING=0 -s ASSERTIONS=1 -s ALLOW_MEMORY_GROWTH=1 -LDFLAGS ?= +LDFLAGS ?= -lre2 SHARED_LDFLAGS ?= -shared @@ -121,11 +121,11 @@ core/desugarer.cpp: core/std.jsonnet.h # Commandline executable. jsonnet: cmd/jsonnet.cpp cmd/utils.cpp $(LIB_OBJ) - $(CXX) $(CXXFLAGS) $(LDFLAGS) $< cmd/utils.cpp $(LIB_SRC:.cpp=.o) -o $@ + $(CXX) $(CXXFLAGS) $< cmd/utils.cpp $(LIB_SRC:.cpp=.o) -o $@ $(LDFLAGS) # Commandline executable (reformatter). jsonnetfmt: cmd/jsonnetfmt.cpp cmd/utils.cpp $(LIB_OBJ) - $(CXX) $(CXXFLAGS) $(LDFLAGS) $< cmd/utils.cpp $(LIB_SRC:.cpp=.o) -o $@ + $(CXX) $(CXXFLAGS) $< cmd/utils.cpp $(LIB_SRC:.cpp=.o) -o $@ $(LDFLAGS) # C binding. libjsonnet.so: $(LIB_OBJ) diff --git a/RE2CMakeLists.txt.in b/RE2CMakeLists.txt.in new file mode 100644 index 000000000..808b92359 --- /dev/null +++ b/RE2CMakeLists.txt.in @@ -0,0 +1,18 @@ +# CMake script run a generation-time. This must be separate from the main +# CMakeLists.txt file to allow downloading and building googletest at generation +# time. +cmake_minimum_required(VERSION 2.8.2) + +project(re2-download NONE) + +include(ExternalProject) +ExternalProject_Add(re2 + GIT_REPOSITORY https://github.com/google/re2.git + GIT_TAG 2019-06-01 + SOURCE_DIR "${GLOBAL_OUTPUT_PATH}/re2-src" + BINARY_DIR "${GLOBAL_OUTPUT_PATH}/re2-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) \ No newline at end of file diff --git a/WORKSPACE b/WORKSPACE index c4dc885b4..36653bbba 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -12,11 +12,19 @@ git_repository( git_repository( name = "com_google_googletest", remote = "https://github.com/google/googletest.git", - # If updating googletest version, also update CMakeLists.txt.in. + # If updating googletest version, also update GoogleTestCMakeLists.txt.in. commit = "2fe3bd994b3189899d93f1d5a881e725e046fdc2", # release: release-1.8.1 shallow_since = "1535728917 -0400", ) +git_repository( + name = "com_googlesource_code_re2", + remote = "https://github.com/google/re2.git", + # If updating RE2 version, also update RE2CMakeLists.txt.in. + commit = "0c95bcce2f1f0f071a786ca2c42384b211b8caba", # release: 2019-06-01 + shallow_since = "1558525654 +0000", +) + load("//tools/build_defs:python_repo.bzl", "python_interpreter") python_interpreter(name = "default_python") diff --git a/core/BUILD b/core/BUILD index 6a0e9cb50..b76feb953 100644 --- a/core/BUILD +++ b/core/BUILD @@ -36,6 +36,7 @@ cc_library( "//stdlib:std", "//third_party/json", "//third_party/md5:libmd5", + "@com_googlesource_code_re2//:re2", ], ) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index e877015cc..fa9bdcf13 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -29,8 +29,8 @@ set(LIBJSONNET_SOURCE vm.cpp) add_library(libjsonnet SHARED ${LIBJSONNET_HEADERS} ${LIBJSONNET_SOURCE}) -add_dependencies(libjsonnet md5 stdlib) -target_link_libraries(libjsonnet md5) +add_dependencies(libjsonnet md5 re2 stdlib) +target_link_libraries(libjsonnet md5 re2) # CMake prepends CMAKE_SHARED_LIBRARY_PREFIX to shared libraries, so without # this step the output would be |liblibjsonnet|. @@ -45,8 +45,8 @@ install(TARGETS libjsonnet # Static library for jsonnet command-line tool. add_library(libjsonnet_static STATIC ${LIBJSONNET_SOURCE}) -add_dependencies(libjsonnet_static md5 stdlib) -target_link_libraries(libjsonnet_static md5) +add_dependencies(libjsonnet_static md5 re2 stdlib) +target_link_libraries(libjsonnet_static md5 re2) set_target_properties(libjsonnet_static PROPERTIES OUTPUT_NAME jsonnet) install(TARGETS libjsonnet_static DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/core/desugarer.cpp b/core/desugarer.cpp index d49a73b05..37eab5de0 100644 --- a/core/desugarer.cpp +++ b/core/desugarer.cpp @@ -34,7 +34,7 @@ struct BuiltinDecl { std::vector params; }; -static unsigned long max_builtin = 37; +static unsigned long max_builtin = 42; BuiltinDecl jsonnet_builtin_decl(unsigned long builtin) { switch (builtin) { @@ -76,6 +76,11 @@ BuiltinDecl jsonnet_builtin_decl(unsigned long builtin) case 35: return {U"parseJson", {U"str"}}; case 36: return {U"encodeUTF8", {U"str"}}; case 37: return {U"decodeUTF8", {U"arr"}}; + case 38: return {U"regexFullMatch", {U"pattern", U"str"}}; + case 39: return {U"regexPartialMatch", {U"pattern", U"str"}}; + case 40: return {U"regexQuoteMeta", {U"str"}}; + case 41: return {U"regexReplace", {U"str", U"pattern", U"to"}}; + case 42: return {U"regexGlobalReplace", {U"str", U"pattern", U"to"}}; default: std::cerr << "INTERNAL ERROR: Unrecognized builtin function: " << builtin << std::endl; std::abort(); diff --git a/core/vm.cpp b/core/vm.cpp index 0cf06fa94..94cda61af 100644 --- a/core/vm.cpp +++ b/core/vm.cpp @@ -26,6 +26,7 @@ limitations under the License. #include "json.hpp" #include "md5.h" #include "parser.h" +#include "re2/re2.h" #include "state.h" #include "static_analysis.h" #include "string_utils.h" @@ -35,6 +36,10 @@ using json = nlohmann::json; namespace { +static const Fodder EF; // Empty fodder. + +static const LocationRange E; // Empty. + /** Turn a path e.g. "/a/b/c" into a dir, e.g. "/a/b/". If there is no path returns "". */ std::string dir_name(const std::string &path) @@ -881,6 +886,11 @@ class Interpreter { builtins["parseJson"] = &Interpreter::builtinParseJson; builtins["encodeUTF8"] = &Interpreter::builtinEncodeUTF8; builtins["decodeUTF8"] = &Interpreter::builtinDecodeUTF8; + builtins["regexFullMatch"] = &Interpreter::builtinRegexFullMatch; + builtins["regexPartialMatch"] = &Interpreter::builtinRegexPartialMatch; + builtins["regexQuoteMeta"] = &Interpreter::builtinRegexQuoteMeta; + builtins["regexReplace"] = &Interpreter::builtinRegexReplace; + builtins["regexGlobalReplace"] = &Interpreter::builtinRegexGlobalReplace; } /** Clean up the heap, stack, stash, and builtin function ASTs. */ @@ -1373,6 +1383,129 @@ class Interpreter { return decodeUTF8(); } + const AST *regexMatch(const std::string &pattern, const std::string &string, bool full) + { + RE2 re(pattern, RE2::CannedOptions::Quiet); + if (!re.ok()) { + std::stringstream ss; + ss << "Invalid regex '" << re.pattern() << "': " << re.error(); + throw makeError(stack.top().location, ss.str()); + } + + int num_groups = re.NumberOfCapturingGroups(); + + std::vector rcaptures(num_groups); + std::vector rargv(num_groups); + std::vector rargs(num_groups); + for (int i = 0; i < num_groups; ++i) { + rargs[i] = &rargv[i]; + rargv[i] = &rcaptures[i]; + } + + if (full ? RE2::FullMatchN(string, re, rargs.data(), num_groups) + : RE2::PartialMatchN(string, re, rargs.data(), num_groups)) { + std::map fields; + + const Identifier *fid = alloc->makeIdentifier(U"string"); + fields[fid].hide = ObjectField::VISIBLE; + fields[fid].body = alloc->make(E, EF, decode_utf8(string), LiteralString::DOUBLE, "", ""); + + fid = alloc->makeIdentifier(U"captures"); + fields[fid].hide = ObjectField::VISIBLE; + std::vector captures; + for (int i = 0; i < num_groups; ++i) { + captures.push_back(Array::Element( + alloc->make(E, EF, decode_utf8(rcaptures[i]), LiteralString::DOUBLE, "", ""), + EF)); + } + fields[fid].body = alloc->make(E, EF, captures, false, EF); + + fid = alloc->makeIdentifier(U"namedCaptures"); + fields[fid].hide = ObjectField::VISIBLE; + DesugaredObject::Fields named_captures; + const std::map &named_groups = re.NamedCapturingGroups(); + for (auto it = named_groups.cbegin(); it != named_groups.cend(); ++it) { + named_captures.push_back(DesugaredObject::Field( + ObjectField::VISIBLE, + alloc->make(E, EF, decode_utf8(it->first), LiteralString::DOUBLE, "", ""), + alloc->make(E, EF, decode_utf8(rcaptures[it->second-1]), LiteralString::DOUBLE, "", ""))); + } + fields[fid].body = alloc->make(E, ASTs{}, named_captures); + + scratch = makeObject(BindingFrame{}, fields, ASTs{}); + } else { + scratch = makeNull(); + } + return nullptr; + } + + const AST *builtinRegexFullMatch(const LocationRange &loc, const std::vector &args) + { + validateBuiltinArgs(loc, "regexFullMatch", args, {Value::STRING, Value::STRING}); + + std::string pattern = encode_utf8(static_cast(args[0].v.h)->value); + std::string string = encode_utf8(static_cast(args[1].v.h)->value); + + return regexMatch(pattern, string, true); + } + + const AST *builtinRegexPartialMatch(const LocationRange &loc, const std::vector &args) + { + validateBuiltinArgs(loc, "regexPartialMatch", args, {Value::STRING, Value::STRING}); + + std::string pattern = encode_utf8(static_cast(args[0].v.h)->value); + std::string string = encode_utf8(static_cast(args[1].v.h)->value); + + return regexMatch(pattern, string, false); + } + + const AST *builtinRegexQuoteMeta(const LocationRange &loc, const std::vector &args) + { + validateBuiltinArgs(loc, "regexQuoteMeta", args, {Value::STRING}); + scratch = makeString(decode_utf8(RE2::QuoteMeta(encode_utf8(static_cast(args[0].v.h)->value)))); + return nullptr; + } + + const AST *builtinRegexReplace(const LocationRange &loc, const std::vector &args) + { + validateBuiltinArgs(loc, "regexReplace", args, {Value::STRING, Value::STRING, Value::STRING}); + + std::string string = encode_utf8(static_cast(args[0].v.h)->value); + std::string pattern = encode_utf8(static_cast(args[1].v.h)->value); + std::string replace = encode_utf8(static_cast(args[2].v.h)->value); + + RE2 re(pattern, RE2::CannedOptions::Quiet); + if(!re.ok()) { + std::stringstream ss; + ss << "Invalid regex '" << re.pattern() << "': " << re.error(); + throw makeError(stack.top().location, ss.str()); + } + + RE2::Replace(&string, re, replace); + scratch = makeString(decode_utf8(string)); + return nullptr; + } + + const AST *builtinRegexGlobalReplace(const LocationRange &loc, const std::vector &args) + { + validateBuiltinArgs(loc, "regexGlobalReplace", args, {Value::STRING, Value::STRING, Value::STRING}); + + std::string string = encode_utf8(static_cast(args[0].v.h)->value); + std::string pattern = encode_utf8(static_cast(args[1].v.h)->value); + std::string replace = encode_utf8(static_cast(args[2].v.h)->value); + + RE2 re(pattern, RE2::CannedOptions::Quiet); + if(!re.ok()) { + std::stringstream ss; + ss << "Invalid regex '" << re.pattern() << "': " << re.error(); + throw makeError(stack.top().location, ss.str()); + } + + RE2::GlobalReplace(&string, re, replace); + scratch = makeString(decode_utf8(string)); + return nullptr; + } + const AST *builtinTrace(const LocationRange &loc, const std::vector &args) { if(args[0].t != Value::STRING) { diff --git a/test_suite/stdlib.jsonnet b/test_suite/stdlib.jsonnet index 7ba684790..e2f6ed812 100644 --- a/test_suite/stdlib.jsonnet +++ b/test_suite/stdlib.jsonnet @@ -925,4 +925,74 @@ std.assertEqual(std.decodeUTF8([65 + 1 - 1]), 'A') && std.assertEqual(std.decodeUTF8([90, 97, 197, 188, 195, 179, 197, 130, 196, 135, 32, 103, 196, 153, 197, 155, 108, 196, 133, 32, 106, 97, 197, 186, 197, 132]), 'Zażółć gęślą jaźń') && std.assertEqual(std.decodeUTF8([240, 159, 152, 131]), '😃') && +std.assertEqual(std.regexFullMatch(@'e', 'hello'), null) && + +std.assertEqual( + std.regexFullMatch(@'h.*o', 'hello'), + { + string: 'hello', + captures: [], + namedCaptures: {}, + } +) && + +std.assertEqual( + std.regexFullMatch(@'h(.*)o', 'hello'), + { + string: 'hello', + captures: ['ell'], + namedCaptures: {}, + } +) && + +std.assertEqual( + std.regexFullMatch(@'h(?P.*)o', 'hello'), + { + string: 'hello', + captures: ['ell'], + namedCaptures: { + mid: 'ell', + }, + } +) && + +std.assertEqual(std.regexPartialMatch(@'world', 'hello'), null) && + +std.assertEqual( + std.regexPartialMatch(@'e', 'hello'), + { + string: 'hello', + captures: [], + namedCaptures: {}, + } +) && + +std.assertEqual( + std.regexPartialMatch(@'e(.*)o', 'hello'), + { + string: 'hello', + captures: ['ll'], + namedCaptures: {}, + } +) && + +std.assertEqual( + std.regexPartialMatch(@'e(?P.*)o', 'hello'), + { + string: 'hello', + captures: ['ll'], + namedCaptures: { + mid: 'll', + }, + } +) && + +std.assertEqual(std.regexQuoteMeta(@'1.5-2.0?'), '1\\.5\\-2\\.0\\?') && + +std.assertEqual(std.regexReplace('wishyfishyisishy', @'ish', 'and'), 'wandyfishyisishy') && +std.assertEqual(std.regexReplace('yabba dabba doo', @'b+', 'd'), 'yada dabba doo') && + +std.assertEqual(std.regexGlobalReplace('wishyfishyisishy', @'ish', 'and'), 'wandyfandyisandy') && +std.assertEqual(std.regexGlobalReplace('yabba dabba doo', @'b+', 'd'), 'yada dada doo') && + true