diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c4fbe494..9ebe1ec42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,52 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [4.5.0] 2017-06-09 +- New API feature: approximate matching using the "edit distance" extended + parameter. This allows the user to request all matches that are a given edit + distance from an exact match for a pattern. +- Initial support for Intel(R) Advanced Vector Extensions 512 (Intel(R) + AVX-512), disabled by default. To enable it, pass `-DBUILD_AVX512=1` to + `cmake`. +- Major compile time improvements in many subsystems, reducing compile time + significantly for many large pattern sets. +- Internal reworking of literal matchers to operate on literals of at + most eight characters, with subsequent confirmation done in the Rose + interpreter. This reduces complexity and bytecode size and improves + performance for many pattern sets. +- Improve performance of the FDR literal matcher front end. +- Improve bucket assignment and other heuristics governing the FDR literal + matcher. +- Improve optimisation passes that take advantage of extended parameter + constraints (`min_offset`, etc). +- Introduce further lookaround specialisations to improve scanning performance. +- Optimise Rose interpreter construction to reduce the length of programs + generated in some situations. +- Remove the old "Rose" pattern decomposition analysis pass in favour of the + new "Violet" pass introduced in Hyperscan 4.3.0. +- In streaming mode, allow exhaustion (where the stream can no longer produce + matchers) to be detected in more situations, improving scanning performance. +- Improve parsing of control verbs (such as `(*UTF8)`) that can only occur at + the beginning of the pattern. Combinations of supported verbs in any order + are now permitted. +- Update version of PCRE used by testing tools as a syntax and semantic + reference to PCRE 8.40. +- Tuning support for Intel(R) microarchitecture code names Skylake, Skylake + Server, Goldmont. +- CMake: when building a native build with a version of GCC that doesn't + recognise the host compiler, tune for the microarch selected by + `-march=native`. +- CMake: don't fail if SQLite (which is only required to build the `hsbench` + tool) is not present. +- CMake: detect libc++ directly and use that to inform the Boost version + requirement. +- Bugfix for issue #51: make the fat runtime build wrapper less fragile. +- Bugfix for issues #46, #52: use `sqlite3_errmsg()` to allow SQLite 3.6.x to + be used. Thanks to @EaseTheWorld for the PR. + ## [4.4.1] 2017-02-28 - Bugfixes to fix issues where stale data was being referenced in scratch - memory. In particular this may have resulted in hs_close_stream() + memory. In particular this may have resulted in `hs_close_stream()` referencing data from other previously scanned streams. This may result in incorrect matches being been reported. @@ -142,9 +185,7 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. supplied with a NULL scratch pointer if no matches are required. This is in line with the behaviour of `hs_close_stream()`. - Disallow bounded repeats with a very large minimum repeat but no maximum, - i.e. { - N, -} for very large N. + i.e. {N,} for very large N. - Reduce compile memory usage in literal set explansion for some large cases. ## [4.0.0] 2015-10-20 diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ede52b45..7f452696a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,20 +1,22 @@ cmake_minimum_required (VERSION 2.8.11) -project (Hyperscan C CXX) +project (hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 4) -set (HS_PATCH_VERSION 1) +set (HS_MINOR_VERSION 5) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(CheckCXXSymbolExists) INCLUDE (CheckFunctionExists) INCLUDE (CheckIncludeFiles) INCLUDE (CheckIncludeFileCXX) INCLUDE (CheckLibraryExists) INCLUDE (CheckSymbolExists) include (CMakeDependentOption) +include (GNUInstallDirs) include (${CMAKE_MODULE_PATH}/platform.cmake) include (${CMAKE_MODULE_PATH}/ragel.cmake) @@ -36,6 +38,7 @@ endif() set(BINDIR "${PROJECT_BINARY_DIR}/bin") set(LIBDIR "${PROJECT_BINARY_DIR}/lib") +set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) # First for the generic no-config case set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}") @@ -59,31 +62,6 @@ include_directories(${PROJECT_SOURCE_DIR}/src) include_directories(${PROJECT_BINARY_DIR}) include_directories(SYSTEM include) -set(BOOST_USE_STATIC_LIBS OFF) -set(BOOST_USE_MULTITHREADED OFF) -set(BOOST_USE_STATIC_RUNTIME OFF) -if (CMAKE_SYSTEM_NAME MATCHES "Darwin" - OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD" - AND CMAKE_C_COMPILER_ID MATCHES "Clang")) - # we need a more recent boost for libc++ used by clang on OSX and FreeBSD - set(BOOST_MINVERSION 1.61.0) -else () - set(BOOST_MINVERSION 1.57.0) -endif () -set(BOOST_NO_BOOST_CMAKE ON) - -# first check for Boost installed on the system -find_package(Boost ${BOOST_MINVERSION}) -if(NOT Boost_FOUND) - # we might have boost in tree, so provide a hint and try again - message(STATUS "trying include dir for boost") - set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") - find_package(Boost ${BOOST_MINVERSION}) - if(NOT Boost_FOUND) - message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") - endif() -endif() - include (${CMAKE_MODULE_PATH}/boost.cmake) # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6) @@ -132,6 +110,12 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) endif() endif() +if (NOT BUILD_SHARED_LIBS) + # build static libs + set(BUILD_STATIC_LIBS ON) + mark_as_advanced(BUILD_STATIC_LIBS) +endif () + #for config if (OPTIMISE) set(HS_OPTIMIZE ON) @@ -141,6 +125,9 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON) +option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" + OFF) + option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF) # TODO: per platform config files? @@ -148,16 +135,21 @@ option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC # TODO: windows generator on cmake always uses msvc, even if we plan to build with icc if(MSVC OR MSVC_IDE) message(STATUS "Building for Windows") + if (MSVC_VERSION LESS 1700) message(FATAL_ERROR "The project requires C++11 features.") else() if (WINDOWS_ICC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark") + set(ARCH_C_FLAGS "/QxHost") + set(ARCH_CXX_FLAGS "/QxHost") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /wd4267 /Qdiag-disable:remark") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") else() - #TODO: don't hardcode arch - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /arch:AVX /wd4267") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /arch:AVX /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + # todo: change these as required + set(ARCH_C_FLAGS "/arch:AVX2") + set(ARCH_CXX_FLAGS "/arch:AVX2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /wd4244 /wd4267") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") endif() string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") @@ -166,32 +158,58 @@ if(MSVC OR MSVC_IDE) set(CMAKE_C_FLAGS_DEBUG "/DNDEBUG ${CMAKE_C_FLAGS_DEBUG}") set(CMAKE_CXX_FLAGS_DEBUG "/DNDEBUG ${CMAKE_CXX_FLAGS_DEBUG}") endif () + + # flags only used to build hs libs + set(HS_C_FLAGS "/Gv") + set(HS_CXX_FLAGS "/Gv") endif() else() - # compiler version checks TODO: test more compilers - if (CMAKE_COMPILER_IS_GNUCXX) - set (GNUCXX_MINVER "4.8.1") - exec_program(${CMAKE_CXX_COMPILER} - ARGS ${CMAKE_CXX_COMPILER_ARG1} --version - OUTPUT_VARIABLE _GXX_OUTPUT) - # is the following too fragile? - string(REGEX REPLACE ".* ([0-9]\\.[0-9](\\.[0-9])?)( |\n).*" "\\1" - GNUCXX_VERSION "${_GXX_OUTPUT}") - message(STATUS "g++ version ${GNUCXX_VERSION}") - if (GNUCXX_VERSION VERSION_LESS ${GNUCXX_MINVER}) - message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support") - endif() - unset(_GXX_OUTPUT) - endif() - # remove CMake's idea of optimisation foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES}) string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}") string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}") endforeach () + if (CMAKE_COMPILER_IS_GNUCC) + message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") + # If gcc doesn't recognise the host cpu, then mtune=native becomes + # generic, which isn't very good in some cases. march=native looks at + # cpuid info and then chooses the best microarch it can (and replaces + # the flag), so use that for tune. + + # arg1 might exist if using ccache + string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native) + execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} + OUTPUT_VARIABLE _GCC_OUTPUT) + string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1" + GNUCC_ARCH "${_GCC_OUTPUT}") + + # test the parsed flag + set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) + execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} + OUTPUT_QUIET ERROR_QUIET + INPUT_FILE /dev/null + RESULT_VARIABLE GNUCC_TUNE_TEST) + if (NOT GNUCC_TUNE_TEST EQUAL 0) + message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid") + endif() + set(TUNE_FLAG ${GNUCC_ARCH}) + else () + set(TUNE_FLAG native) + endif() + + # compiler version checks TODO: test more compilers + if (CMAKE_COMPILER_IS_GNUCXX) + set(GNUCXX_MINVER "4.8.1") + message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER) + message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support") + endif() + endif() + if(OPTIMISE) set(OPT_C_FLAG "-O3") set(OPT_CXX_FLAG "-O2") @@ -216,12 +234,12 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG") endif() - if (NOT CMAKE_C_FLAGS MATCHES .*march.*) - set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native") + if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) + set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}") endif() - if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*) - set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native") + if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) + set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}") endif() if(CMAKE_COMPILER_IS_GNUCC) @@ -244,6 +262,11 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi") endif () + if (CMAKE_C_COMPILER_ID MATCHES "Intel") + set(SKYLAKE_FLAG "-xCORE-AVX512") + else () + set(SKYLAKE_FLAG "-march=skylake-avx512") + endif () endif() CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) @@ -259,6 +282,9 @@ CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC) CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN) CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN) +# are we using libc++ +CHECK_CXX_SYMBOL_EXISTS(_LIBCPP_VERSION ciso646 HAVE_LIBCPP) + if (RELEASE_BUILD) if (HAS_C_HIDDEN) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden") @@ -294,13 +320,10 @@ endif () include (${CMAKE_MODULE_PATH}/arch.cmake) -if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3) - message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") -endif () - # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) +CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P) if (NOT WIN32) set(C_FLAGS_TO_CHECK @@ -404,13 +427,13 @@ endif() endif() if (NOT FAT_RUNTIME) -message(STATUS "Building for current host CPU") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") + message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") else() -message(STATUS "Building runtime for multiple microarchitectures") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + message(STATUS "Building runtime for multiple microarchitectures") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() add_subdirectory(util) @@ -435,19 +458,18 @@ if (NOT WIN32) configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars install(FILES ${CMAKE_BINARY_DIR}/libhs.pc - DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig") + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") endif() # only set these after all tests are done if (NOT FAT_RUNTIME) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${HS_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS} ${HS_CXX_FLAGS}") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") endif() - if(NOT WIN32) set(RAGEL_C_FLAGS "-Wno-unused") endif() @@ -459,13 +481,20 @@ set_source_files_properties( ragelmaker(src/parser/Parser.rl) +set_source_files_properties( + ${CMAKE_BINARY_DIR}/src/parser/control_verbs.cpp + PROPERTIES + COMPILE_FLAGS "${RAGEL_C_FLAGS}") + +ragelmaker(src/parser/control_verbs.rl) + SET(hs_HEADERS src/hs.h src/hs_common.h src/hs_compile.h src/hs_runtime.h ) -install(FILES ${hs_HEADERS} DESTINATION include/hs) +install(FILES ${hs_HEADERS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/hs") set (hs_exec_common_SRCS src/alloc.c @@ -541,25 +570,6 @@ set (hs_exec_SRCS src/nfa/mpv.h src/nfa/mpv.c src/nfa/mpv_internal.h - src/nfa/multiaccel_common.h - src/nfa/multiaccel_doubleshift.h - src/nfa/multiaccel_doubleshiftgrab.h - src/nfa/multiaccel_long.h - src/nfa/multiaccel_longgrab.h - src/nfa/multiaccel_shift.h - src/nfa/multiaccel_shiftgrab.h - src/nfa/multishufti.c - src/nfa/multishufti_avx2.h - src/nfa/multishufti_sse.h - src/nfa/multishufti.h - src/nfa/multitruffle.c - src/nfa/multitruffle_avx2.h - src/nfa/multitruffle_sse.h - src/nfa/multitruffle.h - src/nfa/multivermicelli.c - src/nfa/multivermicelli.h - src/nfa/multivermicelli_sse.h - src/nfa/multivermicelli_avx2.h src/nfa/nfa_api.h src/nfa/nfa_api_dispatch.c src/nfa/nfa_internal.h @@ -573,13 +583,11 @@ set (hs_exec_SRCS src/nfa/sheng_impl.h src/nfa/sheng_impl4.h src/nfa/sheng_internal.h - src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h src/nfa/tamarama.c src/nfa/tamarama.h src/nfa/tamarama_internal.h - src/nfa/truffle_common.h src/nfa/truffle.c src/nfa/truffle.h src/nfa/vermicelli.h @@ -662,6 +670,7 @@ SET (hs_SRCS src/compiler/compiler.h src/compiler/error.cpp src/compiler/error.h + src/compiler/expression_info.h src/fdr/engine_description.cpp src/fdr/engine_description.h src/fdr/fdr_compile.cpp @@ -719,8 +728,6 @@ SET (hs_SRCS src/nfa/mpv_internal.h src/nfa/mpvcompile.cpp src/nfa/mpvcompile.h - src/nfa/multiaccel_compilehelper.cpp - src/nfa/multiaccel_compilehelper.h src/nfa/nfa_api.h src/nfa/nfa_api_queue.h src/nfa/nfa_api_util.h @@ -775,6 +782,8 @@ SET (hs_SRCS src/nfagraph/ng_extparam.h src/nfagraph/ng_fixed_width.cpp src/nfagraph/ng_fixed_width.h + src/nfagraph/ng_fuzzy.cpp + src/nfagraph/ng_fuzzy.h src/nfagraph/ng_haig.cpp src/nfagraph/ng_haig.h src/nfagraph/ng_holder.cpp @@ -820,8 +829,6 @@ SET (hs_SRCS src/nfagraph/ng_restructuring.h src/nfagraph/ng_revacc.cpp src/nfagraph/ng_revacc.h - src/nfagraph/ng_rose.cpp - src/nfagraph/ng_rose.h src/nfagraph/ng_sep.cpp src/nfagraph/ng_sep.h src/nfagraph/ng_small_literal_set.cpp @@ -893,6 +900,8 @@ SET (hs_SRCS src/parser/buildstate.h src/parser/check_refs.cpp src/parser/check_refs.h + src/parser/control_verbs.cpp + src/parser/control_verbs.h src/parser/parse_error.cpp src/parser/parse_error.h src/parser/parser_util.cpp @@ -928,6 +937,8 @@ SET (hs_SRCS src/rose/rose_build_compile.cpp src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h + src/rose/rose_build_dedupe.cpp + src/rose/rose_build_engine_blob.cpp src/rose/rose_build_engine_blob.h src/rose/rose_build_exclusive.cpp src/rose/rose_build_exclusive.h @@ -936,6 +947,10 @@ SET (hs_SRCS src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h + src/rose/rose_build_instructions.cpp + src/rose/rose_build_instructions.h + src/rose/rose_build_lit_accel.cpp + src/rose/rose_build_lit_accel.h src/rose/rose_build_long_lit.cpp src/rose/rose_build_long_lit.h src/rose/rose_build_lookaround.cpp @@ -947,6 +962,7 @@ SET (hs_SRCS src/rose/rose_build_misc.cpp src/rose/rose_build_program.cpp src/rose/rose_build_program.h + src/rose/rose_build_resources.h src/rose/rose_build_role_aliasing.cpp src/rose/rose_build_scatter.cpp src/rose/rose_build_scatter.h @@ -982,8 +998,12 @@ SET (hs_SRCS src/util/fatbit_build.h src/util/graph.h src/util/hash.h + src/util/hash_dynamic_bitset.h + src/util/math.h src/util/multibit_build.cpp src/util/multibit_build.h + src/util/noncopyable.h + src/util/operators.h src/util/order_check.h src/util/partial_store.h src/util/partitioned_set.h @@ -993,6 +1013,7 @@ SET (hs_SRCS src/util/report_manager.cpp src/util/report_manager.h src/util/simd_utils.h + src/util/small_vector.h src/util/target_info.cpp src/util/target_info.h src/util/ue2_containers.h @@ -1048,8 +1069,6 @@ set(hs_dump_SRCS src/rose/rose_build_dump.h src/rose/rose_in_dump.cpp src/rose/rose_in_dump.h - src/rose/rose_dump.cpp - src/rose/rose_dump.h src/util/dump_charclass.cpp src/util/dump_charclass.h src/util/dump_util.cpp @@ -1074,10 +1093,14 @@ if (NOT FAT_RUNTIME) set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) endif() - add_library(hs_exec OBJECT ${hs_exec_SRCS}) + if (BUILD_STATIC_LIBS) + add_library(hs_exec OBJECT ${hs_exec_SRCS}) + + add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) - add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) - set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $) + endif (BUILD_STATIC_LIBS) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) @@ -1085,51 +1108,98 @@ if (NOT FAT_RUNTIME) endif() else (FAT_RUNTIME) - set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") - add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) - set_target_properties(hs_exec_core2 PROPERTIES - COMPILE_FLAGS "-march=core2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) - set_target_properties(hs_exec_corei7 PROPERTIES - COMPILE_FLAGS "-march=corei7" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - set_target_properties(hs_exec_avx2 PROPERTIES - COMPILE_FLAGS "-march=core-avx2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - add_library(hs_exec_common OBJECT - ${hs_exec_common_SRCS} - src/dispatcher.c - ) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + if (NOT BUILD_AVX512) + set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") + endif (NOT BUILD_AVX512) set_source_files_properties(src/dispatcher.c PROPERTIES - COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") + + if (BUILD_STATIC_LIBS) + add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + if (BUILD_AVX512) + add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + + # hs_version.c is added explicitly to avoid some build systems that refuse to + # create a lib without any src (I'm looking at you Xcode) + + add_library(hs_runtime STATIC src/hs_version.c + $ + ${RUNTIME_LIBS}) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} + $ + ${RUNTIME_LIBS}) + + endif (BUILD_STATIC_LIBS) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + # build shared libs add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_core2 PROPERTIES COMPILE_FLAGS "-march=core2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_corei7 PROPERTIES COMPILE_FLAGS "-march=corei7" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_avx2 PROPERTIES COMPILE_FLAGS "-march=core-avx2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) + + if (BUILD_AVX512) + add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) add_library(hs_exec_common_shared OBJECT ${hs_exec_common_SRCS} src/dispatcher.c @@ -1140,31 +1210,21 @@ else (FAT_RUNTIME) endif() # SHARED -# hs_version.c is added explicitly to avoid some build systems that refuse to -# create a lib without any src (I'm looking at you Xcode) - - add_library(hs_runtime STATIC src/hs_version.c - $ $ - $ $) endif (NOT FAT_RUNTIME) - -set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) if (NOT BUILD_SHARED_LIBS) - install(TARGETS hs_runtime DESTINATION lib) + install(TARGETS hs_runtime DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) if (NOT FAT_RUNTIME) - add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c -$) - else() + add_library(hs_runtime_shared SHARED src/hs_version.c + src/hs_valid_platform.c $) + else() add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c $ - $ - $ - $) + ${RUNTIME_SHLIBS}) endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} @@ -1173,24 +1233,17 @@ $) MACOSX_RPATH ON LINKER_LANGUAGE C) install(TARGETS hs_runtime_shared - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() -if (NOT FAT_RUNTIME) - add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $) -else() - # we want the static lib for testing - add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c - ${hs_SRCS} $ $ - $ $) -endif() - -add_dependencies(hs ragel_Parser) +if (BUILD_STATIC_LIBS) + add_dependencies(hs ragel_Parser) +endif () if (NOT BUILD_SHARED_LIBS) -install(TARGETS hs DESTINATION lib) + install(TARGETS hs DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) @@ -1200,9 +1253,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) else() add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c ${hs_SRCS} $ - $ - $ - $) + ${RUNTIME_SHLIBS}) endif() add_dependencies(hs_shared ragel_Parser) @@ -1212,11 +1263,18 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) SOVERSION ${LIB_SOVERSION} MACOSX_RPATH ON) install(TARGETS hs_shared - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() +# used by tools and other targets +if (NOT BUILD_STATIC_LIBS) + # use shared lib without having to change all the targets + add_library(hs ALIAS hs_shared) +endif () + + if(NOT WIN32) add_subdirectory(examples) endif() diff --git a/cmake/arch.cmake b/cmake/arch.cmake index e98fbf227..0519b2e5a 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -10,8 +10,24 @@ else () message (FATAL_ERROR "No intrinsics header found") endif () +if (BUILD_AVX512) + CHECK_C_COMPILER_FLAG(${SKYLAKE_FLAG} HAS_ARCH_SKYLAKE) + if (NOT HAS_ARCH_SKYLAKE) + message (FATAL_ERROR "AVX512 not supported by compiler") + endif () +endif () -set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") +if (FAT_RUNTIME) + # test the highest level microarch to make sure everything works + if (BUILD_AVX512) + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}") + else () + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2") + endif () +else (NOT FAT_RUNTIME) + # if not fat runtime, then test given cflags + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") +endif () # ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> @@ -31,5 +47,39 @@ int main(){ (void)_mm256_xor_si256(z, z); }" HAVE_AVX2) +# and now for AVX512 +CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> +#if !defined(__AVX512BW__) +#error no avx512bw +#endif + +int main(){ + __m512i z = _mm512_setzero_si512(); + (void)_mm512_abs_epi8(z); +}" HAVE_AVX512) + +if (FAT_RUNTIME) + if (NOT HAVE_SSSE3) + message(FATAL_ERROR "SSSE3 support required to build fat runtime") + endif () + if (NOT HAVE_AVX2) + message(FATAL_ERROR "AVX2 support required to build fat runtime") + endif () + if (BUILD_AVX512 AND NOT HAVE_AVX512) + message(FATAL_ERROR "AVX512 support requested but not supported") + endif () +else (NOT FAT_RUNTIME) + if (NOT HAVE_AVX2) + message(STATUS "Building without AVX2 support") + endif () + if (NOT HAVE_AVX512) + message(STATUS "Building without AVX512 support") + endif () +else (NOT FAT_RUNTIME) + if (NOT HAVE_SSSE3) + message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") + endif () +endif () + unset (CMAKE_REQUIRED_FLAGS) unset (INTRIN_INC_H) diff --git a/cmake/backtrace.cmake b/cmake/backtrace.cmake index b8ad79f63..5a446e894 100644 --- a/cmake/backtrace.cmake +++ b/cmake/backtrace.cmake @@ -45,10 +45,12 @@ if(HAVE_BACKTRACE) if(HAS_RDYNAMIC) list(INSERT BACKTRACE_LDFLAGS 0 -rdynamic) endif() - # cmake scope fun - set(HAVE_BACKTRACE ${HAVE_BACKTRACE} PARENT_SCOPE) else() set(BACKTRACE_CFLAGS "") set(BACKTRACE_LDFLAGS "") endif() +# cmake scope fun +set(HAVE_BACKTRACE ${HAVE_BACKTRACE} CACHE BOOL INTERNAL) +set(BACKTRACE_CFLAGS ${BACKTRACE_CFLAGS} CACHE STRING INTERNAL) +set(BACKTRACE_LDFLAGS ${BACKTRACE_LDFLAGS} CACHE STRING INTERNAL) diff --git a/cmake/boost.cmake b/cmake/boost.cmake index 3d513deb6..44b4e8ba6 100644 --- a/cmake/boost.cmake +++ b/cmake/boost.cmake @@ -1,3 +1,31 @@ +# Various checks related to Boost + +set(BOOST_USE_STATIC_LIBS OFF) +set(BOOST_USE_MULTITHREADED OFF) +set(BOOST_USE_STATIC_RUNTIME OFF) +if (HAVE_LIBCPP) + # we need a more recent boost for libc++ + set(BOOST_MINVERSION 1.61.0) +else () + set(BOOST_MINVERSION 1.57.0) +endif () +set(BOOST_NO_BOOST_CMAKE ON) + +unset(Boost_INCLUDE_DIR CACHE) +# we might have boost in tree, so provide a hint and try again +set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") +find_package(Boost ${BOOST_MINVERSION} QUIET) +if(NOT Boost_FOUND) + # otherwise check for Boost installed on the system + unset(BOOST_INCLUDEDIR) + find_package(Boost ${BOOST_MINVERSION} QUIET) + if(NOT Boost_FOUND) + message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") + endif() +endif() + +message(STATUS "Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}") + # Boost 1.62 has a bug that we've patched around, check if it is required if (Boost_VERSION EQUAL 106200) set (CMAKE_REQUIRED_INCLUDES ${BOOST_INCLUDEDIR} "${PROJECT_SOURCE_DIR}/include") @@ -38,4 +66,7 @@ ${BOOST_REV_TEST}" BOOST_REVGRAPH_PATCH) endif() unset (CMAKE_REQUIRED_INCLUDES) +else () + unset(BOOST_REVGRAPH_OK CACHE) + unset(BOOST_REVGRAPH_PATCH CACHE) endif () # Boost 1.62.0 diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index 5baf209b3..70392229c 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -1,27 +1,28 @@ #!/bin/sh -e # This is used for renaming symbols for the fat runtime, don't call directly # TODO: make this a lot less fragile! +cleanup () { + rm -f ${SYMSFILE} ${KEEPSYMS} +} + PREFIX=$1 KEEPSYMS_IN=$2 shift 2 -BUILD=$@ -OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/') -SYMSFILE=/tmp/${PREFIX}_rename.syms.$$ -KEEPSYMS=/tmp/keep.syms.$$ -# grab the command without the target obj or src file flags -# we don't just call gcc directly as there may be flags modifying the arch -CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;') -# find me a libc -LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6) +# $@ contains the actual build command +OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/') +trap cleanup INT QUIT EXIT +SYMSFILE=$(mktemp --tmpdir ${PREFIX}_rename.syms.XXXXX) +KEEPSYMS=$(mktemp --tmpdir keep.syms.XXXXX) +# find the libc used by gcc +LIBC_SO=$("$@" --print-file-name=libc.so.6) cp ${KEEPSYMS_IN} ${KEEPSYMS} # get all symbols from libc and turn them into patterns nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS} # build the object -${BUILD} +"$@" # rename the symbols in the object nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} if test -s ${SYMSFILE} then objcopy --redefine-syms=${SYMSFILE} ${OUT} fi -rm -f ${SYMSFILE} ${KEEPSYMS} diff --git a/cmake/config.h.in b/cmake/config.h.in index c7b577c22..9c250b4c7 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -1,5 +1,8 @@ /* used by cmake */ +#ifndef CONFIG_H_ +#define CONFIG_H_ + /* "Define if the build is 32 bit" */ #cmakedefine ARCH_32_BIT @@ -43,6 +46,8 @@ 0 if you don't. */ #cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP +#cmakedefine HAVE_PTHREAD_NP_H + /* Define to 1 if you have the `malloc_info' function. */ #cmakedefine HAVE_MALLOC_INFO @@ -76,6 +81,9 @@ /* Define to 1 if you have the `_aligned_malloc' function. */ #cmakedefine HAVE__ALIGNED_MALLOC +/* Define if compiler has __builtin_constant_p */ +#cmakedefine HAVE__BUILTIN_CONSTANT_P + /* Optimize, inline critical functions */ #cmakedefine HS_OPTIMIZE @@ -91,3 +99,5 @@ /* define if reverse_graph requires patch for boost 1.62.0 */ #cmakedefine BOOST_REVGRAPH_PATCH + +#endif /* CONFIG_H_ */ diff --git a/cmake/sqlite3.cmake b/cmake/sqlite3.cmake index c07f1161e..cbe17c6d4 100644 --- a/cmake/sqlite3.cmake +++ b/cmake/sqlite3.cmake @@ -22,7 +22,7 @@ if (NOT SQLITE3_FOUND) set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3") set(SQLITE3_LDFLAGS sqlite3_static) else() - message(FATAL_ERROR " no sqlite3 in source tree") + message(STATUS " no sqlite3 in source tree") endif() endif() diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 8f44c15c1..02b5c3f38 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in compilation errors. The version of PCRE used to validate Hyperscan's interpretation of this syntax -is 8.38. +is 8.40. ==================== Supported Constructs @@ -171,6 +171,8 @@ The following regex constructs are not supported by Hyperscan: * Callouts and embedded code. * Atomic grouping and possessive quantifiers. +.. _semantics: + ********* Semantics ********* @@ -284,16 +286,24 @@ which provides the following fields: expression should match successfully. * ``min_length``: The minimum match length (from start to end) required to successfully match this expression. +* ``edit_distance``: Match this expression within a given Levenshtein distance. -These parameters allow the set of matches produced by a pattern to be -constrained at compile time, rather than relying on the application to process -unwanted matches at runtime. +These parameters either allow the set of matches produced by a pattern to be +constrained at compile time (rather than relying on the application to process +unwanted matches at runtime), or allow matching a pattern approximately (within +a given edit distance) to produce more matches. For example, the pattern :regexp:`/foo.*bar/` when given a ``min_offset`` of 10 and a ``max_offset`` of 15 will not produce matches when scanned against ``foobar`` or ``foo0123456789bar`` but will produce a match against the data streams ``foo0123bar`` or ``foo0123456bar``. +Similarly, the pattern :regexp:`/foobar/` when given an ``edit_distance`` of 2 +will produce matches when scanned against ``foobar``, ``fooba``, ``fobr``, +``fo_baz``, ``foooobar``, and anything else that lies within edit distance of 2 +(as defined by Levenshtein distance). For more details, see the +:ref:`approximate_matching` section. + ================= Prefiltering Mode ================= @@ -375,3 +385,74 @@ An :c:type:`hs_platform_info_t` structure targeted at the current host can be built with the :c:func:`hs_populate_platform` function. See :ref:`api_constants` for the full list of CPU tuning and feature flags. + +.. _approximate_matching: + +******************** +Approximate matching +******************** + +Hyperscan provides an experimental approximate matching mode, which will match +patterns within a given edit distance. The exact matching behavior is defined as +follows: + +#. **Edit distance** is defined as Levenshtein distance. That is, there are + three possible edit types considered: insertion, removal and substitution. + More formal description can be found on + `Wikipedia `_. + +#. **Approximate matching** will match all *corpora* within a given edit + distance. That is, given a pattern, approximate matching will match anything + that can be edited to arrive at a corpus that exactly matches the original + pattern. + +#. **Matching semantics** are exactly the same as described in :ref:`semantics`. + +Here are a few examples of approximate matching: + +* Pattern :regexp:`/foo/` can match ``foo`` when using regular Hyperscan + matching behavior. With approximate matching within edit distance 2, the + pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``, + ``f``, and anything else that lies within edit distance 2 of matching corpora + for the original pattern (``foo`` in this case). + +* Pattern :regexp:`/foo(bar)+/` with edit distance 1 will match ``foobarbar``, + ``foobarb0r``, ``fooarbar``, ``foobarba``, ``f0obarbar``, ``fobarbar`` and + anything else that lies within edit distance 1 of matching corpora for the + original pattern (``foobarbar`` in this case). + +* Pattern :regexp:`/foob?ar/` with edit distance 2 will match ``fooar``, + ``foo``, ``fabar``, ``oar`` and anything else that lies within edit distance 2 + of matching corpora for the original pattern (``fooar`` in this case). + +Currently, there are trade-offs and limitations that come with approximate +matching support. Here they are, in a nutshell: + +* Reduced pattern support: + + * For many patterns, approximate matching is complex and can result in + Hyperscan failing to compile a pattern with a "Pattern too large" error, + even if the pattern is supported in normal operation. + * Additionally, some patterns cannot be approximately matched because they + reduce to so-called "vacuous" patterns (patterns that match everything). For + example, pattern :regexp:`/foo/` with edit distance 3, if implemented, + would reduce to matching zero-length buffers. Such patterns will result in a + "Pattern cannot be approximately matched" compile error. + * Finally, due to the inherent complexities of defining matching behavior, + approximate matching implements a reduced subset of regular expression + syntax. Approximate matching does not support UTF-8 (and other + multibyte character encodings), and word boundaries (that is, ``\b``, ``\B`` + and other equivalent constructs). Patterns containing unsupported constructs + will result in "Pattern cannot be approximately matched" compile error. + * When using approximate matching in conjunction with SOM, all of the + restrictions of SOM also apply. See :ref:`som` for more + details. +* Increased stream state/byte code size requirements: due to approximate + matching byte code being inherently larger and more complex than exact + matching, the corresponding requirements also increase. +* Performance overhead: similarly, there is generally a performance cost + associated with approximate matching, both due to increased matching + complexity, and due to the fact that it will produce more matches. + +Approximate matching is always disabled by default, and can be enabled on a +per-pattern basis by using an extended parameter described in :ref:`extparam`. diff --git a/doc/dev-reference/conf.py.in b/doc/dev-reference/conf.py.in index 9f089883e..2daab3696 100644 --- a/doc/dev-reference/conf.py.in +++ b/doc/dev-reference/conf.py.in @@ -44,7 +44,7 @@ master_doc = 'index' # General information about the project. project = u'Hyperscan' -copyright = u'2015-2016, Intel Corporation' +copyright = u'2015-2017, Intel Corporation' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/doc/dev-reference/copyright.rst b/doc/dev-reference/copyright.rst index 737b160f5..9464382e6 100644 --- a/doc/dev-reference/copyright.rst +++ b/doc/dev-reference/copyright.rst @@ -30,4 +30,4 @@ and/or other countries. \*Other names and brands may be claimed as the property of others. -Copyright |copy| 2015-2016, Intel Corporation. All rights reserved. +Copyright |copy| 2015-2017, Intel Corporation. All rights reserved. diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index 1794f3e9a..1d44705b4 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -254,18 +254,32 @@ the current platform is supported by Hyperscan. At of this release, the variants of the runtime that are built, and the CPU capability that is required, are the following: -+----------+-------------------------------+---------------------+ -| Variant | CPU Feature Flag(s) Required | gcc arch flag | -+==========+===============================+=====================+ -| Core 2 | ``SSSE3`` | ``-march=core2`` | -+----------+-------------------------------+---------------------+ -| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | -+----------+-------------------------------+---------------------+ -| AVX 2 | ``AVX2`` | ``-march=avx2`` | -+----------+-------------------------------+---------------------+ - -As this requires compiler, libc, and binutils support, at this time the fat -runtime will only be enabled for Linux builds where the compiler supports the ++----------+-------------------------------+---------------------------+ +| Variant | CPU Feature Flag(s) Required | gcc arch flag | ++==========+===============================+===========================+ +| Core 2 | ``SSSE3`` | ``-march=core2`` | ++----------+-------------------------------+---------------------------+ +| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | ++----------+-------------------------------+---------------------------+ +| AVX 2 | ``AVX2`` | ``-march=core-avx2`` | ++----------+-------------------------------+---------------------------+ +| AVX 512 | ``AVX512BW`` (see note below) | ``-march=skylake-avx512`` | ++----------+-------------------------------+---------------------------+ + +.. note:: + + Hyperscan v4.5 adds support for AVX-512 instructions - in particular the + ``AVX-512BW`` instruction set that was introduced on Intel "Skylake" Xeon + processors - however the AVX-512 runtime variant is **not** enabled by + default in fat runtime builds as not all toolchains support AVX-512 + instruction sets. To build an AVX-512 runtime, the CMake variable + ``BUILD_AVX512`` must be enabled manually during configuration. For + example: :: + + cmake -DBUILD_AVX512=on <...> + +As the fat runtime requires compiler, libc, and binutils support, at this time +it will only be enabled for Linux builds where the compiler supports the `indirect function "ifunc" function attribute `_. diff --git a/doc/dev-reference/index.rst b/doc/dev-reference/index.rst index df4f89161..32f188dd4 100644 --- a/doc/dev-reference/index.rst +++ b/doc/dev-reference/index.rst @@ -17,5 +17,6 @@ Hyperscan |version| Developer's Reference Guide runtime serialization performance + tools api_constants api_files diff --git a/doc/dev-reference/intro.rst b/doc/dev-reference/intro.rst index 5f0cc113d..58879aef1 100644 --- a/doc/dev-reference/intro.rst +++ b/doc/dev-reference/intro.rst @@ -70,6 +70,13 @@ For a given database, Hyperscan provides several guarantees: See :ref:`runtime` for more detail. +***** +Tools +***** + +Some utilities for testing and benchmarking Hyperscan are included with the +library. See :ref:`tools` for more information. + ************ Example Code ************ diff --git a/doc/dev-reference/performance.rst b/doc/dev-reference/performance.rst index 8cc0b6758..23781bd61 100644 --- a/doc/dev-reference/performance.rst +++ b/doc/dev-reference/performance.rst @@ -333,3 +333,13 @@ Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be used to specify a lower bound on the length of the matches for a pattern. Using this facility may be more lightweight in some circumstances than using the SOM flag and post-confirming match length in the calling application. + +******************** +Approximate matching +******************** + +.. tip:: Approximate matching is an experimental feature. + +There is generally a performance impact associated with approximate matching due +to the reduced specificity of the matches. This impact may vary significantly +depending on the pattern and edit distance. diff --git a/doc/dev-reference/tools.rst b/doc/dev-reference/tools.rst new file mode 100644 index 000000000..d2e7a06e0 --- /dev/null +++ b/doc/dev-reference/tools.rst @@ -0,0 +1,116 @@ +.. _tools: + +##### +Tools +##### + +This section describes the set of utilities included with the Hyperscan library. + +******************** +Benchmarker: hsbench +******************** + +The ``hsbench`` tool provides an easy way to measure Hyperscan's performance +for a particular set of patterns and corpus of data to be scanned. + +Patterns are supplied in the format described below in +:ref:`tools_pattern_format`, while the corpus must be provided in the form of a +`corpus database`: this is a simple SQLite database format intended to allow for +easy control of how a corpus is broken into blocks and streams. + +.. note:: A group of Python scripts for constructing corpora databases from + various input types, such as PCAP network traffic captures or text files, can + be found in the Hyperscan source tree in ``tools/hsbench/scripts``. + +Running hsbench +=============== + +Given a file full of patterns specified with ``-e`` and a corpus database +specified with ``-c``, ``hsbench`` will perform a single-threaded benchmark and +produce output like this:: + + $ hsbench -e /tmp/patterns -c /tmp/corpus.db + + Signatures: /tmp/patterns + Hyperscan info: Version: 4.3.1 Features: AVX2 Mode: STREAM + Expression count: 200 + Bytecode size: 342,540 bytes + Database CRC: 0x6cd6b67c + Stream state size: 252 bytes + Scratch size: 18,406 bytes + Compile time: 0.153 seconds + Peak heap usage: 78,073,856 bytes + + Time spent scanning: 0.600 seconds + Corpus size: 72,138,183 bytes (63,946 blocks in 8,891 streams) + Scan matches: 81 (0.001 matches/kilobyte) + Overall block rate: 2,132,004.45 blocks/sec + Overall throughput: 19,241.10 Mbit/sec + +By default, the corpus is scanned twenty times, and the overall performance +reported is computed based the total number of bytes scanned in the time it +takes to perform all twenty scans. The number of repeats can be changed with the +``-n`` argument, and the results of each scan will be displayed if the +``--per-scan`` argument is specified. + +To benchmark Hyperscan on more than one core, you can supply a list of cores +with the ``-T`` argument, which will instruct ``hsbench`` to start one +benchmark thread per core given and compute the throughput from the time taken +to complete all of them. + +.. tip:: For single-threaded benchmarks on multi-processor systems, we recommend + using a utility like ``taskset`` to lock the hsbench process to one core and + minimize jitter due to the operating system's scheduler. + +.. _tools_pattern_format: + +************** +Pattern Format +************** + +All of the Hyperscan tools accept patterns in the same format, read from plain +text files with one pattern per line. Each line looks like this: + +* ``://`` + +For example:: + + 1:/hatstand.*teakettle/s + 2:/(hatstand|teakettle)/iH + 3:/^.{10,20}hatstand/m + +The integer ID is the value that will be reported when a match is found by +Hyperscan and must be unique. + +The pattern itself is a regular expression in PCRE syntax; see +:ref:`compilation` for more information on supported features. + +The flags are single characters that map to Hyperscan flags as follows: + +========= ================================= =========== +Character API Flag Description +========= ================================= =========== +``i`` :c:member:`HS_FLAG_CASELESS` Case-insensitive matching +``s`` :c:member:`HS_FLAG_DOTALL` Dot (``.``) will match newlines +``m`` :c:member:`HS_FLAG_MULTILINE` Multi-line anchoring +``H`` :c:member:`HS_FLAG_SINGLEMATCH` Report match ID at most once +``V`` :c:member:`HS_FLAG_ALLOWEMPTY` Allow patterns that can match against empty buffers +``8`` :c:member:`HS_FLAG_UTF8` UTF-8 mode +``W`` :c:member:`HS_FLAG_UCP` Unicode property support +``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode +``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting +========= ================================= =========== + +In addition to the set of flags above, :ref:`extparam` can be supplied +for each pattern. These are supplied after the flags as ``key=value`` pairs +between braces, separated by commas. For example:: + + 1:/hatstand.*teakettle/s{min_offset=50,max_offset=100} + +All Hyperscan tools will accept a pattern file (or a directory containing +pattern files) with the ``-e`` argument. If no further arguments constraining +the pattern set are given, all patterns in those files are used. + +To select a subset of the patterns, a single ID can be supplied with the ``-z`` +argument, or a file containing a set of IDs can be supplied with the ``-s`` +argument. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b52bbdfaf..c252c9ace 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -22,3 +22,6 @@ set_source_files_properties(patbench.cc PROPERTIES COMPILE_FLAGS "-Wall -Wno-unused-parameter") target_link_libraries(patbench hs pcap) endif() + +install(FILES simplegrep.c pcapscan.cc patbench.cc README.md + DESTINATION ${CMAKE_INSTALL_DOCDIR}/examples) diff --git a/src/alloc.c b/src/alloc.c index aa7638e77..e27649bce 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -67,7 +67,7 @@ hs_free_t normalise_free(hs_free_t f) { } HS_PUBLIC_API -hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { hs_set_database_allocator(allocfunc, freefunc); hs_set_misc_allocator(allocfunc, freefunc); hs_set_stream_allocator(allocfunc, freefunc); @@ -77,7 +77,8 @@ hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_database_alloc = normalise_alloc(allocfunc); hs_database_free = normalise_free(freefunc); @@ -85,7 +86,8 @@ hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_misc_alloc = normalise_alloc(allocfunc); hs_misc_free = normalise_free(freefunc); @@ -93,7 +95,8 @@ hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_scratch_alloc = normalise_alloc(allocfunc); hs_scratch_free = normalise_free(freefunc); @@ -101,7 +104,8 @@ hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_stream_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_stream_alloc = normalise_alloc(allocfunc); hs_stream_free = normalise_free(freefunc); diff --git a/src/compiler/asserts.cpp b/src/compiler/asserts.cpp index be836b06d..444422260 100644 --- a/src/compiler/asserts.cpp +++ b/src/compiler/asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,8 @@ * word-to-word and word-to-nonword) are dropped. */ #include "asserts.h" + +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "nfagraph/ng_prune.h" #include "nfagraph/ng_redundancy.h" @@ -115,8 +117,8 @@ u32 conjunct(u32 flags1, u32 flags2) { typedef map, NFAEdge> edge_cache_t; static -void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, - u32 &assert_edge_count) { +void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr, + edge_cache_t &edge_cache, u32 &assert_edge_count) { DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index); const u32 flags = g[t].assert_flags; @@ -178,8 +180,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, edge_cache.emplace(cache_key, e); g[e].assert_flags = flags; if (++assert_edge_count > MAX_ASSERT_EDGES) { - throw CompileError(g.expressionIndex, - "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } else { NFAEdge e = ecit->second; @@ -200,21 +201,23 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, } static -void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { +void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, s32 adj) { // Don't try and set the report ID of a special vertex. assert(!is_special(v, g)); // There should be no reports set already. assert(g[v].reports.empty()); - Report r = rm.getBasicInternalReport(g, adj); + Report r = rm.getBasicInternalReport(expr, adj); g[v].reports.insert(rm.getInternalId(r)); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static -void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { +void checkForMultilineStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { vector dead; for (auto v : adjacent_vertices_range(g.start, g)) { if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) { @@ -238,7 +241,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { for (const auto &e : dead) { NFAVertex dummy = add_vertex(g); g[dummy].char_reach.setall(); - setReportId(rm, g, dummy, -1); + setReportId(rm, g, expr, dummy, -1); add_edge(source(e, g), dummy, g[e], g); add_edge(dummy, g.accept, g); } @@ -263,7 +266,8 @@ bool hasAssertVertices(const NGHolder &g) { * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ -void removeAssertVertices(ReportManager &rm, NGWrapper &g) { +void removeAssertVertices(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { size_t num = 0; DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g)); @@ -285,12 +289,12 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) { for (auto v : vertices_range(g)) { if (g[v].assert_flags & WORDBOUNDARY_FLAGS) { - replaceAssertVertex(g, v, edge_cache, assert_edge_count); + replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count); num++; } } - checkForMultilineStart(rm, g); + checkForMultilineStart(rm, g, expr); if (num) { DEBUG_PRINTF("resolved %zu assert vertices\n", num); diff --git a/src/compiler/asserts.h b/src/compiler/asserts.h index b9ec80c7c..b4d64c6c9 100644 --- a/src/compiler/asserts.h +++ b/src/compiler/asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,9 @@ namespace ue2 { +class ExpressionInfo; class ReportManager; -class NGWrapper; +class NGHolder; /** \brief Convert temporary assert vertices (from construction method) to * edge-based flags. @@ -44,7 +45,8 @@ class NGWrapper; * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ -void removeAssertVertices(ReportManager &rm, NGWrapper &g); +void removeAssertVertices(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 4a4afc64e..cce89e408 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,9 +55,8 @@ #include "parser/unsupported.h" #include "parser/utf8_validate.h" #include "rose/rose_build.h" -#include "rose/rose_build_dump.h" #include "som/slot_manager_dump.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/compile_error.h" #include "util/target_info.h" #include "util/verify_types.h" @@ -74,12 +73,12 @@ using namespace std; namespace ue2 { - static void validateExt(const hs_expr_ext &ext) { static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET | - HS_EXT_FLAG_MIN_LENGTH; + HS_EXT_FLAG_MIN_LENGTH | + HS_EXT_FLAG_EDIT_DISTANCE; if (ext.flags & ~ALL_EXT_FLAGS) { throw CompileError("Invalid hs_expr_ext flag set."); } @@ -100,25 +99,18 @@ void validateExt(const hs_expr_ext &ext) { } ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, - unsigned flags, ReportID actionId, + unsigned flags, ReportID report, const hs_expr_ext *ext) - : utf8(false), - allow_vacuous(flags & HS_FLAG_ALLOWEMPTY), - highlander(flags & HS_FLAG_SINGLEMATCH), - prefilter(flags & HS_FLAG_PREFILTER), - som(SOM_NONE), - index(index_in), - id(actionId), - min_offset(0), - max_offset(MAX_OFFSET), - min_length(0) { + : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, + false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, + 0, 0) { ParseMode mode(flags); component = parse(expression, mode); - utf8 = mode.utf8; /* utf8 may be set by parse() */ + expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ - if (utf8 && !isValidUtf8(expression)) { + if (expr.utf8 && !isValidUtf8(expression)) { throw ParseError("Expression is not valid UTF-8."); } @@ -146,7 +138,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, // Set SOM type. if (flags & HS_FLAG_SOM_LEFTMOST) { - som = SOM_LEFT; + expr.som = SOM_LEFT; } // Set extended parameters, if we have them. @@ -155,26 +147,29 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, validateExt(*ext); if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { - min_offset = ext->min_offset; + expr.min_offset = ext->min_offset; } if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { - max_offset = ext->max_offset; + expr.max_offset = ext->max_offset; } if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) { - min_length = ext->min_length; + expr.min_length = ext->min_length; + } + if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { + expr.edit_distance = ext->edit_distance; } } // These are validated in validateExt, so an error will already have been // thrown if these conditions don't hold. - assert(max_offset >= min_offset); - assert(max_offset >= min_length); + assert(expr.max_offset >= expr.min_offset); + assert(expr.max_offset >= expr.min_length); // Since prefiltering and SOM aren't supported together, we must squash any // min_length constraint as well. - if (flags & HS_FLAG_PREFILTER && min_length) { + if (flags & HS_FLAG_PREFILTER && expr.min_length) { DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n"); - min_length = 0; + expr.min_length = 0; } } @@ -183,25 +178,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, * \brief Dumps the parse tree to screen in debug mode and to disk in dump * mode. */ -void dumpExpression(UNUSED const ParsedExpression &expr, +void dumpExpression(UNUSED const ParsedExpression &pe, UNUSED const char *stage, UNUSED const Grey &grey) { #if defined(DEBUG) - DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id, - expr.index); + DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n", + pe.expr.report, pe.expr.index); ostringstream debug_tree; - dumpTree(debug_tree, expr.component.get()); + dumpTree(debug_tree, pe.component.get()); printf("%s\n", debug_tree.str().c_str()); #endif // DEBUG #if defined(DUMP_SUPPORT) if (grey.dumpFlags & Grey::DUMP_PARSE) { stringstream ss; - ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_" + ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_" << stage << ".txt"; ofstream out(ss.str().c_str()); - out << "Component Tree for " << expr.id << endl; - dumpTree(out, expr.component.get()); - if (expr.utf8) { + out << "Component Tree for " << pe.expr.report << endl; + dumpTree(out, pe.component.get()); + if (pe.expr.utf8) { out << "UTF8 mode" << endl; } } @@ -211,13 +206,13 @@ void dumpExpression(UNUSED const ParsedExpression &expr, /** \brief Run Component tree optimisations on \a expr. */ static -void optimise(ParsedExpression &expr) { - if (expr.min_length || expr.som) { +void optimise(ParsedExpression &pe) { + if (pe.expr.min_length || pe.expr.som) { return; } DEBUG_PRINTF("optimising\n"); - expr.component->optimise(true /* root is connected to sds */); + pe.component->optimise(true /* root is connected to sds */); } void addExpression(NG &ng, unsigned index, const char *expression, @@ -234,34 +229,34 @@ void addExpression(NG &ng, unsigned index, const char *expression, // Do per-expression processing: errors here will result in an exception // being thrown up to our caller - ParsedExpression expr(index, expression, flags, id, ext); - dumpExpression(expr, "orig", cc.grey); + ParsedExpression pe(index, expression, flags, id, ext); + dumpExpression(pe, "orig", cc.grey); // Apply prefiltering transformations if desired. - if (expr.prefilter) { - prefilterTree(expr.component, ParseMode(flags)); - dumpExpression(expr, "prefiltered", cc.grey); + if (pe.expr.prefilter) { + prefilterTree(pe.component, ParseMode(flags)); + dumpExpression(pe, "prefiltered", cc.grey); } // Expressions containing zero-width assertions and other extended pcre // types aren't supported yet. This call will throw a ParseError exception // if the component tree contains such a construct. - checkUnsupported(*expr.component); + checkUnsupported(*pe.component); - expr.component->checkEmbeddedStartAnchor(true); - expr.component->checkEmbeddedEndAnchor(true); + pe.component->checkEmbeddedStartAnchor(true); + pe.component->checkEmbeddedEndAnchor(true); if (cc.grey.optimiseComponentTree) { - optimise(expr); - dumpExpression(expr, "opt", cc.grey); + optimise(pe); + dumpExpression(pe, "opt", cc.grey); } DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n", - expr.component.get(), expr.index, expr.id); + pe.component.get(), pe.expr.index, pe.expr.report); // You can only use the SOM flags if you've also specified an SOM // precision mode. - if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { + if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { throw CompileError("To use a SOM expression flag in streaming mode, " "an SOM precision mode (e.g. " "HS_MODE_SOM_HORIZON_LARGE) must be specified."); @@ -269,32 +264,31 @@ void addExpression(NG &ng, unsigned index, const char *expression, // If this expression is a literal, we can feed it directly to Rose rather // than building the NFA graph. - if (shortcutLiteral(ng, expr)) { + if (shortcutLiteral(ng, pe)) { DEBUG_PRINTF("took literal short cut\n"); return; } - unique_ptr g = buildWrapper(ng.rm, cc, expr); - - if (!g) { + auto built_expr = buildGraph(ng.rm, cc, pe); + if (!built_expr.g) { DEBUG_PRINTF("NFA build failed on ID %u, but no exception was " - "thrown.\n", expr.id); + "thrown.\n", pe.expr.report); throw CompileError("Internal error."); } - if (!expr.allow_vacuous && matches_everywhere(*g)) { + if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) { throw CompileError("Pattern matches empty buffer; use " "HS_FLAG_ALLOWEMPTY to enable support."); } - if (!ng.addGraph(*g)) { - DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id); + if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) { + DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report); throw CompileError("Error compiling expression."); } } static -aligned_unique_ptr generateRoseEngine(NG &ng) { +bytecode_ptr generateRoseEngine(NG &ng) { const u32 minWidth = ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF; auto rose = ng.rose->buildRose(minWidth); @@ -305,7 +299,6 @@ aligned_unique_ptr generateRoseEngine(NG &ng) { return nullptr; } - dumpRose(*ng.rose, rose.get(), ng.cc.grey); dumpReportManager(ng.rm, ng.cc.grey); dumpSomSlotManager(ng.ssm, ng.cc.grey); dumpSmallWrite(rose.get(), ng.cc.grey); @@ -320,6 +313,9 @@ platform_t target_to_platform(const target_t &target_info) { if (!target_info.has_avx2()) { p |= HS_PLATFORM_NOAVX2; } + if (!target_info.has_avx512()) { + p |= HS_PLATFORM_NOAVX512; + } return p; } @@ -369,7 +365,7 @@ struct hs_database *build(NG &ng, unsigned int *length) { if (!rose) { throw CompileError("Unable to generate bytecode."); } - *length = roseSize(rose.get()); + *length = rose.size(); if (!*length) { DEBUG_PRINTF("RoseEngine has zero length\n"); assert(0); @@ -450,41 +446,42 @@ bool isSupported(const Component &c) { } #endif -unique_ptr buildWrapper(ReportManager &rm, const CompileContext &cc, - const ParsedExpression &expr) { - assert(isSupported(*expr.component)); +BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &pe) { + assert(isSupported(*pe.component)); - const unique_ptr builder = makeNFABuilder(rm, cc, expr); + const auto builder = makeNFABuilder(rm, cc, pe); assert(builder); // Set up START and ACCEPT states; retrieve the special states - const auto bs = makeGlushkovBuildState(*builder, expr.prefilter); + const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter); // Map position IDs to characters/components - expr.component->notePositions(*bs); + pe.component->notePositions(*bs); // Wire the start dotstar state to the firsts - connectInitialStates(*bs, expr); + connectInitialStates(*bs, pe); DEBUG_PRINTF("wire up body of expr\n"); // Build the rest of the FOLLOW set vector initials = {builder->getStartDotStar(), builder->getStart()}; - expr.component->buildFollowSet(*bs, initials); + pe.component->buildFollowSet(*bs, initials); // Wire the lasts to the accept state - connectFinalStates(*bs, expr); + connectFinalStates(*bs, pe); // Create our edges bs->buildEdges(); - auto g = builder->getGraph(); - assert(g); + BuiltExpression built_expr = builder->getGraph(); + assert(built_expr.g); - dumpDotWrapper(*g, "00_before_asserts", cc.grey); - removeAssertVertices(rm, *g); + dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts", + cc.grey); + removeAssertVertices(rm, *built_expr.g, built_expr.expr); - return g; + return built_expr; } } // namespace ue2 diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 1d7d6536d..60d7ca33c 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,11 +35,11 @@ #include "ue2common.h" #include "database.h" +#include "compiler/expression_info.h" #include "parser/Component.h" -#include "som/som.h" +#include "util/noncopyable.h" #include -#include struct hs_database; struct hs_expr_ext; @@ -50,34 +50,32 @@ struct CompileContext; struct Grey; struct target_t; class NG; +class NGHolder; class ReportManager; -class NGWrapper; -/** Class gathering together the pieces of a parsed expression. - * Note: Owns the provided component. - */ -class ParsedExpression : boost::noncopyable { +/** \brief Class gathering together the pieces of a parsed expression. */ +class ParsedExpression : noncopyable { public: ParsedExpression(unsigned index, const char *expression, unsigned flags, - ReportID actionId, const hs_expr_ext *ext = nullptr); - - bool utf8; //!< UTF-8 mode flag specified + ReportID report, const hs_expr_ext *ext = nullptr); - /** \brief root node of parsed component tree. */ - std::unique_ptr component; + /** \brief Expression information (from flags, extparam etc) */ + ExpressionInfo expr; - const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified - const bool highlander; //!< HS_FLAG_SINGLEMATCH specified - const bool prefilter; //!< HS_FLAG_PREFILTER specified - som_type som; //!< chosen SOM mode, or SOM_NONE + /** \brief Root node of parsed component tree. */ + std::unique_ptr component; +}; - /** \brief index in expressions array passed to \ref hs_compile_multi */ - const unsigned index; +/** + * \brief Class gathering together the pieces of an expression that has been + * built into an NFA graph. + */ +struct BuiltExpression { + /** \brief Expression information (from flags, extparam etc) */ + ExpressionInfo expr; - const ReportID id; //!< user-specified pattern ID - u64a min_offset; //!< 0 if not used - u64a max_offset; //!< MAX_OFFSET if not used - u64a min_length; //!< 0 if not used + /** \brief Built Glushkov NFA graph. */ + std::unique_ptr g; }; /** @@ -94,12 +92,12 @@ class ParsedExpression : boost::noncopyable { * @param ext * Struct containing extra parameters for this expression, or NULL if * none. - * @param actionId + * @param report * The identifier to associate with the expression; returned by engine on * match. */ void addExpression(NG &ng, unsigned index, const char *expression, - unsigned flags, const hs_expr_ext *ext, ReportID actionId); + unsigned flags, const hs_expr_ext *ext, ReportID report); /** * Build a Hyperscan database out of the expressions we've been given. A @@ -127,9 +125,8 @@ struct hs_database *build(NG &ng, unsigned int *length); * @return * nullptr on error. */ -std::unique_ptr buildWrapper(ReportManager &rm, - const CompileContext &cc, - const ParsedExpression &expr); +BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &expr); /** * Build a platform_t out of a target_t. diff --git a/src/compiler/expression_info.h b/src/compiler/expression_info.h new file mode 100644 index 000000000..7775f59e7 --- /dev/null +++ b/src/compiler/expression_info.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief ExpressionInfo class for storing the properties of an expression. + */ + +#ifndef COMPILER_EXPRESSION_INFO_H +#define COMPILER_EXPRESSION_INFO_H + +#include "ue2common.h" +#include "som/som.h" + +namespace ue2 { + +/** \brief Properties of an expression. */ +class ExpressionInfo { +public: + ExpressionInfo(unsigned int index_in, bool allow_vacuous_in, + bool highlander_in, bool utf8_in, bool prefilter_in, + som_type som_in, ReportID report_in, u64a min_offset_in, + u64a max_offset_in, u64a min_length_in, u32 edit_distance_in) + : index(index_in), report(report_in), allow_vacuous(allow_vacuous_in), + highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in), + som(som_in), min_offset(min_offset_in), max_offset(max_offset_in), + min_length(min_length_in), edit_distance(edit_distance_in) {} + + /** + * \brief Index of the expression represented by this graph. + * + * Used: + * - down the track in error handling; + * - for identifying parts of an expression in highlander mode. + */ + unsigned int index; + + /** \brief Report ID specified by the user. */ + ReportID report; + + /** \brief Vacuous pattern is allowed. (HS_FLAG_ALLOWEMPTY) */ + bool allow_vacuous; + + /** \brief "Highlander" (single match) pattern. (HS_FLAG_SINGLEMATCH) */ + bool highlander; + + /** \brief UTF-8 pattern. (HS_FLAG_UTF8) */ + bool utf8; + + /** \brief Prefiltering pattern. (HS_FLAG_PREFILTER) */ + bool prefilter; + + /** \brief Start-of-match type requested, or SOM_NONE. */ + som_type som; + + /** \brief Minimum match offset extended parameter. 0 if not used. */ + u64a min_offset; + + /** + * \brief Maximum match offset extended parameter. + * MAX_OFFSET if not used. + */ + u64a max_offset; + + /** \brief Minimum match length extended parameter. 0 if not used. */ + u64a min_length; + + /** + * \brief Approximate matching edit distance extended parameter. + * 0 if not used. + */ + u32 edit_distance; +}; + +} + +#endif // COMPILER_EXPRESSION_INFO_H diff --git a/src/crc32.c b/src/crc32.c index b85acc7f5..1dae47b4e 100644 --- a/src/crc32.c +++ b/src/crc32.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,14 +29,10 @@ #include "crc32.h" #include "config.h" #include "ue2common.h" +#include "util/arch.h" +#include "util/intrinsics.h" -#if defined(HAVE_C_X86INTRIN_H) -#include -#elif defined(HAVE_C_INTRIN_H) -#include -#endif - -#ifndef __SSE4_2__ +#if !defined(HAVE_SSE42) /*** *** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD @@ -582,7 +578,7 @@ u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf, return crc; } -#else // __SSE4_2__ +#else // HAVE_SSE42 #ifdef ARCH_64_BIT #define CRC_WORD 8 @@ -638,7 +634,7 @@ u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf, // Externally visible function u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) { -#ifdef __SSE4_2__ +#if defined(HAVE_SSE42) u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen); #else u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen); diff --git a/src/database.c b/src/database.c index 61eb021fa..dc03bf1fb 100644 --- a/src/database.c +++ b/src/database.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,7 +49,7 @@ int db_correctly_aligned(const void *db) { } HS_PUBLIC_API -hs_error_t hs_free_database(hs_database_t *db) { +hs_error_t HS_CDECL hs_free_database(hs_database_t *db) { if (db && db->magic != HS_DB_MAGIC) { return HS_INVALID; } @@ -59,8 +59,8 @@ hs_error_t hs_free_database(hs_database_t *db) { } HS_PUBLIC_API -hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, - size_t *serialized_length) { +hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes, + size_t *serialized_length) { if (!db || !bytes || !serialized_length) { return HS_INVALID; } @@ -114,7 +114,8 @@ hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, static hs_error_t db_check_platform(const u64a p) { if (p != hs_current_platform - && p != hs_current_platform_no_avx2) { + && p != hs_current_platform_no_avx2 + && p != hs_current_platform_no_avx512) { return HS_DB_PLATFORM_ERROR; } // passed all checks @@ -195,8 +196,9 @@ void db_copy_bytecode(const char *serialized, hs_database_t *db) { } HS_PUBLIC_API -hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, - hs_database_t *db) { +hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes, + const size_t length, + hs_database_t *db) { if (!bytes || !db) { return HS_INVALID; } @@ -237,8 +239,9 @@ hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, } HS_PUBLIC_API -hs_error_t hs_deserialize_database(const char *bytes, const size_t length, - hs_database_t **db) { +hs_error_t HS_CDECL hs_deserialize_database(const char *bytes, + const size_t length, + hs_database_t **db) { if (!bytes || !db) { return HS_INVALID; } @@ -286,7 +289,7 @@ hs_error_t hs_deserialize_database(const char *bytes, const size_t length, } HS_PUBLIC_API -hs_error_t hs_database_size(const hs_database_t *db, size_t *size) { +hs_error_t HS_CDECL hs_database_size(const hs_database_t *db, size_t *size) { if (!size) { return HS_INVALID; } @@ -301,8 +304,9 @@ hs_error_t hs_database_size(const hs_database_t *db, size_t *size) { } HS_PUBLIC_API -hs_error_t hs_serialized_database_size(const char *bytes, const size_t length, - size_t *size) { +hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes, + const size_t length, + size_t *size) { // Decode and check the header hs_database_t header; hs_error_t ret = db_decode_header(&bytes, length, &header); @@ -366,7 +370,9 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat, u8 minor = (version >> 16) & 0xff; u8 major = (version >> 24) & 0xff; - const char *avx2 = (plat & HS_PLATFORM_NOAVX2) ? "NOAVX2" : " AVX2"; + const char *features = (plat & HS_PLATFORM_NOAVX512) + ? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2" + : "AVX512"; const char *mode = NULL; @@ -395,7 +401,7 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat, // that don't have snprintf but have a workalike. int p_len = SNPRINTF_COMPAT( buf, len, "Version: %u.%u.%u Features: %s Mode: %s", - major, minor, release, avx2, mode); + major, minor, release, features, mode); if (p_len < 0) { DEBUG_PRINTF("snprintf output error, returned %d\n", p_len); hs_misc_free(buf); @@ -414,8 +420,8 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat, } HS_PUBLIC_API -hs_error_t hs_serialized_database_info(const char *bytes, size_t length, - char **info) { +hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes, + size_t length, char **info) { if (!info) { return HS_INVALID; } @@ -434,7 +440,7 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length, } HS_PUBLIC_API -hs_error_t hs_database_info(const hs_database_t *db, char **info) { +hs_error_t HS_CDECL hs_database_info(const hs_database_t *db, char **info) { if (!info) { return HS_INVALID; } diff --git a/src/database.h b/src/database.h index 399513fc2..5715ed677 100644 --- a/src/database.h +++ b/src/database.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,7 @@ extern "C" #include "hs_compile.h" // for HS_MODE_ flags #include "hs_version.h" #include "ue2common.h" +#include "util/arch.h" #define HS_DB_VERSION HS_VERSION_32BIT #define HS_DB_MAGIC (0xdbdbdbdbU) @@ -53,14 +54,18 @@ extern "C" #define HS_PLATFORM_CPU_MASK 0x3F #define HS_PLATFORM_NOAVX2 (4<<13) +#define HS_PLATFORM_NOAVX512 (8<<13) /** \brief Platform features bitmask. */ typedef u64a platform_t; static UNUSED const platform_t hs_current_platform = { -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) HS_PLATFORM_NOAVX2 | +#endif +#if !defined(HAVE_AVX512) + HS_PLATFORM_NOAVX512 | #endif 0, }; @@ -68,6 +73,13 @@ const platform_t hs_current_platform = { static UNUSED const platform_t hs_current_platform_no_avx2 = { HS_PLATFORM_NOAVX2 | + HS_PLATFORM_NOAVX512 | + 0, +}; + +static UNUSED +const platform_t hs_current_platform_no_avx512 = { + HS_PLATFORM_NOAVX512 | 0, }; diff --git a/src/dispatcher.c b/src/dispatcher.c index fb2f4f02a..5ae46b56f 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,8 +33,14 @@ #include "util/cpuid_flags.h" #include "util/join.h" +#if defined(DISABLE_AVX512_DISPATCH) +#define avx512_ disabled_ +#define check_avx512() (0) +#endif + #define CREATE_DISPATCH(RTYPE, NAME, ...) \ /* create defns */ \ + RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \ RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \ @@ -46,6 +52,9 @@ \ /* resolver */ \ static void(*JOIN(resolve_, NAME)(void)) { \ + if (check_avx512()) { \ + return JOIN(avx512_, NAME); \ + } \ if (check_avx2()) { \ return JOIN(avx2_, NAME); \ } \ diff --git a/src/fdr/engine_description.h b/src/fdr/engine_description.h index 09b161796..b545e6474 100644 --- a/src/fdr/engine_description.h +++ b/src/fdr/engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,29 +38,19 @@ class EngineDescription { u32 id; target_t code_target; // the target that we built this code for u32 numBuckets; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; public: EngineDescription(u32 id_in, const target_t &code_target_in, - u32 numBuckets_in, u32 confirmPullBackDistance_in, - u32 confirmTopLevelSplit_in) - : id(id_in), code_target(code_target_in), numBuckets(numBuckets_in), - confirmPullBackDistance(confirmPullBackDistance_in), - confirmTopLevelSplit(confirmTopLevelSplit_in) {} + u32 numBuckets_in) + : id(id_in), code_target(code_target_in), numBuckets(numBuckets_in) {} virtual ~EngineDescription(); u32 getID() const { return id; } u32 getNumBuckets() const { return numBuckets; } - u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; } - u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; } - void setConfirmTopLevelSplit(u32 split) { confirmTopLevelSplit = split; } bool isValidOnTarget(const target_t &target_in) const; virtual u32 getDefaultFloodSuffixLength() const = 0; - - virtual bool typicallyHoldsOneCharLits() const { return true; } }; /** Returns a target given a CPU feature set value. */ diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 23416c707..92e75aaa8 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,9 @@ #include "flood_runtime.h" #include "teddy.h" #include "teddy_internal.h" +#include "util/arch.h" #include "util/simd_utils.h" +#include "util/uniform_ops.h" /** \brief number of bytes processed in each iteration */ #define ITER_BYTES 16 @@ -51,7 +53,7 @@ * * The incoming buffer is to split in multiple zones to ensure two properties: * 1: that we can read 8? bytes behind to generate a hash safely - * 2: that we can read the byte after the current byte (domain > 8) + * 2: that we can read the 3 byte after the current byte (domain > 8) */ struct zone { /** \brief copied buffer, used only when it is a boundary zone. */ @@ -116,20 +118,34 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }; +/* compilers don't reliably synthesize the 32-bit ANDN instruction here, + * so we force its generation. + */ +static really_inline +u64a andn(const u32 a, const u8 *b) { + u64a r; +#if defined(HAVE_BMI) && !defined(NO_ASM) + __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b)); +#else + r = unaligned_load_u32(b) & ~a; +#endif + return r; +} + /* generates an initial state mask based on the last byte-ish of history rather * than being all accepting. If there is no history to consider, the state is * generated based on the minimum length of each bucket in order to prevent * confirms. */ static really_inline -m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft, +m128 getInitState(const struct FDR *fdr, u8 len_history, const u64a *ft, const struct zone *z) { m128 s; if (len_history) { /* +1: the zones ensure that we can read the byte at z->end */ u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1); tmp &= fdr->domainMask; - s = *((const m128 *)ft + tmp); + s = load_m128_from_u64a(ft + tmp); s = rshiftbyte_m128(s, 1); } else { s = fdr->start; @@ -138,51 +154,30 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft, } static really_inline -void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, - u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, - u64a *conf8, m128 *s) { +void get_conf_stride_1(const u8 *itPtr, UNUSED const u8 *start_ptr, + UNUSED const u8 *end_ptr, u32 domain_mask_flipped, + const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { /* +1: the zones ensure that we can read the byte at z->end */ - - u64a current_data_0; - u64a current_data_8; - - current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); - u64a v7 = (lv_u16(itPtr + 7, start_ptr, end_ptr + 1) << 1) & - domain_mask_adjusted; - u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; - u64a v1 = (current_data_0 >> 7) & domain_mask_adjusted; - u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted; - u64a v3 = (current_data_0 >> 23) & domain_mask_adjusted; - u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; - u64a v5 = (current_data_0 >> 39) & domain_mask_adjusted; - u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted; - current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); - u64a v15 = (lv_u16(itPtr + 15, start_ptr, end_ptr + 1) << 1) & - domain_mask_adjusted; - u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; - u64a v9 = (current_data_8 >> 7) & domain_mask_adjusted; - u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted; - u64a v11 = (current_data_8 >> 23) & domain_mask_adjusted; - u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; - u64a v13 = (current_data_8 >> 39) & domain_mask_adjusted; - u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted; - - m128 st0 = *(const m128 *)(ft + v0*8); - m128 st1 = *(const m128 *)(ft + v1*8); - m128 st2 = *(const m128 *)(ft + v2*8); - m128 st3 = *(const m128 *)(ft + v3*8); - m128 st4 = *(const m128 *)(ft + v4*8); - m128 st5 = *(const m128 *)(ft + v5*8); - m128 st6 = *(const m128 *)(ft + v6*8); - m128 st7 = *(const m128 *)(ft + v7*8); - m128 st8 = *(const m128 *)(ft + v8*8); - m128 st9 = *(const m128 *)(ft + v9*8); - m128 st10 = *(const m128 *)(ft + v10*8); - m128 st11 = *(const m128 *)(ft + v11*8); - m128 st12 = *(const m128 *)(ft + v12*8); - m128 st13 = *(const m128 *)(ft + v13*8); - m128 st14 = *(const m128 *)(ft + v14*8); - m128 st15 = *(const m128 *)(ft + v15*8); + assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); + u64a reach0 = andn(domain_mask_flipped, itPtr); + u64a reach1 = andn(domain_mask_flipped, itPtr + 1); + u64a reach2 = andn(domain_mask_flipped, itPtr + 2); + u64a reach3 = andn(domain_mask_flipped, itPtr + 3); + + m128 st0 = load_m128_from_u64a(ft + reach0); + m128 st1 = load_m128_from_u64a(ft + reach1); + m128 st2 = load_m128_from_u64a(ft + reach2); + m128 st3 = load_m128_from_u64a(ft + reach3); + + u64a reach4 = andn(domain_mask_flipped, itPtr + 4); + u64a reach5 = andn(domain_mask_flipped, itPtr + 5); + u64a reach6 = andn(domain_mask_flipped, itPtr + 6); + u64a reach7 = andn(domain_mask_flipped, itPtr + 7); + + m128 st4 = load_m128_from_u64a(ft + reach4); + m128 st5 = load_m128_from_u64a(ft + reach5); + m128 st6 = load_m128_from_u64a(ft + reach6); + m128 st7 = load_m128_from_u64a(ft + reach7); st1 = lshiftbyte_m128(st1, 1); st2 = lshiftbyte_m128(st2, 2); @@ -191,6 +186,40 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, st5 = lshiftbyte_m128(st5, 5); st6 = lshiftbyte_m128(st6, 6); st7 = lshiftbyte_m128(st7, 7); + + st0 = or128(st0, st1); + st2 = or128(st2, st3); + st4 = or128(st4, st5); + st6 = or128(st6, st7); + st0 = or128(st0, st2); + st4 = or128(st4, st6); + st0 = or128(st0, st4); + *s = or128(*s, st0); + + *conf0 = movq(*s); + *s = rshiftbyte_m128(*s, 8); + *conf0 ^= ~0ULL; + + u64a reach8 = andn(domain_mask_flipped, itPtr + 8); + u64a reach9 = andn(domain_mask_flipped, itPtr + 9); + u64a reach10 = andn(domain_mask_flipped, itPtr + 10); + u64a reach11 = andn(domain_mask_flipped, itPtr + 11); + + m128 st8 = load_m128_from_u64a(ft + reach8); + m128 st9 = load_m128_from_u64a(ft + reach9); + m128 st10 = load_m128_from_u64a(ft + reach10); + m128 st11 = load_m128_from_u64a(ft + reach11); + + u64a reach12 = andn(domain_mask_flipped, itPtr + 12); + u64a reach13 = andn(domain_mask_flipped, itPtr + 13); + u64a reach14 = andn(domain_mask_flipped, itPtr + 14); + u64a reach15 = andn(domain_mask_flipped, itPtr + 15); + + m128 st12 = load_m128_from_u64a(ft + reach12); + m128 st13 = load_m128_from_u64a(ft + reach13); + m128 st14 = load_m128_from_u64a(ft + reach14); + m128 st15 = load_m128_from_u64a(ft + reach15); + st9 = lshiftbyte_m128(st9, 1); st10 = lshiftbyte_m128(st10, 2); st11 = lshiftbyte_m128(st11, 3); @@ -199,100 +228,86 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, st14 = lshiftbyte_m128(st14, 6); st15 = lshiftbyte_m128(st15, 7); - *s = or128(*s, st0); - *s = or128(*s, st1); - *s = or128(*s, st2); - *s = or128(*s, st3); - *s = or128(*s, st4); - *s = or128(*s, st5); - *s = or128(*s, st6); - *s = or128(*s, st7); - *conf0 = movq(*s); - *s = rshiftbyte_m128(*s, 8); - *conf0 ^= ~0ULL; - + st8 = or128(st8, st9); + st10 = or128(st10, st11); + st12 = or128(st12, st13); + st14 = or128(st14, st15); + st8 = or128(st8, st10); + st12 = or128(st12, st14); + st8 = or128(st8, st12); *s = or128(*s, st8); - *s = or128(*s, st9); - *s = or128(*s, st10); - *s = or128(*s, st11); - *s = or128(*s, st12); - *s = or128(*s, st13); - *s = or128(*s, st14); - *s = or128(*s, st15); + *conf8 = movq(*s); *s = rshiftbyte_m128(*s, 8); *conf8 ^= ~0ULL; } static really_inline -void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, - u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, - u64a *conf8, m128 *s) { - u64a current_data_0; - u64a current_data_8; - - current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); - u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; - u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted; - u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; - u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted; - current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); - u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; - u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted; - u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; - u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted; - - m128 st0 = *(const m128 *)(ft + v0*8); - m128 st2 = *(const m128 *)(ft + v2*8); - m128 st4 = *(const m128 *)(ft + v4*8); - m128 st6 = *(const m128 *)(ft + v6*8); - m128 st8 = *(const m128 *)(ft + v8*8); - m128 st10 = *(const m128 *)(ft + v10*8); - m128 st12 = *(const m128 *)(ft + v12*8); - m128 st14 = *(const m128 *)(ft + v14*8); +void get_conf_stride_2(const u8 *itPtr, UNUSED const u8 *start_ptr, + UNUSED const u8 *end_ptr, u32 domain_mask_flipped, + const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { + assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); + u64a reach0 = andn(domain_mask_flipped, itPtr); + u64a reach2 = andn(domain_mask_flipped, itPtr + 2); + u64a reach4 = andn(domain_mask_flipped, itPtr + 4); + u64a reach6 = andn(domain_mask_flipped, itPtr + 6); + + m128 st0 = load_m128_from_u64a(ft + reach0); + m128 st2 = load_m128_from_u64a(ft + reach2); + m128 st4 = load_m128_from_u64a(ft + reach4); + m128 st6 = load_m128_from_u64a(ft + reach6); + + u64a reach8 = andn(domain_mask_flipped, itPtr + 8); + u64a reach10 = andn(domain_mask_flipped, itPtr + 10); + u64a reach12 = andn(domain_mask_flipped, itPtr + 12); + u64a reach14 = andn(domain_mask_flipped, itPtr + 14); + + m128 st8 = load_m128_from_u64a(ft + reach8); + m128 st10 = load_m128_from_u64a(ft + reach10); + m128 st12 = load_m128_from_u64a(ft + reach12); + m128 st14 = load_m128_from_u64a(ft + reach14); st2 = lshiftbyte_m128(st2, 2); st4 = lshiftbyte_m128(st4, 4); st6 = lshiftbyte_m128(st6, 6); - st10 = lshiftbyte_m128(st10, 2); - st12 = lshiftbyte_m128(st12, 4); - st14 = lshiftbyte_m128(st14, 6); *s = or128(*s, st0); *s = or128(*s, st2); *s = or128(*s, st4); *s = or128(*s, st6); + *conf0 = movq(*s); *s = rshiftbyte_m128(*s, 8); *conf0 ^= ~0ULL; + st10 = lshiftbyte_m128(st10, 2); + st12 = lshiftbyte_m128(st12, 4); + st14 = lshiftbyte_m128(st14, 6); + *s = or128(*s, st8); *s = or128(*s, st10); *s = or128(*s, st12); *s = or128(*s, st14); + *conf8 = movq(*s); *s = rshiftbyte_m128(*s, 8); *conf8 ^= ~0ULL; } static really_inline -void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, - u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, - u64a *conf8, m128 *s) { - u64a current_data_0; - u64a current_data_8; - - current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); - u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; - u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; - current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); - u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; - u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; - - m128 st0 = *(const m128 *)(ft + v0*8); - m128 st4 = *(const m128 *)(ft + v4*8); - m128 st8 = *(const m128 *)(ft + v8*8); - m128 st12 = *(const m128 *)(ft + v12*8); +void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr, + UNUSED const u8 *end_ptr, u32 domain_mask_flipped, + const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { + assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); + u64a reach0 = andn(domain_mask_flipped, itPtr); + u64a reach4 = andn(domain_mask_flipped, itPtr + 4); + u64a reach8 = andn(domain_mask_flipped, itPtr + 8); + u64a reach12 = andn(domain_mask_flipped, itPtr + 12); + + m128 st0 = load_m128_from_u64a(ft + reach0); + m128 st4 = load_m128_from_u64a(ft + reach4); + m128 st8 = load_m128_from_u64a(ft + reach8); + m128 st12 = load_m128_from_u64a(ft + reach12); st4 = lshiftbyte_m128(st4, 4); st12 = lshiftbyte_m128(st12, 4); @@ -315,7 +330,6 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, const u32 *confBase, const struct FDR_Runtime_Args *a, const u8 *ptr, u32 *last_match_id, struct zone *z) { const u8 bucket = 8; - const u8 pullback = 1; if (likely(!*conf)) { return; @@ -332,8 +346,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, u32 bit = findAndClearLSB_64(conf); u32 byte = bit / bucket + offset; u32 bitRem = bit % bucket; - u32 confSplit = *(ptr + byte); - u32 idx = confSplit * bucket + bitRem; + u32 idx = bitRem; u32 cf = confBase[idx]; if (!cf) { continue; @@ -343,18 +356,8 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, if (!(fdrc->groups & *control)) { continue; } - if (!fdrc->mult) { - u32 id = fdrc->nBitsOrSoleID; - if ((*last_match_id == id) && (fdrc->flags & NoRepeat)) { - continue; - } - *last_match_id = id; - *control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf, - id, a->ctxt); - continue; - } - u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a)); - confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control, + u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1); + confWithBit(fdrc, a, ptr_main - a->buf + byte, control, last_match_id, confVal); } while (unlikely(!!*conf)); } @@ -496,6 +499,7 @@ void createShortZone(const u8 *buf, const u8 *hend, const u8 *begin, /* copy the post-padding byte; this is required for domain > 8 due to * overhang */ + assert(ZONE_SHORT_DATA_OFFSET + copy_len + 3 < 64); *z_end = 0; z->end = z_end; @@ -566,15 +570,19 @@ void createStartZone(const u8 *buf, const u8 *hend, const u8 *begin, storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end); + + assert(ZONE_START_BEGIN + copy_len + 3 < 64); } /** * \brief Create a zone for the end region. * * This function requires that there is > ITER_BYTES of data in the buffer to - * scan. The end zone, however, is only responsible for a scanning the <= - * ITER_BYTES rump of data. The end zone is required to handle a full ITER_BYTES - * iteration as the main loop cannot handle the last byte of the buffer. + * scan. The end zone is responsible for a scanning the <= ITER_BYTES rump of + * data and optional ITER_BYTES. The main zone cannot handle the last 3 bytes + * of the buffer. The end zone is required to handle an optional full + * ITER_BYTES from main zone when there are less than 3 bytes to scan. The + * main zone size is reduced by ITER_BYTES in this case. * * This zone ensures that the byte at z->end can be read by filling it with a * padding character. @@ -592,31 +600,45 @@ void createEndZone(const u8 *buf, const u8 *begin, const u8 *end, ptrdiff_t z_len = end - begin; assert(z_len > 0); - assert(z_len <= ITER_BYTES); + size_t iter_bytes_second = 0; + size_t z_len_first = z_len; + if (z_len > ITER_BYTES) { + z_len_first = z_len - ITER_BYTES; + iter_bytes_second = ITER_BYTES; + } + z->shift = ITER_BYTES - z_len_first; - z->shift = ITER_BYTES - z_len; + const u8 *end_first = end - iter_bytes_second; + /* The amount of data we have to copy from main buffer for the + * first iteration. */ + size_t copy_len_first = MIN((size_t)(end_first - buf), + ITER_BYTES + sizeof(CONF_TYPE)); + assert(copy_len_first >= 16); - /* The amount of data we have to copy from main buffer. */ - size_t copy_len = MIN((size_t)(end - buf), - ITER_BYTES + sizeof(CONF_TYPE)); - assert(copy_len >= 16); + size_t total_copy_len = copy_len_first + iter_bytes_second; + assert(total_copy_len + 3 < 64); /* copy the post-padding byte; this is required for domain > 8 due to * overhang */ - z->buf[copy_len] = 0; + z->buf[total_copy_len] = 0; /* set the start and end location of the zone buf * to be scanned */ - u8 *z_end = z->buf + copy_len; + u8 *z_end = z->buf + total_copy_len; z->end = z_end; - z->start = z_end - ITER_BYTES; + z->start = z_end - ITER_BYTES - iter_bytes_second; assert(z->start + z->shift == z_end - z_len); + u8 *z_end_first = z_end - iter_bytes_second; /* copy the first 8 bytes of the valid region */ - unaligned_store_u64a(z->buf, unaligned_load_u64a(end - copy_len)); + unaligned_store_u64a(z->buf, + unaligned_load_u64a(end_first - copy_len_first)); /* copy the last 16 bytes, may overlap with the previous 8 byte write */ - storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); + storeu128(z_end_first - sizeof(m128), loadu128(end_first - sizeof(m128))); + if (iter_bytes_second) { + storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); + } z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end); } @@ -651,13 +673,13 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, /* find maximum buffer location that the main zone can scan * - must be a multiple of ITER_BYTES, and - * - cannot contain the last byte (due to overhang) + * - cannot contain the last 3 bytes (due to 3 bytes read behind the + end of buffer in FDR main loop) */ - const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 1, ITER_BYTES); - assert(main_end >= ptr); + const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 3, ITER_BYTES); /* create a zone if multiple of ITER_BYTES are found */ - if (main_end != ptr) { + if (main_end > ptr) { createMainZone(flood, ptr, main_end, &zoneArr[numZone++]); ptr = main_end; } @@ -684,10 +706,10 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, return HWLM_TERMINATED; \ } \ } \ - __builtin_prefetch(itPtr + (ITER_BYTES*4)); \ + __builtin_prefetch(itPtr + ITER_BYTES); \ u64a conf0; \ u64a conf8; \ - get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \ + get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_flipped, \ ft, &conf0, &conf8, &s); \ do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \ &last_match_id, zz); \ @@ -705,10 +727,11 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr, hwlm_group_t control) { u32 floodBackoff = FLOOD_BACKOFF_START; u32 last_match_id = INVALID_MATCH_ID; - u64a domain_mask_adjusted = fdr->domainMask << 1; + u32 domain_mask_flipped = ~fdr->domainMask; u8 stride = fdr->stride; - const u8 *ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)); - const u32 *confBase = (const u32 *)(ft + fdr->tabSize); + const u64a *ft = + (const u64a *)((const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR))); + const u32 *confBase = (const u32 *)((const u8 *)ft + fdr->tabSize); struct zone zones[ZONE_MAX]; assert(fdr->domain > 8 && fdr->domain < 16); @@ -761,7 +784,7 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr, return HWLM_SUCCESS; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) #define ONLY_AVX2(func) func #else #define ONLY_AVX2(func) NULL @@ -773,8 +796,8 @@ typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, static const FDRFUNCTYPE funcs[] = { fdr_engine_exec, - ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast), - ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fast), + NULL, /* old: fast teddy */ + NULL, /* old: fast teddy */ ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat), ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat), ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat), diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 937513a85..c4ea50f27 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,9 @@ * \brief FDR literal matcher: build API. */ -#include "fdr_internal.h" #include "fdr_compile.h" + +#include "fdr_internal.h" #include "fdr_confirm.h" #include "fdr_compile_internal.h" #include "fdr_engine_description.h" @@ -40,9 +41,10 @@ #include "grey.h" #include "ue2common.h" #include "hwlm/hwlm_build.h" -#include "util/alloc.h" #include "util/compare.h" #include "util/dump_mask.h" +#include "util/math.h" +#include "util/noncopyable.h" #include "util/target_info.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -53,13 +55,15 @@ #include #include #include +#include #include #include +#include #include #include #include -#include +#include using namespace std; @@ -67,31 +71,31 @@ namespace ue2 { namespace { -class FDRCompiler : boost::noncopyable { +class FDRCompiler : noncopyable { private: const FDREngineDescription ŋ + const Grey &grey; vector tab; - const vector &lits; + vector lits; map > bucketToLits; bool make_small; u8 *tabIndexToMask(u32 indexInTable); - void assignStringToBucket(LiteralIndex l, BucketIndex b); void assignStringsToBuckets(); #ifdef DEBUG void dumpMasks(const u8 *defaultMask); #endif void setupTab(); - aligned_unique_ptr setupFDR(pair, size_t> &link); + bytecode_ptr setupFDR(); void createInitialState(FDR *fdr); public: - FDRCompiler(const vector &lits_in, - const FDREngineDescription &eng_in, bool make_small_in) - : eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in), - make_small(make_small_in) {} + FDRCompiler(vector lits_in, const FDREngineDescription &eng_in, + bool make_small_in, const Grey &grey_in) + : eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()), + lits(move(lits_in)), make_small(make_small_in) {} - aligned_unique_ptr build(pair, size_t> &link); + bytecode_ptr build(); }; u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) { @@ -140,27 +144,25 @@ void FDRCompiler::createInitialState(FDR *fdr) { } } -aligned_unique_ptr -FDRCompiler::setupFDR(pair, size_t> &link) { +bytecode_ptr FDRCompiler::setupFDR() { size_t tabSize = eng.getTabSizeBytes(); - auto floodControlTmp = setupFDRFloodControl(lits, eng); - auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); + auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); assert(ISALIGNED_16(tabSize)); - assert(ISALIGNED_16(confirmTmp.second)); - assert(ISALIGNED_16(floodControlTmp.second)); - assert(ISALIGNED_16(link.second)); + assert(ISALIGNED_16(confirmTmp.size())); + assert(ISALIGNED_16(floodControlTmp.size())); size_t headerSize = ROUNDUP_16(sizeof(FDR)); - size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second + - floodControlTmp.second + link.second); + size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.size() + + floodControlTmp.size()); DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu " "total=%zu\n", - headerSize, tabSize, confirmTmp.second, floodControlTmp.second, + headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(), size); - aligned_unique_ptr fdr = aligned_zmalloc_unique(size); + auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc fdr->size = size; @@ -169,16 +171,16 @@ FDRCompiler::setupFDR(pair, size_t> &link) { createInitialState(fdr.get()); u8 *fdr_base = (u8 *)fdr.get(); - u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR)); + u8 *ptr = fdr_base + ROUNDUP_16(sizeof(FDR)); copy(tab.begin(), tab.end(), ptr); ptr += tabSize; - memcpy(ptr, confirmTmp.first.get(), confirmTmp.second); - ptr += confirmTmp.second; + memcpy(ptr, confirmTmp.get(), confirmTmp.size()); + ptr += confirmTmp.size(); fdr->floodOffset = verify_u32(ptr - fdr_base); - memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second); - ptr += floodControlTmp.second; + memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); + ptr += floodControlTmp.size(); /* we are allowing domains 9 to 15 only */ assert(eng.bits > 8 && eng.bits < 16); @@ -187,76 +189,124 @@ FDRCompiler::setupFDR(pair, size_t> &link) { fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8); fdr->stride = eng.stride; - if (link.first) { - fdr->link = verify_u32(ptr - fdr_base); - memcpy(ptr, link.first.get(), link.second); - } else { - fdr->link = 0; - } - return fdr; } -void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) { - bucketToLits[b].push_back(l); +//#define DEBUG_ASSIGNMENT + +static +double getScoreUtil(u32 len, u32 count) { + return len == 0 ? numeric_limits::max() + : our_pow(count, 1.05) * our_pow(len, -3.0); } -struct LitOrder { - explicit LitOrder(const vector &vl_) : vl(vl_) {} - bool operator()(const u32 &i1, const u32 &i2) const { - const string &i1s = vl[i1].s; - const string &i2s = vl[i2].s; +/** + * Returns true if the two given literals should be placed in the same chunk as + * they are identical except for a difference in caselessness. + */ +static +bool isEquivLit(const hwlmLiteral &a, const hwlmLiteral &b, + const hwlmLiteral *last_nocase_lit) { + const size_t a_len = a.s.size(); + const size_t b_len = b.s.size(); - size_t len1 = i1s.size(), len2 = i2s.size(); + if (a_len != b_len) { + return false; + } - if (len1 != len2) { - return len1 < len2; - } else { - auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin()); - if (p.first == i1s.rend()) { - return false; + bool nocase = last_nocase_lit && a_len == last_nocase_lit->s.size() && + !cmp(a.s.c_str(), last_nocase_lit->s.c_str(), a_len, true); + return !cmp(a.s.c_str(), b.s.c_str(), a.s.size(), nocase); +} + +struct Chunk { + Chunk(u32 first_id_in, u32 count_in, u32 length_in) + : first_id(first_id_in), count(count_in), length(length_in) {} + u32 first_id; //!< first id in this chunk + u32 count; //!< how many are in this chunk + u32 length; //!< how long things in the chunk are +}; + +static +vector assignChunks(const vector &lits, + const map &lenCounts) { + const u32 CHUNK_MAX = 512; + const u32 MAX_CONSIDERED_LENGTH = 16; + + // TODO: detailed early stage literal analysis for v. small cases (actually + // look at lits) yes - after we factor this out and merge in the Teddy + // style of building we can look at this, although the teddy merge + // modelling is quite different. It's still probably adaptable to some + // extent for this class of problem. + + vector chunks; + chunks.reserve(CHUNK_MAX); + + const u32 maxPerChunk = lits.size() / + (CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1; + + u32 currentSize = 0; + u32 chunkStartID = 0; + const hwlmLiteral *last_nocase_lit = nullptr; + + for (u32 i = 0; i < lits.size() && chunks.size() < CHUNK_MAX - 1; i++) { + const auto &lit = lits[i]; + + DEBUG_PRINTF("i=%u, lit=%s%s\n", i, escapeString(lit.s).c_str(), + lit.nocase ? " (nocase)" : ""); + + // If this literal is identical to the last one (aside from differences + // in caselessness), keep going even if we will "overfill" a chunk; we + // don't want to split identical literals into different buckets. + if (i != 0 && isEquivLit(lit, lits[i - 1], last_nocase_lit)) { + DEBUG_PRINTF("identical lit\n"); + goto next_literal; + } + + if ((currentSize < MAX_CONSIDERED_LENGTH && + (lit.s.size() != currentSize)) || + (currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) { + currentSize = lit.s.size(); + if (!chunks.empty()) { + chunks.back().count = i - chunkStartID; } - return *p.first < *p.second; + chunkStartID = i; + chunks.emplace_back(i, 0, currentSize); + } +next_literal: + if (lit.nocase) { + last_nocase_lit = &lit; } } -private: - const vector &vl; -}; + assert(!chunks.empty()); + chunks.back().count = lits.size() - chunkStartID; + // close off chunks with an empty row + chunks.emplace_back(lits.size(), 0, 0); -static u64a getScoreUtil(u32 len, u32 count) { - if (len == 0) { - return (u64a)-1; +#ifdef DEBUG_ASSIGNMENT + for (size_t j = 0; j < chunks.size(); j++) { + const auto &chunk = chunks[j]; + printf("chunk %zu first_id=%u count=%u length=%u\n", j, chunk.first_id, + chunk.count, chunk.length); } - const u32 LEN_THRESH = 128; - const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len; - const u64a lenScore = - (LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen); - return count * lenScore; // deemphasize count - possibly more than needed - // this might be overkill in the other direction +#endif + + DEBUG_PRINTF("built %zu chunks (%zu lits)\n", chunks.size(), lits.size()); + assert(chunks.size() <= CHUNK_MAX); + return chunks; } -//#define DEBUG_ASSIGNMENT void FDRCompiler::assignStringsToBuckets() { - typedef u64a SCORE; // 'Score' type - const SCORE MAX_SCORE = (SCORE)-1; - const u32 CHUNK_MAX = 512; - const u32 BUCKET_MAX = 16; - typedef pair SCORE_INDEX_PAIR; + const double MAX_SCORE = numeric_limits::max(); - u32 ls = verify_u32(lits.size()); - assert(ls); // Shouldn't be called with no literals. + assert(!lits.empty()); // Shouldn't be called with no literals. - // make a vector that contains our literals as pointers or u32 LiteralIndex values - vector vli; - vli.resize(ls); + // Count the number of literals for each length. map lenCounts; - for (LiteralIndex l = 0; l < ls; l++) { - vli[l] = l; - lenCounts[lits[l].s.size()]++; + for (const auto &lit : lits) { + lenCounts[lit.s.size()]++; } - // sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd) - stable_sort(vli.begin(), vli.end(), LitOrder(lits)); #ifdef DEBUG_ASSIGNMENT for (const auto &m : lenCounts) { @@ -265,103 +315,94 @@ void FDRCompiler::assignStringsToBuckets() { printf("\n"); #endif - // TODO: detailed early stage literal analysis for v. small cases (actually look at lits) - // yes - after we factor this out and merge in the Teddy style of building we can look - // at this, although the teddy merge modelling is quite different. It's still probably - // adaptable to some extent for this class of problem - - u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound) - u32 count[CHUNK_MAX]; // how many are in this chunk - u32 length[CHUNK_MAX]; // how long things in the chunk are - - const u32 MAX_CONSIDERED_LENGTH = 16; - u32 currentChunk = 0; - u32 currentSize = 0; - u32 chunkStartID = 0; - u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1; - - for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) { - LiteralIndex l = vli[i]; - if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) || - (currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) { - currentSize = lits[l].s.size(); - if (currentChunk) { - count[currentChunk - 1 ] = i - chunkStartID; - } - chunkStartID = firstIds[currentChunk] = i; - length[currentChunk] = currentSize; - currentChunk++; - } - } + // Sort literals by literal length. If tied on length, use lexicographic + // ordering (of the reversed literals). + stable_sort(lits.begin(), lits.end(), + [](const hwlmLiteral &a, const hwlmLiteral &b) { + if (a.s.size() != b.s.size()) { + return a.s.size() < b.s.size(); + } + auto p = mismatch(a.s.rbegin(), a.s.rend(), b.s.rbegin()); + if (p.first != a.s.rend()) { + return *p.first < *p.second; + } + // Sort caseless variants first. + return a.nocase > b.nocase; + }); - assert(currentChunk > 0); - count[currentChunk - 1] = ls - chunkStartID; - // close off chunks with an empty row - firstIds[currentChunk] = ls; - length[currentChunk] = 0; - count[currentChunk] = 0; - u32 nChunks = currentChunk + 1; + vector chunks = assignChunks(lits, lenCounts); -#ifdef DEBUG_ASSIGNMENT - for (u32 j = 0; j < nChunks; j++) { - printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]); - } -#endif + const u32 numChunks = chunks.size(); + const u32 numBuckets = eng.getNumBuckets(); - SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index - u32 nb = eng.getNumBuckets(); + // 2D array of (score, chunk index) pairs, indexed by + // [chunk_index][bucket_index]. + boost::multi_array, 2> t( + boost::extents[numChunks][numBuckets]); - for (u32 j = 0; j < nChunks; j++) { + for (u32 j = 0; j < numChunks; j++) { u32 cnt = 0; - for (u32 k = j; k < nChunks; ++k) { - cnt += count[k]; + for (u32 k = j; k < numChunks; ++k) { + cnt += chunks[k].count; } - t[j][0] = {getScoreUtil(length[j], cnt), 0}; + t[j][0] = {getScoreUtil(chunks[j].length, cnt), 0}; } - for (u32 i = 1; i < nb; i++) { - for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row - SCORE_INDEX_PAIR best = {MAX_SCORE, 0}; - u32 cnt = count[j]; - for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) { - SCORE score = getScoreUtil(length[j], cnt); + for (u32 i = 1; i < numBuckets; i++) { + for (u32 j = 0; j < numChunks - 1; j++) { // don't do last, empty row + pair best = {MAX_SCORE, 0}; + u32 cnt = chunks[j].count; + for (u32 k = j + 1; k < numChunks - 1; k++) { + auto score = getScoreUtil(chunks[j].length, cnt); if (score > best.first) { - break; // if we're now worse locally than our best score, give up + break; // now worse locally than our best score, give up } score += t[k][i-1].first; if (score < best.first) { best = {score, k}; } + cnt += chunks[k].count; } t[j][i] = best; } - t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration + t[numChunks - 1][i] = {0,0}; // fill in empty final row for next iter } #ifdef DEBUG_ASSIGNMENT - for (u32 j = 0; j < nChunks; j++) { - for (u32 i = 0; i < nb; i++) { - SCORE_INDEX_PAIR v = t[j][i]; - printf("<%7lld,%3d>", v.first, v.second); + for (u32 j = 0; j < numChunks; j++) { + printf("%03u: ", j); + for (u32 i = 0; i < numBuckets; i++) { + const auto &v = t[j][i]; + printf("<%0.3f,%3d> ", v.first, v.second); } printf("\n"); } #endif - // our best score is in best[0][N_BUCKETS-1] and we can follow the links + // our best score is in t[0][N_BUCKETS-1] and we can follow the links // to find where our buckets should start and what goes into them - for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) { + for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) { u32 j = t[i][n - 1].second; if (j == 0) { - j = nChunks - 1; + j = numChunks - 1; } - // put chunks between i - j into bucket (NBUCKETS-1) - n -#ifdef DEBUG_ASSIGNMENT - printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j], - nb - n); -#endif - for (u32 k = firstIds[i]; k < firstIds[j]; k++) { - assignStringToBucket((LiteralIndex)vli[k], nb - n); + + // put chunks between i - j into bucket (numBuckets - n). + u32 first_id = chunks[i].first_id; + u32 last_id = chunks[j].first_id; + assert(first_id < last_id); + u32 bucket = numBuckets - n; + UNUSED const auto &first_lit = lits[first_id]; + UNUSED const auto &last_lit = lits[last_id - 1]; + DEBUG_PRINTF("placing [%u-%u) in bucket %u (%u lits, len %zu-%zu, " + "score %0.4f)\n", + first_id, last_id, bucket, last_id - first_id, + first_lit.s.length(), last_lit.s.length(), + getScoreUtil(first_lit.s.length(), last_id - first_id)); + + auto &bucket_lits = bucketToLits[bucket]; + for (u32 k = first_id; k < last_id; k++) { + bucket_lits.push_back(k); } i = j; } @@ -487,49 +528,22 @@ void FDRCompiler::setupTab() { #endif } -aligned_unique_ptr -FDRCompiler::build(pair, size_t> &link) { +bytecode_ptr FDRCompiler::build() { assignStringsToBuckets(); setupTab(); - return setupFDR(link); + return setupFDR(); } } // namespace static -size_t maxMaskLen(const vector &lits) { - size_t rv = 0; - for (const auto &lit : lits) { - rv = max(rv, lit.msk.size()); - } - return rv; -} - -static -void setHistoryRequired(hwlmStreamingControl &stream_ctl, - const vector &lits) { - size_t max_mask_len = maxMaskLen(lits); - - // we want enough history to manage the longest literal and the longest - // mask. - stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1; -} - -static -aligned_unique_ptr -fdrBuildTableInternal(const vector &lits, bool make_small, - const target_t &target, const Grey &grey, u32 hint, - hwlmStreamingControl *stream_control) { - pair, size_t> link(nullptr, 0); - - if (stream_control) { - setHistoryRequired(*stream_control, lits); - } - +bytecode_ptr fdrBuildTableInternal(const vector &lits, + bool make_small, const target_t &target, + const Grey &grey, u32 hint) { DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { - auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link); + auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, grey); if (fdr) { DEBUG_PRINTF("build with teddy succeeded\n"); return fdr; @@ -538,10 +552,8 @@ fdrBuildTableInternal(const vector &lits, bool make_small, } } - const unique_ptr des = - (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small) - : getFdrDescription(hint); - + auto des = (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small) + : getFdrDescription(hint); if (!des) { return nullptr; } @@ -552,27 +564,23 @@ fdrBuildTableInternal(const vector &lits, bool make_small, des->stride = 1; } - FDRCompiler fc(lits, *des, make_small); - return fc.build(link); + FDRCompiler fc(lits, *des, make_small, grey); + return fc.build(); } -aligned_unique_ptr fdrBuildTable(const vector &lits, - bool make_small, const target_t &target, - const Grey &grey, - hwlmStreamingControl *stream_control) { - return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID, - stream_control); +bytecode_ptr fdrBuildTable(const vector &lits, + bool make_small, const target_t &target, + const Grey &grey) { + return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID); } #if !defined(RELEASE_BUILD) -aligned_unique_ptr -fdrBuildTableHinted(const vector &lits, bool make_small, u32 hint, - const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control) { - pair link(nullptr, 0); - return fdrBuildTableInternal(lits, make_small, target, grey, hint, - stream_control); +bytecode_ptr fdrBuildTableHinted(const vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { + return fdrBuildTableInternal(lits, make_small, target, grey, hint); } #endif diff --git a/src/fdr/fdr_compile.h b/src/fdr/fdr_compile.h index c12e00714..58047600f 100644 --- a/src/fdr/fdr_compile.h +++ b/src/fdr/fdr_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #define FDR_COMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include @@ -43,21 +43,18 @@ struct FDR; namespace ue2 { struct hwlmLiteral; -struct hwlmStreamingControl; struct Grey; struct target_t; -ue2::aligned_unique_ptr -fdrBuildTable(const std::vector &lits, bool make_small, - const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control = nullptr); +bytecode_ptr fdrBuildTable(const std::vector &lits, + bool make_small, const target_t &target, + const Grey &grey); #if !defined(RELEASE_BUILD) -ue2::aligned_unique_ptr -fdrBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control = nullptr); +bytecode_ptr fdrBuildTableHinted(const std::vector &lits, + bool make_small, u32 hint, + const target_t &target, const Grey &grey); #endif diff --git a/src/fdr/fdr_compile_internal.h b/src/fdr/fdr_compile_internal.h index 48e2ed6f3..756fe8e70 100644 --- a/src/fdr/fdr_compile_internal.h +++ b/src/fdr/fdr_compile_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,7 @@ #include "ue2common.h" #include "hwlm/hwlm_literal.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -55,21 +55,22 @@ typedef u32 PositionInBucket; // zero is 'we are matching right now!", class EngineDescription; class FDREngineDescription; struct hwlmStreamingControl; +struct Grey; -std::pair, size_t> setupFullMultiConfs( - const std::vector &lits, const EngineDescription &eng, - std::map> &bucketToLits, - bool make_small); +bytecode_ptr setupFullConfs(const std::vector &lits, + const EngineDescription &eng, + std::map> &bucketToLits, + bool make_small); // all suffixes include an implicit max_bucket_width suffix to ensure that // we always read a full-scale flood "behind" us in terms of what's in our // state; if we don't have a flood that's long enough we won't be in the // right state yet to allow blindly advancing -std::pair, size_t> -setupFDRFloodControl(const std::vector &lits, - const EngineDescription &eng); +bytecode_ptr setupFDRFloodControl(const std::vector &lits, + const EngineDescription &eng, + const Grey &grey); -std::pair, size_t> +bytecode_ptr fdrBuildTableStreaming(const std::vector &lits, hwlmStreamingControl &stream_control); diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index e77c46d1f..319141c4d 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,10 +45,7 @@ using namespace std; namespace ue2 { -using ConfSplitType = u8; -using BucketSplitPair = pair; -using BC2CONF = map, size_t>>; +using BC2CONF = map>; // return the number of bytes beyond a length threshold in all strings in lits static @@ -150,9 +147,9 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, //#define FDR_CONFIRM_DUMP 1 -static pair, size_t> -getFDRConfirm(const vector &lits, bool applyOneCharOpt, - bool make_small, bool make_confirm) { +static +bytecode_ptr getFDRConfirm(const vector &lits, + bool make_small, bool make_confirm) { vector tmpLitInfo(lits.size()); CONF_TYPE andmsk; fillLitInfo(lits, tmpLitInfo, andmsk); @@ -166,7 +163,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, if (make_small) { nBits = min(10U, lg2(lits.size()) + 1); } else { - nBits = min(13U, lg2(lits.size()) + 4); + nBits = lg2(lits.size() + 4); } CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL; @@ -177,8 +174,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, u32 soleLitCmp = 0; u32 soleLitMsk = 0; - if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 && - lits[0].msk.empty()) || make_confirm == false) { + if (!make_confirm) { flags = FDRC_FLAG_NO_CONFIRM; if (lits[0].noruns) { flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted @@ -288,7 +284,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, sizeof(LitInfo) * lits.size() + totalLitSize; size = ROUNDUP_N(size, alignof(FDRConfirm)); - auto fdrc = aligned_zmalloc_unique(size); + auto fdrc = make_zeroed_bytecode_ptr(size); assert(fdrc); // otherwise would have thrown std::bad_alloc fdrc->andmsk = andmsk; @@ -322,32 +318,15 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, LiteralIndex litIdx = *i; // Write LitInfo header. - u8 *oldPtr = ptr; LitInfo &finalLI = *(LitInfo *)ptr; finalLI = tmpLitInfo[litIdx]; ptr += sizeof(LitInfo); // String starts directly after LitInfo. - - // Write literal prefix (everything before the last N characters, - // as the last N are already confirmed). - const string &t = lits[litIdx].s; - if (t.size() > sizeof(CONF_TYPE)) { - size_t prefix_len = t.size() - sizeof(CONF_TYPE); - memcpy(ptr, t.c_str(), prefix_len); - ptr += prefix_len; - } - - ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); + assert(lits[litIdx].s.size() <= sizeof(CONF_TYPE)); if (next(i) == e) { finalLI.next = 0; } else { - // our next field represents an adjustment on top of - // current address + the actual size of the literal - // so we track any rounding up done for alignment and - // add this in - that way we don't have to use bigger - // than a u8 (for now) - assert((size_t)(ptr - oldPtr) > t.size()); - finalLI.next = verify_u8(ptr - oldPtr - t.size()); + finalLI.next = 1; } } assert((size_t)(ptr - fdrc_base) <= size); @@ -358,19 +337,16 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base), alignof(FDRConfirm)); assert(actual_size <= size); + fdrc.shrink(actual_size); - return {move(fdrc), actual_size}; + return fdrc; } -static -u32 setupMultiConfirms(const vector &lits, - const EngineDescription &eng, BC2CONF &bc2Conf, - map > &bucketToLits, - bool make_small) { - u32 pullBack = eng.getConfirmPullBackDistance(); - u32 splitMask = eng.getConfirmTopLevelSplit() - 1; - bool splitHasCase = splitMask & 0x20; - +bytecode_ptr +setupFullConfs(const vector &lits, + const EngineDescription &eng, + map> &bucketToLits, + bool make_small) { bool makeConfirm = true; unique_ptr teddyDescr = getTeddyDescription(eng.getID()); @@ -378,101 +354,43 @@ u32 setupMultiConfirms(const vector &lits, makeConfirm = teddyDescr->needConfirm(lits); } + BC2CONF bc2Conf; u32 totalConfirmSize = 0; for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { if (!bucketToLits[b].empty()) { - vector> vl(eng.getConfirmTopLevelSplit()); + vector vl; for (const LiteralIndex &lit_idx : bucketToLits[b]) { - hwlmLiteral lit = lits[lit_idx]; // copy - // c is last char of this literal - u8 c = *(lit.s.rbegin()); - - bool suppressSplit = false; - if (pullBack) { - // make a shorter string to work over if we're pulling back - // getFDRConfirm doesn't know about that stuff - assert(lit.s.size() >= pullBack); - lit.s.resize(lit.s.size() - pullBack); - - u8 c_sub, c_sub_msk; - if (lit.msk.empty()) { - c_sub = 0; - c_sub_msk = 0; - } else { - c_sub = *(lit.cmp.rbegin()); - c_sub_msk = *(lit.msk.rbegin()); - size_t len = lit.msk.size() - - min(lit.msk.size(), (size_t)pullBack); - lit.msk.resize(len); - lit.cmp.resize(len); - } - - // if c_sub_msk is 0xff and lit.nocase - // resteer 'c' to an exact value and set suppressSplit - if ((c_sub_msk == 0xff) && (lit.nocase)) { - suppressSplit = true; - c = c_sub; - } - } - - if (!suppressSplit && splitHasCase && lit.nocase && - ourisalpha(c)) { - vl[(u8)(mytoupper(c) & splitMask)].push_back(lit); - vl[(u8)(mytolower(c) & splitMask)].push_back(lit); - } else { - vl[c & splitMask].push_back(lit); - } + vl.push_back(lits[lit_idx]); } - for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) { - if (vl[c].empty()) { - continue; - } - DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size()); - auto key = make_pair(b, c); - auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(), - make_small, makeConfirm); - totalConfirmSize += fc.second; - assert(bc2Conf.find(key) == end(bc2Conf)); - bc2Conf.emplace(key, move(fc)); - } + DEBUG_PRINTF("b %d sz %zu\n", b, vl.size()); + auto fc = getFDRConfirm(vl, make_small, makeConfirm); + totalConfirmSize += fc.size(); + bc2Conf.emplace(b, move(fc)); } } - return totalConfirmSize; -} - -pair, size_t> -setupFullMultiConfs(const vector &lits, - const EngineDescription &eng, - map> &bucketToLits, - bool make_small) { - BC2CONF bc2Conf; - u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits, - make_small); - u32 primarySwitch = eng.getConfirmTopLevelSplit(); u32 nBuckets = eng.getNumBuckets(); - u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32); + u32 totalConfSwitchSize = nBuckets * sizeof(u32); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); - auto buf = aligned_zmalloc_unique(totalSize); + auto buf = make_zeroed_bytecode_ptr(totalSize, 16); assert(buf); // otherwise would have thrown std::bad_alloc u32 *confBase = (u32 *)buf.get(); u8 *ptr = buf.get() + totalConfSwitchSize; for (const auto &m : bc2Conf) { - const BucketIndex &b = m.first.first; - const u8 &c = m.first.second; - const pair, size_t> &p = m.second; + const BucketIndex &idx = m.first; + const bytecode_ptr &p = m.second; // confirm offset is relative to the base of this structure, now u32 confirm_offset = verify_u32(ptr - buf.get()); - memcpy(ptr, p.first.get(), p.second); - ptr += p.second; - u32 idx = c * nBuckets + b; + memcpy(ptr, p.get(), p.size()); + ptr += p.size(); confBase[idx] = confirm_offset; } - return {move(buf), totalSize}; + + return buf; } } // namespace ue2 diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 87ade9fea..a0603c929 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,8 +40,8 @@ // the whole confirmation procedure static really_inline void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a, - size_t i, u32 pullBackAmount, hwlmcb_rv_t *control, - u32 *last_match, u64a conf_key) { + size_t i, hwlmcb_rv_t *control, u32 *last_match, + u64a conf_key) { assert(i < a->len); assert(ISALIGNED(fdrc)); @@ -68,13 +68,10 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a goto out; } - const u8 *loc = buf + i - li->size + 1 - pullBackAmount; + const u8 *loc = buf + i - li->size + 1; - u8 caseless = li->flags & Caseless; if (loc < buf) { u32 full_overhang = buf - loc; - - const u8 *history = a->buf_history; size_t len_history = a->len_history; // can't do a vectored confirm either if we don't have @@ -82,44 +79,15 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a if (full_overhang > len_history) { goto out; } - - // as for the regular case, no need to do a full confirm if - // we're a short literal - if (unlikely(li->size > sizeof(CONF_TYPE))) { - const u8 *s1 = (const u8 *)li + sizeof(*li); - const u8 *s2 = s1 + full_overhang; - const u8 *loc1 = history + len_history - full_overhang; - const u8 *loc2 = buf; - size_t size1 = MIN(full_overhang, li->size - sizeof(CONF_TYPE)); - size_t wind_size2_back = sizeof(CONF_TYPE) + full_overhang; - size_t size2 = wind_size2_back > li->size ? - 0 : li->size - wind_size2_back; - - if (cmpForward(loc1, s1, size1, caseless)) { - goto out; - } - if (cmpForward(loc2, s2, size2, caseless)) { - goto out; - } - } - } else { // NON-VECTORING PATH - - // if string < conf_type we don't need regular string cmp - if (unlikely(li->size > sizeof(CONF_TYPE))) { - const u8 *s = (const u8 *)li + sizeof(*li); - if (cmpForward(loc, s, li->size - sizeof(CONF_TYPE), - caseless)) { - goto out; - } - } } + assert(li->size <= sizeof(CONF_TYPE)); if (unlikely(!(li->groups & *control))) { goto out; } if (unlikely(li->flags & ComplexConfirm)) { - const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount; + const u8 *loc2 = buf + i - li->extended_size + 1; if (loc2 < buf) { u32 full_overhang = buf - loc2; size_t len_history = a->len_history; @@ -133,7 +101,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a *control = a->cb(loc - buf, i, li->id, a->ctxt); out: oldNext = li->next; // oldNext is either 0 or an 'adjust' value - li = (const struct LitInfo *)((const u8 *)li + oldNext + li->size); + li++; } while (oldNext); } @@ -148,7 +116,7 @@ void confWithBit1(const struct FDRConfirm *fdrc, assert(ISALIGNED(fdrc)); if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, 0, control, last_match, conf_key); + confWithBit(fdrc, a, i, control, last_match, conf_key); return; } else { u32 id = fdrc->nBitsOrSoleID; @@ -176,7 +144,7 @@ void confWithBitMany(const struct FDRConfirm *fdrc, } if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, 0, control, last_match, conf_key); + confWithBit(fdrc, a, i, control, last_match, conf_key); return; } else { const u32 id = fdrc->nBitsOrSoleID; diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index 5e923b08f..2f9ba420c 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,8 +44,7 @@ namespace ue2 { FDREngineDescription::FDREngineDescription(const FDREngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), - def.numBuckets, def.confirmPullBackDistance, - def.confirmTopLevelSplit), + def.numBuckets), schemeWidth(def.schemeWidth), stride(0), bits(0) {} u32 FDREngineDescription::getDefaultFloodSuffixLength() const { @@ -55,7 +54,7 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const { } void getFdrDescriptions(vector *out) { - static const FDREngineDef def = {0, 128, 8, 0, 1, 256}; + static const FDREngineDef def = {0, 64, 8, 0}; out->clear(); out->emplace_back(def); } diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h index d4e70d4b1..09c5ce867 100644 --- a/src/fdr/fdr_engine_description.h +++ b/src/fdr/fdr_engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,8 +43,6 @@ struct FDREngineDef { u32 schemeWidth; u32 numBuckets; u64a cpu_features; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; }; class FDREngineDescription : public EngineDescription { @@ -64,7 +62,6 @@ class FDREngineDescription : public EngineDescription { explicit FDREngineDescription(const FDREngineDef &def); u32 getDefaultFloodSuffixLength() const override; - bool typicallyHoldsOneCharLits() const override { return stride == 1; } }; std::unique_ptr diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 3bf828377..a425d78c8 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,11 +71,6 @@ struct FDR { u32 maxStringLen; u32 floodOffset; - /** link is the relative offset of a secondary included FDR table for - * stream handling if we're a primary FDR table or the subsidiary tertiary - * structures (spillover strings and hash table) if we're a secondary - * structure. */ - u32 link; u8 stride; /* stride - how frequeuntly the data is consulted by the first * stage matcher */ u8 domain; /* number of bits used to index into main FDR table. This value diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index 62693c300..7dcc17d18 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "fdr_confirm.h" #include "fdr_compile_internal.h" #include "fdr_engine_description.h" +#include "grey.h" #include "ue2common.h" #include "util/alloc.h" #include "util/bitutils.h" @@ -90,9 +91,9 @@ void addFlood(vector &tmpFlood, u8 c, const hwlmLiteral &lit, } } -pair, size_t> -setupFDRFloodControl(const vector &lits, - const EngineDescription &eng) { +bytecode_ptr setupFDRFloodControl(const vector &lits, + const EngineDescription &eng, + const Grey &grey) { vector tmpFlood(N_CHARS); u32 default_suffix = eng.getDefaultFloodSuffixLength(); @@ -187,6 +188,14 @@ setupFDRFloodControl(const vector &lits, } #endif + // If flood detection has been switched off in the grey box, we comply by + // setting idCount too high for all floods. + if (!grey.fdrAllowFlood) { + for (auto &fl : tmpFlood) { + fl.idCount = FDR_FLOOD_MAX_IDS; + } + } + map flood2chars; for (u32 i = 0; i < N_CHARS; i++) { FDRFlood fl = tmpFlood[i]; @@ -198,7 +207,7 @@ setupFDRFloodControl(const vector &lits, size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods; size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize); - auto buf = aligned_zmalloc_unique(totalSize); + auto buf = make_zeroed_bytecode_ptr(totalSize, 16); assert(buf); // otherwise would have thrown std::bad_alloc u32 *floodHeader = (u32 *)buf.get(); @@ -218,7 +227,7 @@ setupFDRFloodControl(const vector &lits, DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n", floodHeaderSize, floodStructSize, totalSize); - return {move(buf), totalSize}; + return buf; } } // namespace ue2 diff --git a/src/fdr/flood_runtime.h b/src/fdr/flood_runtime.h index 97723be54..d3f6b3b29 100644 --- a/src/fdr/flood_runtime.h +++ b/src/fdr/flood_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,7 +100,7 @@ const u8 * floodDetect(const struct FDR * fdr, // tryFloodDetect is never put in places where unconditional // reads a short distance forward or backward here // TODO: rationale for this line needs to be rediscovered!! - size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0; + size_t mainLoopLen = len > 2 * iterBytes ? len - 2 * iterBytes : 0; const u32 i = ptr - buf; u32 j = i; diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index e7a0fccde..a3f7cfaf4 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -129,7 +129,8 @@ m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); m128 hi = and128(rshift64_m128(val, 4), mask); - return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi)); + return and128(pshufb_m128(maskBase[0 * 2], lo), + pshufb_m128(maskBase[0 * 2 + 1], hi)); } static really_inline @@ -139,8 +140,8 @@ m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m1(maskBase, val); - m128 res_1 = and128(pshufb(maskBase[1*2], lo), - pshufb(maskBase[1*2+1], hi)); + m128 res_1 = and128(pshufb_m128(maskBase[1*2], lo), + pshufb_m128(maskBase[1*2+1], hi)); m128 res_shifted_1 = palignr(res_1, *old_1, 16-1); *old_1 = res_1; return and128(r, res_shifted_1); @@ -154,8 +155,8 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m2(maskBase, old_1, val); - m128 res_2 = and128(pshufb(maskBase[2*2], lo), - pshufb(maskBase[2*2+1], hi)); + m128 res_2 = and128(pshufb_m128(maskBase[2*2], lo), + pshufb_m128(maskBase[2*2+1], hi)); m128 res_shifted_2 = palignr(res_2, *old_2, 16-2); *old_2 = res_2; return and128(r, res_shifted_2); @@ -169,8 +170,8 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); - m128 res_3 = and128(pshufb(maskBase[3*2], lo), - pshufb(maskBase[3*2+1], hi)); + m128 res_3 = and128(pshufb_m128(maskBase[3*2], lo), + pshufb_m128(maskBase[3*2+1], hi)); m128 res_shifted_3 = palignr(res_3, *old_3, 16-3); *old_3 = res_3; return and128(r, res_shifted_3); diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h index e2936723a..35756c530 100644 --- a/src/fdr/teddy.h +++ b/src/fdr/teddy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #define TEDDY_H_ #include "hwlm/hwlm.h" // for hwlm_group_t +#include "util/arch.h" struct FDR; // forward declaration from fdr_internal.h struct FDR_Runtime_Args; @@ -70,7 +71,7 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control); -#if defined(__AVX2__) +#if defined(HAVE_AVX2) hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, @@ -104,15 +105,6 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); - -hwlm_error_t -fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); - -#endif /* __AVX2__ */ +#endif /* HAVE_AVX2 */ #endif /* TEDDY_H_ */ diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index e4a836d47..299825cc4 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,78 +35,10 @@ #include "teddy.h" #include "teddy_internal.h" #include "teddy_runtime_common.h" +#include "util/arch.h" #include "util/simd_utils.h" -#if defined(__AVX2__) - -static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} -}; +#if defined(HAVE_AVX2) #ifdef ARCH_64_BIT #define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ @@ -199,22 +131,6 @@ do { \ } while (0); #endif -#define CONFIRM_FAST_TEDDY(var, offset, reason, conf_fn) \ -do { \ - if (unlikely(isnonzero256(var))) { \ - u32 arrCnt = 0; \ - m128 lo = cast256to128(var); \ - m128 hi = movdq_hi(var); \ - bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \ - bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \ - for (u32 i = 0; i < arrCnt; i++) { \ - conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \ - &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - } \ -} while (0); - static really_inline m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, const u8 *buf_history, size_t len_history, @@ -226,193 +142,13 @@ m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, return ret; } -/* - * \brief Copy a block of [0,31] bytes efficiently. - * - * This function is a workaround intended to stop some compilers from - * synthesizing a memcpy function call out of the copy of a small number of - * bytes that we do in vectoredLoad128. - */ -static really_inline -void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) { - switch (len) { - case 0: - break; - case 1: - *dst = *src; - break; - case 2: - unaligned_store_u16(dst, unaligned_load_u16(src)); - break; - case 3: - unaligned_store_u16(dst, unaligned_load_u16(src)); - dst[2] = src[2]; - break; - case 4: - unaligned_store_u32(dst, unaligned_load_u32(src)); - break; - case 5: - case 6: - case 7: - /* Perform copy with two overlapping 4-byte chunks. */ - unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); - unaligned_store_u32(dst, unaligned_load_u32(src)); - break; - case 8: - unaligned_store_u64a(dst, unaligned_load_u64a(src)); - break; - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - /* Perform copy with two overlapping 8-byte chunks. */ - unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); - unaligned_store_u64a(dst, unaligned_load_u64a(src)); - break; - case 16: - storeu128(dst, loadu128(src)); - break; - default: - /* Perform copy with two overlapping 16-byte chunks. */ - assert(len < 32); - storeu128(dst + len - 16, loadu128(src + len - 16)); - storeu128(dst, loadu128(src)); - break; - } -} - -static really_inline -m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, - const u8 *buf_history, size_t len_history) { - union { - u8 val8[32]; - m256 val256; - } u; - - uintptr_t copy_start; - uintptr_t copy_len; - - if (ptr >= lo) { - uintptr_t avail = (uintptr_t)(hi - ptr); - if (avail >= 32) { - *p_mask = load256(p_mask_arr256[32] + 32); - return loadu256(ptr); - } - *p_mask = load256(p_mask_arr256[avail] + 32); - copy_start = 0; - copy_len = avail; - } else { - // need contains "how many chars to pull from history" - // calculate based on what we need, what we have in the buffer - // and only what we need to make primary confirm work - uintptr_t start = (uintptr_t)(lo - ptr); - uintptr_t i; - for (i = start; ptr + i < lo; i++) { - u.val8[i] = buf_history[len_history - (lo - (ptr + i))]; - } - uintptr_t end = MIN(32, (uintptr_t)(hi - ptr)); - *p_mask = loadu256(p_mask_arr256[end - start] + 32 - start); - copy_start = i; - copy_len = end - i; - } - - // Runt block from the buffer. - copyRuntBlock256(&u.val8[copy_start], &ptr[copy_start], copy_len); - - return u.val256; -} - -static really_inline -void do_confWithBit1_fast_teddy(u16 bits, const u32 *confBase, - CautionReason reason, - const struct FDR_Runtime_Args *a, - const u8 *ptr, hwlmcb_rv_t *control, - u32 *last_match) { - u32 byte = bits / 8; - u32 cf = confBase[bits % 8]; - const struct FDRConfirm *fdrc = (const struct FDRConfirm *) - ((const u8 *)confBase + cf); - u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); -} - -static really_inline -void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase, - CautionReason reason, - const struct FDR_Runtime_Args *a, const u8 *ptr, - hwlmcb_rv_t *control, u32 *last_match) { - u32 byte = bits / 8; - u32 bitRem = bits % 8; - u32 confSplit = *(ptr+byte) & 0x1f; - u32 idx = confSplit * 8 + bitRem; - u32 cf = confBase[idx]; - if (!cf) { - return; - } - const struct FDRConfirm *fdrc = (const struct FDRConfirm *) - ((const u8 *)confBase + cf); - if (!(fdrc->groups & *control)) { - return; - } - u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, last_match, confVal); -} - -static really_inline -void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) { - if (unlikely(isnonzero128(var))) { -#ifdef ARCH_64_BIT - u64a part_0 = movq(var); - while (unlikely(part_0)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) + - 64 * (offset); - *arrCnt += 1; - } - u64a part_1 = movq(rshiftbyte_m128(var, 8)); - while (unlikely(part_1)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + - 64 * (offset + 1); - *arrCnt += 1; - } -#else - u32 part_0 = movd(var); - while (unlikely(part_0)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) + - 32 * (offset * 2); - *arrCnt += 1; - } - u32 part_1 = movd(rshiftbyte_m128(var, 4)); - while (unlikely(part_1)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + - 32 * (offset * 2 + 1); - *arrCnt += 1; - } - u32 part_2 = movd(rshiftbyte_m128(var, 8)); - while (unlikely(part_2)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) + - 32 * (offset * 2 + 2); - *arrCnt += 1; - } - u32 part_3 = movd(rshiftbyte_m128(var, 12)); - while (unlikely(part_3)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) + - 32 * (offset * 2 + 3); - *arrCnt += 1; - } -#endif - } -} - static really_inline m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) { m256 mask = set32x8(0xf); m256 lo = and256(val, mask); m256 hi = and256(rshift64_m256(val, 4), mask); - return and256(vpshufb(maskBase[0*2], lo), - vpshufb(maskBase[0*2+1], hi)); + return and256(pshufb_m256(maskBase[0*2], lo), + pshufb_m256(maskBase[0*2+1], hi)); } static really_inline @@ -422,8 +158,8 @@ m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) { m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m1(maskBase, val); - m256 res_1 = and256(vpshufb(maskBase[1*2], lo), - vpshufb(maskBase[1*2+1], hi)); + m256 res_1 = and256(pshufb_m256(maskBase[1*2], lo), + pshufb_m256(maskBase[1*2+1], hi)); m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1); *old_1 = res_1; return and256(r, res_shifted_1); @@ -437,8 +173,8 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val); - m256 res_2 = and256(vpshufb(maskBase[2*2], lo), - vpshufb(maskBase[2*2+1], hi)); + m256 res_2 = and256(pshufb_m256(maskBase[2*2], lo), + pshufb_m256(maskBase[2*2+1], hi)); m256 res_shifted_2 = vpalignr(res_2, *old_2, 16-2); *old_2 = res_2; return and256(r, res_shifted_2); @@ -452,20 +188,13 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val); - m256 res_3 = and256(vpshufb(maskBase[3*2], lo), - vpshufb(maskBase[3*2+1], hi)); + m256 res_3 = and256(pshufb_m256(maskBase[3*2], lo), + pshufb_m256(maskBase[3*2+1], hi)); m256 res_shifted_3 = vpalignr(res_3, *old_3, 16-3); *old_3 = res_3; return and256(r, res_shifted_3); } -static really_inline -m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) { - m256 lo = and256(val, mask); - m256 hi = and256(rshift64_m256(val, 4), mask); - return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi)); -} - static really_inline const m256 * getMaskBase_avx2(const struct Teddy *teddy) { return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy)); @@ -959,136 +688,4 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, return HWLM_SUCCESS; } -hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 64; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 1); - - const m256 maskLo = set2x128(maskBase[0]); - const m256 maskHi = set2x128(maskBase[1]); - const m256 mask = set32x8(0xf); - u16 bitArr[512]; - - const u8 *mainStart = ROUNDUP_PTR(ptr, 32); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 32; - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); - ptr += 32; - } - - if (ptr + 32 < buf_end) { - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); - ptr += 32; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); - - m256 val_1 = load256(ptr + 32); - m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); - } - - for (; ptr < buf_end; ptr += 32) { - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); - } - - return HWLM_SUCCESS; -} - -hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 64; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 1); - - const m256 maskLo = set2x128(maskBase[0]); - const m256 maskHi = set2x128(maskBase[1]); - const m256 mask = set32x8(0xf); - u16 bitArr[512]; - - const u8 *mainStart = ROUNDUP_PTR(ptr, 32); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 32; - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); - ptr += 32; - } - - if (ptr + 32 < buf_end) { - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); - ptr += 32; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy); - - m256 val_1 = load256(ptr + 32); - m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy); - } - - for (; ptr < buf_end; ptr += 32) { - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); - } - - return HWLM_SUCCESS; -} - -#endif // __AVX2__ +#endif // HAVE_AVX2 diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 15b9665bb..6f956e8cb 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,22 +26,29 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief FDR literal matcher: Teddy build code. + */ + +#include "teddy_compile.h" + #include "fdr.h" #include "fdr_internal.h" #include "fdr_compile_internal.h" #include "fdr_confirm.h" #include "fdr_engine_description.h" +#include "teddy_internal.h" +#include "teddy_engine_description.h" +#include "grey.h" #include "ue2common.h" #include "util/alloc.h" #include "util/compare.h" +#include "util/noncopyable.h" #include "util/popcount.h" #include "util/target_info.h" #include "util/verify_types.h" -#include "teddy_compile.h" -#include "teddy_internal.h" -#include "teddy_engine_description.h" - #include #include #include @@ -54,8 +61,6 @@ #include #include -#include - using namespace std; namespace ue2 { @@ -64,17 +69,20 @@ namespace { //#define TEDDY_DEBUG -class TeddyCompiler : boost::noncopyable { +class TeddyCompiler : noncopyable { const TeddyEngineDescription ŋ + const Grey &grey; const vector &lits; bool make_small; public: TeddyCompiler(const vector &lits_in, - const TeddyEngineDescription &eng_in, bool make_small_in) - : eng(eng_in), lits(lits_in), make_small(make_small_in) {} + const TeddyEngineDescription &eng_in, bool make_small_in, + const Grey &grey_in) + : eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) { + } - aligned_unique_ptr build(pair, size_t> &link); + bytecode_ptr build(); bool pack(map > &bucketToLits); }; @@ -274,8 +282,7 @@ bool TeddyCompiler::pack(map -TeddyCompiler::build(pair, size_t> &link) { +bytecode_ptr TeddyCompiler::build() { if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { DEBUG_PRINTF("too many literals: %zu\n", lits.size()); return nullptr; @@ -308,16 +315,16 @@ TeddyCompiler::build(pair, size_t> &link) { size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; - auto floodControlTmp = setupFDRFloodControl(lits, eng); - auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); + auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); size_t size = ROUNDUP_N(sizeof(Teddy) + - maskLen + - confirmTmp.second + - floodControlTmp.second + - link.second, 16 * maskWidth); + maskLen + + confirmTmp.size() + + floodControlTmp.size(), + 16 * maskWidth); - aligned_unique_ptr fdr = aligned_zmalloc_unique(size); + auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc Teddy *teddy = (Teddy *)fdr.get(); // ugly u8 *teddy_base = (u8 *)teddy; @@ -327,19 +334,12 @@ TeddyCompiler::build(pair, size_t> &link) { teddy->maxStringLen = verify_u32(maxLen(lits)); u8 *ptr = teddy_base + sizeof(Teddy) + maskLen; - memcpy(ptr, confirmTmp.first.get(), confirmTmp.second); - ptr += confirmTmp.second; + memcpy(ptr, confirmTmp.get(), confirmTmp.size()); + ptr += confirmTmp.size(); teddy->floodOffset = verify_u32(ptr - teddy_base); - memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second); - ptr += floodControlTmp.second; - - if (link.first) { - teddy->link = verify_u32(ptr - teddy_base); - memcpy(ptr, link.first.get(), link.second); - } else { - teddy->link = 0; - } + memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); + ptr += floodControlTmp.size(); u8 *baseMsk = teddy_base + sizeof(Teddy); @@ -423,10 +423,10 @@ TeddyCompiler::build(pair, size_t> &link) { } // namespace -aligned_unique_ptr -teddyBuildTableHinted(const vector &lits, bool make_small, - u32 hint, const target_t &target, - pair, size_t> &link) { +bytecode_ptr teddyBuildTableHinted(const vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { unique_ptr des; if (hint == HINT_INVALID) { des = chooseTeddyEngine(target, lits); @@ -436,8 +436,8 @@ teddyBuildTableHinted(const vector &lits, bool make_small, if (!des) { return nullptr; } - TeddyCompiler tc(lits, *des, make_small); - return tc.build(link); + TeddyCompiler tc(lits, *des, make_small, grey); + return tc.build(); } } // namespace ue2 diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index 276c1347b..5ff4d8394 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief FDR literal matcher: Teddy build API. */ @@ -34,22 +35,22 @@ #define TEDDY_COMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include -#include // std::pair struct FDR; -struct target_t; namespace ue2 { +struct Grey; struct hwlmLiteral; +struct target_t; -ue2::aligned_unique_ptr -teddyBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target, - std::pair, size_t> &link); +bytecode_ptr teddyBuildTableHinted(const std::vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey); } // namespace ue2 diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index d95f4937a..f7559b13f 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,8 +44,7 @@ namespace ue2 { TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), - def.numBuckets, def.confirmPullBackDistance, - def.confirmTopLevelSplit), + def.numBuckets), numMasks(def.numMasks), packed(def.packed) {} u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const { @@ -66,24 +65,22 @@ bool TeddyEngineDescription::needConfirm(const vector &lits) const void getTeddyDescriptions(vector *out) { static const TeddyEngineDef defns[] = { - { 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false, 0, 1 }, - { 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true, 0, 32 }, - { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false, 0, 1 }, - { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true, 0, 32 }, - { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false, 0, 1 }, - { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true, 0, 32 }, - { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false, 0, 1 }, - { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true, 0, 32 }, - { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false, 0, 1 }, - { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true, 0, 32 }, - { 11, 0, 1, 8, false, 0, 1 }, - { 12, 0, 1, 8, true, 0, 32 }, - { 13, 0, 2, 8, false, 0, 1 }, - { 14, 0, 2, 8, true, 0, 32 }, - { 15, 0, 3, 8, false, 0, 1 }, - { 16, 0, 3, 8, true, 0, 32 }, - { 17, 0, 4, 8, false, 0, 1 }, - { 18, 0, 4, 8, true, 0, 32 }, + { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false }, + { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true }, + { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false }, + { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true }, + { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false }, + { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true }, + { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false }, + { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true }, + { 11, 0, 1, 8, false }, + { 12, 0, 1, 8, true }, + { 13, 0, 2, 8, false }, + { 14, 0, 2, 8, true }, + { 15, 0, 3, 8, false }, + { 16, 0, 3, 8, true }, + { 17, 0, 4, 8, false }, + { 18, 0, 4, 8, true }, }; out->clear(); for (const auto &def : defns) { diff --git a/src/fdr/teddy_engine_description.h b/src/fdr/teddy_engine_description.h index 88d201394..3979a5d32 100644 --- a/src/fdr/teddy_engine_description.h +++ b/src/fdr/teddy_engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,8 +45,6 @@ struct TeddyEngineDef { u32 numMasks; u32 numBuckets; bool packed; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; }; class TeddyEngineDescription : public EngineDescription { diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index dc65c70a6..c5f0885f6 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -180,9 +180,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, do { u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); u32 byte = bit / bucket + offset; - u32 bitRem = bit % bucket; - u32 confSplit = *(ptr+byte) & 0x1f; - u32 idx = confSplit * bucket + bitRem; + u32 idx = bit % bucket; u32 cf = confBase[idx]; if (!cf) { continue; @@ -193,7 +191,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, continue; } u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, + confWithBit(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); } while (unlikely(*conf)); } diff --git a/src/grey.cpp b/src/grey.cpp index 340a34bf6..24140c05b 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,7 @@ namespace ue2 { Grey::Grey(void) : optimiseComponentTree(true), + calcComponents(true), performGraphSimplification(true), prefilterReductions(true), removeEdgeRedundancy(true), @@ -54,7 +55,6 @@ Grey::Grey(void) : allowMcSheng(true), allowPuff(true), allowLiteral(true), - allowRose(true), allowViolet(true), allowExtendedNFA(true), /* bounded repeats of course */ allowLimExNFA(true), @@ -62,8 +62,10 @@ Grey::Grey(void) : allowSmallLiteralSet(true), allowCastle(true), allowDecoratedLiteral(true), + allowApproximateMatching(true), allowNoodle(true), fdrAllowTeddy(true), + fdrAllowFlood(true), violetAvoidSuffixes(true), violetAvoidWeakInfixes(true), violetDoubleCut(true), @@ -98,6 +100,7 @@ Grey::Grey(void) : minRoseLiteralLength(3), minRoseNetflowLiteralLength(2), maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */ + maxEditDistance(16), minExtBoundedRepeatSize(32), goughCopyPropagate(true), goughRegisterAllocate(true), @@ -105,8 +108,6 @@ Grey::Grey(void) : roseGraphReduction(true), roseRoleAliasing(true), roseMasks(true), - roseMaxBadLeafLength(5), - roseConvertInfBadLeaves(true), roseConvertFloodProneSuffixes(true), roseMergeRosesDuringAliasing(true), roseMultiTopRoses(true), @@ -116,7 +117,6 @@ Grey::Grey(void) : roseMcClellanSuffix(1), roseMcClellanOutfix(2), roseTransformDelay(true), - roseDesiredSplit(4), earlyMcClellanPrefix(true), earlyMcClellanInfix(true), earlyMcClellanSuffix(true), @@ -157,7 +157,8 @@ Grey::Grey(void) : limitEngineSize(1073741824), // 1 GB limitDFASize(1073741824), // 1 GB limitNFASize(1048576), // 1 MB - limitLBRSize(1048576) // 1 MB + limitLBRSize(1048576), // 1 MB + limitApproxMatchingVertices(5000) { assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */ } @@ -209,6 +210,7 @@ void applyGreyOverrides(Grey *g, const string &s) { } while (0) G_UPDATE(optimiseComponentTree); + G_UPDATE(calcComponents); G_UPDATE(performGraphSimplification); G_UPDATE(prefilterReductions); G_UPDATE(removeEdgeRedundancy); @@ -221,7 +223,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowMcSheng); G_UPDATE(allowPuff); G_UPDATE(allowLiteral); - G_UPDATE(allowRose); G_UPDATE(allowViolet); G_UPDATE(allowExtendedNFA); G_UPDATE(allowLimExNFA); @@ -230,7 +231,9 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowCastle); G_UPDATE(allowDecoratedLiteral); G_UPDATE(allowNoodle); + G_UPDATE(allowApproximateMatching); G_UPDATE(fdrAllowTeddy); + G_UPDATE(fdrAllowFlood); G_UPDATE(violetAvoidSuffixes); G_UPDATE(violetAvoidWeakInfixes); G_UPDATE(violetDoubleCut); @@ -265,6 +268,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(minRoseLiteralLength); G_UPDATE(minRoseNetflowLiteralLength); G_UPDATE(maxRoseNetflowEdges); + G_UPDATE(maxEditDistance); G_UPDATE(minExtBoundedRepeatSize); G_UPDATE(goughCopyPropagate); G_UPDATE(goughRegisterAllocate); @@ -272,8 +276,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(roseGraphReduction); G_UPDATE(roseRoleAliasing); G_UPDATE(roseMasks); - G_UPDATE(roseMaxBadLeafLength); - G_UPDATE(roseConvertInfBadLeaves); G_UPDATE(roseConvertFloodProneSuffixes); G_UPDATE(roseMergeRosesDuringAliasing); G_UPDATE(roseMultiTopRoses); @@ -283,7 +285,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(roseMcClellanSuffix); G_UPDATE(roseMcClellanOutfix); G_UPDATE(roseTransformDelay); - G_UPDATE(roseDesiredSplit); G_UPDATE(earlyMcClellanPrefix); G_UPDATE(earlyMcClellanInfix); G_UPDATE(earlyMcClellanSuffix); @@ -319,6 +320,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(limitDFASize); G_UPDATE(limitNFASize); G_UPDATE(limitLBRSize); + G_UPDATE(limitApproxMatchingVertices); #undef G_UPDATE if (key == "simple_som") { @@ -340,7 +342,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = false; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; @@ -358,7 +359,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = true; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; @@ -376,7 +376,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = true; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; diff --git a/src/grey.h b/src/grey.h index 4882af7d0..505194181 100644 --- a/src/grey.h +++ b/src/grey.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,7 @@ struct Grey { bool optimiseComponentTree; + bool calcComponents; bool performGraphSimplification; bool prefilterReductions; bool removeEdgeRedundancy; @@ -54,7 +55,6 @@ struct Grey { bool allowMcSheng; bool allowPuff; bool allowLiteral; - bool allowRose; bool allowViolet; bool allowExtendedNFA; bool allowLimExNFA; @@ -62,9 +62,11 @@ struct Grey { bool allowSmallLiteralSet; bool allowCastle; bool allowDecoratedLiteral; + bool allowApproximateMatching; bool allowNoodle; bool fdrAllowTeddy; + bool fdrAllowFlood; u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */ bool violetAvoidWeakInfixes; @@ -107,6 +109,7 @@ struct Grey { u32 minRoseLiteralLength; u32 minRoseNetflowLiteralLength; u32 maxRoseNetflowEdges; + u32 maxEditDistance; u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */ @@ -118,8 +121,6 @@ struct Grey { bool roseGraphReduction; bool roseRoleAliasing; bool roseMasks; - u32 roseMaxBadLeafLength; - bool roseConvertInfBadLeaves; bool roseConvertFloodProneSuffixes; bool roseMergeRosesDuringAliasing; bool roseMultiTopRoses; @@ -130,7 +131,6 @@ struct Grey { * always */ u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */ bool roseTransformDelay; - u32 roseDesiredSplit; bool earlyMcClellanPrefix; bool earlyMcClellanInfix; @@ -202,6 +202,9 @@ struct Grey { u32 limitDFASize; //!< max size of a DFA (in bytes) u32 limitNFASize; //!< max size of an NFA (in bytes) u32 limitLBRSize; //!< max size of an LBR engine (in bytes) + + // Approximate matching limits. + u32 limitApproxMatchingVertices; //!< max number of vertices per graph }; #ifndef RELEASE_BUILD diff --git a/src/hs.cpp b/src/hs.cpp index f64e867a2..e3c1f811c 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,10 +39,10 @@ #include "compiler/error.h" #include "nfagraph/ng.h" #include "nfagraph/ng_expr_info.h" -#include "nfagraph/ng_extparam.h" -#include "parser/parse_error.h" #include "parser/Parser.h" +#include "parser/parse_error.h" #include "parser/prefilter.h" +#include "parser/unsupported.h" #include "util/compile_error.h" #include "util/cpuid_flags.h" #include "util/depth.h" @@ -119,8 +119,9 @@ bool checkMode(unsigned int mode, hs_compile_error **comp_error) { static bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) { -#define HS_TUNE_LAST HS_TUNE_FAMILY_BDW -#define HS_CPU_FEATURES_ALL (HS_CPU_FEATURES_AVX2) + static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_GLM; + static constexpr u32 HS_CPU_FEATURES_ALL = + HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512; if (!p) { return true; @@ -277,9 +278,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, } // namespace ue2 extern "C" HS_PUBLIC_API -hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode, - const hs_platform_info_t *platform, hs_database_t **db, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_compile(const char *expression, unsigned flags, + unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, hs_compile_error_t **error) { if (expression == nullptr) { *db = nullptr; *error = generateCompileError("Invalid parameter: expression is NULL", @@ -295,24 +297,25 @@ hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode, } extern "C" HS_PUBLIC_API -hs_error_t hs_compile_multi(const char * const *expressions, - const unsigned *flags, const unsigned *ids, - unsigned elements, unsigned mode, - const hs_platform_info_t *platform, - hs_database_t **db, hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions, + const unsigned *flags, const unsigned *ids, + unsigned elements, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error) { const hs_expr_ext * const *ext = nullptr; // unused for this call. return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode, platform, db, error, Grey()); } extern "C" HS_PUBLIC_API -hs_error_t hs_compile_ext_multi(const char * const *expressions, - const unsigned *flags, const unsigned *ids, - const hs_expr_ext * const *ext, - unsigned elements, unsigned mode, - const hs_platform_info_t *platform, - hs_database_t **db, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions, + const unsigned *flags, const unsigned *ids, + const hs_expr_ext * const *ext, + unsigned elements, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error) { return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode, platform, db, error, Grey()); } @@ -368,19 +371,28 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, assert(pe.component); // Apply prefiltering transformations if desired. - if (pe.prefilter) { + if (pe.expr.prefilter) { prefilterTree(pe.component, ParseMode(flags)); } - unique_ptr g = buildWrapper(rm, cc, pe); + // Expressions containing zero-width assertions and other extended pcre + // types aren't supported yet. This call will throw a ParseError + // exception if the component tree contains such a construct. + checkUnsupported(*pe.component); + + pe.component->checkEmbeddedStartAnchor(true); + pe.component->checkEmbeddedEndAnchor(true); + + auto built_expr = buildGraph(rm, cc, pe); + unique_ptr &g = built_expr.g; + ExpressionInfo &expr = built_expr.expr; if (!g) { DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n"); throw ParseError("Internal error."); } - handleExtendedParams(rm, *g, cc); - fillExpressionInfo(rm, *g, &local_info); + fillExpressionInfo(rm, cc, *g, expr, &local_info); } catch (const CompileError &e) { // Compiler error occurred @@ -409,24 +421,26 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } extern "C" HS_PUBLIC_API -hs_error_t hs_expression_info(const char *expression, unsigned int flags, - hs_expr_info_t **info, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_expression_info(const char *expression, + unsigned int flags, + hs_expr_info_t **info, + hs_compile_error_t **error) { return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK, info, error); } extern "C" HS_PUBLIC_API -hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, - const hs_expr_ext_t *ext, - hs_expr_info_t **info, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_expression_ext_info(const char *expression, + unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error) { return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info, error); } extern "C" HS_PUBLIC_API -hs_error_t hs_populate_platform(hs_platform_info_t *platform) { +hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform) { if (!platform) { return HS_INVALID; } @@ -440,7 +454,7 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) { } extern "C" HS_PUBLIC_API -hs_error_t hs_free_compile_error(hs_compile_error_t *error) { +hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error) { #if defined(FAT_RUNTIME) if (!check_ssse3()) { return HS_ARCH_ERROR; diff --git a/src/hs_common.h b/src/hs_common.h index b25b18423..ffea397e4 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,11 @@ #ifndef HS_COMMON_H_ #define HS_COMMON_H_ +#if defined(_WIN32) +#define HS_CDECL __cdecl +#else +#define HS_CDECL +#endif #include /** @@ -76,7 +81,7 @@ typedef int hs_error_t; * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_free_database(hs_database_t *db); +hs_error_t HS_CDECL hs_free_database(hs_database_t *db); /** * Serialize a pattern database to a stream of bytes. @@ -100,8 +105,8 @@ hs_error_t hs_free_database(hs_database_t *db); * @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be * allocated, other values may be returned if errors are detected. */ -hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, - size_t *length); +hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes, + size_t *length); /** * Reconstruct a pattern database from a stream of bytes previously generated @@ -129,8 +134,9 @@ hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_deserialize_database(const char *bytes, const size_t length, - hs_database_t **db); +hs_error_t HS_CDECL hs_deserialize_database(const char *bytes, + const size_t length, + hs_database_t **db); /** * Reconstruct a pattern database from a stream of bytes previously generated @@ -160,8 +166,9 @@ hs_error_t hs_deserialize_database(const char *bytes, const size_t length, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, - hs_database_t *db); +hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes, + const size_t length, + hs_database_t *db); /** * Provides the size of the stream state allocated by a single stream opened @@ -177,7 +184,8 @@ hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size); +hs_error_t HS_CDECL hs_stream_size(const hs_database_t *database, + size_t *stream_size); /** * Provides the size of the given database in bytes. @@ -192,8 +200,8 @@ hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_database_size(const hs_database_t *database, - size_t *database_size); +hs_error_t HS_CDECL hs_database_size(const hs_database_t *database, + size_t *database_size); /** * Utility function for reporting the size that would be required by a @@ -219,8 +227,9 @@ hs_error_t hs_database_size(const hs_database_t *database, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_serialized_database_size(const char *bytes, const size_t length, - size_t *deserialized_size); +hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes, + const size_t length, + size_t *deserialized_size); /** * Utility function providing information about a database. @@ -237,7 +246,8 @@ hs_error_t hs_serialized_database_size(const char *bytes, const size_t length, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_database_info(const hs_database_t *database, char **info); +hs_error_t HS_CDECL hs_database_info(const hs_database_t *database, + char **info); /** * Utility function providing information about a serialized database. @@ -258,8 +268,8 @@ hs_error_t hs_database_info(const hs_database_t *database, char **info); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_serialized_database_info(const char *bytes, size_t length, - char **info); +hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes, + size_t length, char **info); /** * The type of the callback function that will be used by Hyperscan to allocate @@ -275,7 +285,7 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length, * @return * A pointer to the region of memory allocated, or NULL on error. */ -typedef void *(*hs_alloc_t)(size_t size); +typedef void *(HS_CDECL *hs_alloc_t)(size_t size); /** * The type of the callback function that will be used by Hyperscan to free @@ -284,7 +294,7 @@ typedef void *(*hs_alloc_t)(size_t size); * @param ptr * The region of memory to be freed. */ -typedef void (*hs_free_t)(void *ptr); +typedef void (HS_CDECL *hs_free_t)(void *ptr); /** * Set the allocate and free functions used by Hyperscan for allocating @@ -312,7 +322,8 @@ typedef void (*hs_free_t)(void *ptr); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -344,8 +355,8 @@ hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func, - hs_free_t free_func); +hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -371,7 +382,8 @@ hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -397,7 +409,8 @@ hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -423,7 +436,8 @@ hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Utility function for identifying this release version. @@ -433,7 +447,7 @@ hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * date of the build. It is allocated statically, so it does not need to * be freed by the caller. */ -const char *hs_version(void); +const char * HS_CDECL hs_version(void); /** * Utility function to test the current system architecture. @@ -450,7 +464,7 @@ const char *hs_version(void); * @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not * support Hyperscan. */ -hs_error_t hs_valid_platform(void); +hs_error_t HS_CDECL hs_valid_platform(void); /** * @defgroup HS_ERROR hs_error_t values @@ -545,7 +559,7 @@ hs_error_t hs_valid_platform(void); * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3 * (SSSE3). */ -#define HS_ARCH_ERROR (-11) +#define HS_ARCH_ERROR (-11) /** @} */ diff --git a/src/hs_compile.h b/src/hs_compile.h index c5212cbe1..3d5270443 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -169,13 +169,23 @@ typedef struct hs_platform_info { typedef struct hs_expr_info { /** * The minimum length in bytes of a match for the pattern. + * + * Note: in some cases when using advanced features to suppress matches + * (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this + * may represent a conservative lower bound for the true minimum length of + * a match. */ unsigned int min_width; /** * The maximum length in bytes of a match for the pattern. If the pattern - * has an unbounded maximum width, this will be set to the maximum value of - * an unsigned int (UINT_MAX). + * has an unbounded maximum length, this will be set to the maximum value + * of an unsigned int (UINT_MAX). + * + * Note: in some cases when using advanced features to suppress matches + * (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this + * may represent a conservative upper bound for the true maximum length of + * a match. */ unsigned int max_width; @@ -241,6 +251,13 @@ typedef struct hs_expr_ext { * @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field. */ unsigned long long min_length; + + /** + * Allow patterns to approximately match within this edit distance. To use + * this parameter, set the @ref HS_EXT_FLAG_EDIT_DISTANCE flag in the + * hs_expr_ext::flags field. + */ + unsigned edit_distance; } hs_expr_ext_t; /** @@ -261,6 +278,9 @@ typedef struct hs_expr_ext { /** Flag indicating that the hs_expr_ext::min_length field is used. */ #define HS_EXT_FLAG_MIN_LENGTH 4ULL +/** Flag indicating that the hs_expr_ext::edit_distance field is used. */ +#define HS_EXT_FLAG_EDIT_DISTANCE 8ULL + /** @} */ /** @@ -323,9 +343,10 @@ typedef struct hs_expr_ext { * HS_COMPILER_ERROR on failure, with details provided in the error * parameter. */ -hs_error_t hs_compile(const char *expression, unsigned int flags, - unsigned int mode, const hs_platform_info_t *platform, - hs_database_t **db, hs_compile_error_t **error); +hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags, + unsigned int mode, + const hs_platform_info_t *platform, + hs_database_t **db, hs_compile_error_t **error); /** * The multiple regular expression compiler. @@ -401,11 +422,13 @@ hs_error_t hs_compile(const char *expression, unsigned int flags, * parameter. * */ -hs_error_t hs_compile_multi(const char *const *expressions, - const unsigned int *flags, const unsigned int *ids, - unsigned int elements, unsigned int mode, - const hs_platform_info_t *platform, - hs_database_t **db, hs_compile_error_t **error); +hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions, + const unsigned int *flags, + const unsigned int *ids, + unsigned int elements, unsigned int mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error); /** * The multiple regular expression compiler with extended parameter support. @@ -486,7 +509,7 @@ hs_error_t hs_compile_multi(const char *const *expressions, * parameter. * */ -hs_error_t hs_compile_ext_multi(const char *const *expressions, +hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions, const unsigned int *flags, const unsigned int *ids, const hs_expr_ext_t *const *ext, @@ -505,13 +528,24 @@ hs_error_t hs_compile_ext_multi(const char *const *expressions, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_free_compile_error(hs_compile_error_t *error); +hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); /** * Utility function providing information about a regular expression. The * information provided in @ref hs_expr_info_t includes the minimum and maximum * width of a pattern match. * + * Note: successful analysis of an expression with this function does not imply + * that compilation of the same expression (via @ref hs_compile(), @ref + * hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This + * function may return @ref HS_SUCCESS for regular expressions that Hyperscan + * cannot compile. + * + * Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref + * HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect + * the properties returned in the @ref hs_expr_info_t structure, they will not + * affect the outcome of this function. + * * @param expression * The NULL-terminated expression to parse. Note that this string must * represent ONLY the pattern to be matched, with no delimiters or flags; @@ -553,15 +587,27 @@ hs_error_t hs_free_compile_error(hs_compile_error_t *error); * HS_COMPILER_ERROR on failure, with details provided in the error * parameter. */ -hs_error_t hs_expression_info(const char *expression, unsigned int flags, - hs_expr_info_t **info, - hs_compile_error_t **error); +hs_error_t HS_CDECL hs_expression_info(const char *expression, + unsigned int flags, + hs_expr_info_t **info, + hs_compile_error_t **error); /** * Utility function providing information about a regular expression, with * extended parameter support. The information provided in @ref hs_expr_info_t * includes the minimum and maximum width of a pattern match. * + * Note: successful analysis of an expression with this function does not imply + * that compilation of the same expression (via @ref hs_compile(), @ref + * hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This + * function may return @ref HS_SUCCESS for regular expressions that Hyperscan + * cannot compile. + * + * Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref + * HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect + * the properties returned in the @ref hs_expr_info_t structure, they will not + * affect the outcome of this function. + * * @param expression * The NULL-terminated expression to parse. Note that this string must * represent ONLY the pattern to be matched, with no delimiters or flags; @@ -608,10 +654,11 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags, * HS_COMPILER_ERROR on failure, with details provided in the error * parameter. */ -hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, - const hs_expr_ext_t *ext, - hs_expr_info_t **info, - hs_compile_error_t **error); +hs_error_t HS_CDECL hs_expression_ext_info(const char *expression, + unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error); /** * Populates the platform information based on the current host. @@ -623,7 +670,7 @@ hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_populate_platform(hs_platform_info_t *platform); +hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform); /** * @defgroup HS_PATTERN_FLAG Pattern flags @@ -770,6 +817,14 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform); */ #define HS_CPU_FEATURES_AVX2 (1ULL << 2) +/** + * CPU features flag - Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX512) + * + * Setting this flag indicates that the target platform supports AVX512 + * instructions, specifically AVX-512BW. Using AVX512 implies the use of AVX2. + */ +#define HS_CPU_FEATURES_AVX512 (1ULL << 3) + /** @} */ /** @@ -826,6 +881,30 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform); */ #define HS_TUNE_FAMILY_BDW 5 +/** + * Tuning Parameter - Intel(R) microarchitecture code name Skylake + * + * This indicates that the compiled database should be tuned for the + * Skylake microarchitecture. + */ +#define HS_TUNE_FAMILY_SKL 6 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Skylake Server + * + * This indicates that the compiled database should be tuned for the + * Skylake Server microarchitecture. + */ +#define HS_TUNE_FAMILY_SKX 7 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Goldmont + * + * This indicates that the compiled database should be tuned for the + * Goldmont microarchitecture. + */ +#define HS_TUNE_FAMILY_GLM 8 + /** @} */ /** diff --git a/src/hs_runtime.h b/src/hs_runtime.h index db52f4f50..ecd97ca52 100644 --- a/src/hs_runtime.h +++ b/src/hs_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -145,8 +145,8 @@ typedef int (*match_event_handler)(unsigned int id, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags, - hs_stream_t **stream); +hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, unsigned int flags, + hs_stream_t **stream); /** * Write data to be scanned to the opened stream. @@ -185,10 +185,10 @@ hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags, * match callback indicated that scanning should stop; other values on * error. */ -hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, - unsigned int length, unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *ctxt); +hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); /** * Close a stream. @@ -223,8 +223,8 @@ hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, * @return * Returns @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, - match_event_handler onEvent, void *ctxt); +hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); /** * Reset a stream to an initial state. @@ -264,9 +264,9 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *context); +hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context); /** * Duplicate the given stream. The new stream will have the same state as the @@ -282,7 +282,8 @@ hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id); +hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id, + const hs_stream_t *from_id); /** * Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream @@ -314,11 +315,11 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, - const hs_stream_t *from_id, - hs_scratch_t *scratch, - match_event_handler onEvent, - void *context); +hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, + const hs_stream_t *from_id, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context); /** * The block (non-streaming) regular expression scanner. @@ -355,10 +356,10 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, * match callback indicated that scanning should stop; other values on * error. */ -hs_error_t hs_scan(const hs_database_t *db, const char *data, - unsigned int length, unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *context); +hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, + unsigned int length, unsigned int flags, + hs_scratch_t *scratch, match_event_handler onEvent, + void *context); /** * The vectored regular expression scanner. @@ -398,10 +399,12 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match * callback indicated that scanning should stop; other values on error. */ -hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data, - const unsigned int *length, unsigned int count, - unsigned int flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *context); +hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, + const char *const *data, + const unsigned int *length, + unsigned int count, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context); /** * Allocate a "scratch" space for use by Hyperscan. @@ -429,7 +432,8 @@ hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data, * allocation fails. Other errors may be returned if invalid parameters * are specified. */ -hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch); +hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, + hs_scratch_t **scratch); /** * Allocate a scratch space that is a clone of an existing scratch space. @@ -449,7 +453,8 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch); * @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails. * Other errors may be returned if invalid parameters are specified. */ -hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest); +hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src, + hs_scratch_t **dest); /** * Provides the size of the given scratch space. @@ -465,7 +470,8 @@ hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size); +hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, + size_t *scratch_size); /** * Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref @@ -480,7 +486,7 @@ hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_free_scratch(hs_scratch_t *scratch); +hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch); /** * Callback 'from' return value, indicating that the start of this match was diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c index 939cde1f6..128ac04fd 100644 --- a/src/hs_valid_platform.c +++ b/src/hs_valid_platform.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #include "util/cpuid_flags.h" HS_PUBLIC_API -hs_error_t hs_valid_platform(void) { +hs_error_t HS_CDECL hs_valid_platform(void) { /* Hyperscan requires SSSE3, anything else is a bonus */ if (check_ssse3()) { return HS_SUCCESS; diff --git a/src/hs_version.c b/src/hs_version.c index 45e23c3b5..04cf46f3f 100644 --- a/src/hs_version.c +++ b/src/hs_version.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,6 @@ #include "hs_version.h" HS_PUBLIC_API -const char *hs_version(void) { +const char * HS_CDECL hs_version(void) { return HS_VERSION_STRING; } diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 3c7615a7b..6eaa7ed15 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -172,6 +172,8 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen, hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, hwlm_group_t groups) { + assert(t); + DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups); if (!groups) { DEBUG_PRINTF("groups all off\n"); @@ -201,6 +203,9 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback cb, void *ctxt, hwlm_group_t groups) { + assert(t); + assert(scratch); + const u8 *hbuf = scratch->core_info.hbuf; const size_t hlen = scratch->core_info.hlen; const u8 *buf = scratch->core_info.buf; diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index fa6335c94..2f61ea6d6 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,31 +29,23 @@ /** \file * \brief Hamster Wheel Literal Matcher: build code. */ + +#include "hwlm_build.h" + #include "grey.h" #include "hwlm.h" -#include "hwlm_build.h" #include "hwlm_internal.h" +#include "hwlm_literal.h" #include "noodle_engine.h" #include "noodle_build.h" #include "scratch.h" #include "ue2common.h" #include "fdr/fdr_compile.h" -#include "nfa/shufticompile.h" -#include "nfa/trufflecompile.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/compare.h" #include "util/compile_context.h" #include "util/compile_error.h" -#include "util/dump_charclass.h" -#include "util/target_info.h" #include "util/ue2string.h" -#include "util/verify_types.h" #include -#include -#include #include #include @@ -61,431 +53,6 @@ using namespace std; namespace ue2 { -static const unsigned int MAX_ACCEL_OFFSET = 16; -static const unsigned int MAX_SHUFTI_WIDTH = 240; - -static -size_t mask_overhang(const hwlmLiteral &lit) { - size_t msk_true_size = lit.msk.size(); - assert(msk_true_size <= HWLM_MASKLEN); - assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); - for (u8 c : lit.msk) { - if (!c) { - msk_true_size--; - } else { - break; - } - } - - if (lit.s.length() >= msk_true_size) { - return 0; - } - - /* only short literals should be able to have a mask which overhangs */ - assert(lit.s.length() < MAX_ACCEL_OFFSET); - return msk_true_size - lit.s.length(); -} - -static -bool findDVerm(const vector &lits, AccelAux *aux) { - const hwlmLiteral &first = *lits.front(); - - struct candidate { - candidate(void) - : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const hwlmLiteral &base, u32 offset) - : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), - b5insens(false), valid(true) {} - char c1; - char c2; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (other.cdiffers() && !cdiffers()) { - return false; - } - - if (!other.cdiffers() && cdiffers()) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - - bool cdiffers(void) const { - if (!b5insens) { - return c1 != c2; - } - return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { - found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; - j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - } else { - found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - break; - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - aux->dverm.offset = verify_u8(best.max_offset); - - if (!best.b5insens) { - aux->dverm.accel_type = ACCEL_DVERM; - aux->dverm.c1 = best.c1; - aux->dverm.c2 = best.c2; - DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } else { - aux->dverm.accel_type = ACCEL_DVERM_NOCASE; - aux->dverm.c1 = best.c1 & CASE_CLEAR; - aux->dverm.c2 = best.c2 & CASE_CLEAR; - DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } - return true; -} - -static -bool findSVerm(const vector &lits, AccelAux *aux) { - const hwlmLiteral &first = *lits.front(); - - struct candidate { - candidate(void) - : c(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const hwlmLiteral &base, u32 offset) - : c(base.s[offset]), max_offset(0), - b5insens(false), valid(true) {} - char c; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - if (lit.nocase && ourisalpha(curr.c)) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - found |= curr.c == lit.s[j]; - found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - } else { - found = curr.c == lit.s[j]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - if (!best.b5insens) { - aux->verm.accel_type = ACCEL_VERM; - aux->verm.c = best.c; - DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); - } else { - aux->verm.accel_type = ACCEL_VERM_NOCASE; - aux->verm.c = best.c & CASE_CLEAR; - DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); - } - aux->verm.offset = verify_u8(best.max_offset); - - return true; -} - -static -void filterLits(const vector &lits, hwlm_group_t expected_groups, - vector *filtered_lits, u32 *min_len) { - *min_len = MAX_ACCEL_OFFSET; - - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - const size_t lit_len = lit.s.length(); - if (lit_len < *min_len) { - *min_len = verify_u32(lit_len); - } - - filtered_lits->push_back(&lit); - -#ifdef DEBUG - DEBUG_PRINTF("lit:"); - for (u32 i = 0; i < lit.s.length(); i++) { - printf("%02hhx", lit.s[i]); - } - printf("\n"); -#endif - } -} - -static -bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit, - u32 max_offset) { - for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { - unsigned char c = lit.s[i]; - if (lit.nocase) { - if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { - return true; - } - } else { - if (cr.test(c)) { - return true; - } - } - } - - return false; -} - -static -void findForwardAccelScheme(const vector &lits, - hwlm_group_t expected_groups, AccelAux *aux) { - DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); - u32 min_len = MAX_ACCEL_OFFSET; - vector filtered_lits; - - filterLits(lits, expected_groups, &filtered_lits, &min_len); - if (filtered_lits.empty()) { - return; - } - - if (findDVerm(filtered_lits, aux) - || findSVerm(filtered_lits, aux)) { - return; - } - - /* look for shufti/truffle */ - - vector reach(MAX_ACCEL_OFFSET, CharReach()); - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - u32 overhang = mask_overhang(lit); - for (u32 i = 0; i < overhang; i++) { - /* this offset overhangs the start of the real literal; look at the - * msk/cmp */ - for (u32 j = 0; j < N_CHARS; j++) { - if ((j & lit.msk[i]) == lit.cmp[i]) { - reach[i].set(j); - } - } - } - for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { - CharReach &reach_i = reach[i]; - u32 i_effective = i - overhang; - - if (litGuardedByCharReach(reach_i, lit, i_effective)) { - continue; - } - unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] - : lit.s.back(); - if (lit.nocase) { - reach_i.set(mytoupper(c)); - reach_i.set(mytolower(c)); - } else { - reach_i.set(c); - } - } - } - - u32 min_count = ~0U; - u32 min_offset = ~0U; - for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { - size_t count = reach[i].count(); - DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, - describeClass(reach[i]).c_str(), count); - if (count < min_count) { - min_count = (u32)count; - min_offset = i; - } - } - - if (min_count > MAX_SHUFTI_WIDTH) { - DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); - return; - } - - const CharReach &cr = reach[min_offset]; - if (-1 != - shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { - DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->shufti.accel_type = ACCEL_SHUFTI; - aux->shufti.offset = verify_u8(min_offset); - return; - } - - truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); - DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->truffle.accel_type = ACCEL_TRUFFLE; - aux->truffle.offset = verify_u8(min_offset); -} - -static -void buildForwardAccel(HWLM *h, const vector &lits, - hwlm_group_t expected_groups) { - findForwardAccelScheme(lits, expected_groups, &h->accel1); - findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); - - h->accel1_groups = expected_groups; -} - static void dumpLits(UNUSED const vector &lits) { #ifdef DEBUG @@ -512,7 +79,6 @@ bool everyoneHasGroups(const vector &lits) { static bool isNoodleable(const vector &lits, - const hwlmStreamingControl *stream_control, const CompileContext &cc) { if (!cc.grey.allowNoodle) { return false; @@ -523,19 +89,6 @@ bool isNoodleable(const vector &lits, return false; } - if (stream_control) { // nullptr if in block mode - if (lits.front().s.length() > stream_control->history_max + 1) { - DEBUG_PRINTF("length of %zu too long for history max %zu\n", - lits.front().s.length(), - stream_control->history_max); - return false; - } - if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) { - assert(0); - return false; - } - } - if (!lits.front().msk.empty()) { DEBUG_PRINTF("noodle can't handle supplementary masks\n"); return false; @@ -544,23 +97,12 @@ bool isNoodleable(const vector &lits, return true; } -aligned_unique_ptr hwlmBuild(const vector &lits, - hwlmStreamingControl *stream_control, - bool make_small, const CompileContext &cc, - hwlm_group_t expected_groups) { +bytecode_ptr hwlmBuild(const vector &lits, bool make_small, + const CompileContext &cc, + UNUSED hwlm_group_t expected_groups) { assert(!lits.empty()); dumpLits(lits); - if (stream_control) { - assert(stream_control->history_min <= stream_control->history_max); - - // We should not have been passed any literals that are too long to - // match with a maximally-sized history buffer. - assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) { - return lit.s.length() <= stream_control->history_max + 1; - })); - } - // Check that we haven't exceeded the maximum number of literals. if (lits.size() > cc.grey.limitLiteralCount) { throw ResourceLimitError(); @@ -595,29 +137,21 @@ aligned_unique_ptr hwlmBuild(const vector &lits, assert(everyoneHasGroups(lits)); - if (isNoodleable(lits, stream_control, cc)) { + if (isNoodleable(lits, cc)) { DEBUG_PRINTF("build noodle table\n"); engType = HWLM_ENGINE_NOOD; const hwlmLiteral &lit = lits.front(); auto noodle = noodBuildTable(lit); if (noodle) { - engSize = noodSize(noodle.get()); - } - if (stream_control) { - // For now, a single literal still goes to noodle and asks - // for a great big history - stream_control->literal_history_required = lit.s.length() - 1; - assert(stream_control->literal_history_required - <= stream_control->history_max); + engSize = noodle.size(); } eng = move(noodle); } else { DEBUG_PRINTF("building a new deal\n"); engType = HWLM_ENGINE_FDR; - auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey, - stream_control); + auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey); if (fdr) { - engSize = fdrSize(fdr.get()); + engSize = fdr.size(); } eng = move(fdr); } @@ -631,23 +165,12 @@ aligned_unique_ptr hwlmBuild(const vector &lits, throw ResourceLimitError(); } - auto h = aligned_zmalloc_unique(ROUNDUP_CL(sizeof(HWLM)) + engSize); + const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize; + auto h = make_zeroed_bytecode_ptr(hwlm_len, 64); h->type = engType; memcpy(HWLM_DATA(h.get()), eng.get(), engSize); - if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) { - buildForwardAccel(h.get(), lits, expected_groups); - } - - if (stream_control) { - DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n", - stream_control->literal_history_required, - stream_control->history_max); - assert(stream_control->literal_history_required - <= stream_control->history_max); - } - return h; } diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index fbf359e60..f2691496e 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,9 +34,8 @@ #define HWLM_BUILD_H #include "hwlm.h" -#include "hwlm_literal.h" #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -47,30 +46,12 @@ namespace ue2 { struct CompileContext; struct Grey; -struct target_t; - -/** \brief Structure gathering together the input/output parameters related to - * streaming mode operation. */ -struct hwlmStreamingControl { - /** \brief IN parameter: Upper limit on the amount of history that can be - * requested. */ - size_t history_max; - - /** \brief IN parameter: History already known to be used before literal - * analysis. */ - size_t history_min; - - /** \brief OUT parameter: History required by the literal matcher to - * correctly match all literals. */ - size_t literal_history_required; -}; +struct hwlmLiteral; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of * literals. * * \param lits The group of literals. - * \param stream_control Streaming control parameters. If the matcher will - * operate in non-streaming (block) mode, this pointer should be NULL. * \param make_small Optimise matcher for small size. * \param cc Compile context. * \param expected_groups FIXME: document me! @@ -79,11 +60,9 @@ struct hwlmStreamingControl { * may result in a nullptr return value, or a std::bad_alloc exception being * thrown. */ -aligned_unique_ptr -hwlmBuild(const std::vector &lits, - hwlmStreamingControl *stream_control, bool make_small, - const CompileContext &cc, - hwlm_group_t expected_groups = HWLM_ALL_GROUPS); +bytecode_ptr hwlmBuild(const std::vector &lits, + bool make_small, const CompileContext &cc, + hwlm_group_t expected_groups = HWLM_ALL_GROUPS); /** * Returns an estimate of the number of repeated characters on the end of a diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index b7af99d32..0e2a1ea5d 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,12 +37,13 @@ #include "ue2common.h" #include +#include #include namespace ue2 { /** \brief Max length of the literal passed to HWLM. */ -#define HWLM_LITERAL_MAX_LEN 255 +#define HWLM_LITERAL_MAX_LEN 8 /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 @@ -111,6 +112,19 @@ struct hwlmLiteral { : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} }; +inline +bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) { + return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) < + std::tie(b.id, b.s, b.nocase, b.noruns, b.groups, b.msk, b.cmp); +} + +inline +bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) { + return a.id == b.id && a.s == b.s && a.nocase == b.nocase && + a.noruns == b.noruns && a.groups == b.groups && a.msk == b.msk && + a.cmp == b.cmp; +} + /** * Consistency test; returns false if the given msk/cmp test can never match * the literal string s. diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index d2b4e3f20..63fdf0728 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,6 @@ #include "hwlm_literal.h" #include "noodle_internal.h" -#include "util/alloc.h" #include "util/compare.h" #include "util/verify_types.h" #include "ue2common.h" @@ -67,7 +66,7 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) { return offset; } -aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit) { +bytecode_ptr noodBuildTable(const hwlmLiteral &lit) { if (!lit.msk.empty()) { DEBUG_PRINTF("noodle can't handle supplementary masks\n"); return nullptr; @@ -75,7 +74,7 @@ aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit) { const auto &s = lit.s; size_t noodle_len = sizeof(noodTable) + s.length(); - auto n = aligned_zmalloc_unique(noodle_len); + auto n = make_zeroed_bytecode_ptr(noodle_len); assert(n); size_t key_offset = findNoodFragOffset(lit); diff --git a/src/hwlm/noodle_build.h b/src/hwlm/noodle_build.h index 1a41695f7..b5725f082 100644 --- a/src/hwlm/noodle_build.h +++ b/src/hwlm/noodle_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,11 +30,11 @@ * \brief Noodle literal matcher: build code. */ -#ifndef NOODLE_BUILD_H_048A1A6D585A9A -#define NOODLE_BUILD_H_048A1A6D585A9A +#ifndef NOODLE_BUILD_H +#define NOODLE_BUILD_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" struct noodTable; @@ -43,7 +43,7 @@ namespace ue2 { struct hwlmLiteral; /** \brief Construct a Noodle matcher for the given literal. */ -ue2::aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit); +bytecode_ptr noodBuildTable(const hwlmLiteral &lit); size_t noodSize(const noodTable *n); @@ -61,5 +61,5 @@ void noodPrintStats(const noodTable *n, FILE *f); #endif // DUMP_SUPPORT -#endif /* NOODLE_BUILD_H_048A1A6D585A9A */ +#endif /* NOODLE_BUILD_H */ diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 1d1ab4e68..9758f42b2 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,8 +33,11 @@ #include "noodle_engine.h" #include "noodle_internal.h" #include "ue2common.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/compare.h" +#include "util/intrinsics.h" +#include "util/join.h" #include "util/masked_move.h" #include "util/simd_utils.h" @@ -50,6 +53,24 @@ struct cb_info { size_t offsetAdj; //!< used in streaming mode }; +#if defined(HAVE_AVX512) +#define CHUNKSIZE 64 +#define MASK_TYPE m512 +#define Z_BITS 64 +#define Z_TYPE u64a +#elif defined(HAVE_AVX2) +#define CHUNKSIZE 32 +#define MASK_TYPE m256 +#define Z_BITS 32 +#define Z_TYPE u32 +#else +#define CHUNKSIZE 16 +#define MASK_TYPE m128 +#define Z_BITS 32 +#define Z_TYPE u32 +#endif + + #define RETURN_IF_TERMINATED(x) \ { \ if ((x) == HWLM_TERMINATED) { \ @@ -60,8 +81,9 @@ struct cb_info { #define SINGLE_ZSCAN() \ do { \ while (unlikely(z)) { \ - u32 pos = findAndClearLSB_32(&z); \ + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos; \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \ matchPos); \ RETURN_IF_TERMINATED(rv); \ @@ -71,8 +93,9 @@ struct cb_info { #define DOUBLE_ZSCAN() \ do { \ while (unlikely(z)) { \ - u32 pos = findAndClearLSB_32(&z); \ + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos - 1; \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \ noCase, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ @@ -109,7 +132,11 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, return HWLM_SUCCESS; } -#if defined(__AVX2__) +#if defined(HAVE_AVX512) +#define CHUNKSIZE 64 +#define MASK_TYPE m512 +#include "noodle_engine_avx512.c" +#elif defined(HAVE_AVX2) #define CHUNKSIZE 32 #define MASK_TYPE m256 #include "noodle_engine_avx2.c" @@ -122,12 +149,14 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, static really_inline hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, bool noCase, const struct cb_info *cbi) { - hwlm_error_t rv; - size_t end = len; const MASK_TYPE mask1 = getMask(key[0], noCase); const MASK_TYPE caseMask = getCaseMask(); +#if !defined(HAVE_AVX512) + hwlm_error_t rv; + size_t end = len; + if (len < CHUNKSIZE) { rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len); return rv; @@ -172,13 +201,15 @@ hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, cbi, s2End, end); return rv; +#else // HAVE_AVX512 + return scanSingle512(buf, len, key, noCase, caseMask, mask1, cbi); +#endif } static really_inline hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, size_t keyLen, size_t keyOffset, bool noCase, const struct cb_info *cbi) { - hwlm_error_t rv; // we stop scanning for the key-fragment when the rest of the key can't // possibly fit in the remaining buffer size_t end = len - keyLen + keyOffset + 2; @@ -187,6 +218,9 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase); const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase); +#if !defined(HAVE_AVX512) + hwlm_error_t rv; + if (end - keyOffset < CHUNKSIZE) { rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, mask1, mask2, cbi, keyOffset, end); @@ -243,6 +277,10 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, caseMask, mask1, mask2, cbi, off, end); return rv; +#else // AVX512 + return scanDouble512(buf, len, key, keyLen, keyOffset, noCase, caseMask, + mask1, mask2, cbi, keyOffset, end); +#endif // AVX512 } diff --git a/src/hwlm/noodle_engine_avx2.c b/src/hwlm/noodle_engine_avx2.c index 14d0eab54..a3f46047e 100644 --- a/src/hwlm/noodle_engine_avx2.c +++ b/src/hwlm/noodle_engine_avx2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -117,9 +117,9 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, if (l < 4) { u8 *vp = (u8*)&v; switch (l) { - case 3: vp[2] = d[2]; - case 2: vp[1] = d[1]; - case 1: vp[0] = d[0]; + case 3: vp[2] = d[2]; // fallthrough + case 2: vp[1] = d[1]; // fallthrough + case 1: vp[0] = d[0]; // fallthrough } } else { v = masked_move256_len(d, l); @@ -157,9 +157,9 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, if (l < 4) { u8 *vp = (u8*)&v; switch (l) { - case 3: vp[2] = d[2]; - case 2: vp[1] = d[1]; - case 1: vp[0] = d[0]; + case 3: vp[2] = d[2]; // fallthrough + case 2: vp[1] = d[1]; // fallthrough + case 1: vp[0] = d[0]; // fallthrough } } else { v = masked_move256_len(d, l); diff --git a/src/hwlm/noodle_engine_avx512.c b/src/hwlm/noodle_engine_avx512.c new file mode 100644 index 000000000..d4e6527f8 --- /dev/null +++ b/src/hwlm/noodle_engine_avx512.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* noodle scan parts for AVX512 */ + +static really_inline +m512 getMask(u8 c, bool noCase) { + u8 k = caseClear8(c, noCase); + return set64x8(k); +} + +static really_inline +m512 getCaseMask(void) { + return set64x8(CASE_CLEAR); +} + +// The short scan routine. It is used both to scan data up to an +// alignment boundary if needed and to finish off data that the aligned scan +// function can't handle (due to small/unaligned chunk at end) +static really_inline +hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, + bool noCase, m512 caseMask, m512 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + ptrdiff_t scan_len = end - start; + DEBUG_PRINTF("scan_len %zu\n", scan_len); + assert(scan_len <= 64); + if (!scan_len) { + return HWLM_SUCCESS; + } + + __mmask64 k = (~0ULL) >> (64 - scan_len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 v = loadu_maskz_m512(k, d); + + if (noCase) { + v = and512(v, caseMask); + } + + // reuse the load mask to indicate valid bytes + u64a z = masked_eq512mask(k, mask1, v); + + SINGLE_ZSCAN(); + + return HWLM_SUCCESS; +} + +static really_inline +hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key, + bool noCase, m512 caseMask, m512 mask1, + const struct cb_info *cbi) { + const u8 *d = buf; + const u8 *e = buf + len; + DEBUG_PRINTF("start %p end %p \n", d, e); + assert(d < e); + if (d + 64 >= e) { + goto tail; + } + + // peel off first part to cacheline boundary + const u8 *d1 = ROUNDUP_PTR(d, 64); + if (scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, + d1 - d) == HWLM_TERMINATED) { + return HWLM_TERMINATED; + } + d = d1; + + for (; d + 64 < e; d += 64) { + DEBUG_PRINTF("d %p e %p \n", d, e); + m512 v = noCase ? and512(load512(d), caseMask) : load512(d); + + u64a z = eq512mask(mask1, v); + __builtin_prefetch(d + 128); + + SINGLE_ZSCAN(); + } + +tail: + DEBUG_PRINTF("d %p e %p \n", d, e); + // finish off tail + + return scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, d - buf, + e - buf); +} + +static really_inline +hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, + size_t keyLen, size_t keyOffset, bool noCase, + m512 caseMask, m512 mask1, m512 mask2, + const struct cb_info *cbi, u64a *lastz0, + size_t start, size_t end) { + DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0); + const u8 *d = buf + start; + ptrdiff_t scan_len = end - start; + if (!scan_len) { + return HWLM_SUCCESS; + } + assert(scan_len <= 64); + __mmask64 k = (~0ULL) >> (64 - scan_len); + DEBUG_PRINTF("load mask 0x%016llx scan_len %zu\n", k, scan_len); + + m512 v = loadu_maskz_m512(k, d); + if (noCase) { + v = and512(v, caseMask); + } + + u64a z0 = masked_eq512mask(k, mask1, v); + u64a z1 = masked_eq512mask(k, mask2, v); + u64a z = (*lastz0 | (z0 << 1)) & z1; + DEBUG_PRINTF("z 0x%016llx\n", z); + + DOUBLE_ZSCAN(); + *lastz0 = z0 >> (scan_len - 1); + return HWLM_SUCCESS; +} + +static really_inline +hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key, + size_t keyLen, size_t keyOffset, bool noCase, + m512 caseMask, m512 mask1, m512 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + const u8 *e = buf + end; + u64a lastz0 = 0; + DEBUG_PRINTF("start %zu end %zu \n", start, end); + assert(d < e); + if (d + 64 >= e) { + goto tail; + } + + // peel off first part to cacheline boundary + const u8 *d1 = ROUNDUP_PTR(d, 64); + if (scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, + mask1, mask2, cbi, &lastz0, start, + d1 - buf) == HWLM_TERMINATED) { + return HWLM_TERMINATED; + } + d = d1; + + for (; d + 64 < e; d += 64) { + DEBUG_PRINTF("d %p e %p 0x%016llx\n", d, e, lastz0); + m512 v = noCase ? and512(load512(d), caseMask) : load512(d); + + /* we have to pull the masks out of the AVX registers because we can't + byte shift between the lanes */ + u64a z0 = eq512mask(mask1, v); + u64a z1 = eq512mask(mask2, v); + u64a z = (lastz0 | (z0 << 1)) & z1; + lastz0 = z0 >> 63; + + // On large packet buffers, this prefetch appears to get us about 2%. + __builtin_prefetch(d + 256); + + DEBUG_PRINTF("z 0x%016llx\n", z); + + DOUBLE_ZSCAN(); + } + +tail: + DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf); + // finish off tail + + return scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, + mask1, mask2, cbi, &lastz0, d - buf, end); +} diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 99eab11dc..2bc60945f 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,9 +30,6 @@ #include "shufti.h" #include "truffle.h" #include "vermicelli.h" -#include "multishufti.h" -#include "multitruffle.h" -#include "multivermicelli.h" #include "ue2common.h" const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { @@ -132,220 +129,6 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = c_end; break; - /* multibyte matchers */ - case ACCEL_MLVERM: - DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MLVERM_NOCASE: - DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MLGVERM: - DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MLGVERM_NOCASE: - DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MSVERM: - DEBUG_PRINTF("accel msverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MSVERM_NOCASE: - DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MSGVERM: - DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MSGVERM_NOCASE: - DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MDSVERM: - DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSVERM_NOCASE: - DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSGVERM: - DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSGVERM_NOCASE: - DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MLSHUFTI: - DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MLGSHUFTI: - DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MSSHUFTI: - DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MSGSHUFTI: - DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MDSSHUFTI: - DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, - accel->mdshufti.len1, accel->mdshufti.len2); - break; - case ACCEL_MDSGSHUFTI: - DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, - accel->mdshufti.len1, accel->mdshufti.len2); - break; - case ACCEL_MLTRUFFLE: - DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MLGTRUFFLE: - DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MSTRUFFLE: - DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MSGTRUFFLE: - DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MDSTRUFFLE: - DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_truffleExec(accel->mdtruffle.mask1, - accel->mdtruffle.mask2, c, c_end, - accel->mdtruffle.len1, - accel->mdtruffle.len2); - break; - case ACCEL_MDSGTRUFFLE: - DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1, - accel->mdtruffle.mask2, c, c_end, - accel->mdtruffle.len1, - accel->mdtruffle.len2); - break; - default: assert(!"not here"); diff --git a/src/nfa/accel.h b/src/nfa/accel.h index a13563b68..3a03d0596 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,36 +61,7 @@ enum AccelType { ACCEL_DSHUFTI, ACCEL_TRUFFLE, ACCEL_RED_TAPE, - /* multibyte vermicellis */ - ACCEL_MLVERM, - ACCEL_MLVERM_NOCASE, - ACCEL_MLGVERM, - ACCEL_MLGVERM_NOCASE, - ACCEL_MSVERM, - ACCEL_MSVERM_NOCASE, - ACCEL_MSGVERM, - ACCEL_MSGVERM_NOCASE, - ACCEL_MDSVERM, - ACCEL_MDSVERM_NOCASE, - ACCEL_MDSGVERM, - ACCEL_MDSGVERM_NOCASE, - /* multibyte shuftis */ - ACCEL_MLSHUFTI, - ACCEL_MLGSHUFTI, - ACCEL_MSSHUFTI, - ACCEL_MSGSHUFTI, - ACCEL_MDSSHUFTI, - ACCEL_MDSGSHUFTI, - /* multibyte truffles */ - ACCEL_MLTRUFFLE, - ACCEL_MLGTRUFFLE, - ACCEL_MSTRUFFLE, - ACCEL_MSGTRUFFLE, - ACCEL_MDSTRUFFLE, - ACCEL_MDSGTRUFFLE, - /* masked dverm */ ACCEL_DVERM_MASKED, - }; /** \brief Structure for accel framework. */ @@ -140,42 +111,12 @@ union AccelAux { m128 lo2; m128 hi2; } dshufti; - struct { - u8 accel_type; - u8 offset; - m128 lo; - m128 hi; - u8 len; - } mshufti; - struct { - u8 accel_type; - u8 offset; - m128 lo; - m128 hi; - u8 len1; - u8 len2; - } mdshufti; struct { u8 accel_type; u8 offset; m128 mask1; m128 mask2; } truffle; - struct { - u8 accel_type; - u8 offset; - m128 mask1; - m128 mask2; - u8 len; - } mtruffle; - struct { - u8 accel_type; - u8 offset; - m128 mask1; - m128 mask2; - u8 len1; - u8 len2; - } mdtruffle; }; /** diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index d257b530b..7c56ba723 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,9 +33,11 @@ #include "nfagraph/ng_limex_accel.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "util/accel_scheme.h" #include "util/charreach.h" #include "util/container.h" #include "util/dump_charclass.h" +#include "util/small_vector.h" #include "util/verify_types.h" #include @@ -49,16 +51,15 @@ namespace ue2 { namespace { struct path { - vector reach; + small_vector reach; dstate_id_t dest = DEAD_STATE; - explicit path(dstate_id_t base) : dest(base) { - } + explicit path(dstate_id_t base) : dest(base) {} }; }; -static -void dump_paths(const vector &paths) { - for (UNUSED const auto &p : paths) { +template +void dump_paths(const Container &paths) { + for (UNUSED const path &p : paths) { DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest); } DEBUG_PRINTF("%zu paths\n", paths.size()); @@ -113,17 +114,17 @@ void extend(const raw_dfa &rdfa, const path &p, } else { path pp = append(p, CharReach(), p.dest); all[p.dest].push_back(pp); - out.push_back(pp); + out.push_back(move(pp)); } } if (!s.reports_eod.empty()) { path pp = append(p, CharReach(), p.dest); all[p.dest].push_back(pp); - out.push_back(pp); + out.push_back(move(pp)); } - map dest; + flat_map dest; for (unsigned i = 0; i < N_CHARS; i++) { u32 succ = s.next[rdfa.alpha_remap[i]]; dest[succ].set(i); @@ -140,7 +141,7 @@ void extend(const raw_dfa &rdfa, const path &p, DEBUG_PRINTF("----good: [%s] -> %u\n", describeClasses(pp.reach).c_str(), pp.dest); all[e.first].push_back(pp); - out.push_back(pp); + out.push_back(move(pp)); } } @@ -162,8 +163,10 @@ vector> generate_paths(const raw_dfa &rdfa, dump_paths(paths); vector> rv; + rv.reserve(paths.size()); for (auto &p : paths) { - rv.push_back(move(p.reach)); + rv.push_back(vector(std::make_move_iterator(p.reach.begin()), + std::make_move_iterator(p.reach.end()))); } return rv; } @@ -327,7 +330,7 @@ accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { const dstate &raw = rdfa.states[this_idx]; const vector rev_map = reverse_alpha_remapping(rdfa); bool outs2_broken = false; - map succs; + flat_map succs; for (u32 i = 0; i < rev_map.size(); i++) { if (raw.next[i] == this_idx) { @@ -379,16 +382,18 @@ accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; jj = cr_all_j.find_next(jj)) { rv.double_byte.emplace((u8)ii, (u8)jj); + if (rv.double_byte.size() > 8) { + DEBUG_PRINTF("outs2 too big\n"); + outs2_broken = true; + goto done; + } } } } } - if (rv.double_byte.size() > 8) { - DEBUG_PRINTF("outs2 too big\n"); - outs2_broken = true; - } - + done: + assert(outs2_broken || rv.double_byte.size() <= 8); if (outs2_broken) { rv.double_byte.clear(); } @@ -536,17 +541,17 @@ accel_dfa_build_strat::getAccelInfo(const Grey &grey) { dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); DEBUG_PRINTF("sds %hu\n", sds_proxy); - for (size_t i = 0; i < rdfa.states.size(); i++) { + /* Find accel info for a single state. */ + auto do_state = [&](size_t i) { if (i == DEAD_STATE) { - continue; + return; } /* Note on report acceleration states: While we can't accelerate while - * we - * are spamming out callbacks, the QR code paths don't raise reports + * we are spamming out callbacks, the QR code paths don't raise reports * during scanning so they can accelerate report states. */ if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { - continue; + return; } size_t single_limit = @@ -557,15 +562,28 @@ accel_dfa_build_strat::getAccelInfo(const Grey &grey) { if (ei.cr.count() > single_limit) { DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, ei.cr.count()); - continue; + return; } DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count()); rv[i] = ei; + }; + + if (only_accel_init) { + DEBUG_PRINTF("only computing accel for init states\n"); + do_state(rdfa.start_anchored); + if (rdfa.start_floating != rdfa.start_anchored) { + do_state(rdfa.start_floating); + } + } else { + DEBUG_PRINTF("computing accel for all states\n"); + for (size_t i = 0; i < rdfa.states.size(); i++) { + do_state(i); + } } - /* provide accleration states to states in the region of sds */ + /* provide acceleration states to states in the region of sds */ if (contains(rv, sds_proxy)) { AccelScheme sds_ei = rv[sds_proxy]; sds_ei.double_byte.clear(); /* region based on single byte scheme diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h index 3cfaf2725..881892ed4 100644 --- a/src/nfa/accel_dfa_build_strat.h +++ b/src/nfa/accel_dfa_build_strat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,8 +43,8 @@ struct Grey; class accel_dfa_build_strat : public dfa_build_strat { public: - explicit accel_dfa_build_strat(const ReportManager &rm_in) - : dfa_build_strat(rm_in) {} + accel_dfa_build_strat(const ReportManager &rm_in, bool only_accel_init_in) + : dfa_build_strat(rm_in), only_accel_init(only_accel_init_in) {} virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const; virtual size_t accelSize(void) const = 0; virtual u32 max_allowed_offset_accel() const = 0; @@ -53,6 +53,8 @@ class accel_dfa_build_strat : public dfa_build_strat { virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out); virtual std::map getAccelInfo(const Grey &grey); +private: + bool only_accel_init; }; } // namespace ue2 diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index e99e71a59..0d19fa8c6 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -93,54 +93,6 @@ const char *accelName(u8 accel_type) { return "truffle"; case ACCEL_RED_TAPE: return "red tape"; - case ACCEL_MLVERM: - return "multibyte long vermicelli"; - case ACCEL_MLVERM_NOCASE: - return "multibyte long vermicelli nocase"; - case ACCEL_MLGVERM: - return "multibyte long-grab vermicelli"; - case ACCEL_MLGVERM_NOCASE: - return "multibyte long-grab vermicelli nocase"; - case ACCEL_MSVERM: - return "multibyte shift vermicelli"; - case ACCEL_MSVERM_NOCASE: - return "multibyte shift vermicelli nocase"; - case ACCEL_MSGVERM: - return "multibyte shift-grab vermicelli"; - case ACCEL_MSGVERM_NOCASE: - return "multibyte shift-grab vermicelli nocase"; - case ACCEL_MDSVERM: - return "multibyte doubleshift vermicelli"; - case ACCEL_MDSVERM_NOCASE: - return "multibyte doubleshift vermicelli nocase"; - case ACCEL_MDSGVERM: - return "multibyte doubleshift-grab vermicelli"; - case ACCEL_MDSGVERM_NOCASE: - return "multibyte doubleshift-grab vermicelli nocase"; - case ACCEL_MLSHUFTI: - return "multibyte long shufti"; - case ACCEL_MLGSHUFTI: - return "multibyte long-grab shufti"; - case ACCEL_MSSHUFTI: - return "multibyte shift shufti"; - case ACCEL_MSGSHUFTI: - return "multibyte shift-grab shufti"; - case ACCEL_MDSSHUFTI: - return "multibyte doubleshift shufti"; - case ACCEL_MDSGSHUFTI: - return "multibyte doubleshift-grab shufti"; - case ACCEL_MLTRUFFLE: - return "multibyte long truffle"; - case ACCEL_MLGTRUFFLE: - return "multibyte long-grab truffle"; - case ACCEL_MSTRUFFLE: - return "multibyte shift truffle"; - case ACCEL_MSGTRUFFLE: - return "multibyte shift-grab truffle"; - case ACCEL_MDSTRUFFLE: - return "multibyte doubleshift truffle"; - case ACCEL_MDSGTRUFFLE: - return "multibyte doubleshift-grab truffle"; default: return "unknown!"; } @@ -283,59 +235,6 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { (const u8 *)&accel.truffle.mask2); break; } - case ACCEL_MLVERM: - case ACCEL_MLVERM_NOCASE: - case ACCEL_MLGVERM: - case ACCEL_MLGVERM_NOCASE: - case ACCEL_MSVERM: - case ACCEL_MSVERM_NOCASE: - case ACCEL_MSGVERM: - case ACCEL_MSGVERM_NOCASE: - fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len); - break; - case ACCEL_MDSVERM: - case ACCEL_MDSVERM_NOCASE: - case ACCEL_MDSGVERM: - case ACCEL_MDSGVERM_NOCASE: - fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1, - accel.mdverm.len2); - break; - case ACCEL_MLSHUFTI: - case ACCEL_MLGSHUFTI: - case ACCEL_MSSHUFTI: - case ACCEL_MSGSHUFTI: - fprintf(f, " len:%u\n", accel.mshufti.len); - dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo, - (const u8 *)&accel.mshufti.hi); - dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo, - (const u8 *)&accel.mshufti.hi); - break; - case ACCEL_MDSSHUFTI: - case ACCEL_MDSGSHUFTI: - fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2); - dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo, - (const u8 *)&accel.mdshufti.hi); - dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo, - (const u8 *)&accel.mdshufti.hi); - break; - case ACCEL_MLTRUFFLE: - case ACCEL_MLGTRUFFLE: - case ACCEL_MSTRUFFLE: - case ACCEL_MSGTRUFFLE: - fprintf(f, " len:%u\n", accel.mtruffle.len); - dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1, - (const u8 *)&accel.mtruffle.mask2); - dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1, - (const u8 *)&accel.mtruffle.mask2); - break; - case ACCEL_MDSTRUFFLE: - case ACCEL_MDSGTRUFFLE: - fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2); - dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1, - (const u8 *)&accel.mdtruffle.mask2); - dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1, - (const u8 *)&accel.mdtruffle.mask2); - break; default: fprintf(f, "\n"); break; diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 32e569ba9..a224410dc 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -225,274 +225,6 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_NONE; } -static -void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { - if (info.ma_type == MultibyteAccelInfo::MAT_NONE) { - DEBUG_PRINTF("no multimatch for us :("); - return; - } - - u32 offset = info.multiaccel_offset; - const CharReach &stops = info.multiaccel_stops; - - assert(aux->accel_type == ACCEL_NONE); - if (stops.all()) { - return; - } - - size_t outs = stops.count(); - DEBUG_PRINTF("%zu outs\n", outs); - assert(outs && outs < 256); - - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - if (outs == 1) { - aux->accel_type = ACCEL_MLVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MLVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_LONGGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MLGVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MLGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_SHIFT: - if (outs == 1) { - aux->accel_type = ACCEL_MSVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MSVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MSGVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MSGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_DSHIFT: - if (outs == 1) { - aux->accel_type = ACCEL_MDSVERM; - aux->mdverm.offset = offset; - aux->mdverm.c = stops.find_first(); - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MDSVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MDSGVERM; - aux->mdverm.offset = offset; - aux->mdverm.c = stops.find_first(); - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MDSGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - default: - // shouldn't happen - assert(0); - return; - } - - DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); - - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MLSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_LONGGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MLGSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_SHIFT: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MSSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MSGSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_DSHIFT: - if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, - (u8 *)&aux->mdshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MDSSHUFTI; - aux->mdshufti.offset = offset; - aux->mdshufti.len1 = info.ma_len1; - aux->mdshufti.len2 = info.ma_len2; - return; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, - (u8 *)&aux->mdshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MDSGSHUFTI; - aux->mdshufti.offset = offset; - aux->mdshufti.len1 = info.ma_len1; - aux->mdshufti.len2 = info.ma_len2; - return; - default: - // shouldn't happen - assert(0); - return; - } - DEBUG_PRINTF("shufti build failed, falling through\n"); - - if (outs <= ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("building Truffle for %zu chars\n", outs); - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - aux->accel_type = ACCEL_MLTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_LONGGRAB: - aux->accel_type = ACCEL_MLGTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_SHIFT: - aux->accel_type = ACCEL_MSTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - aux->accel_type = ACCEL_MSGTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_DSHIFT: - aux->accel_type = ACCEL_MDSTRUFFLE; - aux->mdtruffle.offset = offset; - aux->mdtruffle.len1 = info.ma_len1; - aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mdtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - aux->accel_type = ACCEL_MDSGTRUFFLE; - aux->mdtruffle.offset = offset; - aux->mdtruffle.len1 = info.ma_len1; - aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mdtruffle.mask2); - break; - default: - // shouldn't happen - assert(0); - return; - } - return; - } - - DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs); -} - bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { assert(aux->accel_type == ACCEL_NONE); if (info.single_stops.none()) { @@ -500,9 +232,6 @@ bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_RED_TAPE; aux->generic.offset = info.single_offset; } - if (aux->accel_type == ACCEL_NONE) { - buildAccelMulti(info, aux); - } if (aux->accel_type == ACCEL_NONE) { buildAccelDouble(info, aux); } diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 9b30146cd..9bd4ff18d 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,30 +37,9 @@ union AccelAux; namespace ue2 { -struct MultibyteAccelInfo { - /* multibyte accel schemes, ordered by strength */ - enum multiaccel_type { - MAT_SHIFT, - MAT_SHIFTGRAB, - MAT_DSHIFT, - MAT_DSHIFTGRAB, - MAT_LONG, - MAT_LONGGRAB, - MAT_MAX, - MAT_NONE = MAT_MAX - }; - CharReach cr; - u32 offset = 0; - u32 len1 = 0; - u32 len2 = 0; - multiaccel_type type = MAT_NONE; -}; - struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), - single_stops(CharReach::dot()), - multiaccel_offset(0), ma_len1(0), ma_len2(0), - ma_type(MultibyteAccelInfo::MAT_NONE) {} + single_stops(CharReach::dot()) {} u32 single_offset; /**< offset correction to apply to single schemes */ u32 double_offset; /**< offset correction to apply to double schemes */ CharReach double_stop1; /**< single-byte accel stop literals for double @@ -68,11 +47,6 @@ struct AccelInfo { flat_set> double_stop2; /**< double-byte accel stop * literals */ CharReach single_stops; /**< escapes for single byte acceleration */ - u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */ - CharReach multiaccel_stops; /**< escapes for multibyte acceleration */ - u32 ma_len1; /**< multiaccel len1 */ - u32 ma_len2; /**< multiaccel len2 */ - MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */ }; bool buildAccelAux(const AccelInfo &info, AccelAux *aux); diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 3b40ab9a8..40fbc18cb 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Castle: multi-tenant repeat engine, compiler code. */ + #include "castlecompile.h" #include "castle_internal.h" @@ -439,7 +441,7 @@ void buildSubcastles(const CastleProto &proto, vector &subs, } } -aligned_unique_ptr +bytecode_ptr buildCastle(const CastleProto &proto, const map>> &triggers, const CompileContext &cc, const ReportManager &rm) { @@ -501,7 +503,7 @@ buildCastle(const CastleProto &proto, // possibly means that we've got a repeat that we can't trigger. We do // need to cope with it though. if (contains(triggers, top)) { - min_period = minPeriod(triggers.at(top), cr, &is_reset); + min_period = depth(minPeriod(triggers.at(top), cr, &is_reset)); } if (min_period > pr.bounds.max) { @@ -560,7 +562,7 @@ buildCastle(const CastleProto &proto, DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); vector stale_iter; if (!may_stale.empty()) { - mmbBuildSparseIterator(stale_iter, may_stale, numRepeats); + stale_iter = mmbBuildSparseIterator(may_stale, numRepeats); } @@ -577,7 +579,7 @@ buildCastle(const CastleProto &proto, total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); total_size += byte_length(stale_iter); // stale sparse iter - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); nfa->type = verify_u8(CASTLE_NFA); nfa->length = verify_u32(total_size); nfa->nPositions = verify_u32(subs.size()); diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index 938e57c4d..9f44692d4 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Castle: multi-tenant repeat engine, compiler code. */ @@ -36,7 +37,7 @@ #include "nfa_kind.h" #include "ue2common.h" #include "nfagraph/ng_repeat.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/depth.h" #include "util/ue2_containers.h" @@ -120,7 +121,7 @@ void remapCastleTops(CastleProto &proto, std::map &top_map); * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run * first. */ -ue2::aligned_unique_ptr +bytecode_ptr buildCastle(const CastleProto &proto, const std::map>> &triggers, const CompileContext &cc, const ReportManager &rm); diff --git a/src/nfa/dfa_min.cpp b/src/nfa/dfa_min.cpp index 0d3bca114..f309cc535 100644 --- a/src/nfa/dfa_min.cpp +++ b/src/nfa/dfa_min.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,12 +26,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file -* \brief Build code for DFA minimization -*/ +/** + * \file + * \brief Build code for DFA minimization. + */ /** - * /Summary of the Hopcrofts algorithm/ + * /Summary of the Hopcroft minimisation algorithm/ + * * partition := {F, Q \ F}; * work_queue := {F}; * while (work_queue is not empty) do @@ -57,22 +59,20 @@ #include "dfa_min.h" #include "grey.h" -#include "nfa/rdfa.h" -#include "nfagraph/ng_mcclellan.h" +#include "rdfa.h" #include "ue2common.h" -#include "util/partitioned_set.h" #include "util/container.h" +#include "util/noncopyable.h" +#include "util/partitioned_set.h" #include "util/ue2_containers.h" #include #include +#include #include +#include #include #include -#include - -#include -#include using namespace std; @@ -81,118 +81,81 @@ namespace ue2 { namespace { struct hopcroft_state_info { - vector > prev; + explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {} + + /** \brief Mapping from symbol to a list of predecessors that transition to + * this state on that symbol. */ + vector> prev; }; -struct DFA_components : boost::noncopyable { - dstate_id_t nstates; - size_t inp_size; - set work_queue; - /*Partition contains reduced states*/ - partitioned_set partition; - vector states; +struct HopcroftInfo : noncopyable { + size_t alpha_size; //!< Size of DFA alphabet. + queue work_queue; //!< Hopcroft work queue of partition indices. + partitioned_set partition; //!< Partition set of DFA states. + vector states; //!< Pre-calculated state info (preds) - explicit DFA_components(const raw_dfa &rdfa); + explicit HopcroftInfo(const raw_dfa &rdfa); }; -} //namespace +} // namespace /** - * create_map: - * Creates an initial partitioning and work_queue. - * Initial partition contains {accepting states..., Non-accepting states} - * Initial work_queue contains accepting state subsets + * \brief Create an initial partitioning and work_queue. * - * The initial partitioning needs to distinguish between the different - * reporting behaviours (unlike standard hopcroft) --> more than one subset - * possible for the accepting states. + * Initial partition contains {accepting states..., Non-accepting states} + * Initial work_queue contains accepting state subsets * - * Look for accepting states in both reports and reports_eod. - * Creates a map with a key(reports, reports_eod) and an id. - * Reports of each state are searched against the map and - * added to the corresponding id -> partition[id] and work_queue[id]. - * Non Accept states are added to partition[id+1]. + * The initial partitioning needs to distinguish between the different + * reporting behaviours (unlike standard Hopcroft) --> more than one subset + * possible for the accepting states. + * + * Look for accepting states in both reports and reports_eod. + * Creates a map with a key(reports, reports_eod) and an id. + * Reports of each state are searched against the map and + * added to the corresponding id -> partition[id] and work_queue[id]. + * Non Accept states are added to partition[id+1]. */ static -vector create_map(const raw_dfa &rdfa, set &work_queue) { +vector create_map(const raw_dfa &rdfa, queue &work_queue) { using ReportKey = pair, flat_set>; map subset_map; vector state_to_subset(rdfa.states.size(), INVALID_SUBSET); for (size_t i = 0; i < rdfa.states.size(); i++) { - if (!rdfa.states[i].reports.empty() || - !rdfa.states[i].reports_eod.empty()) { - ReportKey key(rdfa.states[i].reports, rdfa.states[i].reports_eod); + const auto &ds = rdfa.states[i]; + if (!ds.reports.empty() || !ds.reports_eod.empty()) { + ReportKey key(ds.reports, ds.reports_eod); if (contains(subset_map, key)) { state_to_subset[i] = subset_map[key]; } else { size_t sub = subset_map.size(); - subset_map[key] = sub; + subset_map.emplace(std::move(key), sub); state_to_subset[i] = sub; - work_queue.insert(sub); + work_queue.push(sub); } } } - /* handle non accepts */ + /* Give non-accept states their own subset. */ size_t non_accept_sub = subset_map.size(); - for (size_t i = 0; i < state_to_subset.size(); i++) { - if (state_to_subset[i] == INVALID_SUBSET) { - state_to_subset[i] = non_accept_sub; - } - } + replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET, + non_accept_sub); return state_to_subset; } -DFA_components::DFA_components(const raw_dfa &rdfa) - : nstates(rdfa.states.size()), - inp_size(rdfa.states[nstates - 1].next.size()), - partition(create_map(rdfa, work_queue)) { - /* initializing states */ - for (size_t i = 0; i < nstates; i++) { - states.push_back(hopcroft_state_info()); - states.back().prev.resize(inp_size); - } - - for (size_t i = 0; i < nstates; i++) { // i is the previous state - for (size_t j = 0; j < inp_size; j++) { - /* Creating X_table */ - dstate_id_t present_state = rdfa.states[i].next[j]; - states[present_state].prev[j].push_back(i); - - DEBUG_PRINTF("rdfa.states[%zu].next[%zu] %hu \n", i, j, - rdfa.states[i].next[j]); +HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) + : alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)), + states(rdfa.states.size(), hopcroft_state_info(alpha_size)) { + /* Construct predecessor lists for each state, indexed by symbol. */ + for (size_t i = 0; i < states.size(); i++) { // i is the previous state + for (size_t sym = 0; sym < alpha_size; sym++) { + dstate_id_t present_state = rdfa.states[i].next[sym]; + states[present_state].prev[sym].push_back(i); } } } -/** - * choose and remove a set A from work_queue. - */ -static -void get_work_item(DFA_components &mdfa, ue2::flat_set &A) { - A.clear(); - assert(!mdfa.work_queue.empty()); - set::iterator pt = mdfa.work_queue.begin(); - insert(&A, mdfa.partition[*pt]); - mdfa.work_queue.erase(pt); -} - -/** - * X is the set of states for which a transition on the input leads to a state - * in A. - */ -static -void create_X(const DFA_components &mdfa, const ue2::flat_set &A, - size_t inp, ue2::flat_set &X) { - X.clear(); - - for (dstate_id_t id : A) { - insert(&X, mdfa.states[id].prev[inp]); - } -} - /** * For a split set X, each subset S (given by part_index) in the partition, two * sets are created: v_inter (X intersection S) and v_sub (S - X). @@ -206,14 +169,14 @@ void create_X(const DFA_components &mdfa, const ue2::flat_set &A, * - replace S in work_queue by the smaller of the two sets. */ static -void split_and_replace_set(const size_t part_index, DFA_components &mdfa, - const ue2::flat_set &splitter) { +void split_and_replace_set(const size_t part_index, HopcroftInfo &info, + const flat_set &splitter) { /* singleton sets cannot be split */ - if (mdfa.partition[part_index].size() == 1) { + if (info.partition[part_index].size() == 1) { return; } - size_t small_index = mdfa.partition.split(part_index, splitter); + size_t small_index = info.partition.split(part_index, splitter); if (small_index == INVALID_SUBSET) { /* the set could not be split */ @@ -223,54 +186,56 @@ void split_and_replace_set(const size_t part_index, DFA_components &mdfa, /* larger subset remains at the input subset index, if the input subset was * already in the work queue then the larger subset will remain there. */ - mdfa.work_queue.insert(small_index); + info.work_queue.push(small_index); } /** - * The complete Hopcrofts algorithm is implemented in this function. - * Choose and remove a set tray from work_queue - * For each input- X is created. - * For each subset in the partition, split_and_replace_sets are called with the - * split set. + * \brief Core of the Hopcroft minimisation algorithm. */ static -void dfa_min(DFA_components &mdfa) { - ue2::flat_set A, X; +void dfa_min(HopcroftInfo &info) { + flat_set curr, sym_preds; vector cand_subsets; - while (!mdfa.work_queue.empty()) { - get_work_item(mdfa, A); + while (!info.work_queue.empty()) { + /* Choose and remove a set of states (curr, or A in the description + * above) from the work queue. Note that we copy the set because the + * partition may be split by the loop below. */ + curr.clear(); + insert(&curr, info.partition[info.work_queue.front()]); + info.work_queue.pop(); + + for (size_t sym = 0; sym < info.alpha_size; sym++) { + /* Find the set of states sym_preds for which a transition on the + * given symbol leads to a state in curr. */ + sym_preds.clear(); + for (dstate_id_t s : curr) { + insert(&sym_preds, info.states[s].prev[sym]); + } - for (size_t inp = 0; inp < mdfa.inp_size; inp++) { - create_X(mdfa, A, inp, X); - if (X.empty()) { + if (sym_preds.empty()) { continue; } - /* we only need to consider subsets with at least one member in X for - * splitting */ + /* we only need to consider subsets with at least one member in + * sym_preds for splitting */ cand_subsets.clear(); - mdfa.partition.find_overlapping(X, &cand_subsets); + info.partition.find_overlapping(sym_preds, &cand_subsets); for (size_t sub : cand_subsets) { - split_and_replace_set(sub, mdfa, X); + split_and_replace_set(sub, info, sym_preds); } } } } /** - * Creating new dfa table - * Map ordering contains key being an equivalence classes first state - * and the value being the equivalence class index. - * Eq_state[i] tells us new state id the equivalence class located at - * partition[i]. + * \brief Build the new DFA state table. */ static -void mapping_new_states(const DFA_components &mdfa, - vector &old_to_new, - raw_dfa &rdfa) { - const size_t num_partitions = mdfa.partition.size(); +void mapping_new_states(const HopcroftInfo &info, + vector &old_to_new, raw_dfa &rdfa) { + const size_t num_partitions = info.partition.size(); // Mapping from equiv class's first state to equiv class index. map ordering; @@ -279,7 +244,7 @@ void mapping_new_states(const DFA_components &mdfa, vector eq_state(num_partitions); for (size_t i = 0; i < num_partitions; i++) { - ordering[*mdfa.partition[i].begin()] = i; + ordering[*info.partition[i].begin()] = i; } dstate_id_t new_id = 0; @@ -287,30 +252,28 @@ void mapping_new_states(const DFA_components &mdfa, eq_state[m.second] = new_id++; } - for (size_t t = 0; t < mdfa.partition.size(); t++) { - for (dstate_id_t id : mdfa.partition[t]) { + for (size_t t = 0; t < info.partition.size(); t++) { + for (dstate_id_t id : info.partition[t]) { old_to_new[id] = eq_state[t]; } } vector new_states; new_states.reserve(num_partitions); - for (size_t i = 0; i < mdfa.nstates; i++) { - if (contains(ordering, i)) { - new_states.push_back(rdfa.states[i]); - } + + for (const auto &m : ordering) { + new_states.push_back(rdfa.states[m.first]); } - rdfa.states.swap(new_states); + rdfa.states = std::move(new_states); } static -void renumber_new_states(const DFA_components &mdfa, - const vector &old_to_new, - raw_dfa &rdfa) { - for (size_t i = 0; i < mdfa.partition.size(); i++) { - for (size_t j = 0; j < mdfa.inp_size; j++) { - dstate_id_t output = rdfa.states[i].next[j]; - rdfa.states[i].next[j] = old_to_new[output]; +void renumber_new_states(const HopcroftInfo &info, + const vector &old_to_new, raw_dfa &rdfa) { + for (size_t i = 0; i < info.partition.size(); i++) { + for (size_t sym = 0; sym < info.alpha_size; sym++) { + dstate_id_t output = rdfa.states[i].next[sym]; + rdfa.states[i].next[sym] = old_to_new[output]; } dstate_id_t dad = rdfa.states[i].daddy; rdfa.states[i].daddy = old_to_new[dad]; @@ -321,17 +284,16 @@ void renumber_new_states(const DFA_components &mdfa, } static -void new_dfa(raw_dfa &rdfa, const DFA_components &mdfa) { - if (mdfa.partition.size() != mdfa.nstates) { - vector old_to_new(mdfa.nstates); - mapping_new_states(mdfa, old_to_new, rdfa); - renumber_new_states(mdfa, old_to_new, rdfa); +void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) { + if (info.partition.size() == info.states.size()) { + return; } + + vector old_to_new(info.states.size()); + mapping_new_states(info, old_to_new, rdfa); + renumber_new_states(info, old_to_new, rdfa); } -/** - * MAIN FUNCTION - */ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { if (!grey.minimizeDFA) { return; @@ -339,10 +301,10 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { UNUSED const size_t states_before = rdfa.states.size(); - DFA_components mdfa(rdfa); + HopcroftInfo info(rdfa); - dfa_min(mdfa); - new_dfa(rdfa, mdfa); + dfa_min(info); + new_dfa(rdfa, info); DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before, rdfa.states.size()); diff --git a/src/nfa/dfa_min.h b/src/nfa/dfa_min.h index 8277a4ba0..61ca6c21a 100644 --- a/src/nfa/dfa_min.h +++ b/src/nfa/dfa_min.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Build code for McClellan DFA. +/** + * \file + * \brief Build code for DFA minimization. */ #ifndef DFA_MIN_H diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 314b6fd02..58b05d3d1 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,6 @@ #include "grey.h" #include "mcclellancompile.h" #include "nfa_internal.h" -#include "util/alloc.h" #include "util/compile_context.h" #include "util/container.h" #include "util/graph_range.h" @@ -81,7 +80,7 @@ class gough_build_strat : public mcclellan_build_strat { gough_build_strat( raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in, const map &accel_info) - : mcclellan_build_strat(r, rm_in), rdfa(r), gg(g), + : mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g), accel_gough_info(accel_info) {} unique_ptr gatherReports(vector &reports /* out */, vector &reports_eod /* out */, @@ -1036,9 +1035,9 @@ void update_accel_prog_offset(const gough_build_strat &gbs, } } -aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, + const CompileContext &cc, + const ReportManager &rm) { assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 || !cc.streaming); @@ -1071,7 +1070,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, map accel_allowed; find_allowed_accel_states(*cfg, blocks, &accel_allowed); gough_build_strat gbs(raw, *cfg, rm, accel_allowed); - aligned_unique_ptr basic_dfa = mcclellanCompile_i(raw, gbs, cc); + auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); assert(basic_dfa); if (!basic_dfa) { return nullptr; @@ -1117,7 +1116,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, gi.stream_som_loc_width = somPrecision; u32 gough_size = ROUNDUP_N(curr_offset, 16); - aligned_unique_ptr gough_dfa = aligned_zmalloc_unique(gough_size); + auto gough_dfa = make_zeroed_bytecode_ptr(gough_size); memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); diff --git a/src/nfa/goughcompile.h b/src/nfa/goughcompile.h index 54f98cef2..72469f3ca 100644 --- a/src/nfa/goughcompile.h +++ b/src/nfa/goughcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "mcclellancompile.h" #include "nfa_kind.h" #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/ue2_containers.h" #include "util/order_check.h" @@ -88,10 +88,10 @@ struct raw_som_dfa : public raw_dfa { * som */ }; -aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc, - const ReportManager &rm); +bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, + const CompileContext &cc, + const ReportManager &rm); } // namespace ue2 -#endif +#endif // GOUGHCOMPILE_H diff --git a/src/nfa/goughcompile_internal.h b/src/nfa/goughcompile_internal.h index 52e65f15f..a6ba0d1b8 100644 --- a/src/nfa/goughcompile_internal.h +++ b/src/nfa/goughcompile_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "mcclellancompile.h" #include "ue2common.h" #include "util/charreach.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include "util/ue2_containers.h" @@ -41,7 +42,6 @@ #include #include -#include #include namespace ue2 { @@ -103,7 +103,7 @@ struct GoughSSAVarWithInputs; struct GoughSSAVarMin; struct GoughSSAVarJoin; -struct GoughSSAVar : boost::noncopyable { +struct GoughSSAVar : noncopyable { GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {} virtual ~GoughSSAVar(); const ue2::flat_set &get_inputs() const { diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index c74c7079d..4834b6a54 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,11 +39,9 @@ #include "nfa_internal.h" #include "shufti.h" #include "truffle.h" -#include "multishufti.h" -#include "multitruffle.h" -#include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" @@ -118,7 +116,7 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex, DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n"); m256 accelPerm = limex->accelPermute; m256 accelComp = limex->accelCompare; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); assert((idx1 & idx2) == 0); // should be no shared bits @@ -153,18 +151,20 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n"); m512 accelPerm = limex->accelPermute; m512 accelComp = limex->accelCompare; -#if !defined(__AVX2__) +#if defined(HAVE_AVX512) + idx = packedExtract512(s, accelPerm, accelComp); +#elif defined(HAVE_AVX2) + u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); + assert((idx1 & idx2) == 0); // should be no shared bits + idx = idx1 | idx2; +#else u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits idx = idx1 | idx2 | idx3 | idx4; -#else - u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo); - u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); - assert((idx1 & idx2) == 0); // should be no shared bits - idx = idx1 | idx2; #endif return accelScanWrapper(accelTable, aux, input, idx, i, end); } diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index c75eae597..7183d4b79 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Main NFA build code. */ + #include "limex_compile.h" #include "accel.h" @@ -47,6 +49,7 @@ #include "repeatcompile.h" #include "util/alloc.h" #include "util/bitutils.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/compile_context.h" #include "util/container.h" @@ -66,6 +69,7 @@ #include #include +#include #include using namespace std; @@ -89,8 +93,6 @@ struct precalcAccel { CharReach double_cr; flat_set> double_lits; /* double-byte accel stop literals */ u32 double_offset; - - MultibyteAccelInfo ma_info; }; struct limex_accel_info { @@ -354,16 +356,12 @@ void buildReachMapping(const build_info &args, vector &reach, } struct AccelBuild { - AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0), - ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {} + AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {} NFAVertex v; u32 state; u32 offset; // offset correction to apply CharReach stop1; // single-byte accel stop literals flat_set> stop2; // double-byte accel stop literals - u32 ma_len1; // multiaccel len1 - u32 ma_len2; // multiaccel len2 - MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type }; static @@ -378,12 +376,7 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) { build.stop1 = CharReach::dot(); } else { const precalcAccel &precalc = bi.accel.precalc.at(ss); - unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2; - if (ma_len >= MULTIACCEL_MIN_LEN) { - build.ma_len1 = precalc.ma_info.len1; - build.stop1 = precalc.ma_info.cr; - build.offset = precalc.ma_info.offset; - } else if (precalc.double_lits.empty()) { + if (precalc.double_lits.empty()) { build.stop1 = precalc.single_cr; build.offset = precalc.single_offset; } else { @@ -602,7 +595,6 @@ void fillAccelInfo(build_info &bi) { limex_accel_info &accel = bi.accel; unordered_map &accel_map = accel.accel_map; const map &br_cyclic = bi.br_cyclic; - const CompileContext &cc = bi.cc; const unordered_map &state_ids = bi.state_ids; const u32 num_states = bi.num_states; @@ -659,27 +651,17 @@ void fillAccelInfo(build_info &bi) { DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, as.double_offset); - // try multibyte acceleration first - MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc); - precalcAccel &pa = accel.precalc[state_set]; - useful |= state_set; - - // if we successfully built a multibyte accel scheme, use that - if (mai.type != MultibyteAccelInfo::MAT_NONE) { - pa.ma_info = mai; - - DEBUG_PRINTF("multibyte acceleration!\n"); - continue; - } - pa.single_offset = as.offset; pa.single_cr = as.cr; + if (as.double_byte.size() != 0) { pa.double_offset = as.double_offset; pa.double_lits = as.double_byte; pa.double_cr = as.double_cr; - }; + } + + useful |= state_set; } for (const auto &m : accel_map) { @@ -696,19 +678,8 @@ void fillAccelInfo(build_info &bi) { state_set.reset(); state_set.set(state_id); - bool is_multi = false; - auto p_it = accel.precalc.find(state_set); - if (p_it != accel.precalc.end()) { - const precalcAccel &pa = p_it->second; - offset = max(pa.double_offset, pa.single_offset); - is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE; - assert(offset <= MAX_ACCEL_DEPTH); - } - accel.accelerable.insert(v); - if (!is_multi) { - findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); - } + findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); } } @@ -721,6 +692,7 @@ typedef vector> static u32 getEffectiveAccelStates(const build_info &args, + const unordered_map &dom_map, u32 active_accel_mask, const vector &accelStates) { /* accelStates is indexed by the acceleration bit index and contains a @@ -756,7 +728,6 @@ u32 getEffectiveAccelStates(const build_info &args, * so we may still require on earlier states to be accurately modelled. */ const NGHolder &h = args.h; - auto dom_map = findDominators(h); /* map from accel_id to mask of accel_ids that it is dominated by */ vector dominated_by(accelStates.size()); @@ -773,8 +744,8 @@ u32 getEffectiveAccelStates(const build_info &args, u32 accel_id = findAndClearLSB_32(&local_accel_mask); assert(accel_id < accelStates.size()); NFAVertex v = accelStates[accel_id].v; - while (dom_map[v]) { - v = dom_map[v]; + while (contains(dom_map, v) && dom_map.at(v)) { + v = dom_map.at(v); if (contains(accel_id_map, v)) { dominated_by[accel_id] |= 1U << accel_id_map[v]; } @@ -887,6 +858,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, return; } + const auto dom_map = findDominators(args.h); + // We have 2^n different accel entries, one for each possible // combination of accelerable states. assert(accelStates.size() < 32); @@ -900,7 +873,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, effective_accel_set.push_back(0); /* empty is effectively empty */ for (u32 i = 1; i < accelCount; i++) { - u32 effective_i = getEffectiveAccelStates(args, i, accelStates); + u32 effective_i = getEffectiveAccelStates(args, dom_map, i, + accelStates); effective_accel_set.push_back(effective_i); if (effective_i == IMPOSSIBLE_ACCEL_MASK) { @@ -947,16 +921,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, if (contains(accel.precalc, effective_states)) { const auto &precalc = accel.precalc.at(effective_states); - if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { - ainfo.ma_len1 = precalc.ma_info.len1; - ainfo.ma_len2 = precalc.ma_info.len2; - ainfo.multiaccel_offset = precalc.ma_info.offset; - ainfo.multiaccel_stops = precalc.ma_info.cr; - ainfo.ma_type = precalc.ma_info.type; - } else { - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; - } + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; } } @@ -1637,6 +1603,84 @@ u32 findBestNumOfVarShifts(const build_info &args, return bestNumOfVarShifts; } +static +bool cannotDie(const build_info &args, const set &tops) { + const auto &h = args.h; + + // When this top is activated, all of the vertices in 'tops' are switched + // on. If any of those lead to a graph that cannot die, then this top + // cannot die. + + // For each top, we use a depth-first search to traverse the graph from the + // top, looking for a cyclic path consisting of vertices of dot reach. If + // one exists, than the NFA cannot die after this top is triggered. + + vector colours(num_vertices(h)); + auto colour_map = boost::make_iterator_property_map(colours.begin(), + get(vertex_index, h)); + + struct CycleFound {}; + struct CannotDieVisitor : public boost::default_dfs_visitor { + void back_edge(const NFAEdge &e, const NGHolder &g) const { + DEBUG_PRINTF("back-edge %zu,%zu\n", g[source(e, g)].index, + g[target(e, g)].index); + if (g[target(e, g)].char_reach.all()) { + assert(g[source(e, g)].char_reach.all()); + throw CycleFound(); + } + } + }; + + try { + for (const auto &top : tops) { + DEBUG_PRINTF("checking top vertex %zu\n", h[top].index); + + // Constrain the search to the top vertices and any dot vertices it + // can reach. + auto term_func = [&](NFAVertex v, const NGHolder &g) { + if (v == top) { + return false; + } + if (!g[v].char_reach.all()) { + return true; + } + if (contains(args.br_cyclic, v) && + args.br_cyclic.at(v).repeatMax != depth::infinity()) { + // Bounded repeat vertices without inf max can be turned + // off. + return true; + } + return false; + }; + + boost::depth_first_visit(h, top, CannotDieVisitor(), colour_map, + term_func); + } + } catch (const CycleFound &) { + DEBUG_PRINTF("cycle found\n"); + return true; + } + + return false; +} + +/** \brief True if this NFA cannot ever be in no states at all. */ +static +bool cannotDie(const build_info &args) { + const auto &h = args.h; + const auto &state_ids = args.state_ids; + + // If we have a startDs we're actually using, we can't die. + if (state_ids.at(h.startDs) != NO_STATE) { + DEBUG_PRINTF("is using startDs\n"); + return true; + } + + return all_of_in(args.tops | map_values, [&](const set &verts) { + return cannotDie(args, verts); + }); +} + template struct Factory { // typedefs for readability, for types derived from traits @@ -1700,8 +1744,8 @@ struct Factory { static void buildRepeats(const build_info &args, - vector, size_t>> &out, - u32 *scratchStateSize, u32 *streamState) { + vector> &out, + u32 *scratchStateSize, u32 *streamState) { out.reserve(args.repeats.size()); u32 repeat_idx = 0; @@ -1712,7 +1756,7 @@ struct Factory { u32 tableOffset, tugMaskOffset; size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset); - auto info = aligned_zmalloc_unique(len); + auto info = make_zeroed_bytecode_ptr(len); char *info_ptr = (char *)info.get(); // Collect state space info. @@ -1766,7 +1810,7 @@ struct Factory { *streamState += streamStateLen; *scratchStateSize += sizeof(RepeatControl); - out.emplace_back(move(info), len); + out.emplace_back(move(info)); } } @@ -2074,8 +2118,7 @@ struct Factory { } static - void writeRepeats(const vector, - size_t>> &repeats, + void writeRepeats(const vector> &repeats, vector &repeatOffsets, implNFA_t *limex, const u32 repeatOffsetsOffset, const u32 repeatOffset) { const u32 num_repeats = verify_u32(repeats.size()); @@ -2088,10 +2131,9 @@ struct Factory { for (u32 i = 0; i < num_repeats; i++) { repeatOffsets[i] = offset; - assert(repeats[i].first); - memcpy((char *)limex + offset, repeats[i].first.get(), - repeats[i].second); - offset += repeats[i].second; + assert(repeats[i]); + memcpy((char *)limex + offset, repeats[i].get(), repeats[i].size()); + offset += repeats[i].size(); } // Write repeat offset lookup table. @@ -2112,19 +2154,19 @@ struct Factory { } static - aligned_unique_ptr generateNfa(const build_info &args) { + bytecode_ptr generateNfa(const build_info &args) { if (args.num_states > NFATraits::maxStates) { return nullptr; } // Build bounded repeat structures. - vector, size_t>> repeats; + vector> repeats; u32 repeats_full_state = 0; u32 repeats_stream_state = 0; buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state); size_t repeatSize = 0; for (size_t i = 0; i < repeats.size(); i++) { - repeatSize += repeats[i].second; + repeatSize += repeats[i].size(); } // We track report lists that have already been written into the global @@ -2214,7 +2256,7 @@ struct Factory { size_t nfaSize = sizeof(NFA) + offset; DEBUG_PRINTF("nfa size %zu\n", nfaSize); - auto nfa = aligned_zmalloc_unique(nfaSize); + auto nfa = make_zeroed_bytecode_ptr(nfaSize); assert(nfa); // otherwise we would have thrown std::bad_alloc implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get()); @@ -2234,6 +2276,11 @@ struct Factory { limex->shiftCount = shiftCount; writeShiftMasks(args, limex); + if (cannotDie(args)) { + DEBUG_PRINTF("nfa cannot die\n"); + setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE); + } + // Determine the state required for our state vector. findStateSize(args, limex); @@ -2295,7 +2342,7 @@ struct Factory { template struct generateNfa { - static aligned_unique_ptr call(const build_info &args) { + static bytecode_ptr call(const build_info &args) { return Factory::generateNfa(args); } }; @@ -2392,17 +2439,15 @@ u32 max_state(const ue2::unordered_map &state_ids) { return rv; } -aligned_unique_ptr generate(NGHolder &h, - const ue2::unordered_map &states, - const vector &repeats, - const map &reportSquashMap, - const map &squashMap, - const map> &tops, - const set &zombies, - bool do_accel, - bool stateCompression, - u32 hint, - const CompileContext &cc) { +bytecode_ptr generate(NGHolder &h, + const ue2::unordered_map &states, + const vector &repeats, + const map &reportSquashMap, + const map &squashMap, + const map> &tops, + const set &zombies, bool do_accel, + bool stateCompression, u32 hint, + const CompileContext &cc) { const u32 num_states = max_state(states) + 1; DEBUG_PRINTF("total states: %u\n", num_states); diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 21cb76087..a12ae9f6e 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Main NFA build code. */ @@ -37,10 +38,10 @@ #include #include -#include "ue2common.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_squash.h" // for NFAStateSet -#include "util/alloc.h" +#include "ue2common.h" +#include "util/bytecode_ptr.h" #include "util/ue2_containers.h" struct NFA; @@ -50,7 +51,8 @@ namespace ue2 { struct BoundedRepeatData; struct CompileContext; -/** \brief Construct a LimEx NFA from an NGHolder. +/** + * \brief Construct a LimEx NFA from an NGHolder. * * \param g Input NFA graph. Must have state IDs assigned. * \param repeats Bounded repeat information, if any. @@ -66,7 +68,7 @@ struct CompileContext; * \return a built NFA, or nullptr if no NFA could be constructed for this * graph. */ -aligned_unique_ptr generate(NGHolder &g, +bytecode_ptr generate(NGHolder &g, const ue2::unordered_map &states, const std::vector &repeats, const std::map &reportSquashMap, diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 852639ea3..797e87ba2 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -290,6 +290,20 @@ static void dumpLimexText(const limex_type *limex, FILE *f) { u32 size = limex_traits::size; + fprintf(f, "%u-bit LimEx NFA (%u shifts, %u exceptions)\n", size, + limex->shiftCount, limex->exceptionCount); + fprintf(f, "flags: "); + if (limex->flags & LIMEX_FLAG_COMPRESS_STATE) { + fprintf(f, "COMPRESS_STATE "); + } + if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { + fprintf(f, "COMPRESS_MASKED "); + } + if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { + fprintf(f, "CANNOT_DIE "); + } + fprintf(f, "\n\n"); + dumpMask(f, "init", (const u8 *)&limex->init, size); dumpMask(f, "init_dot_star", (const u8 *)&limex->initDS, size); dumpMask(f, "accept", (const u8 *)&limex->accept, size); diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index ccbf34223..db703f039 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -85,6 +85,7 @@ #define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ #define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ +#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */ enum LimExTrigger { LIMEX_TRIGGER_NONE = 0, diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 016d1f924..7b89182be 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -60,6 +60,7 @@ #define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel) #define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions) #define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream) +#define LOOP_NOACCEL_FN JOIN(LIMEX_API_ROOT, _Loop_No_Accel) #define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream) #define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB) #define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First) @@ -172,24 +173,75 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, switch (limex_m->shiftCount) { \ case 8: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \ + /* fallthrough */ \ case 7: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \ + /* fallthrough */ \ case 6: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \ + /* fallthrough */ \ case 5: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \ + /* fallthrough */ \ case 4: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \ + /* fallthrough */ \ case 3: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \ + /* fallthrough */ \ case 2: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \ + /* fallthrough */ \ case 1: \ + /* fallthrough */ \ case 0: \ ; \ } \ } while (0) +/** + * \brief LimEx NFAS inner loop without accel. + * + * Note that the "all zeroes" early death check is only performed if can_die is + * true. + * + */ +static really_inline +char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc, + size_t length, STATE_T *s_ptr, struct CONTEXT_T *ctx, + u64a offset, const char flags, u64a *final_loc, + const char first_match, const char can_die) { + const ENG_STATE_T *reach = get_reach_table(limex); +#if SIZE < 256 + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); +#endif + const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); + STATE_T s = *s_ptr; + + size_t i = *loc; + for (; i != length; i++) { + DUMP_INPUT(i); + if (can_die && ISZERO_STATE(s)) { + DEBUG_PRINTF("no states are switched on, early exit\n"); + break; + } + + STATE_T succ; + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); + + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { + return MO_HALT_MATCHING; + } + + u8 c = input[i]; + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); + } + + *loc = i; + *s_ptr = s; + return MO_CONTINUE_MATCHING; +} static really_inline char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, @@ -202,7 +254,8 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, = LOAD_FROM_ENG(&limex->accel_and_friends); const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif - const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset); + const u8 *accelTable = + (const u8 *)((const char *)limex + limex->accelTableOffset); const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); @@ -221,24 +274,20 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, } without_accel: - for (; i != min_accel_offset; i++) { - DUMP_INPUT(i); - if (ISZERO_STATE(s)) { - DEBUG_PRINTF("no states are switched on, early exit\n"); - ctx->s = s; - return MO_CONTINUE_MATCHING; + if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { + const char can_die = 0; + if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, + flags, final_loc, first_match, + can_die) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; } - - u8 c = input[i]; - STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - - if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, - &succ, final_loc, ctx, flags, 0, first_match)) { + } else { + const char can_die = 1; + if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, + flags, final_loc, first_match, + can_die) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } - - s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } with_accel: @@ -279,7 +328,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, goto without_accel; } - u8 c = input[i]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); @@ -288,6 +336,7 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, return MO_HALT_MATCHING; } + u8 c = input[i]; s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } @@ -333,14 +382,13 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, u64a *final_loc = NULL; for (size_t i = length; i != 0; i--) { - DUMP_INPUT(i-1); + DUMP_INPUT(i - 1); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); ctx->s = s; return MO_CONTINUE_MATCHING; } - u8 c = input[i-1]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); @@ -349,6 +397,7 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, return MO_HALT_MATCHING; } + u8 c = input[i - 1]; s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } @@ -999,6 +1048,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef RUN_ACCEL_FN #undef RUN_EXCEPTIONS_FN #undef REV_STREAM_FN +#undef LOOP_NOACCEL_FN #undef STREAM_FN #undef STREAMCB_FN #undef STREAMFIRST_FN diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index 5ca8fce09..365d47296 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,22 +38,23 @@ #define LIMEX_SHUFFLE_H #include "ue2common.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { - m128 shuffled = pshufb(s, permute); + m128 shuffled = pshufb_m128(s, permute); m128 compared = and128(shuffled, compare); u16 rv = ~movemask128(eq128(compared, shuffled)); return (u32)rv; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { // vpshufb doesn't cross lanes, so this is a bit of a cheat - m256 shuffled = vpshufb(s, permute); + m256 shuffled = pshufb_m256(s, permute); m256 compared = and256(shuffled, compare); u32 rv = ~movemask256(eq256(compared, shuffled)); // stitch the lane-wise results back together @@ -61,4 +62,17 @@ u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { } #endif // AVX2 +#if defined(HAVE_AVX512) +static really_inline +u32 packedExtract512(m512 s, const m512 permute, const m512 compare) { + // vpshufb doesn't cross lanes, so this is a bit of a cheat + m512 shuffled = pshufb_m512(s, permute); + m512 compared = and512(shuffled, compare); + u64a rv = ~eq512mask(compared, shuffled); + // stitch the lane-wise results back together + rv = rv >> 32 | rv; + return (u32)(((rv >> 16) | rv) & 0xffffU); +} +#endif // AVX512 + #endif // LIMEX_SHUFFLE_H diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 7a73c9d42..e875477b1 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -456,9 +456,8 @@ bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { } static -aligned_unique_ptr mcclellanCompile16(dfa_info &info, - const CompileContext &cc, - set *accel_states) { +bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 16\n"); vector reports; /* index in ri for the appropriate report list */ @@ -497,7 +496,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, @@ -685,9 +684,8 @@ void allocateFSN8(dfa_info &info, } static -aligned_unique_ptr mcclellanCompile8(dfa_info &info, - const CompileContext &cc, - set *accel_states) { +bytecode_ptr mcclellanCompile8(dfa_info &info, const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 8\n"); vector reports; @@ -717,12 +715,13 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8); + allocateFSN8(info, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, accel_escape_info.size(), arb, single, nfa.get()); @@ -763,7 +762,7 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, #define MAX_SHERMAN_LIST_LEN 8 static -void addIfEarlier(set &dest, dstate_id_t candidate, +void addIfEarlier(flat_set &dest, dstate_id_t candidate, dstate_id_t max) { if (candidate < max) { dest.insert(candidate); @@ -771,19 +770,41 @@ void addIfEarlier(set &dest, dstate_id_t candidate, } static -void addSuccessors(set &dest, const dstate &source, +void addSuccessors(flat_set &dest, const dstate &source, u16 alphasize, dstate_id_t curr_id) { for (symbol_t s = 0; s < alphasize; s++) { addIfEarlier(dest, source.next[s], curr_id); } } +/* \brief Returns a set of states to search for a better daddy. */ +static +flat_set find_daddy_candidates(const dfa_info &info, + dstate_id_t curr_id) { + flat_set hinted; + + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + // Add existing daddy and his successors, then search back one generation. + const u16 alphasize = info.impl_alpha_size; + dstate_id_t daddy = info.states[curr_id].daddy; + for (u32 level = 0; daddy && level < 2; level++) { + addIfEarlier(hinted, daddy, curr_id); + addSuccessors(hinted, info.states[daddy], alphasize, curr_id); + daddy = info.states[daddy].daddy; + } + + return hinted; +} + #define MAX_SHERMAN_SELF_LOOP 20 static -void find_better_daddy(dfa_info &info, dstate_id_t curr_id, - bool using8bit, bool any_cyclic_near_anchored_state, - const Grey &grey) { +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, + bool any_cyclic_near_anchored_state, + bool trust_daddy_states, const Grey &grey) { if (!grey.allowShermanStates) { return; } @@ -818,21 +839,21 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - set hinted; /* set of states to search for a better daddy */ - addIfEarlier(hinted, 0, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - - dstate_id_t mydaddy = currState.daddy; - if (mydaddy) { - addIfEarlier(hinted, mydaddy, curr_id); - addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id); - dstate_id_t mygranddaddy = info.states[mydaddy].daddy; - if (mygranddaddy) { - addIfEarlier(hinted, mygranddaddy, curr_id); - addSuccessors(hinted, info.states[mygranddaddy], alphasize, - curr_id); + flat_set hinted; + if (trust_daddy_states) { + // Use the daddy already set for this state so long as it isn't already + // a Sherman state. + if (!info.is_sherman(currState.daddy)) { + hinted.insert(currState.daddy); + } else { + // Fall back to granddaddy, which has already been processed (due + // to BFS ordering) and cannot be a Sherman state. + dstate_id_t granddaddy = info.states[currState.daddy].daddy; + assert(!info.is_sherman(granddaddy)); + hinted.insert(granddaddy); } + } else { + hinted = find_daddy_candidates(info, curr_id); } for (const dstate_id_t &donor : hinted) { @@ -885,7 +906,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, - self_loop_width); + self_loop_width); return; } @@ -939,9 +960,10 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } -aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, - const CompileContext &cc, - set *accel_states) { +bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, + const CompileContext &cc, + bool trust_daddy_states, + set *accel_states) { u16 total_daddy = 0; dfa_info info(strat); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; @@ -957,7 +979,7 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat & for (u32 i = 0; i < info.size(); i++) { find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, - cc.grey); + trust_daddy_states, cc.grey); total_daddy += info.extra[i].daddytaken; } @@ -965,7 +987,7 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat & info.size() * info.impl_alpha_size, info.size(), info.impl_alpha_size); - aligned_unique_ptr nfa; + bytecode_ptr nfa; if (!using8bit) { nfa = mcclellanCompile16(info, cc, accel_states); } else { @@ -980,11 +1002,13 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat & return nfa; } -aligned_unique_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, - set *accel_states) { - mcclellan_build_strat mbs(raw, rm); - return mcclellanCompile_i(raw, mbs, cc, accel_states); +bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + bool only_accel_init, + bool trust_daddy_states, + set *accel_states) { + mcclellan_build_strat mbs(raw, rm, only_accel_init); + return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states); } size_t mcclellan_build_strat::accelSize(void) const { diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 8d8dfb196..baf72d9ce 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "accel_dfa_build_strat.h" #include "rdfa.h" #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/ue2_containers.h" #include @@ -48,14 +48,15 @@ struct CompileContext; class mcclellan_build_strat : public accel_dfa_build_strat { public: - mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) - : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {} + mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, + bool only_accel_init_in) + : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( std::vector &reports /* out */, std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const override; + ReportID *arbReport /* out */) const override; size_t accelSize(void) const override; u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; @@ -65,17 +66,30 @@ class mcclellan_build_strat : public accel_dfa_build_strat { raw_dfa &rdfa; }; -/* accel_states: (optional) on success, is filled with the set of accelerable - * states */ -ue2::aligned_unique_ptr +/** + * \brief Construct an implementation DFA. + * + * \param raw the raw dfa to construct from + * \param cc compile context + * \param rm report manger + * \param only_accel_init if true, only the init states will be examined for + * acceleration opportunities + * \param trust_daddy_states if true, trust the daddy state set in the raw dfa + * rather than conducting a search for a better daddy (for Sherman + * states) + * \param accel_states (optional) success, is filled with the set of + * accelerable states + */ +bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, + const ReportManager &rm, bool only_accel_init, + bool trust_daddy_states = false, std::set *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ -ue2::aligned_unique_ptr +bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, - const CompileContext &cc, + const CompileContext &cc, bool trust_daddy_states = false, std::set *accel_states = nullptr); /** @@ -89,4 +103,4 @@ bool has_accel_mcclellan(const NFA *nfa); } // namespace ue2 -#endif +#endif // MCCLELLANCOMPILE_H diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index a61a19ab7..17e022fe6 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,6 +43,12 @@ namespace ue2 { #define INIT_STATE 1 +static +bool state_has_reports(const raw_dfa &raw, dstate_id_t s) { + const auto &ds = raw.states[s]; + return !ds.reports.empty() || !ds.reports_eod.empty(); +} + static u32 count_dots(const raw_dfa &raw) { assert(raw.start_anchored == INIT_STATE); @@ -60,8 +66,7 @@ u32 count_dots(const raw_dfa &raw) { } } - if (!raw.states[raw.states[i].next[0]].reports.empty() - || !raw.states[raw.states[i].next[0]].reports_eod.empty()) { + if (state_has_reports(raw, raw.states[i].next[0])) { goto validate; } @@ -162,74 +167,8 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector *dist_in) { return last_d; } -static -void find_in_edges(const raw_dfa &raw, vector > *in_edges) { - in_edges->clear(); - in_edges->resize(raw.states.size()); - ue2::unordered_set seen; - - for (u32 s = 1; s < raw.states.size(); s++) { - seen.clear(); - for (u32 j = 0; j < raw.alpha_size; j++) { - dstate_id_t t = raw.states[s].next[j]; - if (contains(seen, t)) { - continue; - } - seen.insert(t); - (*in_edges)[t].push_back(s); - } - } -} - -static -void calc_min_dist_to_accept(const raw_dfa &raw, - const vector > &in_edges, - vector *accept_dist) { - vector &dist = *accept_dist; - dist.clear(); - dist.resize(raw.states.size(), ~0U); - - /* for reporting states to start from */ - deque to_visit; - for (u32 s = 0; s < raw.states.size(); s++) { - if (!raw.states[s].reports.empty() - || !raw.states[s].reports_eod.empty()) { - to_visit.push_back(s); - dist[s] = 0; - } - } - - /* bfs */ - UNUSED u32 last_d = 0; - while (!to_visit.empty()) { - dstate_id_t s = to_visit.front(); - to_visit.pop_front(); - assert(s != DEAD_STATE); - - u32 d = dist[s]; - assert(d >= last_d); - assert(d != ~0U); - - for (vector::const_iterator it = in_edges[s].begin(); - it != in_edges[s].end(); ++it) { - dstate_id_t t = *it; - if (t == DEAD_STATE) { - continue; - } - if (dist[t] == ~0U) { - to_visit.push_back(t); - dist[t] = d + 1; - } else { - assert(dist[t] <= d + 1); - } - } - - last_d = d; - } -} - -bool prune_overlong(raw_dfa &raw, u32 max_offset) { - DEBUG_PRINTF("pruning to at most %u\n", max_offset); +bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { + DEBUG_PRINTF("clearing reports on states deeper than %u\n", max_offset); vector bob_dist; u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist); @@ -237,53 +176,18 @@ bool prune_overlong(raw_dfa &raw, u32 max_offset) { return false; } - vector > in_edges; - find_in_edges(raw, &in_edges); - - vector accept_dist; - calc_min_dist_to_accept(raw, in_edges, &accept_dist); - - in_edges.clear(); - - /* look over the states and filter out any which cannot reach a report - * states before max_offset */ - vector new_ids(raw.states.size()); - vector new_states; - u32 count = 1; - new_states.push_back(raw.states[DEAD_STATE]); - - for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) { - if (bob_dist[s] + accept_dist[s] > max_offset) { - DEBUG_PRINTF("pruned %u: bob %u, report %u\n", s, bob_dist[s], - accept_dist[s]); - new_ids[s] = DEAD_STATE; - } else { - new_ids[s] = count++; - new_states.push_back(raw.states[s]); - assert(new_states.size() == count); - assert(new_ids[s] <= s); - } - } - - /* swap states */ - DEBUG_PRINTF("pruned %zu -> %u\n", raw.states.size(), count); - raw.states.swap(new_states); - new_states.clear(); - - /* update edges and daddys to refer to the new ids */ + bool changed = false; for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) { - for (u32 j = 0; j < raw.alpha_size; j++) { - dstate_id_t old_t = raw.states[s].next[j]; - raw.states[s].next[j] = new_ids[old_t]; + if (bob_dist[s] > max_offset && state_has_reports(raw, s)) { + DEBUG_PRINTF("clearing reports on %u (depth %u)\n", s, bob_dist[s]); + auto &ds = raw.states[s]; + ds.reports.clear(); + ds.reports_eod.clear(); + changed = true; } - raw.states[s].daddy = new_ids[raw.states[s].daddy]; } - /* update specials */ - raw.start_floating = new_ids[raw.start_floating]; - raw.start_anchored = new_ids[raw.start_anchored]; - - return true; + return changed; } set all_reports(const raw_dfa &rdfa) { diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 554c1efdd..d681e06b1 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,10 +39,12 @@ namespace ue2 { u32 remove_leading_dots(raw_dfa &raw); /** - * Prunes any states which cannot be reached within max_offset from start of - * stream. Returns false if no changes are made to the rdfa + * \brief Clear reports on any states that are deeper than \a max_offset from + * start of stream. + * + * Returns false if no changes are made to the DFA. */ -bool prune_overlong(raw_dfa &raw, u32 max_offset); +bool clear_deeper_reports(raw_dfa &raw, u32 max_offset); std::set all_reports(const raw_dfa &rdfa); bool has_eod_accepts(const raw_dfa &rdfa); diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c index 98db3f0a1..9722fd676 100644 --- a/src/nfa/mcsheng.c +++ b/src/nfa/mcsheng.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "nfa_api.h" #include "nfa_api_queue.h" #include "nfa_internal.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/compare.h" #include "util/simd_utils.h" @@ -168,7 +169,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, * extract a single copy of the state from the u32 for checking. */ u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) u32 sheng_limit_x4 = sheng_limit * 0x01010101; m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); @@ -176,20 +177,20 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, m->sheng_accel_limit, sheng_stop_limit); #endif -#define SHENG_SINGLE_ITER do { \ - m128 shuffle_mask = masks[*(c++)]; \ - s = pshufb(shuffle_mask, s); \ - u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ - DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr); \ - if (s_gpr_x4 >= sheng_stop_limit_x4) { \ - s_gpr = s_gpr_x4; \ - goto exit; \ - } \ +#define SHENG_SINGLE_ITER do { \ + m128 shuffle_mask = masks[*(c++)]; \ + s = pshufb_m128(shuffle_mask, s); \ + u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ + DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \ + if (s_gpr_x4 >= sheng_stop_limit_x4) { \ + s_gpr = s_gpr_x4; \ + goto exit; \ + } \ } while (0) u8 s_gpr; while (c < c_end) { -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) /* This version uses pext for efficently bitbashing out scaled * versions of the bytes to process from a u64a */ @@ -197,7 +198,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ data_bytes &= ~0xffULL; /* clear low bits for scale space */ m128 shuffle_mask0 = load128((const char *)masks + cc0); - s = pshufb(shuffle_mask0, s); + s = pshufb_m128(shuffle_mask0, s); m128 s_max = s; m128 s_max0 = s_max; DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s)); @@ -207,7 +208,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ assert(cc##iter == (u64a)c[iter] << 4); \ m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ - s = pshufb(shuffle_mask##iter, s); \ + s = pshufb_m128(shuffle_mask##iter, s); \ if (do_accel && iter == 7) { \ /* in the final iteration we also have to check against accel */ \ m128 s_temp = sadd_u8_m128(s, accel_delta); \ @@ -287,19 +288,19 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, assert(soft_c_end - c < SHENG_CHUNK); switch (soft_c_end - c) { case 7: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 6: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 5: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 4: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 3: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 2: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 1: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough } } diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 7b4e58ab1..2049fee03 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -617,7 +617,7 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, #define MAX_SHERMAN_LIST_LEN 8 static -void addIfEarlier(set &dest, dstate_id_t candidate, +void addIfEarlier(flat_set &dest, dstate_id_t candidate, dstate_id_t max) { if (candidate < max) { dest.insert(candidate); @@ -625,13 +625,35 @@ void addIfEarlier(set &dest, dstate_id_t candidate, } static -void addSuccessors(set &dest, const dstate &source, +void addSuccessors(flat_set &dest, const dstate &source, u16 alphasize, dstate_id_t curr_id) { for (symbol_t s = 0; s < alphasize; s++) { addIfEarlier(dest, source.next[s], curr_id); } } +/* \brief Returns a set of states to search for a better daddy. */ +static +flat_set find_daddy_candidates(const dfa_info &info, + dstate_id_t curr_id) { + flat_set hinted; + + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + // Add existing daddy and his successors, then search back one generation. + const u16 alphasize = info.impl_alpha_size; + dstate_id_t daddy = info.states[curr_id].daddy; + for (u32 level = 0; daddy && level < 2; level++) { + addIfEarlier(hinted, daddy, curr_id); + addSuccessors(hinted, info.states[daddy], alphasize, curr_id); + daddy = info.states[daddy].daddy; + } + + return hinted; +} + #define MAX_SHERMAN_SELF_LOOP 20 static @@ -671,22 +693,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - set hinted; /* set of states to search for a better daddy */ - addIfEarlier(hinted, 0, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - - dstate_id_t mydaddy = currState.daddy; - if (mydaddy) { - addIfEarlier(hinted, mydaddy, curr_id); - addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id); - dstate_id_t mygranddaddy = info.states[mydaddy].daddy; - if (mygranddaddy) { - addIfEarlier(hinted, mygranddaddy, curr_id); - addSuccessors(hinted, info.states[mygranddaddy], alphasize, - curr_id); - } - } + flat_set hinted = find_daddy_candidates(info, curr_id); for (const dstate_id_t &donor : hinted) { assert(donor < curr_id); @@ -821,7 +828,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { } static -aligned_unique_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, +bytecode_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, const map &accel_escape_info, const Grey &grey) { DEBUG_PRINTF("building mcsheng 16\n"); @@ -872,7 +879,7 @@ aligned_unique_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, @@ -967,7 +974,7 @@ void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, } static -aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, +bytecode_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, const map &accel_escape_info) { DEBUG_PRINTF("building mcsheng 8\n"); @@ -998,7 +1005,7 @@ aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, @@ -1019,13 +1026,13 @@ aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, return nfa; } -aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowMcSheng) { return nullptr; } - mcclellan_build_strat mbs(raw, rm); + mcclellan_build_strat mbs(raw, rm, false); dfa_info info(mbs); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; @@ -1044,7 +1051,7 @@ aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, return nullptr; } - aligned_unique_ptr nfa; + bytecode_ptr nfa; if (!using8bit) { nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); } else { diff --git a/src/nfa/mcsheng_compile.h b/src/nfa/mcsheng_compile.h index d1ae1e323..487ab45f4 100644 --- a/src/nfa/mcsheng_compile.h +++ b/src/nfa/mcsheng_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,13 +29,8 @@ #ifndef MCSHENGCOMPILE_H #define MCSHENGCOMPILE_H -#include "accel_dfa_build_strat.h" -#include "rdfa.h" #include "ue2common.h" -#include "util/alloc.h" -#include "util/ue2_containers.h" - -#include +#include "util/bytecode_ptr.h" struct NFA; @@ -43,10 +38,10 @@ namespace ue2 { class ReportManager; struct CompileContext; +struct raw_dfa; -ue2::aligned_unique_ptr -mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm); +bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm); bool has_accel_mcsheng(const NFA *nfa); diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 87fb462e5..8497c6487 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -309,9 +309,9 @@ const mpv_counter_info &findCounter(const vector &counters, return counters.front(); } -aligned_unique_ptr mpvCompile(const vector &puffs_in, - const vector &triggered_puffs, - const ReportManager &rm) { +bytecode_ptr mpvCompile(const vector &puffs_in, + const vector &triggered_puffs, + const ReportManager &rm) { assert(!puffs_in.empty() || !triggered_puffs.empty()); u32 puffette_count = puffs_in.size() + triggered_puffs.size(); @@ -343,7 +343,7 @@ aligned_unique_ptr mpvCompile(const vector &puffs_in, DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len); - aligned_unique_ptr nfa = aligned_zmalloc_unique(len); + auto nfa = make_zeroed_bytecode_ptr(len); mpv_puffette *pa_base = (mpv_puffette *) ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv) diff --git a/src/nfa/mpvcompile.h b/src/nfa/mpvcompile.h index fb91ac64e..4f820e436 100644 --- a/src/nfa/mpvcompile.h +++ b/src/nfa/mpvcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #define MPV_COMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include @@ -61,9 +61,9 @@ struct raw_puff { * puffs in the triggered_puffs vector are enabled when an TOP_N event is * delivered corresponding to their index in the vector */ -aligned_unique_ptr mpvCompile(const std::vector &puffs, - const std::vector &triggered_puffs, - const ReportManager &rm); +bytecode_ptr mpvCompile(const std::vector &puffs, + const std::vector &triggered_puffs, + const ReportManager &rm); } // namespace ue2 diff --git a/src/nfa/multiaccel_common.h b/src/nfa/multiaccel_common.h deleted file mode 100644 index 1a13c3b6d..000000000 --- a/src/nfa/multiaccel_common.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_COMMON_H_ -#define MULTIACCEL_COMMON_H_ - -#include "config.h" -#include "ue2common.h" -#include "util/join.h" -#include "util/bitutils.h" - -/* - * When doing shifting, remember that the total number of shifts should be n-1 - */ -#define VARISHIFT(src, dst, len) \ - do { \ - (dst) &= (src) >> (len); \ - } while (0) -#define STATIC_SHIFT1(x) \ - do { \ - (x) &= (x) >> 1; \ - } while (0) -#define STATIC_SHIFT2(x) \ - do { \ - (x) &= (x) >> 2;\ - } while (0) -#define STATIC_SHIFT4(x) \ - do { \ - (x) &= (x) >> 4; \ - } while (0) -#define STATIC_SHIFT8(x) \ - do { \ - (x) &= (x) >> 8; \ - } while (0) -#define SHIFT1(x) \ - do {} while (0) -#define SHIFT2(x) \ - do { \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT3(x) \ - do { \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT4(x) \ - do { \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT5(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT6(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT7(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT8(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT9(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT10(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT11(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT12(x); \ - do { \ - SHIFT8(x);\ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT13(x); \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT14(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT15(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT16(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT8(x); \ - } while (0) -#define SHIFT17(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT18(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT19(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT20(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT21(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT22(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT23(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT24(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT8(x); \ - } while (0) -#define SHIFT25(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT26(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT27(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT28(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT29(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT30(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT31(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT32(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT8(x); \ - } while (0) - -/* - * this function is used by 32-bit multiaccel matchers. 32-bit matchers accept - * a 32-bit integer as a buffer, where low 16 bits is movemask result and - * high 16 bits are "don't care" values. this function is not expected to return - * a result higher than 16. - */ -static really_inline -const u8 *match32(const u8 *buf, const u32 z) { - if (unlikely(z != 0)) { - u32 pos = ctz32(z); - assert(pos < 16); - return buf + pos; - } - return NULL; -} - -/* - * this function is used by 64-bit multiaccel matchers. 64-bit matchers accept - * a 64-bit integer as a buffer, where low 32 bits is movemask result and - * high 32 bits are "don't care" values. this function is not expected to return - * a result higher than 32. - */ -static really_inline -const u8 *match64(const u8 *buf, const u64a z) { - if (unlikely(z != 0)) { - u32 pos = ctz64(z); - assert(pos < 32); - return buf + pos; - } - return NULL; -} - -#endif /* MULTIACCEL_COMMON_H_ */ diff --git a/src/nfa/multiaccel_compilehelper.cpp b/src/nfa/multiaccel_compilehelper.cpp deleted file mode 100644 index 4c1f81018..000000000 --- a/src/nfa/multiaccel_compilehelper.cpp +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "multiaccel_compilehelper.h" - -using namespace std; -using namespace ue2; - -#ifdef DEBUG -static const char* state_to_str[] = { - "FIRST_RUN", - "SECOND_RUN", - "WAITING_FOR_GRAB", - "FIRST_TAIL", - "SECOND_TAIL", - "STOPPED", - "INVALID" -}; -static const char* type_to_str[] = { - "SHIFT", - "SHIFTGRAB", - "DOUBLESHIFT", - "DOUBLESHIFTGRAB", - "LONG", - "LONGGRAB", - "NONE" -}; - -static -void dumpMultiaccelState(const accel_data &d) { - DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n", - type_to_str[(unsigned) d.type], - state_to_str[(unsigned) d.state], - d.len1, d.tlen1, d.len2, d.tlen2); -} -#endif - -/* stop all the matching. this may render most schemes invalid. */ -static -void stop(accel_data &d) { - switch (d.state) { - case STATE_STOPPED: - case STATE_INVALID: - break; - case STATE_FIRST_TAIL: - case STATE_SECOND_RUN: - /* - * Shift matchers are special case, because they have "tails". - * When shift matcher reaches a mid/endpoint, tail mode is - * activated, which looks for more matches to extend the match. - * - * For example, consider pattern /a{5}ba{3}/. Under normal circumstances, - * long-grab matcher will be picked for this pattern (matching a run of a's, - * followed by a not-a), because doubleshift matcher would be confused by - * consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts - * by 1) and throw out the rest of the pattern. - * - * With tails, we defer ending the run until we actually run out of - * matching characters, so the above pattern will now be parsed by - * doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4). - * - * So if we are stopping shift matchers, we should check if we aren't in - * the process of matching first tail or second run. If we are, we can't - * finish the second run as we are stopping, but we can try and split - * the first tail instead to obtain a valid second run. - */ - if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) { - // can't split an empty void... - d.state = STATE_INVALID; - break; - } - d.len2 = 0; - d.state = STATE_STOPPED; - break; - case STATE_SECOND_TAIL: - d.state = STATE_STOPPED; - break; - case STATE_WAITING_FOR_GRAB: - case STATE_FIRST_RUN: - if (d.type == MultibyteAccelInfo::MAT_LONG) { - d.state = STATE_STOPPED; - } else { - d.state = STATE_INVALID; - } - break; - } -} - -static -void validate(accel_data &d, unsigned max_len) { - // try and fit in all our tails - if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) { - // case 1: everything fits in - d.len1 += d.tlen1; - d.len2 += d.tlen2; - d.tlen1 = 0; - d.tlen2 = 0; - } else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) { - // case 2: everything but the second tail fits in - d.len1 += d.tlen1; - d.tlen1 = 0; - // try going for a partial tail - if (d.tlen2 != 0) { - int new_tlen2 = max_len - 1 - d.len1 - d.len2; - if (new_tlen2 > 0) { - d.len2 += new_tlen2; - } - d.tlen2 = 0; - } - } else if (d.len1 + d.tlen1 < max_len) { - // case 3: first run and its tail fits in - if (d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { - // split the tail into a second run - d.len2 = d.tlen1; - } else { - d.len1 += d.tlen1; - d.len2 = 0; - } - d.tlen1 = 0; - d.tlen2 = 0; - } else if (d.len1 < max_len) { - // case 4: nothing but the first run fits in - // try going for a partial tail - if (d.tlen1 != 0) { - int new_tlen1 = max_len - 1 - d.len1; - if (new_tlen1 > 0) { - d.len1 += new_tlen1; - } - d.tlen1 = 0; - } - d.len2 = 0; - d.tlen2 = 0; - } - // if we removed our second run, doubleshift matchers are no longer valid - if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) { - d.state = STATE_INVALID; - } else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) { - // long matchers can just stop whenever they want to - d.len1 = max_len - 1; - } - - // now, general sanity checks - if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) { - d.state = STATE_INVALID; - } - if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) { - d.state = STATE_INVALID; - } -} - -static -void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) { - switch (d.type) { - case MultibyteAccelInfo::MAT_LONG: - { - /* - * For long matcher, we want lots of consecutive same-or-subset - * char-reaches - */ - if ((ref_cr & cur_cr) == cur_cr) { - d.len1++; - } else { - d.state = STATE_STOPPED; - } - } - break; - - case MultibyteAccelInfo::MAT_LONGGRAB: - { - /* - * For long-grab matcher, we want lots of consecutive same-or-subset - * char-reaches with a negative match in the end. - */ - if ((ref_cr & cur_cr) == cur_cr) { - d.len1++; - } else if (!(ref_cr & cur_cr).any()) { - /* we grabbed, stop immediately */ - d.state = STATE_STOPPED; - } else { - /* our run-n-grab was interrupted; mark as invalid */ - d.state = STATE_INVALID; - } - } - break; - - case MultibyteAccelInfo::MAT_SHIFTGRAB: - { - /* - * For shift-grab matcher, we want two matches separated by anything; - * however the second vertex *must* be a negative (non-overlapping) match. - * - * Shiftgrab matcher is identical to shift except for presence of grab. - */ - if (d.state == STATE_WAITING_FOR_GRAB) { - if ((ref_cr & cur_cr).any()) { - d.state = STATE_INVALID; - } else { - d.state = STATE_FIRST_RUN; - d.len1++; - } - return; - } - } - /* no break, falling through */ - case MultibyteAccelInfo::MAT_SHIFT: - { - /* - * For shift-matcher, we want two matches separated by anything. - */ - if (ref_cr == cur_cr) { - // keep matching tail - switch (d.state) { - case STATE_FIRST_RUN: - d.state = STATE_FIRST_TAIL; - break; - case STATE_FIRST_TAIL: - d.tlen1++; - break; - default: - // shouldn't happen - assert(0); - } - } else { - switch (d.state) { - case STATE_FIRST_RUN: - // simply advance - d.len1++; - break; - case STATE_FIRST_TAIL: - // we found a non-matching char after tail, so stop - d.state = STATE_STOPPED; - break; - default: - // shouldn't happen - assert(0); - } - } - } - break; - - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - { - /* - * For double shift-grab matcher, we want two matches separated by - * either negative matches or dots; however the second vertex *must* - * be a negative match. - * - * Doubleshiftgrab matcher is identical to doubleshift except for - * presence of grab. - */ - if (d.state == STATE_WAITING_FOR_GRAB) { - if ((ref_cr & cur_cr).any()) { - d.state = STATE_INVALID; - } else { - d.state = STATE_FIRST_RUN; - d.len1++; - } - return; - } - } - /* no break, falling through */ - case MultibyteAccelInfo::MAT_DSHIFT: - { - /* - * For double shift matcher, we want three matches, each separated - * by a lot of anything. - * - * Doubleshift matcher is complicated by presence of tails. - */ - if (ref_cr == cur_cr) { - // decide if we are activating second shift or matching tails - switch (d.state) { - case STATE_FIRST_RUN: - d.state = STATE_FIRST_TAIL; - d.len2 = 1; // we're now ready for our second run - break; - case STATE_FIRST_TAIL: - d.tlen1++; - break; - case STATE_SECOND_RUN: - d.state = STATE_SECOND_TAIL; - break; - case STATE_SECOND_TAIL: - d.tlen2++; - break; - default: - // shouldn't happen - assert(0); - } - } else { - switch (d.state) { - case STATE_FIRST_RUN: - d.len1++; - break; - case STATE_FIRST_TAIL: - // start second run - d.state = STATE_SECOND_RUN; - d.len2++; - break; - case STATE_SECOND_RUN: - d.len2++; - break; - case STATE_SECOND_TAIL: - // stop - d.state = STATE_STOPPED; - break; - default: - // shouldn't happen - assert(0); - } - } - } - break; - - default: - // shouldn't happen - assert(0); - break; - } -} - -MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, - u32 off, unsigned max_length) - : cr(ref_cr), offset(off), max_len(max_length) { - int accel_num = (int) MultibyteAccelInfo::MAT_MAX; - accels.resize(accel_num); - - // mark everything as valid - for (int i = 0; i < accel_num; i++) { - accel_data &ad = accels[i]; - ad.len1 = 1; - ad.type = (MultibyteAccelInfo::multiaccel_type) i; - - /* for shift-grab matchers, we are waiting for the grab right at the start */ - if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB - || ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { - ad.state = STATE_WAITING_FOR_GRAB; - } else { - ad.state = STATE_FIRST_RUN; - } - } -} - -bool MultiaccelCompileHelper::canAdvance() { - for (const accel_data &ad : accels) { - if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) { - return true; - } - } - return false; -} - -void MultiaccelCompileHelper::advance(const CharReach &cur_cr) { - for (accel_data &ad : accels) { - if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) { - continue; - } - match(ad, cr, cur_cr); -#ifdef DEBUG - dumpMultiaccelState(ad); -#endif - } -} - -MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() { - int best_len = 0; - accel_data best; - - DEBUG_PRINTF("Stopping multiaccel compile\n"); - - for (accel_data &ad : accels) { - // stop our matching - stop(ad); - validate(ad, max_len); - -#ifdef DEBUG - dumpMultiaccelState(ad); -#endif - - // skip invalid schemes - if (ad.state == STATE_INVALID) { - continue; - } - DEBUG_PRINTF("Marking as viable\n"); - - // TODO: relative strengths of accel schemes? maybe e.g. a shorter - // long match would in some cases be preferable to a longer - // double shift match (for example, depending on length)? - int as_len = ad.len1 + ad.len2; - if (as_len >= best_len) { - DEBUG_PRINTF("Marking as best\n"); - best_len = as_len; - best = ad; - } - } - // if we found at least one accel scheme, return it - if (best.state != STATE_INVALID) { -#ifdef DEBUG - DEBUG_PRINTF("Picked best multiaccel state:\n"); - dumpMultiaccelState(best); -#endif - MultibyteAccelInfo info; - info.cr = cr; - info.offset = offset; - info.len1 = best.len1; - info.len2 = best.len2; - info.type = best.type; - return info; - } - return MultibyteAccelInfo(); -} diff --git a/src/nfa/multiaccel_doubleshift.h b/src/nfa/multiaccel_doubleshift.h deleted file mode 100644 index 7ed7534cf..000000000 --- a/src/nfa/multiaccel_doubleshift.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_DOUBLESHIFT_H_ -#define MULTIACCEL_DOUBLESHIFT_H_ - -#include "multiaccel_common.h" - -#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ - if (unlikely(z)) { \ - match_t tmp = z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, tmp, len2); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define DOUBLESHIFT_MATCH_32_DEF(n) \ - DOUBLESHIFT_MATCH(n, u32, 32) -#define DOUBLESHIFT_MATCH_64_DEF(n) \ - DOUBLESHIFT_MATCH(n, u64a, 64) -#define DOUBLESHIFT_MATCH_DEF(n) \ - DOUBLESHIFT_MATCH_32_DEF(n) \ - DOUBLESHIFT_MATCH_64_DEF(n) - -DOUBLESHIFT_MATCH_DEF(1) -DOUBLESHIFT_MATCH_DEF(2) -DOUBLESHIFT_MATCH_DEF(3) -DOUBLESHIFT_MATCH_DEF(4) -DOUBLESHIFT_MATCH_DEF(5) -DOUBLESHIFT_MATCH_DEF(6) -DOUBLESHIFT_MATCH_DEF(7) -DOUBLESHIFT_MATCH_DEF(8) -DOUBLESHIFT_MATCH_DEF(9) -DOUBLESHIFT_MATCH_DEF(10) -DOUBLESHIFT_MATCH_DEF(11) -DOUBLESHIFT_MATCH_DEF(12) -DOUBLESHIFT_MATCH_DEF(13) -DOUBLESHIFT_MATCH_DEF(14) -DOUBLESHIFT_MATCH_DEF(15) -DOUBLESHIFT_MATCH_64_DEF(16) -DOUBLESHIFT_MATCH_64_DEF(17) -DOUBLESHIFT_MATCH_64_DEF(18) -DOUBLESHIFT_MATCH_64_DEF(19) -DOUBLESHIFT_MATCH_64_DEF(20) -DOUBLESHIFT_MATCH_64_DEF(21) -DOUBLESHIFT_MATCH_64_DEF(22) -DOUBLESHIFT_MATCH_64_DEF(23) -DOUBLESHIFT_MATCH_64_DEF(24) -DOUBLESHIFT_MATCH_64_DEF(25) -DOUBLESHIFT_MATCH_64_DEF(26) -DOUBLESHIFT_MATCH_64_DEF(27) -DOUBLESHIFT_MATCH_64_DEF(28) -DOUBLESHIFT_MATCH_64_DEF(29) -DOUBLESHIFT_MATCH_64_DEF(30) -DOUBLESHIFT_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftMatch_32_1, - &doubleshiftMatch_32_2, - &doubleshiftMatch_32_3, - &doubleshiftMatch_32_4, - &doubleshiftMatch_32_5, - &doubleshiftMatch_32_6, - &doubleshiftMatch_32_7, - &doubleshiftMatch_32_8, - &doubleshiftMatch_32_9, - &doubleshiftMatch_32_10, - &doubleshiftMatch_32_11, - &doubleshiftMatch_32_12, - &doubleshiftMatch_32_13, - &doubleshiftMatch_32_14, - &doubleshiftMatch_32_15, -}; - -static -const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftMatch_64_1, - &doubleshiftMatch_64_2, - &doubleshiftMatch_64_3, - &doubleshiftMatch_64_4, - &doubleshiftMatch_64_5, - &doubleshiftMatch_64_6, - &doubleshiftMatch_64_7, - &doubleshiftMatch_64_8, - &doubleshiftMatch_64_9, - &doubleshiftMatch_64_10, - &doubleshiftMatch_64_11, - &doubleshiftMatch_64_12, - &doubleshiftMatch_64_13, - &doubleshiftMatch_64_14, - &doubleshiftMatch_64_15, - &doubleshiftMatch_64_16, - &doubleshiftMatch_64_17, - &doubleshiftMatch_64_18, - &doubleshiftMatch_64_19, - &doubleshiftMatch_64_20, - &doubleshiftMatch_64_21, - &doubleshiftMatch_64_22, - &doubleshiftMatch_64_23, - &doubleshiftMatch_64_24, - &doubleshiftMatch_64_25, - &doubleshiftMatch_64_26, - &doubleshiftMatch_64_27, - &doubleshiftMatch_64_28, - &doubleshiftMatch_64_29, - &doubleshiftMatch_64_30, - &doubleshiftMatch_64_31, -}; - -#endif /* MULTIACCEL_DOUBLESHIFT_H_ */ diff --git a/src/nfa/multiaccel_doubleshiftgrab.h b/src/nfa/multiaccel_doubleshiftgrab.h deleted file mode 100644 index 51955b4a6..000000000 --- a/src/nfa/multiaccel_doubleshiftgrab.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_ -#define MULTIACCEL_DOUBLESHIFTGRAB_H_ - -#include "multiaccel_common.h" - -#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ - if (unlikely(z)) { \ - match_t neg = ~z; \ - match_t tmp = z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ - neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, tmp, len2); \ - VARISHIFT(neg, z, 1); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ - DOUBLESHIFTGRAB_MATCH(n, u32, 32) -#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \ - DOUBLESHIFTGRAB_MATCH(n, u64a, 64) -#define DOUBLESHIFTGRAB_MATCH_DEF(n) \ - DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ - DOUBLESHIFTGRAB_MATCH_64_DEF(n) - -DOUBLESHIFTGRAB_MATCH_DEF(1) -DOUBLESHIFTGRAB_MATCH_DEF(2) -DOUBLESHIFTGRAB_MATCH_DEF(3) -DOUBLESHIFTGRAB_MATCH_DEF(4) -DOUBLESHIFTGRAB_MATCH_DEF(5) -DOUBLESHIFTGRAB_MATCH_DEF(6) -DOUBLESHIFTGRAB_MATCH_DEF(7) -DOUBLESHIFTGRAB_MATCH_DEF(8) -DOUBLESHIFTGRAB_MATCH_DEF(9) -DOUBLESHIFTGRAB_MATCH_DEF(10) -DOUBLESHIFTGRAB_MATCH_DEF(11) -DOUBLESHIFTGRAB_MATCH_DEF(12) -DOUBLESHIFTGRAB_MATCH_DEF(13) -DOUBLESHIFTGRAB_MATCH_DEF(14) -DOUBLESHIFTGRAB_MATCH_DEF(15) -DOUBLESHIFTGRAB_MATCH_64_DEF(16) -DOUBLESHIFTGRAB_MATCH_64_DEF(17) -DOUBLESHIFTGRAB_MATCH_64_DEF(18) -DOUBLESHIFTGRAB_MATCH_64_DEF(19) -DOUBLESHIFTGRAB_MATCH_64_DEF(20) -DOUBLESHIFTGRAB_MATCH_64_DEF(21) -DOUBLESHIFTGRAB_MATCH_64_DEF(22) -DOUBLESHIFTGRAB_MATCH_64_DEF(23) -DOUBLESHIFTGRAB_MATCH_64_DEF(24) -DOUBLESHIFTGRAB_MATCH_64_DEF(25) -DOUBLESHIFTGRAB_MATCH_64_DEF(26) -DOUBLESHIFTGRAB_MATCH_64_DEF(27) -DOUBLESHIFTGRAB_MATCH_64_DEF(28) -DOUBLESHIFTGRAB_MATCH_64_DEF(29) -DOUBLESHIFTGRAB_MATCH_64_DEF(30) -DOUBLESHIFTGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftgrabMatch_32_1, - &doubleshiftgrabMatch_32_2, - &doubleshiftgrabMatch_32_3, - &doubleshiftgrabMatch_32_4, - &doubleshiftgrabMatch_32_5, - &doubleshiftgrabMatch_32_6, - &doubleshiftgrabMatch_32_7, - &doubleshiftgrabMatch_32_8, - &doubleshiftgrabMatch_32_9, - &doubleshiftgrabMatch_32_10, - &doubleshiftgrabMatch_32_11, - &doubleshiftgrabMatch_32_12, - &doubleshiftgrabMatch_32_13, - &doubleshiftgrabMatch_32_14, - &doubleshiftgrabMatch_32_15, -}; - -static -const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftgrabMatch_64_1, - &doubleshiftgrabMatch_64_2, - &doubleshiftgrabMatch_64_3, - &doubleshiftgrabMatch_64_4, - &doubleshiftgrabMatch_64_5, - &doubleshiftgrabMatch_64_6, - &doubleshiftgrabMatch_64_7, - &doubleshiftgrabMatch_64_8, - &doubleshiftgrabMatch_64_9, - &doubleshiftgrabMatch_64_10, - &doubleshiftgrabMatch_64_11, - &doubleshiftgrabMatch_64_12, - &doubleshiftgrabMatch_64_13, - &doubleshiftgrabMatch_64_14, - &doubleshiftgrabMatch_64_15, - &doubleshiftgrabMatch_64_16, - &doubleshiftgrabMatch_64_17, - &doubleshiftgrabMatch_64_18, - &doubleshiftgrabMatch_64_19, - &doubleshiftgrabMatch_64_20, - &doubleshiftgrabMatch_64_21, - &doubleshiftgrabMatch_64_22, - &doubleshiftgrabMatch_64_23, - &doubleshiftgrabMatch_64_24, - &doubleshiftgrabMatch_64_25, - &doubleshiftgrabMatch_64_26, - &doubleshiftgrabMatch_64_27, - &doubleshiftgrabMatch_64_28, - &doubleshiftgrabMatch_64_29, - &doubleshiftgrabMatch_64_30, - &doubleshiftgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */ diff --git a/src/nfa/multiaccel_long.h b/src/nfa/multiaccel_long.h deleted file mode 100644 index 515f0bc22..000000000 --- a/src/nfa/multiaccel_long.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_LONG_H_ -#define MULTIACCEL_LONG_H_ - -#include "multiaccel_common.h" - -#define LONG_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ - if (unlikely(z)) { \ - z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ - JOIN(SHIFT, len)(z); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define LONG_MATCH_32_DEF(n) \ - LONG_MATCH(n, u32, 32) -#define LONG_MATCH_64_DEF(n) \ - LONG_MATCH(n, u64a, 64) -#define LONG_MATCH_DEF(n) \ - LONG_MATCH_32_DEF(n) \ - LONG_MATCH_64_DEF(n) - -LONG_MATCH_DEF(1) -LONG_MATCH_DEF(2) -LONG_MATCH_DEF(3) -LONG_MATCH_DEF(4) -LONG_MATCH_DEF(5) -LONG_MATCH_DEF(6) -LONG_MATCH_DEF(7) -LONG_MATCH_DEF(8) -LONG_MATCH_DEF(9) -LONG_MATCH_DEF(10) -LONG_MATCH_DEF(11) -LONG_MATCH_DEF(12) -LONG_MATCH_DEF(13) -LONG_MATCH_DEF(14) -LONG_MATCH_DEF(15) -LONG_MATCH_64_DEF(16) -LONG_MATCH_64_DEF(17) -LONG_MATCH_64_DEF(18) -LONG_MATCH_64_DEF(19) -LONG_MATCH_64_DEF(20) -LONG_MATCH_64_DEF(21) -LONG_MATCH_64_DEF(22) -LONG_MATCH_64_DEF(23) -LONG_MATCH_64_DEF(24) -LONG_MATCH_64_DEF(25) -LONG_MATCH_64_DEF(26) -LONG_MATCH_64_DEF(27) -LONG_MATCH_64_DEF(28) -LONG_MATCH_64_DEF(29) -LONG_MATCH_64_DEF(30) -LONG_MATCH_64_DEF(31) - -static -const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) = -{ - // skip the first three - 0, - &longMatch_32_1, - &longMatch_32_2, - &longMatch_32_3, - &longMatch_32_4, - &longMatch_32_5, - &longMatch_32_6, - &longMatch_32_7, - &longMatch_32_8, - &longMatch_32_9, - &longMatch_32_10, - &longMatch_32_11, - &longMatch_32_12, - &longMatch_32_13, - &longMatch_32_14, - &longMatch_32_15, - }; - -static -const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first three - 0, - &longMatch_64_1, - &longMatch_64_2, - &longMatch_64_3, - &longMatch_64_4, - &longMatch_64_5, - &longMatch_64_6, - &longMatch_64_7, - &longMatch_64_8, - &longMatch_64_9, - &longMatch_64_10, - &longMatch_64_11, - &longMatch_64_12, - &longMatch_64_13, - &longMatch_64_14, - &longMatch_64_15, - &longMatch_64_16, - &longMatch_64_17, - &longMatch_64_18, - &longMatch_64_19, - &longMatch_64_20, - &longMatch_64_21, - &longMatch_64_22, - &longMatch_64_23, - &longMatch_64_24, - &longMatch_64_25, - &longMatch_64_26, - &longMatch_64_27, - &longMatch_64_28, - &longMatch_64_29, - &longMatch_64_30, - &longMatch_64_31, -}; - -#endif /* MULTIACCEL_LONG_H_ */ diff --git a/src/nfa/multiaccel_longgrab.h b/src/nfa/multiaccel_longgrab.h deleted file mode 100644 index 09daaf82a..000000000 --- a/src/nfa/multiaccel_longgrab.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_LONGGRAB_H_ -#define MULTIACCEL_LONGGRAB_H_ - -#include "multiaccel_common.h" - -#define LONGGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ - if (unlikely(z)) { \ - match_t tmp = ~z; \ - tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ - JOIN(SHIFT, len)(z); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define LONGGRAB_MATCH_32_DEF(n) \ - LONGGRAB_MATCH(n, u32, 32) -#define LONGGRAB_MATCH_64_DEF(n) \ - LONGGRAB_MATCH(n, u64a, 64) -#define LONGGRAB_MATCH_DEF(n) \ - LONGGRAB_MATCH_32_DEF(n) \ - LONGGRAB_MATCH_64_DEF(n) - -LONGGRAB_MATCH_DEF(1) -LONGGRAB_MATCH_DEF(2) -LONGGRAB_MATCH_DEF(3) -LONGGRAB_MATCH_DEF(4) -LONGGRAB_MATCH_DEF(5) -LONGGRAB_MATCH_DEF(6) -LONGGRAB_MATCH_DEF(7) -LONGGRAB_MATCH_DEF(8) -LONGGRAB_MATCH_DEF(9) -LONGGRAB_MATCH_DEF(10) -LONGGRAB_MATCH_DEF(11) -LONGGRAB_MATCH_DEF(12) -LONGGRAB_MATCH_DEF(13) -LONGGRAB_MATCH_DEF(14) -LONGGRAB_MATCH_DEF(15) -LONGGRAB_MATCH_64_DEF(16) -LONGGRAB_MATCH_64_DEF(17) -LONGGRAB_MATCH_64_DEF(18) -LONGGRAB_MATCH_64_DEF(19) -LONGGRAB_MATCH_64_DEF(20) -LONGGRAB_MATCH_64_DEF(21) -LONGGRAB_MATCH_64_DEF(22) -LONGGRAB_MATCH_64_DEF(23) -LONGGRAB_MATCH_64_DEF(24) -LONGGRAB_MATCH_64_DEF(25) -LONGGRAB_MATCH_64_DEF(26) -LONGGRAB_MATCH_64_DEF(27) -LONGGRAB_MATCH_64_DEF(28) -LONGGRAB_MATCH_64_DEF(29) -LONGGRAB_MATCH_64_DEF(30) -LONGGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first three - 0, - &longgrabMatch_32_1, - &longgrabMatch_32_2, - &longgrabMatch_32_3, - &longgrabMatch_32_4, - &longgrabMatch_32_5, - &longgrabMatch_32_6, - &longgrabMatch_32_7, - &longgrabMatch_32_8, - &longgrabMatch_32_9, - &longgrabMatch_32_10, - &longgrabMatch_32_11, - &longgrabMatch_32_12, - &longgrabMatch_32_13, - &longgrabMatch_32_14, - &longgrabMatch_32_15, - }; - -static -const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first three - 0, - &longgrabMatch_64_1, - &longgrabMatch_64_2, - &longgrabMatch_64_3, - &longgrabMatch_64_4, - &longgrabMatch_64_5, - &longgrabMatch_64_6, - &longgrabMatch_64_7, - &longgrabMatch_64_8, - &longgrabMatch_64_9, - &longgrabMatch_64_10, - &longgrabMatch_64_11, - &longgrabMatch_64_12, - &longgrabMatch_64_13, - &longgrabMatch_64_14, - &longgrabMatch_64_15, - &longgrabMatch_64_16, - &longgrabMatch_64_17, - &longgrabMatch_64_18, - &longgrabMatch_64_19, - &longgrabMatch_64_20, - &longgrabMatch_64_21, - &longgrabMatch_64_22, - &longgrabMatch_64_23, - &longgrabMatch_64_24, - &longgrabMatch_64_25, - &longgrabMatch_64_26, - &longgrabMatch_64_27, - &longgrabMatch_64_28, - &longgrabMatch_64_29, - &longgrabMatch_64_30, - &longgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_LONGGRAB_H_ */ diff --git a/src/nfa/multiaccel_shift.h b/src/nfa/multiaccel_shift.h deleted file mode 100644 index fd362a8b6..000000000 --- a/src/nfa/multiaccel_shift.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_SHIFT_H_ -#define MULTIACCEL_SHIFT_H_ - -#include "multiaccel_common.h" - -#define SHIFT_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ - if (unlikely(z)) { \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define SHIFT_MATCH_32_DEF(n) \ - SHIFT_MATCH(n, u32, 32) -#define SHIFT_MATCH_64_DEF(n) \ - SHIFT_MATCH(n, u64a, 64) -#define SHIFT_MATCH_DEF(n) \ - SHIFT_MATCH_32_DEF(n) \ - SHIFT_MATCH_64_DEF(n) - -SHIFT_MATCH_DEF(1) -SHIFT_MATCH_DEF(2) -SHIFT_MATCH_DEF(3) -SHIFT_MATCH_DEF(4) -SHIFT_MATCH_DEF(5) -SHIFT_MATCH_DEF(6) -SHIFT_MATCH_DEF(7) -SHIFT_MATCH_DEF(8) -SHIFT_MATCH_DEF(9) -SHIFT_MATCH_DEF(10) -SHIFT_MATCH_DEF(11) -SHIFT_MATCH_DEF(12) -SHIFT_MATCH_DEF(13) -SHIFT_MATCH_DEF(14) -SHIFT_MATCH_DEF(15) -SHIFT_MATCH_64_DEF(16) -SHIFT_MATCH_64_DEF(17) -SHIFT_MATCH_64_DEF(18) -SHIFT_MATCH_64_DEF(19) -SHIFT_MATCH_64_DEF(20) -SHIFT_MATCH_64_DEF(21) -SHIFT_MATCH_64_DEF(22) -SHIFT_MATCH_64_DEF(23) -SHIFT_MATCH_64_DEF(24) -SHIFT_MATCH_64_DEF(25) -SHIFT_MATCH_64_DEF(26) -SHIFT_MATCH_64_DEF(27) -SHIFT_MATCH_64_DEF(28) -SHIFT_MATCH_64_DEF(29) -SHIFT_MATCH_64_DEF(30) -SHIFT_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first - 0, - &shiftMatch_32_1, - &shiftMatch_32_2, - &shiftMatch_32_3, - &shiftMatch_32_4, - &shiftMatch_32_5, - &shiftMatch_32_6, - &shiftMatch_32_7, - &shiftMatch_32_8, - &shiftMatch_32_9, - &shiftMatch_32_10, - &shiftMatch_32_11, - &shiftMatch_32_12, - &shiftMatch_32_13, - &shiftMatch_32_14, - &shiftMatch_32_15, -}; - -static -const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first - 0, - &shiftMatch_64_1, - &shiftMatch_64_2, - &shiftMatch_64_3, - &shiftMatch_64_4, - &shiftMatch_64_5, - &shiftMatch_64_6, - &shiftMatch_64_7, - &shiftMatch_64_8, - &shiftMatch_64_9, - &shiftMatch_64_10, - &shiftMatch_64_11, - &shiftMatch_64_12, - &shiftMatch_64_13, - &shiftMatch_64_14, - &shiftMatch_64_15, - &shiftMatch_64_16, - &shiftMatch_64_17, - &shiftMatch_64_18, - &shiftMatch_64_19, - &shiftMatch_64_20, - &shiftMatch_64_21, - &shiftMatch_64_22, - &shiftMatch_64_23, - &shiftMatch_64_24, - &shiftMatch_64_25, - &shiftMatch_64_26, - &shiftMatch_64_27, - &shiftMatch_64_28, - &shiftMatch_64_29, - &shiftMatch_64_30, - &shiftMatch_64_31, -}; - -#endif /* MULTIACCEL_SHIFT_H_ */ diff --git a/src/nfa/multiaccel_shiftgrab.h b/src/nfa/multiaccel_shiftgrab.h deleted file mode 100644 index 032ed0865..000000000 --- a/src/nfa/multiaccel_shiftgrab.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_SHIFTGRAB_H_ -#define MULTIACCEL_SHIFTGRAB_H_ - -#include "multiaccel_common.h" - -#define SHIFTGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ - if (unlikely(z)) { \ - match_t tmp = ~z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, z, 1); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define SHIFTGRAB_MATCH_32_DEF(n) \ - SHIFTGRAB_MATCH(n, u32, 32) -#define SHIFTGRAB_MATCH_64_DEF(n) \ - SHIFTGRAB_MATCH(n, u64a, 64) -#define SHIFTGRAB_MATCH_DEF(n) \ - SHIFTGRAB_MATCH_32_DEF(n) \ - SHIFTGRAB_MATCH_64_DEF(n) - -SHIFTGRAB_MATCH_DEF(1) -SHIFTGRAB_MATCH_DEF(2) -SHIFTGRAB_MATCH_DEF(3) -SHIFTGRAB_MATCH_DEF(4) -SHIFTGRAB_MATCH_DEF(5) -SHIFTGRAB_MATCH_DEF(6) -SHIFTGRAB_MATCH_DEF(7) -SHIFTGRAB_MATCH_DEF(8) -SHIFTGRAB_MATCH_DEF(9) -SHIFTGRAB_MATCH_DEF(10) -SHIFTGRAB_MATCH_DEF(11) -SHIFTGRAB_MATCH_DEF(12) -SHIFTGRAB_MATCH_DEF(13) -SHIFTGRAB_MATCH_DEF(14) -SHIFTGRAB_MATCH_DEF(15) -SHIFTGRAB_MATCH_64_DEF(16) -SHIFTGRAB_MATCH_64_DEF(17) -SHIFTGRAB_MATCH_64_DEF(18) -SHIFTGRAB_MATCH_64_DEF(19) -SHIFTGRAB_MATCH_64_DEF(20) -SHIFTGRAB_MATCH_64_DEF(21) -SHIFTGRAB_MATCH_64_DEF(22) -SHIFTGRAB_MATCH_64_DEF(23) -SHIFTGRAB_MATCH_64_DEF(24) -SHIFTGRAB_MATCH_64_DEF(25) -SHIFTGRAB_MATCH_64_DEF(26) -SHIFTGRAB_MATCH_64_DEF(27) -SHIFTGRAB_MATCH_64_DEF(28) -SHIFTGRAB_MATCH_64_DEF(29) -SHIFTGRAB_MATCH_64_DEF(30) -SHIFTGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first - 0, - &shiftgrabMatch_32_1, - &shiftgrabMatch_32_2, - &shiftgrabMatch_32_3, - &shiftgrabMatch_32_4, - &shiftgrabMatch_32_5, - &shiftgrabMatch_32_6, - &shiftgrabMatch_32_7, - &shiftgrabMatch_32_8, - &shiftgrabMatch_32_9, - &shiftgrabMatch_32_10, - &shiftgrabMatch_32_11, - &shiftgrabMatch_32_12, - &shiftgrabMatch_32_13, - &shiftgrabMatch_32_14, - &shiftgrabMatch_32_15, -}; - -static -const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) = - { -// skip the first - 0, - &shiftgrabMatch_64_1, - &shiftgrabMatch_64_2, - &shiftgrabMatch_64_3, - &shiftgrabMatch_64_4, - &shiftgrabMatch_64_5, - &shiftgrabMatch_64_6, - &shiftgrabMatch_64_7, - &shiftgrabMatch_64_8, - &shiftgrabMatch_64_9, - &shiftgrabMatch_64_10, - &shiftgrabMatch_64_11, - &shiftgrabMatch_64_12, - &shiftgrabMatch_64_13, - &shiftgrabMatch_64_14, - &shiftgrabMatch_64_15, - &shiftgrabMatch_64_16, - &shiftgrabMatch_64_17, - &shiftgrabMatch_64_18, - &shiftgrabMatch_64_19, - &shiftgrabMatch_64_20, - &shiftgrabMatch_64_21, - &shiftgrabMatch_64_22, - &shiftgrabMatch_64_23, - &shiftgrabMatch_64_24, - &shiftgrabMatch_64_25, - &shiftgrabMatch_64_26, - &shiftgrabMatch_64_27, - &shiftgrabMatch_64_28, - &shiftgrabMatch_64_29, - &shiftgrabMatch_64_30, - &shiftgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_SHIFTGRAB_H_ */ diff --git a/src/nfa/multishufti.c b/src/nfa/multishufti.c deleted file mode 100644 index cb85b7186..000000000 --- a/src/nfa/multishufti.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shufti: character class acceleration. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#include "config.h" -#include "ue2common.h" - -#include "multishufti.h" - -#include "multiaccel_common.h" - -#if !defined(__AVX2__) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multishufti_avx2.h b/src/nfa/multishufti_avx2.h deleted file mode 100644 index 042f55707..000000000 --- a/src/nfa/multishufti_avx2.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "shufti_common.h" - -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars, - const u8 *buf, const m256 low4bits, - const m256 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, - const u8 *buf, - const u8 *buf_end, u8 run_len -#ifdef MULTIACCEL_DOUBLE - , u8 run_len2 -#endif - ) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 32) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m256 zeroes = zeroes256(); - const m256 low4bits = set32x8(0xf); - const m256 wide_mask_lo = set2x128(mask_lo); - const m256 wide_mask_hi = set2x128(mask_hi); - const u8 *rv; - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (32 - min); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multishufti_sse.h b/src/nfa/multishufti_sse.h deleted file mode 100644 index 0a9b543ee..000000000 --- a/src/nfa/multishufti_sse.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "shufti_common.h" - -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -/* Normal SSSE3 shufti */ - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars, - const u8 *buf, const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - // negate first 16 bits - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF; - return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi, - const u8 *buf, const u8 *buf_end, - const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi, - const u8 *buf, const u8 *buf_end, - const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; - m128 data2 = load128(buf + 16); - u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; - - // store the results - u32 last_res = z1 | (z2 << 16); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; - data2 = load128(buf + 16); - z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - for (; buf + 15 < buf_end; buf += 16) { - m128 chars = load128(buf); - ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, - const u8 *buf, - const u8 *buf_end, u8 run_len -#ifdef MULTIACCEL_DOUBLE - , u8 run_len2 -#endif - ) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 16) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m128 zeroes = zeroes128(); - const m128 low4bits = _mm_set1_epi8(0xf); - const u8 *rv; - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (16 - min); - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi, - buf, buf_end, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } else if (buf_end - buf >= 16){ - rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi, - buf, buf_end, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf_end - 16); - rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, - buf_end - 16, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multitruffle.c b/src/nfa/multitruffle.c deleted file mode 100644 index 381bda936..000000000 --- a/src/nfa/multitruffle.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" - -#include "multitruffle.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -#include "multiaccel_common.h" - -#if !defined(__AVX2__) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multitruffle.h b/src/nfa/multitruffle.h deleted file mode 100644 index 8703b5ca3..000000000 --- a/src/nfa/multitruffle.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTITRUFFLE_H -#define MULTITRUFFLE_H - -/** \file - * \brief Multitruffle: multibyte version of Truffle. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#include "util/simd_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -#ifdef __cplusplus -} -#endif - - -#endif /* MULTITRUFFLE_H */ diff --git a/src/nfa/multitruffle_avx2.h b/src/nfa/multitruffle_avx2.h deleted file mode 100644 index e52db5fc9..000000000 --- a/src/nfa/multitruffle_avx2.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Matches a byte in a charclass using three shuffles - */ - -#include "config.h" -#include "ue2common.h" -#include "multiaccel_common.h" - -/* - * include "block" function - */ -#include "truffle_common.h" - -/* - * single-byte truffle fwd match function, should only be defined when not - * compiling multiaccel - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - m256 v, const u8 *buf, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("run_len %zu\n", buf_end - buf); - const m256 wide_clear = set2x128(shuf_mask_lo_highclear); - const m256 wide_set = set2x128(shuf_mask_lo_highset); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 32) { - return truffleMini(wide_clear, wide_set, buf, buf_end); - } - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (32 - min); - - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multitruffle_sse.h b/src/nfa/multitruffle_sse.h deleted file mode 100644 index b287e4fc4..000000000 --- a/src/nfa/multitruffle_sse.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" -#include "multiaccel_common.h" - -/* - * include "block" function - */ -#include "truffle_common.h" - -/* - * single-byte truffle fwd match function, should only be defined when not - * compiling multiaccel - */ - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - m128 v, const u8 *buf, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF; - return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; - m128 data2 = load128(buf + 16); - u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; - - // store the results - u32 last_res = z1 | (z2 << 16); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; - data2 = load128(buf + 16); - z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - for (; buf + 15 < buf_end; buf += 16) { - m128 chars = load128(buf); - ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("run_len %zu\n", buf_end - buf); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 16) { - return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end); - } - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (16 - min); - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } else if (buf_end - buf >= 16){ - rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf_end - 16); - rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, - buf_end - 16, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multivermicelli.c b/src/nfa/multivermicelli.c deleted file mode 100644 index ab6d2cf21..000000000 --- a/src/nfa/multivermicelli.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" - -#include "multivermicelli.h" - -#include "multiaccel_common.h" - -#if !defined(__AVX2__) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multivermicelli_avx2.h b/src/nfa/multivermicelli_avx2.h deleted file mode 100644 index 9081aa3fc..000000000 --- a/src/nfa/multivermicelli_avx2.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#include "multiaccel_common.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m256 casemask = set32x8(CASE_CLEAR); - const u8 *ptr; - m256 data = loadu256(buf); - u32 z = movemask256(eq256(chars, and256(casemask, data))); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8 *ptr; - - m256 data = loadu256(buf); - u32 z = movemask256(eq256(chars, data)); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -/* - * 32-byte pipeline - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m256 data = load256(buf); - u32 z = movemask256(eq256(chars, data)); - last_res = z; - last_buf = buf; - buf += 32; - - // now, start the pipeline! - assert((size_t)buf % 32 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data = load256(buf); - z = movemask256(eq256(chars, data)); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 32); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte caseless pipeline - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m256 casemask = set32x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m256 data = load256(buf); - u32 z = movemask256(eq256(chars, and256(casemask, data))); - last_res = z; - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 32 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data = load256(buf); - z = movemask256(eq256(chars, and256(casemask, data))); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 32); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - const u8 *ptr; - - // Handle small scans. - if (buf_end - buf < 32) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf; - } - - m256 chars = set32x8(c); /* nocase already uppercase */ - - uintptr_t min = (uintptr_t)buf % 32; - - if (min) { - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - buf += 32 - min; - } - - if (buf_end - buf >= 32){ - ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - - // final unaligned scan - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - - // run our pipeline - return ptr ? ptr : buf_end; -} diff --git a/src/nfa/multivermicelli_sse.h b/src/nfa/multivermicelli_sse.h deleted file mode 100644 index cdacd2c43..000000000 --- a/src/nfa/multivermicelli_sse.h +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#define VERM_BOUNDARY 16 -#define VERM_TYPE m128 -#define VERM_SET_FN set16x8 - -#include "multiaccel_common.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8 *ptr; - m128 data = loadu128(buf); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8 *ptr; - - m128 data = loadu128(buf); - u32 z = movemask128(eq128(chars, data)); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, data)); - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = movemask128(eq128(chars, data)); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = movemask128(eq128(chars, and128(casemask, data))); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = movemask128(eq128(chars, data1)); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, data2)); - - // store the results - u32 last_res = z1 | (z2 << VERM_BOUNDARY); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = movemask128(eq128(chars, data1)); - data2 = load128(buf + 16); - z2 = movemask128(eq128(chars, data2)); - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - if (buf + 15 < buf_end) { - return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -/* - * 32-byte caseless pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = movemask128(eq128(chars, and128(casemask, data1))); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, and128(casemask, data2))); - u32 z = z1 | (z2 << VERM_BOUNDARY); - - last_res = z; - last_buf = buf; - buf += 32; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = movemask128(eq128(chars, and128(casemask, data1))); - data2 = load128(buf + 16); - z2 = movemask128(eq128(chars, and128(casemask, data2))); - z = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = z; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - if (buf + 15 < buf_end) { - return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - const u8 *ptr; - - // Handle small scans. - if (buf_end - buf < VERM_BOUNDARY) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf; - } - - VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - - uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; - - if (min) { - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - buf += VERM_BOUNDARY - min; - } - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline32)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } else if (buf_end - buf >= 16){ - ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline16)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - - // final unaligned scan - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf_end - VERM_BOUNDARY, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf_end - VERM_BOUNDARY, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - - // run our pipeline - return ptr ? ptr : buf_end; -} diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 3103cd297..9185ccdd7 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -401,7 +401,7 @@ const char *NFATraits::name = "Sheng"; template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 32; + static const u32 stateAlign = 64; static const bool fast = true; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; diff --git a/src/nfa/rdfa_merge.cpp b/src/nfa/rdfa_merge.cpp index 45457555c..50e9b62a0 100644 --- a/src/nfa/rdfa_merge.cpp +++ b/src/nfa/rdfa_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include "util/report_manager.h" #include "util/ue2_containers.h" +#include #include using namespace std; @@ -135,6 +136,10 @@ class Automaton_Merge { } } + // Sort so that our alphabet mapping isn't dependent on the order of + // rdfas passed in. + sort(esets.begin(), esets.end()); + alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); } diff --git a/src/nfa/sheng_impl.h b/src/nfa/sheng_impl.h index fc3e54aa8..9552fe15d 100644 --- a/src/nfa/sheng_impl.h +++ b/src/nfa/sheng_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,7 +58,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, while (likely(cur_buf != end)) { const u8 c = *cur_buf; const m128 shuffle_mask = masks[c]; - cur_state = pshufb(shuffle_mask, cur_state); + cur_state = pshufb_m128(shuffle_mask, cur_state); const u8 tmp = movd(cur_state); DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); diff --git a/src/nfa/sheng_impl4.h b/src/nfa/sheng_impl4.h index 2561e52d3..740322010 100644 --- a/src/nfa/sheng_impl4.h +++ b/src/nfa/sheng_impl4.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,19 +100,19 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, const u8 c4 = *b4; const m128 shuffle_mask1 = masks[c1]; - cur_state = pshufb(shuffle_mask1, cur_state); + cur_state = pshufb_m128(shuffle_mask1, cur_state); const u8 a1 = movd(cur_state); const m128 shuffle_mask2 = masks[c2]; - cur_state = pshufb(shuffle_mask2, cur_state); + cur_state = pshufb_m128(shuffle_mask2, cur_state); const u8 a2 = movd(cur_state); const m128 shuffle_mask3 = masks[c3]; - cur_state = pshufb(shuffle_mask3, cur_state); + cur_state = pshufb_m128(shuffle_mask3, cur_state); const u8 a3 = movd(cur_state); const m128 shuffle_mask4 = masks[c4]; - cur_state = pshufb(shuffle_mask4, cur_state); + cur_state = pshufb_m128(shuffle_mask4, cur_state); const u8 a4 = movd(cur_state); DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 53f2c1318..c4094cedc 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -450,16 +450,15 @@ bool has_accel_sheng(const NFA *) { return true; /* consider the sheng region as accelerated */ } -aligned_unique_ptr shengCompile(raw_dfa &raw, - const CompileContext &cc, - const ReportManager &rm, - set *accel_states) { +bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + set *accel_states) { if (!cc.grey.allowSheng) { DEBUG_PRINTF("Sheng is not allowed!\n"); return nullptr; } - sheng_build_strat strat(raw, rm); + sheng_build_strat strat(raw, rm, only_accel_init); dfa_info info(strat); DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); @@ -508,7 +507,7 @@ aligned_unique_ptr shengCompile(raw_dfa &raw, DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n", nfa_size, total_aux, total_reports, total_accel, total_size); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset, accel_offset, total_size, total_size - sizeof(NFA)); diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index 873b7c758..9885cd16f 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,12 +26,12 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef SHENGCOMPILE_H_ -#define SHENGCOMPILE_H_ +#ifndef SHENGCOMPILE_H +#define SHENGCOMPILE_H #include "accel_dfa_build_strat.h" #include "rdfa.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/ue2_containers.h" @@ -45,8 +45,9 @@ struct raw_dfa; class sheng_build_strat : public accel_dfa_build_strat { public: - sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) - : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {} + sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, + bool only_accel_init_in) + : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( std::vector &reports /* out */, @@ -62,9 +63,9 @@ class sheng_build_strat : public accel_dfa_build_strat { raw_dfa &rdfa; }; -aligned_unique_ptr -shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, - std::set *accel_states = nullptr); +bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + std::set *accel_states = nullptr); struct sheng_escape_info { CharReach outs; @@ -77,4 +78,4 @@ bool has_accel_sheng(const NFA *nfa); } // namespace ue2 -#endif /* SHENGCOMPILE_H_ */ +#endif /* SHENGCOMPILE_H */ diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index d68b1b047..09ffc0cf9 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,11 +34,57 @@ #include "shufti.h" #include "ue2common.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" #include "util/unaligned.h" -#include "shufti_common.h" +#ifdef DEBUG +#include + +#define DUMP_MSK(_t) \ +static UNUSED \ +void dumpMsk##_t(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + for (int j = 0; j < 8; j++) { \ + if ((c >> (7-j)) & 0x1) \ + printf("1"); \ + else \ + printf("0"); \ + } \ + printf(" "); \ + } \ +} \ +static UNUSED \ +void dumpMsk##_t##AsChars(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + if (isprint(c)) \ + printf("%c",c); \ + else \ + printf("."); \ + } \ +} + +#endif + +/** \brief Naive byte-by-byte implementation. */ +static really_inline +const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, + const u8 *buf_end) { + assert(buf < buf_end); + + for (; buf < buf_end; ++buf) { + u8 c = *buf; + if (lo[c & 0xf] & hi[c >> 4]) { + break; + } + } + return buf; +} /** \brief Naive byte-by-byte implementation. */ static really_inline @@ -55,9 +101,33 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, return buf_end; } -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) /* Normal SSSE3 shufti */ +#ifdef DEBUG +DUMP_MSK(128) +#endif + +#define GET_LO_4(chars) and128(chars, low4bits) +#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) + +static really_inline +u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, + const m128 compare) { + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); + m128 t = and128(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); +#endif + return movemask128(eq128(t, compare)); +} + static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffff)) { @@ -149,8 +219,8 @@ const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) { static really_inline const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, const m128 low4bits, const m128 zeroes) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); m128 t = and128(c_lo, c_hi); #ifdef DEBUG @@ -219,8 +289,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, const m128 ones) { m128 chars_lo = GET_LO_4(chars); m128 chars_hi = GET_HI_4(chars); - m128 c_lo = pshufb(mask1_lo, chars_lo); - m128 c_hi = pshufb(mask1_hi, chars_hi); + m128 c_lo = pshufb_m128(mask1_lo, chars_lo); + m128 c_hi = pshufb_m128(mask1_hi, chars_hi); m128 t = or128(c_lo, c_hi); #ifdef DEBUG @@ -231,8 +301,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); #endif - m128 c2_lo = pshufb(mask2_lo, chars_lo); - m128 c2_hi = pshufb(mask2_hi, chars_hi); + m128 c2_lo = pshufb_m128(mask2_lo, chars_lo); + m128 c2_hi = pshufb_m128(mask2_hi, chars_hi); m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1)); #ifdef DEBUG @@ -290,13 +360,41 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, return buf_end; } -#else // AVX2 - 256 wide shuftis +#elif !defined(HAVE_AVX512) +// AVX2 - 256 wide shuftis + +#ifdef DEBUG +DUMP_MSK(256) +#endif + +#define GET_LO_4(chars) and256(chars, low4bits) +#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) + +static really_inline +u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, + const m256 compare) { + m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); + m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); + m256 t = and256(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); +#endif + + return movemask256(eq256(t, compare)); +} static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { + DEBUG_PRINTF("z 0x%08x\n", z); if (unlikely(z != 0xffffffff)) { u32 pos = ctz32(~z); assert(pos < 32); + DEBUG_PRINTF("match @ pos %u\n", pos); return buf + pos; } else { return NULL; // no match @@ -309,7 +407,7 @@ const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, // do the hi and lo shuffles in the one avx register m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); - m256 c_shuf = vpshufb(mask, c); + m256 c_shuf = pshufb_m256(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); // the upper 32-bits can't match u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); @@ -418,8 +516,8 @@ const u8 *lastMatch(const u8 *buf, u32 z) { static really_inline const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, const m256 low4bits, const m256 zeroes) { - m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); - m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); + m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); + m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); m256 t = and256(c_lo, c_hi); #ifdef DEBUG @@ -440,7 +538,7 @@ const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, // do the hi and lo shuffles in the one avx register m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); - m256 c_shuf = vpshufb(mask, c); + m256 c_shuf = pshufb_m256(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); // the upper 32-bits can't match u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); @@ -532,8 +630,8 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, DEBUG_PRINTF("buf %p\n", buf); m256 chars_lo = GET_LO_4(chars); m256 chars_hi = GET_HI_4(chars); - m256 c_lo = vpshufb(mask1_lo, chars_lo); - m256 c_hi = vpshufb(mask1_hi, chars_hi); + m256 c_lo = pshufb_m256(mask1_lo, chars_lo); + m256 c_hi = pshufb_m256(mask1_hi, chars_hi); m256 t = or256(c_lo, c_hi); #ifdef DEBUG @@ -544,8 +642,8 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); #endif - m256 c2_lo = vpshufb(mask2_lo, chars_lo); - m256 c2_hi = vpshufb(mask2_hi, chars_hi); + m256 c2_lo = pshufb_m256(mask2_lo, chars_lo); + m256 c2_hi = pshufb_m256(mask2_hi, chars_hi); m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1)); #ifdef DEBUG @@ -564,8 +662,8 @@ const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, // do the hi and lo shuffles in the one avx register m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); - m256 c_shuf1 = vpshufb(mask1, c); - m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1); + m256 c_shuf1 = pshufb_m256(mask1, c); + m256 c_shuf2 = rshift128_m256(pshufb_m256(mask2, c), 1); m256 t0 = or256(c_shuf1, c_shuf2); m128 t = or128(movdq_hi(t0), cast256to128(t0)); // the upper 32-bits can't match @@ -602,6 +700,7 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, const u8 *buf, const u8 *buf_end) { /* we should always have at least 16 bytes */ assert(buf_end - buf >= 16); + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); if (buf_end - buf < 32) { return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf, @@ -652,4 +751,347 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, return buf_end; } -#endif //AVX2 +#else // defined(HAVE_AVX512) + +#ifdef DEBUG +DUMP_MSK(512) +#endif + +static really_inline +u64a block(m512 mask_lo, m512 mask_hi, m512 chars, const m512 low4bits, + const m512 compare) { + m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); + m512 c_hi = pshufb_m512(mask_hi, + rshift64_m512(andnot512(low4bits, chars), 4)); + m512 t = and512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + return eq512mask(t, compare); +} +static really_inline +const u8 *firstMatch64(const u8 *buf, u64a z) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = ctz64(~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 64); + return buf + pos; + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *fwdBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, + const m512 low4bits, const m512 zeroes) { + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + return firstMatch64(buf, z); +} + +static really_inline +const u8 *shortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, + const u8 *buf_end, const m512 low4bits, + const m512 zeroes) { + DEBUG_PRINTF("short shufti %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + // load mask + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_maskz_m512(k, buf); + + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + // reuse the load mask to indicate valid bytes + return firstMatch64(buf, z | ~k); +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); + DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); + DEBUG_PRINTF("b %s\n", buf); + + const m512 low4bits = set64x8(0xf); + const m512 zeroes = zeroes512(); + const m512 wide_mask_lo = set4x128(mask_lo); + const m512 wide_mask_hi = set4x128(mask_hi); + const u8 *rv; + + // small cases. + if (buf_end - buf <= 64) { + rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, + zeroes); + return rv ? rv : buf_end; + } + + assert(buf_end - buf >= 64); + + // Preconditioning: most of the time our buffer won't be aligned. + if ((uintptr_t)buf % 64) { + rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, + ROUNDUP_PTR(buf, 64), low4bits, zeroes); + if (rv) { + return rv; + } + buf = ROUNDUP_PTR(buf, 64); + } + + const u8 *last_block = ROUNDDOWN_PTR(buf_end, 64); + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock512(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, + zeroes); + if (rv) { + return rv; + } + buf += 64; + } + + if (buf == buf_end) { + goto done; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 64); + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock512(wide_mask_lo, wide_mask_hi, chars, buf_end - 64, low4bits, + zeroes); + if (rv) { + return rv; + } +done: + return buf_end; +} + +static really_inline +const u8 *lastMatch64(const u8 *buf, u64a z) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = clz64(~z); + DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); + return buf + (63 - pos); + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *rshortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, + const u8 *buf_end, const m512 low4bits, + const m512 zeroes) { + DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + // load mask + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_maskz_m512(k, buf); + + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + // reuse the load mask to indicate valid bytes + return lastMatch64(buf, z | ~k); +} + +static really_inline +const u8 *revBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, + const m512 low4bits, const m512 zeroes) { + m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); + m512 c_hi = pshufb_m512(mask_hi, + rshift64_m512(andnot512(low4bits, chars), 4)); + m512 t = and512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + u64a z = eq512mask(t, zeroes); + return lastMatch64(buf, z); +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("buf %p buf_end %p\n", buf, buf_end); + assert(buf && buf_end); + assert(buf < buf_end); + + const m512 low4bits = set64x8(0xf); + const m512 zeroes = zeroes512(); + const m512 wide_mask_lo = set4x128(mask_lo); + const m512 wide_mask_hi = set4x128(mask_hi); + const u8 *rv; + + if (buf_end - buf < 64) { + rv = rshortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, + zeroes); + return rv ? rv : buf - 1; + } + + if (ROUNDDOWN_PTR(buf_end, 64) != buf_end) { + // peel off unaligned portion + assert(buf_end - buf >= 64); + DEBUG_PRINTF("start\n"); + rv = rshortShufti512(wide_mask_lo, wide_mask_hi, + ROUNDDOWN_PTR(buf_end, 64), buf_end, low4bits, + zeroes); + if (rv) { + return rv; + } + buf_end = ROUNDDOWN_PTR(buf_end, 64); + } + + const u8 *last_block = ROUNDUP_PTR(buf, 64); + while (buf_end > last_block) { + buf_end -= 64; + m512 lchars = load512(buf_end); + rv = revBlock512(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, + zeroes); + if (rv) { + return rv; + } + } + if (buf_end == buf) { + goto done; + } + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf. + m512 chars = loadu512(buf); + rv = revBlock512(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); + if (rv) { + return rv; + } +done: + return buf - 1; +} + +static really_inline +const u8 *fwdBlock2(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, m512 mask2_hi, + m512 chars, const u8 *buf, const m512 low4bits, + const m512 ones, __mmask64 k) { + DEBUG_PRINTF("buf %p %.64s\n", buf, buf); + m512 chars_lo = and512(chars, low4bits); + m512 chars_hi = rshift64_m512(andnot512(low4bits, chars), 4); + m512 c_lo = maskz_pshufb_m512(k, mask1_lo, chars_lo); + m512 c_hi = maskz_pshufb_m512(k, mask1_hi, chars_hi); + m512 t = or512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + m512 c2_lo = maskz_pshufb_m512(k, mask2_lo, chars_lo); + m512 c2_hi = maskz_pshufb_m512(k, mask2_hi, chars_hi); + m512 t2 = or512(t, rshift128_m512(or512(c2_lo, c2_hi), 1)); + +#ifdef DEBUG + DEBUG_PRINTF(" c2_lo: "); dumpMsk512(c2_lo); printf("\n"); + DEBUG_PRINTF(" c2_hi: "); dumpMsk512(c2_hi); printf("\n"); + DEBUG_PRINTF(" t2: "); dumpMsk512(t2); printf("\n"); +#endif + u64a z = eq512mask(t2, ones); + + return firstMatch64(buf, z | ~k); +} + +static really_inline +const u8 *shortDoubleShufti512(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, + m512 mask2_hi, const u8 *buf, const u8 *buf_end, + const m512 low4bits, const m512 ones) { + DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_mask_m512(ones, k, buf); + + const u8 *rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, chars, buf, + low4bits, ones, k); + + return rv; +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, + m128 mask2_lo, m128 mask2_hi, + const u8 *buf, const u8 *buf_end) { + /* we should always have at least 16 bytes */ + assert(buf_end - buf >= 16); + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); + + const m512 ones = ones512(); + const m512 low4bits = set64x8(0xf); + const m512 wide_mask1_lo = set4x128(mask1_lo); + const m512 wide_mask1_hi = set4x128(mask1_hi); + const m512 wide_mask2_lo = set4x128(mask2_lo); + const m512 wide_mask2_hi = set4x128(mask2_hi); + const u8 *rv; + + if (buf_end - buf <= 64) { + rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, buf, buf_end, low4bits, ones); + DEBUG_PRINTF("rv %p\n", rv); + return rv ? rv : buf_end; + } + + // Preconditioning: most of the time our buffer won't be aligned. + if ((uintptr_t)buf % 64) { + rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, buf, ROUNDUP_PTR(buf, 64), + low4bits, ones); + if (rv) { + return rv; + } + + buf = ROUNDUP_PTR(buf, 64); + } + + const u8 *last_block = buf_end - 64; + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, lchars, buf, low4bits, ones, ~0); + if (rv) { + return rv; + } + buf += 64; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, + chars, buf_end - 64, low4bits, ones, ~0); + if (rv) { + return rv; + } + + return buf_end; +} +#endif diff --git a/src/nfa/shufti_common.h b/src/nfa/shufti_common.h deleted file mode 100644 index e63ad27af..000000000 --- a/src/nfa/shufti_common.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHUFTI_COMMON_H_ -#define SHUFTI_COMMON_H_ - -#include "ue2common.h" - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -/* - * Common stuff for all versions of shufti (single, multi and multidouble) - */ - -/** \brief Naive byte-by-byte implementation. */ -static really_inline -const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, - const u8 *buf_end) { - assert(buf < buf_end); - - for (; buf < buf_end; ++buf) { - u8 c = *buf; - if (lo[c & 0xf] & hi[c >> 4]) { - break; - } - } - return buf; -} - -#ifdef DEBUG -#include - -#define DUMP_MSK(_t) \ -static UNUSED \ -void dumpMsk##_t(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - for (int j = 0; j < 8; j++) { \ - if ((c >> (7-j)) & 0x1) \ - printf("1"); \ - else \ - printf("0"); \ - } \ - printf(" "); \ - } \ -} \ -static UNUSED \ -void dumpMsk##_t##AsChars(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - if (isprint(c)) \ - printf("%c",c); \ - else \ - printf("."); \ - } \ -} - -#endif - -#if !defined(__AVX2__) - -#ifdef DEBUG -DUMP_MSK(128) -#endif - -#define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) - -static really_inline -u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, - const m128 compare) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); - m128 t = and128(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); -#endif - return movemask128(eq128(t, compare)); -} - -#else - -#ifdef DEBUG -DUMP_MSK(256) -#endif - -#define GET_LO_4(chars) and256(chars, low4bits) -#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) - -static really_inline -u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, - const m256 compare) { - m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); - m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); - m256 t = and256(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); -#endif - - return movemask256(eq256(t, compare)); -} - -#endif - - -#endif /* SHUFTI_COMMON_H_ */ diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp index c28caacbe..1a6e8beff 100644 --- a/src/nfa/tamaramacompile.cpp +++ b/src/nfa/tamaramacompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Tamarama: container engine for exclusive engines, - * compiler code. +/** + * \file + * \brief Tamarama: container engine for exclusive engines, compiler code. */ #include "config.h" @@ -111,8 +111,9 @@ void copyInSubnfas(const char *base_offset, NFA &nfa, * returns via out_top_remap, a mapping indicating how tops in the subengines in * relate to the tamarama's tops. */ -aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, - map, u32> &out_top_remap) { +bytecode_ptr +buildTamarama(const TamaInfo &tamaInfo, const u32 queue, + map, u32> &out_top_remap) { vector top_base; remapTops(tamaInfo, top_base, out_top_remap); @@ -133,7 +134,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, // use subSize as a sentinel value for no active subengines, // so add one to subSize here u32 activeIdxSize = calcPackedBytes(subSize + 1); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); nfa->type = verify_u8(TAMARAMA_NFA); nfa->length = verify_u32(total_size); nfa->queueIndex = queue; @@ -148,7 +149,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, copy_bytes(ptr, top_base); ptr += byte_length(top_base); - u32 *offsets = (u32*)ptr; + u32 *offsets = (u32 *)ptr; char *sub_nfa_offset = ptr + sizeof(u32) * subSize; copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset, activeIdxSize); diff --git a/src/nfa/tamaramacompile.h b/src/nfa/tamaramacompile.h index 048b966b8..7fcea3ec8 100644 --- a/src/nfa/tamaramacompile.h +++ b/src/nfa/tamaramacompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,15 +26,16 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Tamarama: container engine for exclusive engines, compiler code. +/** + * \file + * \brief Tamarama: container engine for exclusive engines, compiler code. */ #ifndef NFA_TAMARAMACOMPILE_H #define NFA_TAMARAMACOMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -45,7 +46,7 @@ struct NFA; namespace ue2 { /** - * \brief A TamaProto that contains top remapping and reports info + * \brief A TamaProto that contains top remapping and reports info. */ struct TamaProto { void add(const NFA *n, const u32 id, const u32 top, @@ -59,7 +60,7 @@ struct TamaProto { }; /** - * \brief Contruction info for a Tamarama engine: + * \brief Construction info for a Tamarama engine: * contains at least two subengines. * * A TamaInfo is converted into a single NFA, with each top triggering a @@ -70,7 +71,7 @@ struct TamaInfo { static constexpr size_t max_occupancy = 65536; // arbitrary limit /** \brief Add a new subengine. */ - void add(NFA* sub, const std::set &top); + void add(NFA *sub, const std::set &top); /** \brief All the subengines */ std::vector subengines; @@ -86,9 +87,10 @@ std::set all_reports(const TamaProto &proto); * returns via out_top_remap, a mapping indicating how tops in the subengines in * relate to the tamarama's tops. */ -ue2::aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, - const u32 queue, - std::map, u32> &out_top_remap); +bytecode_ptr +buildTamarama(const TamaInfo &tamaInfo, const u32 queue, + std::map, u32> &out_top_remap); + } // namespace ue2 #endif // NFA_TAMARAMACOMPILE_H diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 1eff269ab..be6b312cf 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,12 +33,11 @@ #include "ue2common.h" #include "truffle.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" -#include "truffle_common.h" - -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { @@ -51,6 +50,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) { return NULL; // no match } +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffff)) { + u32 pos = ctz32(~z & 0xffff); + assert(pos < 16); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { + + m128 highconst = _mm_set1_epi8(0x80); + m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); + + // and now do the real work + m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v); + m128 t1 = xor128(v, highconst); + m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1); + m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); + m128 shuf3 = pshufb_m128(shuf_mask_hi, t2); + m128 tmp = and128(or128(shuf1, shuf2), shuf3); + m128 tmp2 = eq128(tmp, zeroes128()); + u32 z = movemask128(tmp2); + + return z; +} + +static +const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 16); + + m128 chars = zeroes128(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + static really_inline const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v, const u8 *buf) { @@ -124,7 +174,7 @@ const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, m128 chars = zeroes128(); memcpy(&chars, buf, len); - u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); const u8 *rv = lastMatch(buf, z | mask); @@ -181,7 +231,9 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, return buf - 1; } -#else +#elif !defined(HAVE_AVX512) + +// AVX2 static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { @@ -194,6 +246,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) { return NULL; // no match } +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffffffff)) { + u32 pos = ctz32(~z); + assert(pos < 32); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { + + m256 highconst = _mm256_set1_epi8(0x80); + m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); + + // and now do the real work + m256 shuf1 = pshufb_m256(shuf_mask_lo_highclear, v); + m256 t1 = xor256(v, highconst); + m256 shuf2 = pshufb_m256(shuf_mask_lo_highset, t1); + m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); + m256 shuf3 = pshufb_m256(shuf_mask_hi, t2); + m256 tmp = and256(or256(shuf1, shuf2), shuf3); + m256 tmp2 = eq256(tmp, zeroes256()); + u32 z = movemask256(tmp2); + + return z; +} + +static +const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 32); + + m256 chars = zeroes256(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + static really_inline const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v, const u8 *buf) { @@ -265,7 +368,7 @@ const u8 *truffleRevMini(m256 shuf_mask_lo_highclear, m256 chars = zeroes256(); memcpy(&chars, buf, len); - u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); const u8 *rv = lastMatch(buf, z | mask); @@ -322,4 +425,184 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, return buf - 1; } +#else // AVX512 + +static really_inline +const u8 *lastMatch(const u8 *buf, u64a z) { + if (unlikely(z != ~0ULL)) { + u64a pos = clz64(~z); + assert(pos < 64); + return buf + (63 - pos); + } + + return NULL; // no match +} + +static really_inline +const u8 *firstMatch(const u8 *buf, u64a z) { + if (unlikely(z != ~0ULL)) { + u64a pos = ctz64(~z); + assert(pos < 64); + DEBUG_PRINTF("pos %llu\n", pos); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u64a block(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, m512 v) { + m512 highconst = set64x8(0x80); + m512 shuf_mask_hi = set8x64(0x8040201008040201); + + // and now do the real work + m512 shuf1 = pshufb_m512(shuf_mask_lo_highclear, v); + m512 t1 = xor512(v, highconst); + m512 shuf2 = pshufb_m512(shuf_mask_lo_highset, t1); + m512 t2 = andnot512(highconst, rshift64_m512(v, 4)); + m512 shuf3 = pshufb_m512(shuf_mask_hi, t2); + m512 tmp = and512(or512(shuf1, shuf2), shuf3); + u64a z = eq512mask(tmp, zeroes512()); + + return z; +} + +static really_inline +const u8 *truffleMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len <= 64); + + __mmask64 mask = (~0ULL) >> (64 - len); + + m512 chars = loadu_maskz_m512(mask, buf); + + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + + const u8 *rv = firstMatch(buf, z | ~mask); + + return rv; +} + +static really_inline +const u8 *fwdBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + m512 v, const u8 *buf) { + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return firstMatch(buf, z); +} + +static really_inline +const u8 *revBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + m512 v, const u8 *buf) { + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return lastMatch(buf, z); +} + +const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("len %zu\n", buf_end - buf); + const m512 wide_clear = set4x128(shuf_mask_lo_highclear); + const m512 wide_set = set4x128(shuf_mask_lo_highset); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf <= 64) { + rv = truffleMini(wide_clear, wide_set, buf, buf_end); + return rv ? rv : buf_end; + } + + assert(buf_end - buf >= 64); + if ((uintptr_t)buf % 64) { + // Preconditioning: most of the time our buffer won't be aligned. + rv = truffleMini(wide_clear, wide_set, buf, ROUNDUP_PTR(buf, 64)); + if (rv) { + return rv; + } + buf = ROUNDUP_PTR(buf, 64); + } + const u8 *last_block = buf_end - 64; + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock(wide_clear, wide_set, lchars, buf); + if (rv) { + return rv; + } + buf += 64; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 64); + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock(wide_clear, wide_set, chars, buf_end - 64); + if (rv) { + return rv; + } + return buf_end; +} + +static really_inline +const u8 *truffleRevMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 64); + + __mmask64 mask = (~0ULL) >> (64 - len); + m512 chars = loadu_maskz_m512(mask, buf); + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + DEBUG_PRINTF("mask 0x%016llx z 0x%016llx\n", mask, z); + const u8 *rv = lastMatch(buf, z | ~mask); + + if (rv) { + return rv; + } + return buf - 1; +} + +const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + const m512 wide_clear = set4x128(shuf_mask_lo_highclear); + const m512 wide_set = set4x128(shuf_mask_lo_highset); + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + DEBUG_PRINTF("len %zu\n", buf_end - buf); + + if (buf_end - buf < 64) { + return truffleRevMini(wide_clear, wide_set, buf, buf_end); + } + + assert(buf_end - buf >= 64); + + // Preconditioning: most of the time our buffer won't be aligned. + m512 chars = loadu512(buf_end - 64); + rv = revBlock(wide_clear, wide_set, chars, buf_end - 64); + if (rv) { + return rv; + } + buf_end = (const u8 *)ROUNDDOWN_N((uintptr_t)buf_end, 64); + + const u8 *last_block = buf + 64; + while (buf_end > last_block) { + buf_end -= 64; + m512 lchars = load512(buf_end); + rv = revBlock(wide_clear, wide_set, lchars, buf_end); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + chars = loadu512(buf); + rv = revBlock(wide_clear, wide_set, chars, buf); + if (rv) { + return rv; + } + return buf - 1; +} + #endif diff --git a/src/nfa/truffle_common.h b/src/nfa/truffle_common.h deleted file mode 100644 index 7368e550d..000000000 --- a/src/nfa/truffle_common.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TRUFFLE_COMMON_H_ -#define TRUFFLE_COMMON_H_ - -#include "util/bitutils.h" -#include "util/simd_utils.h" - -/* - * Common stuff for all versions of truffle (single, multi and multidouble) - */ -#if !defined(__AVX2__) - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffff)) { - u32 pos = ctz32(~z & 0xffff); - assert(pos < 16); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { - - m128 highconst = _mm_set1_epi8(0x80); - m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); - - // and now do the real work - m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); - m128 t1 = xor128(v, highconst); - m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); - m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); - m128 shuf3 = pshufb(shuf_mask_hi, t2); - m128 tmp = and128(or128(shuf1, shuf2), shuf3); - m128 tmp2 = eq128(tmp, zeroes128()); - u32 z = movemask128(tmp2); - - return z; -} - -static -const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 16); - - m128 chars = zeroes128(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; - const u8 *rv = firstMatch(buf, z| mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - -#else - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffffffff)) { - u32 pos = ctz32(~z); - assert(pos < 32); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { - - m256 highconst = _mm256_set1_epi8(0x80); - m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); - - // and now do the real work - m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); - m256 t1 = xor256(v, highconst); - m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); - m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); - m256 shuf3 = vpshufb(shuf_mask_hi, t2); - m256 tmp = and256(or256(shuf1, shuf2), shuf3); - m256 tmp2 = eq256(tmp, zeroes256()); - u32 z = movemask256(tmp2); - - return z; -} - -static -const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 32); - - m256 chars = zeroes256(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; - const u8 *rv = firstMatch(buf, z | mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - -#endif - -#endif /* TRUFFLE_COMMON_H_ */ diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index dff9c7e87..8b247c74b 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,10 +27,11 @@ */ /** \file - * \brief NG, NGHolder, NGWrapper and graph handling. + * \brief NG and graph handling. */ -#include "grey.h" #include "ng.h" + +#include "grey.h" #include "ng_anchored_acyclic.h" #include "ng_anchored_dots.h" #include "ng_asserts.h" @@ -41,6 +42,7 @@ #include "ng_equivalence.h" #include "ng_extparam.h" #include "ng_fixed_width.h" +#include "ng_fuzzy.h" #include "ng_haig.h" #include "ng_literal_component.h" #include "ng_literal_decorated.h" @@ -52,7 +54,6 @@ #include "ng_region.h" #include "ng_region_redundancy.h" #include "ng_reports.h" -#include "ng_rose.h" #include "ng_sep.h" #include "ng_small_literal_set.h" #include "ng_som.h" @@ -62,6 +63,7 @@ #include "ng_util.h" #include "ng_width.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" #include "rose/rose_build.h" #include "smallwrite/smallwrite_build.h" @@ -100,16 +102,16 @@ NG::~NG() { * \throw CompileError if SOM cannot be supported for the component. */ static -bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, +bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, const som_type som, const u32 comp_id) { DEBUG_PRINTF("doing som\n"); - dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); assert(hasCorrectlyNumberedVertices(g)); - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); // First, we try the "SOM chain" support in ng_som.cpp. - sombe_rv rv = doSom(ng, g, w, comp_id, som); + sombe_rv rv = doSom(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -118,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, assert(rv == SOMBE_FAIL); /* Next, Sombe style approaches */ - rv = doSomWithHaig(ng, g, w, comp_id, som); + rv = doSomWithHaig(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -132,7 +134,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, vector > triggers; /* empty for outfix */ assert(g.kind == NFA_OUTFIX); - dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); makeReportsSomPass(ng.rm, g); auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, ng.cc.grey); @@ -145,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, /* Our various strategies for supporting SOM for this pattern have failed. * Provide a generic pattern not supported/too large return value as it is * unclear what the meaning of a specific SOM error would be */ - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); assert(0); // unreachable return false; @@ -200,27 +202,35 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, } static -bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, - const u32 comp_id) { +bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, + const som_type som, const u32 comp_id) { const CompileContext &cc = ng.cc; assert(hasCorrectlyNumberedVertices(g)); DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", - w.expressionIndex, comp_id, num_vertices(g), num_edges(g)); + expr.index, comp_id, num_vertices(g), num_edges(g)); - dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); - reduceGraph(g, som, w.utf8, cc); + reduceExtendedParams(g, ng.rm, som); + reduceGraph(g, som, expr.utf8, cc); - dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { removeRegionRedundancy(g, som); } + // We might be done at this point: if we've run out of vertices, we can + // stop processing. + if (num_vertices(g) == N_SPECIALS) { + DEBUG_PRINTF("all vertices claimed\n"); + return true; + } + // "Short Exhaustible Passthrough" patterns always become outfixes. if (!som && isSEP(g, ng.rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); @@ -231,12 +241,12 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, // Start Of Match handling. if (som) { - if (addComponentSom(ng, g, w, som, comp_id)) { + if (addComponentSom(ng, g, expr, som, comp_id)) { return true; } } - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { return true; @@ -251,15 +261,11 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { return true; } - if (splitOffRose(*ng.rose, g, w.prefilter, cc)) { - return true; - } - - if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) { + if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { return true; } @@ -272,26 +278,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, cc)) { - return true; - } - - if (splitOffRose(*ng.rose, g, w.prefilter, cc)) { - return true; - } - - // A final pass at cyclic redundancy and Rose - // TODO: investigate - coverage results suggest that this never succeeds? - if (cc.grey.performGraphSimplification) { - if (removeCyclicPathRedundancy(g) || - removeCyclicDominated(g, som)) { - if (handleFixedWidth(*ng.rose, g, cc.grey)) { - return true; - } - } - } - - if (finalChanceRose(*ng.rose, g, w.prefilter, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { return true; } @@ -306,7 +293,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, // Returns true if all components have been added. static -bool processComponents(NG &ng, NGWrapper &w, +bool processComponents(NG &ng, ExpressionInfo &expr, deque> &g_comp, const som_type som) { const u32 num_components = g_comp.size(); @@ -316,7 +303,7 @@ bool processComponents(NG &ng, NGWrapper &w, if (!g_comp[i]) { continue; } - if (addComponent(ng, *g_comp[i], w, som, i)) { + if (addComponent(ng, *g_comp[i], expr, som, i)) { g_comp[i].reset(); continue; } @@ -336,40 +323,65 @@ bool processComponents(NG &ng, NGWrapper &w, return false; } -bool NG::addGraph(NGWrapper &w) { +bool NG::addGraph(ExpressionInfo &expr, unique_ptr g_ptr) { + assert(g_ptr); + NGHolder &g = *g_ptr; + // remove reports that aren't on vertices connected to accept. - clearReports(w); + clearReports(g); - som_type som = w.som; - if (som && isVacuous(w)) { - throw CompileError(w.expressionIndex, "Start of match is not " + som_type som = expr.som; + if (som && isVacuous(g)) { + throw CompileError(expr.index, "Start of match is not " "currently supported for patterns which match an " "empty buffer."); } - dumpDotWrapper(w, "01_initial", cc.grey); - assert(allMatchStatesHaveReports(w)); + dumpDotWrapper(g, expr, "01_initial", cc.grey); + assert(allMatchStatesHaveReports(g)); /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, w); - resolveAsserts(rm, w); + ensureCodePointStart(rm, g, expr); - dumpDotWrapper(w, "02_post_assert_resolve", cc.grey); - assert(allMatchStatesHaveReports(w)); + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } - pruneUseless(w); - pruneEmptyVertices(w); + // validate graph's suitability for fuzzing before resolving asserts + validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey); - if (can_never_match(w)) { - throw CompileError(w.expressionIndex, "Pattern can never match."); + resolveAsserts(rm, g, expr); + dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey); + assert(allMatchStatesHaveReports(g)); + + make_fuzzy(g, expr.edit_distance, cc.grey); + dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey); + + pruneUseless(g); + pruneEmptyVertices(g); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); } - optimiseVirtualStarts(w); /* good for som */ + optimiseVirtualStarts(g); /* good for som */ + + propagateExtendedParams(g, expr, rm); + reduceExtendedParams(g, rm, som); + + // We may have removed all the edges to accept, in which case this + // expression cannot match. + if (can_never_match(g)) { + throw CompileError(expr.index, "Extended parameter constraints can not " + "be satisfied for any match from this " + "expression."); + } - handleExtendedParams(rm, w, cc); - if (w.min_length) { - // We have a minimum length constraint, which we currently use SOM to - // satisfy. + if (any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength; + })) { + // We have at least one report with a minimum length constraint, which + // we currently use SOM to satisfy. som = SOM_LEFT; ssm.somPrecision(8); } @@ -381,98 +393,104 @@ bool NG::addGraph(NGWrapper &w) { // first, we can perform graph work that can be done on an individual // expression basis. - if (w.utf8) { - relaxForbiddenUtf8(w); + if (expr.utf8) { + relaxForbiddenUtf8(g, expr); } - if (w.highlander && !w.min_length && !w.min_offset) { + if (all_of_in(all_reports(g), [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.ekey != INVALID_EKEY && !report.minLength && + !report.minOffset; + })) { // In highlander mode: if we don't have constraints on our reports that // may prevent us accepting our first match (i.e. extended params) we // can prune the other out-edges of all vertices connected to accept. - pruneHighlanderAccepts(w, rm); + // TODO: shift the report checking down into pruneHighlanderAccepts() + // to allow us to handle the parts we can in mixed cases. + pruneHighlanderAccepts(g, rm); } - dumpDotWrapper(w, "02b_fairly_early", cc.grey); + dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); // If we're a vacuous pattern, we can handle this early. - if (splitOffVacuous(boundary, rm, w)) { + if (splitOffVacuous(boundary, rm, g, expr)) { DEBUG_PRINTF("split off vacuous\n"); } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(w) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); return true; } // Now that vacuous edges have been removed, update the min width exclusive // of boundary reports. - minWidth = min(minWidth, findMinWidth(w)); + minWidth = min(minWidth, findMinWidth(g)); // Add the pattern to the small write builder. - smwr->add(w); + smwr->add(g, expr); if (!som) { - removeSiblingsOfStartDotStar(w); + removeSiblingsOfStartDotStar(g); } - dumpDotWrapper(w, "03_early", cc.grey); + dumpDotWrapper(g, expr, "03_early", cc.grey); // Perform a reduction pass to merge sibling character classes together. if (cc.grey.performGraphSimplification) { - removeRedundancy(w, som); - prunePathsRedundantWithSuccessorOfCyclics(w, som); + removeRedundancy(g, som); + prunePathsRedundantWithSuccessorOfCyclics(g, som); } - dumpDotWrapper(w, "04_reduced", cc.grey); + dumpDotWrapper(g, expr, "04_reduced", cc.grey); // If we've got some literals that span the graph from start to accept, we // can split them off into Rose from here. if (!som) { - if (splitOffLiterals(*this, w)) { + if (splitOffLiterals(*this, g)) { DEBUG_PRINTF("some vertices claimed by literals\n"); } } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(w) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed before calc components\n"); return true; } - // Split the graph into a set of connected components. + // Split the graph into a set of connected components and process those. + // Note: this invalidates g_ptr. - deque> g_comp = calcComponents(w); + auto g_comp = calcComponents(std::move(g_ptr), cc.grey); assert(!g_comp.empty()); if (!som) { - for (u32 i = 0; i < g_comp.size(); i++) { - assert(g_comp[i]); - reformLeadingDots(*g_comp[i]); + for (auto &gc : g_comp) { + assert(gc); + reformLeadingDots(*gc); } - recalcComponents(g_comp); + recalcComponents(g_comp, cc.grey); } - if (processComponents(*this, w, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } // If we're in prefiltering mode, we can run the prefilter reductions and // have another shot at accepting the graph. - if (cc.grey.prefilterReductions && w.prefilter) { - for (u32 i = 0; i < g_comp.size(); i++) { - if (!g_comp[i]) { + if (cc.grey.prefilterReductions && expr.prefilter) { + for (auto &gc : g_comp) { + if (!gc) { continue; } - - prefilterReductions(*g_comp[i], cc); + prefilterReductions(*gc, cc); } - if (processComponents(*this, w, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } } @@ -482,7 +500,7 @@ bool NG::addGraph(NGWrapper &w) { if (g_comp[i]) { DEBUG_PRINTF("could not compile component %u with %zu vertices\n", i, num_vertices(*g_comp[i])); - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -491,63 +509,60 @@ bool NG::addGraph(NGWrapper &w) { } /** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ -bool NG::addHolder(NGHolder &w) { - DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w)); - assert(allMatchStatesHaveReports(w)); - assert(hasCorrectlyNumberedVertices(w)); +bool NG::addHolder(NGHolder &g) { + DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); + assert(allMatchStatesHaveReports(g)); + assert(hasCorrectlyNumberedVertices(g)); /* We don't update the global minWidth here as we care about the min width * of the whole pattern - not a just a prefix of it. */ bool prefilter = false; - //dumpDotComp(comp, w, *this, 20, "prefix_init"); + //dumpDotComp(comp, g, *this, 20, "prefix_init"); som_type som = SOM_NONE; /* the prefixes created by the SOM code do not themselves track som */ bool utf8 = false; // handling done earlier - reduceGraph(w, som, utf8, cc); + reduceGraph(g, som, utf8, cc); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { - removeRegionRedundancy(w, som); + removeRegionRedundancy(g, som); } // "Short Exhaustible Passthrough" patterns always become outfixes. - if (isSEP(w, rm, cc.grey)) { + if (isSEP(g, rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); - if (rose->addOutfix(w)) { + if (rose->addOutfix(g)) { return true; } } - if (splitOffAnchoredAcyclic(*rose, w, cc)) { + if (splitOffAnchoredAcyclic(*rose, g, cc)) { return true; } - if (handleSmallLiteralSets(*rose, w, cc) - || handleFixedWidth(*rose, w, cc.grey)) { + if (handleSmallLiteralSets(*rose, g, cc) + || handleFixedWidth(*rose, g, cc.grey)) { return true; } - if (handleDecoratedLiterals(*rose, w, cc)) { + if (handleDecoratedLiterals(*rose, g, cc)) { return true; } - if (splitOffRose(*rose, w, prefilter, cc)) { + if (doViolet(*rose, g, prefilter, false, rm, cc)) { return true; } - if (splitOffPuffs(*rose, rm, w, prefilter, cc)) { + if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { return true; } - if (splitOffRose(*rose, w, prefilter, cc)) { - return true; - } - if (finalChanceRose(*rose, w, prefilter, cc)) { + if (doViolet(*rose, g, prefilter, true, rm, cc)) { return true; } DEBUG_PRINTF("trying for outfix\n"); - if (rose->addOutfix(w)) { + if (rose->addOutfix(g)) { DEBUG_PRINTF("ok\n"); return true; } @@ -602,24 +617,4 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, return true; } -NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in, - bool prefilter_in, som_type som_in, ReportID r, - u64a min_offset_in, u64a max_offset_in, u64a min_length_in) - : expressionIndex(ei), reportId(r), highlander(highlander_in), - utf8(utf8_in), prefilter(prefilter_in), som(som_in), - min_offset(min_offset_in), max_offset(max_offset_in), - min_length(min_length_in) { - // All special nodes/edges are added in NGHolder's constructor. - DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s " - "min_offset=%llu max_offset=%llu min_length=%llu\n", - this, expressionIndex, reportId, - highlander ? " highlander" : "", - utf8 ? " utf8" : "", - prefilter ? " prefilter" : "", - (som != SOM_NONE) ? " som" : "", - min_offset, max_offset, min_length); -} - -NGWrapper::~NGWrapper() {} - } // namespace ue2 diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index 4aa6a7dc7..a5a5c235a 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,7 @@ */ /** \file - * \brief NG, NGHolder, NGWrapper declarations. + * \brief NG declaration. */ #ifndef NG_H @@ -42,6 +42,7 @@ #include "util/compile_context.h" #include "util/depth.h" #include "util/graph.h" +#include "util/noncopyable.h" #include "util/report_manager.h" #include "util/ue2_containers.h" @@ -51,41 +52,16 @@ #include #include -#include - namespace ue2 { struct CompileContext; struct ue2_literal; -class NGWrapper : public NGHolder { -public: - NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8, - bool prefilter, const som_type som, ReportID rid, u64a min_offset, - u64a max_offset, u64a min_length); - - ~NGWrapper() override; - - /** index of the expression represented by this graph, used - * - down the track in error handling - * - identifying parts of an expression in highlander mode - */ - const unsigned int expressionIndex; - - const ReportID reportId; /**< user-visible report id */ - const bool highlander; /**< user-specified single match only */ - const bool utf8; /**< UTF-8 mode */ - const bool prefilter; /**< prefiltering mode */ - const som_type som; /**< SOM type requested */ - u64a min_offset; /**< extparam min_offset value */ - u64a max_offset; /**< extparam max_offset value */ - u64a min_length; /**< extparam min_length value */ -}; - +class ExpressionInfo; class RoseBuild; class SmallWriteBuild; -class NG : boost::noncopyable { +class NG : noncopyable { public: NG(const CompileContext &in_cc, size_t num_patterns, unsigned in_somPrecision); @@ -93,14 +69,14 @@ class NG : boost::noncopyable { /** \brief Consumes a pattern, returns false or throws a CompileError * exception if the graph cannot be consumed. */ - bool addGraph(NGWrapper &w); + bool addGraph(ExpressionInfo &expr, std::unique_ptr g_ptr); /** \brief Consumes a graph, cut-down version of addGraph for use by SOM * processing. */ bool addHolder(NGHolder &h); - /** \brief Adds a literal to Rose, used by literal shortcut passes (instead of - * using \ref addGraph) */ + /** \brief Adds a literal to Rose, used by literal shortcut passes (instead + * of using \ref addGraph) */ bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, bool highlander, som_type som); @@ -127,7 +103,8 @@ class NG : boost::noncopyable { * * Shared with the small write compiler. */ -void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc); +void reduceGraph(NGHolder &g, som_type som, bool utf8, + const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_anchored_dots.cpp b/src/nfagraph/ng_anchored_dots.cpp index ed9c7f486..9a13376d1 100644 --- a/src/nfagraph/ng_anchored_dots.cpp +++ b/src/nfagraph/ng_anchored_dots.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -208,7 +208,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, /* get bounds */ depth min; - depth max = 1; + depth max(1); if (selfLoop) { // A self-loop indicates that this is a '.+' or '.*' @@ -229,9 +229,9 @@ void reformAnchoredRepeatsComponent(NGHolder &g, } } - min = 0; + min = depth(0); } else { - min = 1; + min = depth(1); } *startBegin = min; @@ -326,8 +326,8 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } /* get bounds */ - depth min = 1; - depth max = 1; + depth min(1); + depth max(1); if (selfLoop) { // A self-loop indicates that this is a '.+' or '.*' @@ -349,7 +349,7 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, DEBUG_PRINTF("min greater than one, skipping\n"); return; } - min = 0; + min = depth(0); } *startBegin += min; @@ -502,7 +502,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, startEnd->str().c_str()); if (start == g.start && startEnd->is_infinite()) { - *startEnd = dots.size(); + *startEnd = depth(dots.size()); } else if (startEnd->is_finite()) { *startEnd += dots.size(); } diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index c2f0d68f1..8812afadb 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,7 @@ #include "ng_prune.h" #include "ng_redundancy.h" #include "ng_util.h" +#include "compiler/compiler.h" #include "parser/position.h" // for POS flags #include "util/bitutils.h" // for findAndClearLSB_32 #include "util/boundary_reports.h" @@ -184,43 +185,45 @@ void findSplitters(const NGHolder &g, const vector &asserts, } static -void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { +void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, s32 adj) { // Don't try and set the report ID of a special vertex. assert(!is_special(v, g)); // If there's a report set already, we're replacing it. g[v].reports.clear(); - Report ir = rm.getBasicInternalReport(g, adj); + Report ir = rm.getBasicInternalReport(expr, adj); g[v].reports.insert(rm.getInternalId(ir)); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static -NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v, - const CharReach &cr_mask) { +NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, const CharReach &cr_mask) { NFAVertex clone = clone_vertex(g, v); g[clone].char_reach &= cr_mask; clone_out_edges(g, v, clone); clone_in_edges(g, v, clone); if (v == g.startDs) { - if (g.utf8) { + if (expr.utf8) { g[clone].char_reach &= ~UTF_START_CR; } DEBUG_PRINTF("marked as virt\n"); g[clone].assert_flags = POS_FLAG_VIRTUAL_START; - setReportId(rm, g, clone, 0); + setReportId(rm, g, expr, clone, 0); } return clone; } static -void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { +void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, bool ucp) { assert(v != g.start); assert(v != g.accept); assert(v != g.acceptEod); @@ -232,14 +235,14 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; // Split v into word/nonword vertices with only asserting out-edges. - NFAVertex w_out = makeClone(rm, g, v, cr_word); - NFAVertex nw_out = makeClone(rm, g, v, cr_nonword); + NFAVertex w_out = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword); remove_out_edge_if(w_out, has_no_assert, g); remove_out_edge_if(nw_out, has_no_assert, g); // Split v into word/nonword vertices with only asserting in-edges. - NFAVertex w_in = makeClone(rm, g, v, cr_word); - NFAVertex nw_in = makeClone(rm, g, v, cr_nonword); + NFAVertex w_in = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword); remove_in_edge_if(w_in, has_no_assert, g); remove_in_edge_if(nw_in, has_no_assert, g); @@ -250,7 +253,8 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { } static -void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { +void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + set *dead) { for (const auto &e : edges_range(g)) { u32 flags = g[e].assert_flags; if (!flags) { @@ -363,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -372,7 +376,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -416,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -425,7 +429,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -450,7 +454,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } } -void resolveAsserts(ReportManager &rm, NGWrapper &g) { +void resolveAsserts(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { vector asserts = getAsserts(g); if (asserts.empty()) { return; @@ -460,20 +465,20 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { map to_split_ucp; /* by index, for determinism */ findSplitters(g, asserts, &to_split, &to_split_ucp); if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { - throw CompileError(g.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } for (const auto &m : to_split) { assert(!contains(to_split_ucp, m.first)); - splitVertex(rm, g, m.second, false); + splitVertex(rm, g, expr, m.second, false); } for (const auto &m : to_split_ucp) { - splitVertex(rm, g, m.second, true); + splitVertex(rm, g, expr, m.second, true); } set dead; - resolveEdges(rm, g, &dead); + resolveEdges(rm, g, expr, &dead); remove_edges(dead, g); renumber_vertices(g); @@ -485,15 +490,16 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { clearReports(g); } -void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { /* In utf8 mode there is an implicit assertion that we start at codepoint * boundaries. Assert resolution handles the badness coming from asserts. * The only other source of trouble is startDs->accept connections. */ NFAEdge orig = edge(g.startDs, g.accept, g); - if (g.utf8 && orig) { - DEBUG_PRINTF("rectifying %u\n", g.reportId); - Report ir = rm.getBasicInternalReport(g); + if (expr.utf8 && orig) { + DEBUG_PRINTF("rectifying %u\n", expr.report); + Report ir = rm.getBasicInternalReport(expr); ReportID rep = rm.getInternalId(ir); NFAVertex v_a = add_vertex(g); diff --git a/src/nfagraph/ng_asserts.h b/src/nfagraph/ng_asserts.h index 8183490ac..2534f5714 100644 --- a/src/nfagraph/ng_asserts.h +++ b/src/nfagraph/ng_asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,14 @@ namespace ue2 { struct BoundaryReports; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void resolveAsserts(ReportManager &rm, NGWrapper &g); +void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); -void ensureCodePointStart(ReportManager &rm, NGWrapper &g); +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 4ca0b37e4..60f667f49 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,11 +28,13 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGWrapper from a parsed expression. + * NGHolder from a parsed expression. */ + +#include "ng_builder.h" + #include "grey.h" #include "ng.h" -#include "ng_builder.h" #include "ng_util.h" #include "ue2common.h" #include "compiler/compiler.h" // for ParsedExpression @@ -79,7 +81,7 @@ class NFABuilderImpl : public NFABuilder { void cloneRegion(Position first, Position last, unsigned posOffset) override; - unique_ptr getGraph() override; + BuiltExpression getGraph() override; private: /** fetch a vertex given its Position ID. */ @@ -94,8 +96,11 @@ class NFABuilderImpl : public NFABuilder { /** \brief Greybox: used for resource limits. */ const Grey &grey; - /** \brief Underlying NGWrapper graph. */ - unique_ptr graph; + /** \brief Underlying graph. */ + unique_ptr graph; + + /** \brief Underlying expression info. */ + ExpressionInfo expr; /** \brief mapping from position to vertex. Use \ref getVertex for access. * */ @@ -108,12 +113,9 @@ class NFABuilderImpl : public NFABuilder { } // namespace NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, - const ParsedExpression &expr) - : rm(rm_in), grey(grey_in), - graph(ue2::make_unique( - expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som, - expr.id, expr.min_offset, expr.max_offset, expr.min_length)), - vertIdx(N_SPECIALS) { + const ParsedExpression &parsed) + : rm(rm_in), grey(grey_in), graph(ue2::make_unique()), + expr(parsed.expr), vertIdx(N_SPECIALS) { // Reserve space for a reasonably-sized NFA id2vertex.reserve(64); @@ -150,7 +152,7 @@ void NFABuilderImpl::addVertex(Position pos) { (*graph)[v].index = pos; } -unique_ptr NFABuilderImpl::getGraph() { +BuiltExpression NFABuilderImpl::getGraph() { DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n", num_vertices(*graph), num_edges(*graph)); @@ -161,13 +163,13 @@ unique_ptr NFABuilderImpl::getGraph() { throw CompileError("Pattern too large."); } - return move(graph); + return { expr, move(graph) }; } void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { - Report ir = rm.getBasicInternalReport(*graph, offsetAdjust); + Report ir = rm.getBasicInternalReport(expr, offsetAdjust); DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n", - pos, graph->reportId, offsetAdjust, ir.ekey); + pos, expr.report, offsetAdjust, ir.ekey); NFAVertex v = getVertex(pos); auto &reports = (*graph)[v].reports; diff --git a/src/nfagraph/ng_builder.h b/src/nfagraph/ng_builder.h index 5bd95ba9d..9f71b6223 100644 --- a/src/nfagraph/ng_builder.h +++ b/src/nfagraph/ng_builder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGWrapper from a parsed expression. + * NGHolder from a parsed expression. */ #ifndef NG_BUILDER_H @@ -37,22 +37,22 @@ #include "ue2common.h" #include "parser/position.h" +#include "util/noncopyable.h" #include -#include namespace ue2 { class CharReach; -class NGWrapper; class ReportManager; +struct BuiltExpression; struct CompileContext; class ParsedExpression; /** \brief Abstract builder interface. Use \ref makeNFABuilder to construct * one. Used by GlushkovBuildState. */ -class NFABuilder : boost::noncopyable { +class NFABuilder : noncopyable { public: virtual ~NFABuilder(); @@ -83,10 +83,10 @@ class NFABuilder : boost::noncopyable { unsigned posOffset) = 0; /** - * \brief Returns the built NGWrapper graph. + * \brief Returns the built NGHolder graph and ExpressionInfo. * Note that this builder cannot be used after this call. */ - virtual std::unique_ptr getGraph() = 0; + virtual BuiltExpression getGraph() = 0; }; /** Construct a usable NFABuilder. */ diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index da6775e44..bfe73eb27 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,6 +55,7 @@ #include "ng_prune.h" #include "ng_undirected.h" #include "ng_util.h" +#include "grey.h" #include "ue2common.h" #include "util/graph_range.h" #include "util/make_unique.h" @@ -63,6 +64,7 @@ #include #include +#include using namespace std; @@ -218,66 +220,43 @@ vector findShellEdges(const NGHolder &g, return shell_edges; } -static -void removeVertices(const flat_set &verts, NFAUndirectedGraph &ug, - ue2::unordered_map &old2new, - ue2::unordered_map &new2old) { - for (auto v : verts) { - assert(contains(old2new, v)); - auto uv = old2new.at(v); - clear_vertex(uv, ug); - remove_vertex(uv, ug); - old2new.erase(v); - new2old.erase(uv); - } -} - -static -void renumberVertices(NFAUndirectedGraph &ug) { - u32 vertexIndex = 0; - for (auto uv : vertices_range(ug)) { - put(boost::vertex_index, ug, uv, vertexIndex++); - } -} - /** * Common code called by calc- and recalc- below. Splits the given holder into * one or more connected components, adding them to the comps deque. */ static -void splitIntoComponents(const NGHolder &g, deque> &comps, +void splitIntoComponents(unique_ptr g, + deque> &comps, const depth &max_head_depth, const depth &max_tail_depth, bool *shell_comp) { - DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(g)); + DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(*g)); assert(shell_comp); *shell_comp = false; // Compute "shell" head and tail subgraphs. - vector depths; - calcDepths(g, depths); - auto head_shell = findHeadShell(g, depths, max_head_depth); - auto tail_shell = findTailShell(g, depths, max_tail_depth); + auto depths = calcBidiDepths(*g); + auto head_shell = findHeadShell(*g, depths, max_head_depth); + auto tail_shell = findTailShell(*g, depths, max_tail_depth); for (auto v : head_shell) { tail_shell.erase(v); } - if (head_shell.size() + tail_shell.size() + N_SPECIALS >= num_vertices(g)) { + if (head_shell.size() + tail_shell.size() + N_SPECIALS >= + num_vertices(*g)) { DEBUG_PRINTF("all in shell component\n"); - comps.push_back(cloneHolder(g)); + comps.push_back(std::move(g)); *shell_comp = true; return; } - vector shell_edges = findShellEdges(g, head_shell, tail_shell); + vector shell_edges = findShellEdges(*g, head_shell, tail_shell); DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", head_shell.size(), tail_shell.size(), shell_edges.size()); - NFAUndirectedGraph ug; ue2::unordered_map old2new; - - createUnGraph(g, true, true, ug, old2new); + auto ug = createUnGraph(*g, true, true, old2new); // Construct reverse mapping. ue2::unordered_map new2old; @@ -285,20 +264,26 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, new2old.emplace(m.second, m.first); } - // Remove shells from undirected graph and renumber so we have dense - // vertex indices. - removeVertices(head_shell, ug, old2new, new2old); - removeVertices(tail_shell, ug, old2new, new2old); - renumberVertices(ug); + // Filter shell vertices from undirected graph. + unordered_set shell_undir_vertices; + for (auto v : head_shell) { + shell_undir_vertices.insert(old2new.at(v)); + } + for (auto v : tail_shell) { + shell_undir_vertices.insert(old2new.at(v)); + } + auto filtered_ug = boost::make_filtered_graph( + ug, boost::keep_all(), make_bad_vertex_filter(&shell_undir_vertices)); + // Actually run the connected components algorithm. map split_components; const u32 num = connected_components( - ug, boost::make_assoc_property_map(split_components)); + filtered_ug, boost::make_assoc_property_map(split_components)); assert(num > 0); if (num == 1 && shell_edges.empty()) { DEBUG_PRINTF("single component\n"); - comps.push_back(cloneHolder(g)); + comps.push_back(std::move(g)); return; } @@ -313,7 +298,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, assert(contains(new2old, uv)); NFAVertex v = new2old.at(uv); verts[c].push_back(v); - DEBUG_PRINTF("vertex %zu is in comp %u\n", g[v].index, c); + DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c); } ue2::unordered_map v_map; // temp map for fillHolder @@ -328,12 +313,12 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, auto gc = ue2::make_unique(); v_map.clear(); - fillHolder(gc.get(), g, vv, &v_map); + fillHolder(gc.get(), *g, vv, &v_map); // Remove shell edges, which will get their own component. for (const auto &e : shell_edges) { - auto cu = v_map.at(source(e, g)); - auto cv = v_map.at(target(e, g)); + auto cu = v_map.at(source(e, *g)); + auto cv = v_map.at(target(e, *g)); assert(edge(cu, cv, *gc).second); remove_edge(cu, cv, *gc); } @@ -352,7 +337,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, auto gc = ue2::make_unique(); v_map.clear(); - fillHolder(gc.get(), g, vv, &v_map); + fillHolder(gc.get(), *g, vv, &v_map); pruneUseless(*gc); DEBUG_PRINTF("shell edge component %zu has %zu vertices\n", @@ -374,33 +359,39 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, })); } -deque> calcComponents(const NGHolder &g) { +deque> calcComponents(unique_ptr g, + const Grey &grey) { deque> comps; // For trivial cases, we needn't bother running the full // connected_components algorithm. - if (isAlternationOfClasses(g)) { - comps.push_back(cloneHolder(g)); + if (!grey.calcComponents || isAlternationOfClasses(*g)) { + comps.push_back(std::move(g)); return comps; } bool shell_comp = false; - splitIntoComponents(g, comps, MAX_HEAD_SHELL_DEPTH, MAX_TAIL_SHELL_DEPTH, - &shell_comp); + splitIntoComponents(std::move(g), comps, depth(MAX_HEAD_SHELL_DEPTH), + depth(MAX_TAIL_SHELL_DEPTH), &shell_comp); if (shell_comp) { DEBUG_PRINTF("re-running on shell comp\n"); assert(!comps.empty()); - auto sc = move(comps.back()); + auto sc = std::move(comps.back()); comps.pop_back(); - splitIntoComponents(*sc, comps, 0, 0, &shell_comp); + splitIntoComponents(std::move(sc), comps, depth(0), depth(0), + &shell_comp); } DEBUG_PRINTF("finished; split into %zu components\n", comps.size()); return comps; } -void recalcComponents(deque> &comps) { +void recalcComponents(deque> &comps, const Grey &grey) { + if (!grey.calcComponents) { + return; + } + deque> out; for (auto &gc : comps) { @@ -409,14 +400,13 @@ void recalcComponents(deque> &comps) { } if (isAlternationOfClasses(*gc)) { - out.push_back(move(gc)); + out.push_back(std::move(gc)); continue; } - auto gc_comps = calcComponents(*gc); - for (auto &elem : gc_comps) { - out.push_back(move(elem)); - } + auto gc_comps = calcComponents(std::move(gc), grey); + out.insert(end(out), std::make_move_iterator(begin(gc_comps)), + std::make_move_iterator(end(gc_comps))); } // Replace comps with our recalculated list. diff --git a/src/nfagraph/ng_calc_components.h b/src/nfagraph/ng_calc_components.h index e68c81fcc..1bcdc5f81 100644 --- a/src/nfagraph/ng_calc_components.h +++ b/src/nfagraph/ng_calc_components.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,12 +39,15 @@ namespace ue2 { class NGHolder; +struct Grey; bool isAlternationOfClasses(const NGHolder &g); -std::deque> calcComponents(const NGHolder &g); +std::deque> +calcComponents(std::unique_ptr g, const Grey &grey); -void recalcComponents(std::deque> &comps); +void recalcComponents(std::deque> &comps, + const Grey &grey); } // namespace ue2 diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 63e0e46b7..67a6b27b4 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief NFA graph vertex depth calculations. */ #include "ng_depth.h" @@ -123,34 +124,35 @@ struct StartFilter { } // namespace -template +template static -void findLoopReachable(const GraphT &g, - const typename GraphT::vertex_descriptor srcVertex, - vector &deadNodes) { - typedef typename GraphT::edge_descriptor EdgeT; - typedef typename GraphT::vertex_descriptor VertexT; - typedef set EdgeSet; +vector findLoopReachable(const Graph &g, + const typename Graph::vertex_descriptor src) { + vector deadNodes(num_vertices(g)); + + using Edge = typename Graph::edge_descriptor; + using Vertex = typename Graph::vertex_descriptor; + using EdgeSet = set; EdgeSet deadEdges; BackEdges be(deadEdges); - depth_first_search(g, visitor(be).root_vertex(srcVertex)); + depth_first_search(g, visitor(be).root_vertex(src)); auto af = make_bad_edge_filter(&deadEdges); auto acyclic_g = make_filtered_graph(g, af); - vector topoOrder; /* actually reverse topological order */ + vector topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); topological_sort(acyclic_g, back_inserter(topoOrder)); for (const auto &e : deadEdges) { - u32 srcIdx = g[source(e, g)].index; + size_t srcIdx = g[source(e, g)].index; if (srcIdx != NODE_START_DOTSTAR) { deadNodes[srcIdx] = true; } } - for (VertexT v : reverse(topoOrder)) { + for (auto v : reverse(topoOrder)) { for (const auto &e : in_edges_range(v, g)) { if (deadNodes[g[source(e, g)].index]) { deadNodes[g[v].index] = true; @@ -158,6 +160,8 @@ void findLoopReachable(const GraphT &g, } } } + + return deadNodes; } template @@ -269,12 +273,11 @@ void calcAndStoreDepth(const Graph &g, } } -void calcDepths(const NGHolder &g, std::vector &depths) { +vector calcDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); vector dMin; vector dMax; @@ -282,8 +285,7 @@ void calcDepths(const NGHolder &g, std::vector &depths) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - vector deadNodes(numVertices); - findLoopReachable(g, g.start, deadNodes); + auto deadNodes = findLoopReachable(g, g.start); DEBUG_PRINTF("doing start\n"); calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths, @@ -291,14 +293,15 @@ void calcDepths(const NGHolder &g, std::vector &depths) { DEBUG_PRINTF("doing startds\n"); calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexDepth::fromStartDotStar); + + return depths; } -void calcDepths(const NGHolder &g, std::vector &depths) { +vector calcRevDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); vector dMin; vector dMax; @@ -312,8 +315,7 @@ void calcDepths(const NGHolder &g, std::vector &depths) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - vector deadNodes(numVertices); - findLoopReachable(rg, g.acceptEod, deadNodes); + auto deadNodes = findLoopReachable(rg, g.acceptEod); DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( @@ -324,14 +326,15 @@ void calcDepths(const NGHolder &g, std::vector &depths) { calcAndStoreDepth( rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAcceptEod); + + return depths; } -void calcDepths(const NGHolder &g, vector &depths) { +vector calcBidiDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); vector dMin; vector dMax; @@ -339,8 +342,7 @@ void calcDepths(const NGHolder &g, vector &depths) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - vector deadNodes(numVertices); - findLoopReachable(g, g.start, deadNodes); + auto deadNodes = findLoopReachable(g, g.start); DEBUG_PRINTF("doing start\n"); calcAndStoreDepth( @@ -354,8 +356,7 @@ void calcDepths(const NGHolder &g, vector &depths) { /* Now go backwards */ typedef reverse_graph RevNFAGraph; const RevNFAGraph rg(g); - deadNodes.assign(numVertices, false); - findLoopReachable(rg, g.acceptEod, deadNodes); + deadNodes = findLoopReachable(rg, g.acceptEod); DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( @@ -366,26 +367,27 @@ void calcDepths(const NGHolder &g, vector &depths) { calcAndStoreDepth( rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAcceptEod); + + return depths; } -void calcDepthsFrom(const NGHolder &g, const NFAVertex src, - vector &depths) { +vector calcDepthsFrom(const NGHolder &g, const NFAVertex src) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - vector deadNodes(numVertices); - findLoopReachable(g, g.start, deadNodes); + auto deadNodes = findLoopReachable(g, g.start); vector dMin, dMax; calcDepthFromSource(g, src, deadNodes, dMin, dMax); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); for (auto v : vertices_range(g)) { - u32 idx = g[v].index; + auto idx = g[v].index; depths.at(idx) = getDepths(idx, dMin, dMax); } + + return depths; } } // namespace ue2 diff --git a/src/nfagraph/ng_depth.h b/src/nfagraph/ng_depth.h index 16231ea1e..36cca87e8 100644 --- a/src/nfagraph/ng_depth.h +++ b/src/nfagraph/ng_depth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,23 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief NFA graph vertex depth calculations. */ -#ifndef STRUCTURAL_ANALYSIS_H -#define STRUCTURAL_ANALYSIS_H +#ifndef NG_DEPTH_H +#define NG_DEPTH_H -#include "nfagraph/ng_holder.h" #include "ue2common.h" +#include "nfagraph/ng_holder.h" #include "util/depth.h" #include namespace ue2 { -class NGHolder; - /** * \brief Encapsulates min/max depths relative to the start and startDs * vertices. @@ -72,28 +71,29 @@ struct NFAVertexBidiDepth { }; /** - * \brief Calculate depths from start and startDs. - * Fills the vector \p depths (indexed by \p vertex_index). + * \brief Calculate depths from start and startDs. Returns them in a vector, + * indexed by vertex index. */ -void calcDepths(const NGHolder &g, std::vector &depths); +std::vector calcDepths(const NGHolder &g); /** - * \brief Calculate depths to accept and acceptEod. - * Fills the vector \p depths (indexed by \p vertex_index). + * \brief Calculate depths to accept and acceptEod. Returns them in a vector, + * indexed by vertex index. */ -void calcDepths(const NGHolder &g, std::vector &depths); +std::vector calcRevDepths(const NGHolder &g); /** - * \brief Calculate depths to/from all special vertices. - * Fills the vector \p depths (indexed by \p vertex_index). + * \brief Calculate depths to/from all special vertices. Returns them in a + * vector, indexed by vertex index. */ -void calcDepths(const NGHolder &g, std::vector &depths); +std::vector calcBidiDepths(const NGHolder &g); -/** Calculate the (min, max) depths from the given \p src to every vertex in - * the graph and return them in a vector, indexed by \p vertex_index. */ -void calcDepthsFrom(const NGHolder &g, const NFAVertex src, - std::vector &depths); +/** + * \brief Calculate the (min, max) depths from the given \p src to every vertex + * in the graph and return them in a vector, indexed by \p vertex_index. + */ +std::vector calcDepthsFrom(const NGHolder &g, const NFAVertex src); } // namespace ue2 -#endif +#endif // NG_DEPTH_H diff --git a/src/nfagraph/ng_dominators.cpp b/src/nfagraph/ng_dominators.cpp index d01af9947..50536b760 100644 --- a/src/nfagraph/ng_dominators.cpp +++ b/src/nfagraph/ng_dominators.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,8 +62,8 @@ unordered_map calcDominators(const Graph &g, vector vertices_by_dfnum(num_verts, Graph::null_vertex()); // Output map. - unordered_map doms; - auto dom_map = make_assoc_property_map(doms); + vector doms(num_verts, Graph::null_vertex()); + auto dom_map = make_iterator_property_map(doms.begin(), index_map); boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map, parent_map, vertices_by_dfnum, @@ -71,10 +71,12 @@ unordered_map calcDominators(const Graph &g, /* Translate back to an NFAVertex map */ unordered_map doms2; - for (const auto &e : doms) { - NFAVertex f(e.first); - NFAVertex s(e.second); - doms2[f] = s; + doms2.reserve(num_verts); + for (auto v : vertices_range(g)) { + auto dom_of_v = doms[g[v].index]; + if (dom_of_v) { + doms2.emplace(v, dom_of_v); + } } return doms2; } diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index fc840f251..094d24015 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,24 +35,25 @@ #include "config.h" -#include "ng_dump.h" +#include "nfagraph/ng_dump.h" -#include "hwlm/hwlm_build.h" -#include "ng.h" -#include "ng_util.h" -#include "parser/position.h" +#include "hs_compile.h" /* for HS_MODE_* flags */ #include "ue2common.h" +#include "compiler/compiler.h" +#include "hwlm/hwlm_build.h" #include "nfa/accel.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX -#include "smallwrite/smallwrite_dump.h" +#include "nfagraph/ng.h" +#include "nfagraph/ng_util.h" +#include "parser/position.h" #include "rose/rose_build.h" #include "rose/rose_internal.h" +#include "smallwrite/smallwrite_dump.h" #include "util/bitutils.h" #include "util/dump_charclass.h" #include "util/report.h" #include "util/report_manager.h" #include "util/ue2string.h" -#include "hs_compile.h" /* for HS_MODE_* flags */ #include #include @@ -287,13 +288,13 @@ void dumpGraphImpl(const char *name, const GraphT &g, // manual instantiation of templated dumpGraph above. template void dumpGraphImpl(const char *, const NGHolder &); -void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, - const Grey &grey) { +void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, + const char *name, const Grey &grey) { if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) { stringstream ss; - ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot"; + ss << grey.dumpPath << "Expr_" << expr.index << "_" << name << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), nw); + dumpGraphImpl(ss.str().c_str(), g); } } diff --git a/src/nfagraph/ng_dump.h b/src/nfagraph/ng_dump.h index b20d9f1be..077f07cef 100644 --- a/src/nfagraph/ng_dump.h +++ b/src/nfagraph/ng_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,7 @@ namespace ue2 { class NGHolder; class NG; -class NGWrapper; +class ExpressionInfo; class ReportManager; // Implementations for stubs below -- all have the suffix "Impl". @@ -61,7 +61,8 @@ void dumpGraphImpl(const char *name, const GraphT &g); template void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm); -void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey); +void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, + const char *name, const Grey &grey); void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp, const Grey &grey); @@ -88,10 +89,10 @@ static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) { // Stubs which call through to dump code if compiled in. UNUSED static inline -void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name, - UNUSED const Grey &grey) { +void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr, + UNUSED const char *name, UNUSED const Grey &grey) { #ifdef DUMP_SUPPORT - dumpDotWrapperImpl(w, name, grey); + dumpDotWrapperImpl(g, expr, name, grey); #endif } diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index 32a392a6d..438e5ea8a 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,10 +63,10 @@ class VertexInfo; struct VertexInfoPtrCmp { // for flat_set bool operator()(const VertexInfo *a, const VertexInfo *b) const; - // for unordered_set - size_t operator()(const VertexInfo *a) const; }; +using VertexInfoSet = flat_set; + /** Precalculated (and maintained) information about a vertex. */ class VertexInfo { public: @@ -74,8 +74,8 @@ class VertexInfo { : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), equivalence_class(~0), vertex_flags(g[v].assert_flags) {} - flat_set pred; //!< predecessors of this vertex - flat_set succ; //!< successors of this vertex + VertexInfoSet pred; //!< predecessors of this vertex + VertexInfoSet succ; //!< successors of this vertex NFAVertex v; size_t vert_index; CharReach cr; @@ -86,21 +86,11 @@ class VertexInfo { unsigned vertex_flags; }; -} - -typedef ue2::unordered_set VertexInfoSet; - // compare two vertex info pointers on their vertex index bool VertexInfoPtrCmp::operator()(const VertexInfo *a, const VertexInfo *b) const { return a->vert_index < b->vert_index; } -// provide a "hash" for vertex info pointer by returning its vertex index -size_t VertexInfoPtrCmp::operator()(const VertexInfo *a) const { - return a->vert_index; -} - -namespace { // to avoid traversing infomap each time we need to check the class during // partitioning, we will cache the information pertaining to a particular class @@ -133,7 +123,7 @@ class ClassInfo { friend size_t hash_value(const ClassInfo &c) { size_t val = 0; - boost::hash_combine(val, boost::hash_range(begin(c.rs), end(c.rs))); + boost::hash_combine(val, c.rs); boost::hash_combine(val, c.vertex_flags); boost::hash_combine(val, c.cr); boost::hash_combine(val, c.adjacent_cr); @@ -342,9 +332,9 @@ vector partitionGraph(vector> &infos, vector rdepths; if (eq == LEFT_EQUIVALENCE) { - calcDepths(g, depths); + depths = calcDepths(g); } else { - calcDepths(g, rdepths); + rdepths = calcRevDepths(g); } // partition the graph based on CharReach diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index b43c7fd1f..5f5bbea74 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,8 +27,8 @@ */ /** \file - * \brief Code for discovering properties of an NGWrapper used by - * hs_expression_info. + * \brief Code for discovering properties of an NFA graph used by + * hs_expression_info(). */ #include "ng_expr_info.h" @@ -37,10 +37,14 @@ #include "ng_asserts.h" #include "ng_depth.h" #include "ng_edge_redundancy.h" +#include "ng_extparam.h" +#include "ng_fuzzy.h" #include "ng_holder.h" +#include "ng_prune.h" #include "ng_reports.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/expression_info.h" #include "parser/position.h" // for POS flags #include "util/boundary_reports.h" #include "util/compile_context.h" @@ -58,42 +62,42 @@ namespace ue2 { /* get rid of leading \b and multiline ^ vertices */ static -void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) { +void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) { vector victims; - for (auto v : adjacent_vertices_range(root, w)) { - if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) { + for (auto v : adjacent_vertices_range(root, g)) { + if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n"); victims.push_back(v); } } for (auto u : victims) { - for (auto v : adjacent_vertices_range(u, w)) { - add_edge_if_not_present(root, v, w); + for (auto v : adjacent_vertices_range(u, g)) { + add_edge_if_not_present(root, v, g); } } - remove_vertices(victims, w); + remove_vertices(victims, g); } static -void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, +void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, const vector &depths, DepthMinMax &info) { - if (is_any_accept(v, w)) { + if (is_any_accept(v, g)) { return; } - if (is_any_start(v, w)) { - info.min = 0; + if (is_any_start(v, g)) { + info.min = depth(0); info.max = max(info.max, depth(0)); return; } - u32 idx = w[v].index; + u32 idx = g[v].index; assert(idx < depths.size()); const DepthMinMax &d = depths.at(idx); - for (ReportID report_id : w[v].reports) { + for (ReportID report_id : g[v].reports) { const Report &report = rm.getReport(report_id); assert(report.type == EXTERNAL_CALLBACK); @@ -118,7 +122,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, rd.max = min(rd.max, max_offset); } - DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id, + DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id, rd.str().c_str()); info = unionDepthMinMax(info, rd); @@ -126,8 +130,8 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, } static -bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) { - for (const auto &report_id : all_reports(w)) { +bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { + for (const auto &report_id : all_reports(g)) { if (rm.getReport(report_id).offsetAdjust) { return true; } @@ -135,28 +139,61 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) { return false; } -void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) { +void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, + NGHolder &g, ExpressionInfo &expr, + hs_expr_info *info) { assert(info); + // remove reports that aren't on vertices connected to accept. + clearReports(g); + + assert(allMatchStatesHaveReports(g)); + + /* + * Note: the following set of analysis passes / transformations should + * match those in NG::addGraph(). + */ + /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, w); - resolveAsserts(rm, w); - optimiseVirtualStarts(w); + ensureCodePointStart(rm, g, expr); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + // validate graph's suitability for fuzzing + validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey); + + resolveAsserts(rm, g, expr); + assert(allMatchStatesHaveReports(g)); + + // fuzz graph - this must happen before any transformations are made + make_fuzzy(g, expr.edit_distance, cc.grey); + + pruneUseless(g); + pruneEmptyVertices(g); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + optimiseVirtualStarts(g); + + propagateExtendedParams(g, expr, rm); - removeLeadingVirtualVerticesFromRoot(w, w.start); - removeLeadingVirtualVerticesFromRoot(w, w.startDs); + removeLeadingVirtualVerticesFromRoot(g, g.start); + removeLeadingVirtualVerticesFromRoot(g, g.startDs); - vector depths; - calcDepthsFrom(w, w.start, depths); + auto depths = calcDepthsFrom(g, g.start); DepthMinMax d; - for (auto u : inv_adjacent_vertices_range(w.accept, w)) { - checkVertex(rm, w, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.accept, g)) { + checkVertex(rm, g, u, depths, d); } - for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) { - checkVertex(rm, w, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) { + checkVertex(rm, g, u, depths, d); } if (d.max.is_finite()) { @@ -170,9 +207,9 @@ void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) { info->min_width = UINT_MAX; } - info->unordered_matches = hasOffsetAdjust(rm, w); - info->matches_at_eod = can_match_at_eod(w); - info->matches_only_at_eod = can_only_match_at_eod(w); + info->unordered_matches = hasOffsetAdjust(rm, g); + info->matches_at_eod = can_match_at_eod(g); + info->matches_only_at_eod = can_only_match_at_eod(g); } } // namespace ue2 diff --git a/src/nfagraph/ng_expr_info.h b/src/nfagraph/ng_expr_info.h index dcc5a419f..f9bd68093 100644 --- a/src/nfagraph/ng_expr_info.h +++ b/src/nfagraph/ng_expr_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,7 @@ */ /** \file - * \brief Code for discovering properties of an NGWrapper used by + * \brief Code for discovering properties of an expression used by * hs_expression_info. */ @@ -36,14 +36,15 @@ struct hs_expr_info; -#include "ue2common.h" - namespace ue2 { -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; +struct CompileContext; -void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info); +void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, + NGHolder &g, ExpressionInfo &expr, hs_expr_info *info); } // namespace ue2 diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index a504ac50a..bc7f81efd 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,28 +26,32 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Propagate extended parameters to vertex reports and reduce graph if * possible. * * This code handles the propagation of the extension parameters specified by - * the user with the hs_expr_ext structure into the reports on the graph's + * the user with the \ref hs_expr_ext structure into the reports on the graph's * vertices. * * There are also some analyses that prune edges that cannot contribute to a * match given these constraints, or transform the graph in order to make a * constraint implicit. */ + +#include "ng_extparam.h" + #include "ng.h" #include "ng_depth.h" #include "ng_dump.h" -#include "ng_extparam.h" #include "ng_prune.h" #include "ng_reports.h" #include "ng_som_util.h" #include "ng_width.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "parser/position.h" #include "util/compile_context.h" #include "util/compile_error.h" @@ -65,8 +69,28 @@ namespace ue2 { static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000; static const u32 MAX_MINLENGTH_TO_CONVERT = 2000; -/** \brief Find the (min, max) offset adjustment for the reports on a given - * vertex. */ +/** True if all the given reports have the same extparam bounds. */ +template +bool hasSameBounds(const Container &reports, const ReportManager &rm) { + assert(!reports.empty()); + + const auto &first = rm.getReport(*reports.begin()); + for (auto id : reports) { + const auto &report = rm.getReport(id); + if (report.minOffset != first.minOffset || + report.maxOffset != first.maxOffset || + report.minLength != first.minLength) { + return false; + } + } + + return true; +} + +/** + * \brief Find the (min, max) offset adjustment for the reports on a given + * vertex. + */ static pair getMinMaxOffsetAdjust(const ReportManager &rm, const NGHolder &g, NFAVertex v) { @@ -127,54 +151,76 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { return match_depths; } -/** \brief Replace the graph's reports with new reports that specify bounds. */ -static -void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, - set &done) { +template +void replaceReports(NGHolder &g, NFAVertex accept, flat_set &seen, + Function func) { for (auto v : inv_adjacent_vertices_range(accept, g)) { - // Don't operate on g.accept itself. if (v == g.accept) { + // Don't operate on accept: the accept->acceptEod edge is stylised. assert(accept == g.acceptEod); + assert(g[v].reports.empty()); continue; } - // Don't operate on a vertex we've already done. - if (contains(done, v)) { - continue; + if (!seen.insert(v).second) { + continue; // We have already processed v. } - done.insert(v); - flat_set new_reports; auto &reports = g[v].reports; + if (reports.empty()) { + continue; + } + decltype(g[v].reports) new_reports; + for (auto id : g[v].reports) { + new_reports.insert(func(v, id)); + } + reports = std::move(new_reports); + } +} - for (auto id : reports) { - Report ir = rm.getReport(id); // make a copy - assert(!ir.hasBounds()); - - // Note that we need to cope with offset adjustment here. - - ir.minOffset = g.min_offset - ir.offsetAdjust; - if (g.max_offset == MAX_OFFSET) { - ir.maxOffset = MAX_OFFSET; - } else { - ir.maxOffset = g.max_offset - ir.offsetAdjust; - } - assert(ir.maxOffset >= ir.minOffset); +/** + * Generic function for replacing all the reports in the graph. + * + * Pass this a function that takes a vertex and a ReportID returns another + * ReportID (or the same one) to replace it with. + */ +template +void replaceReports(NGHolder &g, Function func) { + flat_set seen; + replaceReports(g, g.accept, seen, func); + replaceReports(g, g.acceptEod, seen, func); +} - ir.minLength = g.min_length; - if (g.min_length && !g.som) { - ir.quashSom = true; - } +/** \brief Replace the graph's reports with new reports that specify bounds. */ +static +void updateReportBounds(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { + DEBUG_PRINTF("updating report bounds\n"); + replaceReports(g, [&](NFAVertex, ReportID id) { + Report report = rm.getReport(id); // make a copy + assert(!report.hasBounds()); + + // Note that we need to cope with offset adjustment here. + + report.minOffset = expr.min_offset - report.offsetAdjust; + if (expr.max_offset == MAX_OFFSET) { + report.maxOffset = MAX_OFFSET; + } else { + report.maxOffset = expr.max_offset - report.offsetAdjust; + } + assert(report.maxOffset >= report.minOffset); - DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " - "min_length=%llu\n", - id, ir.minOffset, ir.maxOffset, ir.minLength); - new_reports.insert(rm.getInternalId(ir)); + report.minLength = expr.min_length; + if (expr.min_length && !expr.som) { + report.quashSom = true; } - DEBUG_PRINTF("swapping reports on vertex %zu\n", g[v].index); - reports.swap(new_reports); - } + DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " + "min_length=%llu\n", id, report.minOffset, + report.maxOffset, report.minLength); + + return rm.getInternalId(report); + }); } static @@ -187,31 +233,93 @@ bool hasVirtualStarts(const NGHolder &g) { return false; } -/** If the pattern is unanchored, has a max_offset and has not asked for SOM, - * we can use that knowledge to anchor it which will limit its lifespan. Note - * that we can't use this transformation if there's a min_length, as it's - * currently handled using "sly SOM". +/** Set the min_length param for all reports to zero. */ +static +void clearMinLengthParam(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min length\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for + * all reports. + */ +static +void clearOffsetParams(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min and max offset\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minOffset = 0; + new_report.maxOffset = MAX_OFFSET; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * If the pattern is unanchored, has a max_offset and has not asked for SOM, we + * can use that knowledge to anchor it which will limit its lifespan. Note that + * we can't use this transformation if there's a min_length, as it's currently + * handled using "sly SOM". * * Note that it is possible to handle graphs that have a combination of * anchored and unanchored paths, but it's too tricky for the moment. */ static -bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, - const depth &maxWidth) { - assert(!g.som); - assert(g.max_offset != MAX_OFFSET); +bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { + if (!isFloating(g)) { + return false; + } + + const auto &reports = all_reports(g); + if (reports.empty()) { + return false; + } + + if (any_of_in(reports, [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.maxOffset == MAX_OFFSET || report.minLength || + report.offsetAdjust; + })) { + return false; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const depth minWidth = findMinWidth(g); + const depth maxWidth = findMaxWidth(g); + assert(minWidth <= maxWidth); assert(maxWidth.is_reachable()); + const auto &first_report = rm.getReport(*reports.begin()); + const auto min_offset = first_report.minOffset; + const auto max_offset = first_report.maxOffset; + assert(max_offset < MAX_OFFSET); + DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", - minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset, - g.max_offset); + minWidth.str().c_str(), maxWidth.str().c_str(), + min_offset, max_offset); - if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { + if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) { return false; } - if (g.max_offset < minWidth) { + if (max_offset < minWidth) { assert(0); return false; } @@ -232,10 +340,10 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, u32 min_bound, max_bound; if (maxWidth.is_infinite()) { min_bound = 0; - max_bound = g.max_offset - minWidth; + max_bound = max_offset - minWidth; } else { - min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0; - max_bound = g.max_offset - minWidth; + min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0; + max_bound = max_offset - minWidth; } DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); @@ -288,6 +396,13 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, renumber_vertices(g); renumber_edges(g); + if (minWidth == maxWidth) { + // For a fixed width pattern, we can retire the offsets as + // they are implicit in the graph now. + clearOffsetParams(g, rm); + } + + clearReports(g); return true; } @@ -315,7 +430,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) { } static -bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g, +bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, int *adjust) { const auto &reports = all_reports(g); if (reports.empty()) { @@ -336,16 +451,27 @@ bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g, return true; } -/** If the pattern has a min_length and is of "ratchet" form with one unbounded +/** + * If the pattern has a min_length and is of "ratchet" form with one unbounded * repeat, that repeat can become a bounded repeat. * * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ */ static -bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { - assert(g.min_length); +bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { + const auto &reports = all_reports(g); + + if (reports.empty()) { + return false; + } - if (g.min_length > MAX_MINLENGTH_TO_CONVERT) { + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const auto &min_length = rm.getReport(*reports.begin()).minLength; + if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) { return false; } @@ -375,7 +501,6 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { u32 width = 0; - // Walk from the start vertex to the cyclic state and ensure we have a // chain of vertices. while (v != cyclic) { @@ -437,10 +562,10 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); - if (width >= g.min_length) { + if (width >= min_length) { DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", - g.min_length, width); - g.min_length = 0; + min_length, width); + clearMinLengthParam(g, rm); return true; } @@ -468,7 +593,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { const CharReach &cr = g[cyclic].char_reach; - for (u32 i = 0; i < g.min_length - width - 1; ++i) { + for (u32 i = 0; i < min_length - width - 1; ++i) { v = add_vertex(g); g[v].char_reach = cr; @@ -485,28 +610,27 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { renumber_vertices(g); renumber_edges(g); + clearMinLengthParam(g, rm); clearReports(g); - - g.min_length = 0; return true; } static -bool hasExtParams(const NGWrapper &g) { - if (g.min_length != 0) { +bool hasExtParams(const ExpressionInfo &expr) { + if (expr.min_length != 0) { return true; } - if (g.min_offset != 0) { + if (expr.min_offset != 0) { return true; } - if (g.max_offset != MAX_OFFSET) { + if (expr.max_offset != MAX_OFFSET) { return true; } return false; } -static -depth maxDistFromStart(const NFAVertexBidiDepth &d) { +template +depth maxDistFromStart(const VertexDepth &d) { if (!d.fromStartDotStar.max.is_unreachable()) { // A path from startDs, any path, implies we can match at any offset. return depth::infinity(); @@ -535,7 +659,7 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) { } static -bool isEdgePrunable(const NGWrapper &g, +bool isEdgePrunable(const NGHolder &g, const Report &report, const vector &depths, const NFAEdge &e) { const NFAVertex u = source(e, g); @@ -564,29 +688,29 @@ bool isEdgePrunable(const NGWrapper &g, const NFAVertexBidiDepth &du = depths.at(u_idx); const NFAVertexBidiDepth &dv = depths.at(v_idx); - if (g.min_offset) { + if (report.minOffset) { depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv); - if (max_offset.is_finite() && max_offset < g.min_offset) { + if (max_offset.is_finite() && max_offset < report.minOffset) { DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); return true; } } - if (g.max_offset != MAX_OFFSET) { + if (report.maxOffset != MAX_OFFSET) { depth min_offset = minDistFromStart(du) + minDistToAccept(dv); assert(min_offset.is_finite()); - if (min_offset > g.max_offset) { + if (min_offset > report.maxOffset) { DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); return true; } } - if (g.min_length && is_any_accept(v, g)) { + if (report.minLength && is_any_accept(v, g)) { // Simple take on min_length. If we're an edge to accept and our max // dist from start is too small, we can be pruned. const depth &width = du.fromStart.max; - if (width.is_finite() && width < g.min_length) { + if (width.is_finite() && width < report.minLength) { DEBUG_PRINTF("max width %s from start too small for min_length\n", width.str().c_str()); return true; @@ -597,14 +721,25 @@ bool isEdgePrunable(const NGWrapper &g, } static -void pruneExtUnreachable(NGWrapper &g) { - vector depths; - calcDepths(g, depths); +void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { + const auto &reports = all_reports(g); + if (reports.empty()) { + return; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("report bounds vary\n"); + return; + } + + const auto &report = rm.getReport(*reports.begin()); + + auto depths = calcBidiDepths(g); vector dead; for (const auto &e : edges_range(g)) { - if (isEdgePrunable(g, depths, e)) { + if (isEdgePrunable(g, report, depths, e)) { DEBUG_PRINTF("pruning\n"); dead.push_back(e); } @@ -616,32 +751,45 @@ void pruneExtUnreachable(NGWrapper &g) { remove_edges(dead, g); pruneUseless(g); + clearReports(g); } -/** Remove vacuous edges in graphs where the min_offset or min_length - * constraints dictate that they can never produce a match. */ +/** + * Remove vacuous edges in graphs where the min_offset or min_length + * constraints dictate that they can never produce a match. + */ static -void pruneVacuousEdges(NGWrapper &g) { - if (!g.min_length && !g.min_offset) { - return; - } - +void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { vector dead; + auto has_min_offset = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minOffset > 0; + }); + }; + + auto has_min_length = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minLength > 0; + }); + }; + for (const auto &e : edges_range(g)) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - // Special case: Crudely remove vacuous edges from start in graphs with a - // min_offset. - if (g.min_offset && u == g.start && is_any_accept(v, g)) { + // Special case: Crudely remove vacuous edges from start in graphs with + // a min_offset. + if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. - if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) { + if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; @@ -652,12 +800,14 @@ void pruneVacuousEdges(NGWrapper &g) { return; } + DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size()); remove_edges(dead, g); pruneUseless(g); + clearReports(g); } static -void pruneUnmatchable(NGWrapper &g, const vector &depths, +void pruneUnmatchable(NGHolder &g, const vector &depths, const ReportManager &rm, NFAVertex accept) { vector dead; @@ -668,6 +818,11 @@ void pruneUnmatchable(NGWrapper &g, const vector &depths, continue; } + if (!hasSameBounds(g[v].reports, rm)) { + continue; + } + const auto &report = rm.getReport(*g[v].reports.begin()); + u32 idx = g[v].index; DepthMinMax d = depths[idx]; // copy pair adj = getMinMaxOffsetAdjust(rm, g, v); @@ -676,16 +831,16 @@ void pruneUnmatchable(NGWrapper &g, const vector &depths, d.min += adj.first; d.max += adj.second; - if (d.max.is_finite() && d.max < g.min_length) { + if (d.max.is_finite() && d.max < report.minLength) { DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", - d.max.str().c_str(), g.min_length); + d.max.str().c_str(), report.minLength); dead.push_back(e); continue; } - if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) { + if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) { DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", - d.min.str().c_str(), g.max_offset); + d.min.str().c_str(), report.maxOffset); dead.push_back(e); continue; } @@ -694,11 +849,15 @@ void pruneUnmatchable(NGWrapper &g, const vector &depths, remove_edges(dead, g); } -/** Remove edges to accepts that can never produce a match long enough to - * satisfy our min_length and max_offset constraints. */ +/** + * Remove edges to accepts that can never produce a match long enough to + * satisfy our min_length and max_offset constraints. + */ static -void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) { - if (!g.min_length) { +void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { + if (!any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength > 0; + })) { return; } @@ -708,33 +867,19 @@ void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) { pruneUnmatchable(g, depths, rm, g.acceptEod); pruneUseless(g); -} - -static -bool isUnanchored(const NGHolder &g) { - for (auto v : adjacent_vertices_range(g.start, g)) { - if (!edge(g.startDs, v, g).second) { - DEBUG_PRINTF("fail, %zu is anchored vertex\n", g[v].index); - return false; - } - } - return true; + clearReports(g); } static bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { - for (auto report : all_reports(g)) { - const Report &ir = rm.getReport(report); - if (ir.offsetAdjust) { - return true; - } - } - return false; + return any_of_in(all_reports(g), [&rm](ReportID id) { + return rm.getReport(id).offsetAdjust != 0; + }); } -void handleExtendedParams(ReportManager &rm, NGWrapper &g, - UNUSED const CompileContext &cc) { - if (!hasExtParams(g)) { +void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, + ReportManager &rm) { + if (!hasExtParams(expr)) { return; } @@ -742,132 +887,158 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, depth maxWidth = findMaxWidth(g); bool is_anchored = !has_proper_successor(g.startDs, g) && out_degree(g.start, g); - bool has_offset_adj = hasOffsetAdjustments(rm, g); - - DEBUG_PRINTF("minWidth=%s, maxWidth=%s, anchored=%d, offset_adj=%d\n", - minWidth.str().c_str(), maxWidth.str().c_str(), is_anchored, - has_offset_adj); DepthMinMax match_depths = findMatchLengths(rm, g); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); - if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) { + if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { ostringstream oss; oss << "Expression is anchored and cannot satisfy min_offset=" - << g.min_offset << " as it can only produce matches of length " + << expr.min_offset << " as it can only produce matches of length " << maxWidth << " bytes at most."; - throw CompileError(g.expressionIndex, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (minWidth > g.max_offset) { + if (minWidth > expr.max_offset) { ostringstream oss; - oss << "Expression has max_offset=" << g.max_offset << " but requires " - << minWidth << " bytes to match."; - throw CompileError(g.expressionIndex, oss.str()); + oss << "Expression has max_offset=" << expr.max_offset + << " but requires " << minWidth << " bytes to match."; + throw CompileError(expr.index, oss.str()); } - if (maxWidth.is_finite() && match_depths.max < g.min_length) { + if (maxWidth.is_finite() && match_depths.max < expr.min_length) { ostringstream oss; - oss << "Expression has min_length=" << g.min_length << " but can " + oss << "Expression has min_length=" << expr.min_length << " but can " "only produce matches of length " << match_depths.max << " bytes at most."; - throw CompileError(g.expressionIndex, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (g.min_length && g.min_length <= match_depths.min) { + if (expr.min_length && expr.min_length <= match_depths.min) { DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", - g.min_length); - g.min_length = 0; + expr.min_length); + expr.min_length = 0; } - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } - pruneVacuousEdges(g); - pruneUnmatchable(g, rm); + updateReportBounds(rm, g, expr); +} - if (!has_offset_adj) { - pruneExtUnreachable(g); - } +/** + * If the pattern is completely anchored and has a min_length set, this can + * be converted to a min_offset. + */ +static +void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { + if (has_proper_successor(g.startDs, g)) { + return; // not wholly anchored + } + + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + u64a min_len_offset = report.minLength - report.offsetAdjust; + new_report.minOffset = max(report.minOffset, min_len_offset); + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Clear offset bounds on reports that are not needed because they're satisfied + * by vertex depth. + */ +static +void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) { + auto depths = calcDepths(g); + + replaceReports(g, [&](NFAVertex v, ReportID id) { + const auto &d = depths.at(g[v].index); + const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min); + const depth &max_depth = maxDistFromStart(d); + + DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index, + min_depth.str().c_str(), max_depth.str().c_str()); + + Report report = rm.getReport(id); // copy + bool modified = false; + if (report.minOffset && !report.offsetAdjust && + report.minOffset <= min_depth) { + report.minOffset = 0; + modified = true; + } + if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() && + report.maxOffset >= max_depth) { + report.maxOffset = MAX_OFFSET; + modified = true; + } + if (modified) { + DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n", + g[v].index, report.minOffset, report.maxOffset); + return rm.getInternalId(report); + } - // We may have removed all the edges to accept, in which case this - // expression cannot match. - if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { - throw CompileError(g.expressionIndex, "Extended parameter " - "constraints can not be satisfied for any match from " - "this expression."); + return id; + }); +} + +void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) { + if (!any_of_in(all_reports(g), + [&](ReportID id) { return rm.getReport(id).hasBounds(); })) { + DEBUG_PRINTF("no extparam bounds\n"); + return; } - // Remove reports on vertices without an edge to accept (which have been - // pruned above). - clearReports(g); + DEBUG_PRINTF("graph has extparam bounds\n"); - // Recalc. - minWidth = findMinWidth(g); - maxWidth = findMaxWidth(g); - is_anchored = proper_out_degree(g.startDs, g) == 0 && - out_degree(g.start, g); - has_offset_adj = hasOffsetAdjustments(rm, g); + pruneVacuousEdges(g, rm); + if (can_never_match(g)) { + return; + } - // If the pattern is completely anchored and has a min_length set, this can - // be converted to a min_offset. - if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) { - DEBUG_PRINTF("converting min_length to min_offset=%llu for " - "anchored case\n", g.min_length); - g.min_offset = g.min_length; - g.min_length = 0; + pruneUnmatchable(g, rm); + if (can_never_match(g)) { + return; } - if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) { - DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n", - g.min_offset); - g.min_offset = 0; + if (!hasOffsetAdjustments(rm, g)) { + pruneExtUnreachable(g, rm); + if (can_never_match(g)) { + return; + } } - if (!hasExtParams(g)) { + replaceMinLengthWithOffset(g, rm); + if (can_never_match(g)) { return; } // If the pattern has a min_length and is of "ratchet" form with one // unbounded repeat, that repeat can become a bounded repeat. // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ - if (g.min_length && transformMinLengthToRepeat(rm, g)) { - DEBUG_PRINTF("converted min_length to bounded repeat\n"); - // recalc - minWidth = findMinWidth(g); + transformMinLengthToRepeat(g, rm); + if (can_never_match(g)) { + return; } // If the pattern is unanchored, has a max_offset and has not asked for // SOM, we can use that knowledge to anchor it which will limit its // lifespan. Note that we can't use this transformation if there's a // min_length, as it's currently handled using "sly SOM". - - // Note that it is possible to handle graphs that have a combination of - // anchored and unanchored paths, but it's too tricky for the moment. - - if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length && - !has_offset_adj && isUnanchored(g)) { - if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) { - DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(), - maxWidth.str().c_str()); - if (minWidth == maxWidth) { - // For a fixed width pattern, we can retire the offsets as they - // are implicit in the graph now. - g.min_offset = 0; - g.max_offset = MAX_OFFSET; - } + if (som == SOM_NONE) { + anchorPatternWithBoundedRepeat(g, rm); + if (can_never_match(g)) { + return; } } - //dumpGraph("final.dot", g); - - if (!hasExtParams(g)) { - return; - } - set done; - updateReportBounds(rm, g, g.accept, done); - updateReportBounds(rm, g, g.acceptEod, done); + removeUnneededOffsetBounds(g, rm); } } // namespace ue2 diff --git a/src/nfagraph/ng_extparam.h b/src/nfagraph/ng_extparam.h index d5df1cf6d..ae818075c 100644 --- a/src/nfagraph/ng_extparam.h +++ b/src/nfagraph/ng_extparam.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,14 +34,30 @@ #ifndef NG_EXTPARAM_H #define NG_EXTPARAM_H +#include "som/som.h" + namespace ue2 { -struct CompileContext; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void handleExtendedParams(ReportManager &rm, NGWrapper &g, - const CompileContext &cc); +/** + * \brief Propagate extended parameter information to vertex reports. Will + * throw CompileError if this expression's extended parameters are not + * satisfiable. + * + * This will also remove extended parameter constraints that are guaranteed to + * be satisfied from ExpressionInfo. + */ +void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, + ReportManager &rm); + +/** + * \brief Perform graph reductions (if possible) to do with extended parameter + * constraints on reports. + */ +void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som); } // namespace ue2 diff --git a/src/nfagraph/ng_fuzzy.cpp b/src/nfagraph/ng_fuzzy.cpp new file mode 100644 index 000000000..2c3d85bd5 --- /dev/null +++ b/src/nfagraph/ng_fuzzy.cpp @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Graph fuzzer for approximate matching + */ + +#include "ng_fuzzy.h" + +#include "ng.h" +#include "ng_depth.h" +#include "ng_util.h" + +#include +#include +using namespace std; + +namespace ue2 { + +// returns all successors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> gatherSuccessorsByDepth(const NGHolder &g, + NFAVertex src, u32 depth) { + vector> result(depth); + flat_set cur, next; + + assert(depth > 0); + + // populate current set of successors + for (auto v : adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + cur.insert(v); + } + result[0] = cur; + + for (unsigned d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + // don't go past special nodes + if (is_special(v, g)) { + continue; + } + + for (auto succ : adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == succ) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); + next.insert(succ); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +// returns all predecessors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> gatherPredecessorsByDepth(const NGHolder &g, + NFAVertex src, + u32 depth) { + vector> result(depth); + flat_set cur, next; + + assert(depth > 0); + + // populate current set of successors + for (auto v : inv_adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + cur.insert(v); + } + result[0] = cur; + + for (unsigned d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + for (auto pred : inv_adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == pred) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); + next.insert(pred); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +/* + * This struct produces a fuzzed graph; that is, a graph that is able to match + * the original pattern, as well as input data within a certain edit distance. + * Construct the struct, then call fuzz_graph() to transform the graph. + * + * Terminology used: + * - Shadow vertices: vertices mirroring the original graph at various edit + * distances + * - Shadow graph level: edit distance of a particular shadow graph + * - Helpers: dot vertices assigned to shadow vertices, used for insert/replace + */ +struct ShadowGraph { + NGHolder &g; + u32 edit_distance; + map, NFAVertex> shadow_map; + map, NFAVertex> helper_map; + map clones; + // edge creation is deferred + vector> edges_to_be_added; + flat_set orig; + + ShadowGraph(NGHolder &g_in, u32 ed_in) : g(g_in), edit_distance(ed_in) {} + + void fuzz_graph() { + if (edit_distance == 0) { + return; + } + + // step 1: prepare the vertices, helpers and shadows according to + // the original graph + prepare_graph(); + + // step 2: add shadow and helper nodes + build_shadow_graph(); + + // step 3: set up reports for newly created vertices (and make clones + // if necessary) + create_reports(); + + // step 4: wire up shadow graph and helpers for insert/replace/remove + connect_shadow_graph(); + + // step 5: commit all the edge wirings + DEBUG_PRINTF("Committing edge wirings\n"); + for (const auto &p : edges_to_be_added) { + add_edge_if_not_present(p.first, p.second, g); + } + + DEBUG_PRINTF("Done!\n"); + } + +private: + const NFAVertex& get_clone(const NFAVertex &v) { + return contains(clones, v) ? + clones[v] : v; + } + + void connect_to_clones(const NFAVertex &u, const NFAVertex &v) { + const NFAVertex &clone_u = get_clone(u); + const NFAVertex &clone_v = get_clone(v); + + edges_to_be_added.emplace_back(u, v); + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[u].index, g[v].index); + + // do not connect clones to accepts, we do it during cloning + if (is_any_accept(clone_v, g)) { + return; + } + edges_to_be_added.emplace_back(clone_u, clone_v); + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[clone_u].index, + g[clone_v].index); + } + + void prepare_graph() { + DEBUG_PRINTF("Building shadow graphs\n"); + + for (auto v : vertices_range(g)) { + // all level 0 vertices are their own helpers and their own shadows + helper_map[make_pair(v, 0)] = v; + shadow_map[make_pair(v, 0)] = v; + + // find special nodes + if (is_any_accept(v, g)) { + DEBUG_PRINTF("Node %zu is a special node\n", g[v].index); + for (unsigned edit = 1; edit <= edit_distance; edit++) { + // all accepts are their own shadows and helpers at all + // levels + shadow_map[make_pair(v, edit)] = v; + helper_map[make_pair(v, edit)] = v; + } + continue; + } + DEBUG_PRINTF("Node %zu is to be shadowed\n", g[v].index); + orig.insert(v); + } + } + + void build_shadow_graph() { + for (auto v : orig) { + DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n", + g[v].index); + for (unsigned dist = 1; dist <= edit_distance; dist++) { + auto shadow_v = v; + + // start and startDs cannot have shadows but do have helpers + if (!is_any_start(v, g)) { + shadow_v = clone_vertex(g, v); + DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", + g[shadow_v].index, dist); + } + shadow_map[make_pair(v, dist)] = shadow_v; + + // if there's nowhere to go from this vertex, no helper needed + if (proper_out_degree(v, g) < 1) { + helper_map[make_pair(v, dist)] = shadow_v; + continue; + } + + auto helper_v = clone_vertex(g, v); + DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", + g[helper_v].index, dist); + + // this is a helper, so make it a dot + g[helper_v].char_reach = CharReach::dot(); + // do not copy virtual start's assert flags + if (is_virtual_start(v, g)) { + g[helper_v].assert_flags = 0; + } + helper_map[make_pair(v, dist)] = helper_v; + } + } + } + + // wire up successors according to the original graph, wire helpers + // to shadow successors (insert/replace) + void connect_succs(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + + // multiple insert + if (dist > 1) { + const auto &prev_level_helper = helper_map[make_pair(v, dist - 1)]; + connect_to_clones(prev_level_helper, cur_shadow_helper); + } + + for (auto orig_dst : adjacent_vertices_range(v, g)) { + const auto &shadow_dst = shadow_map[make_pair(orig_dst, dist)]; + + connect_to_clones(cur_shadow_v, shadow_dst); + + // ignore startDs for insert/replace + if (orig_dst == g.startDs) { + continue; + } + + connect_to_clones(cur_shadow_helper, shadow_dst); + } + } + + // wire up predecessors according to the original graph, wire + // predecessors to helpers (replace), wire predecessor helpers to + // helpers (multiple replace) + void connect_preds(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + + auto orig_src_vertices = inv_adjacent_vertices_range(v, g); + for (auto orig_src : orig_src_vertices) { + // ignore edges from start to startDs + if (v == g.startDs && orig_src == g.start) { + continue; + } + // ignore self-loops for replace + if (orig_src != v) { + // do not wire a replace node for start vertices if we + // have a virtual start + if (is_virtual_start(v, g) && is_any_start(orig_src, g)) { + continue; + } + + if (dist) { + const auto &prev_level_src = + shadow_map[make_pair(orig_src, dist - 1)]; + const auto &prev_level_helper = + helper_map[make_pair(orig_src, dist - 1)]; + + connect_to_clones(prev_level_src, cur_shadow_helper); + connect_to_clones(prev_level_helper, cur_shadow_helper); + } + } + // wire predecessor according to original graph + const auto &shadow_src = shadow_map[make_pair(orig_src, dist)]; + + connect_to_clones(shadow_src, cur_shadow_v); + } + } + + // wire up previous level helper to current shadow (insert) + void connect_helpers(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + auto prev_level_v = shadow_map[make_pair(v, dist - 1)]; + + connect_to_clones(prev_level_v, cur_shadow_helper); + } + + /* + * wiring edges for removal is a special case. + * + * when wiring edges for removal, as well as wiring up immediate + * predecessors to immediate successors, we also need to wire up more + * distant successors to their respective shadow graph levels. + * + * for example, consider graph start->a->b->c->d->accept. + * + * at edit distance 1, we need remove edges start->b, a->c, b->d, and + * c->accept, all going from original graph (level 0) to shadow graph + * level 1. + * + * at edit distance 2, we also need edges start->c, a->d and b->accept, + * all going from level 0 to shadow graph level 2. + * + * this is propagated to all shadow levels; that is, given edit + * distance 3, we will have edges from shadow levels 0->1, 0->2, + * 0->3, 1->2, 1->3, and 2->3. + * + * therefore, we wire them in steps: first wire with step 1 (0->1, 1->2, + * 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc. + * + * we also have to wire helpers to their removal successors, to + * accommodate for a replace followed by a remove, on all shadow levels. + * + * and finally, we also have to wire source shadows into removal + * successor helpers on a level above, to accommodate for a remove + * followed by a replace. + */ + void connect_removals(NFAVertex v) { + DEBUG_PRINTF("Wiring up remove edges for node %zu\n", g[v].index); + + // vertices returned by this function don't include self-loops + auto dst_vertices_by_depth = + gatherSuccessorsByDepth(g, v, edit_distance); + auto orig_src_vertices = inv_adjacent_vertices_range(v, g); + for (auto orig_src : orig_src_vertices) { + // ignore self-loops + if (orig_src == v) { + continue; + } + for (unsigned step = 1; step <= edit_distance; step++) { + for (unsigned dist = step; dist <= edit_distance; dist++) { + auto &dst_vertices = dst_vertices_by_depth[step - 1]; + for (auto &orig_dst : dst_vertices) { + const auto &shadow_src = + shadow_map[make_pair(orig_src, dist - step)]; + const auto &shadow_helper = + helper_map[make_pair(orig_src, dist - step)]; + const auto &shadow_dst = + shadow_map[make_pair(orig_dst, dist)]; + + // removal + connect_to_clones(shadow_src, shadow_dst); + + // removal from helper vertex + connect_to_clones(shadow_helper, shadow_dst); + + // removal into helper, requires additional edit + if ((dist + 1) <= edit_distance) { + const auto &next_level_helper = + helper_map[make_pair(orig_dst, dist + 1)]; + + connect_to_clones(shadow_src, next_level_helper); + } + } + } + } + } + } + + void connect_shadow_graph() { + DEBUG_PRINTF("Wiring up the graph\n"); + + for (auto v : orig) { + + DEBUG_PRINTF("Wiring up edges for node %zu\n", g[v].index); + + for (unsigned dist = 0; dist <= edit_distance; dist++) { + + // handle insert/replace + connect_succs(v, dist); + + // handle replace/multiple insert + connect_preds(v, dist); + + // handle helpers + if (dist > 0) { + connect_helpers(v, dist); + } + } + + // handle removals + connect_removals(v); + } + } + + void connect_to_targets(NFAVertex src, const flat_set &targets) { + for (auto dst : targets) { + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[src].index, + g[dst].index); + edges_to_be_added.emplace_back(src, dst); + } + } + + // create a clone of the vertex, but overwrite its report set + void create_clone(NFAVertex v, const flat_set &reports, + unsigned max_edit_distance, + const flat_set &targets) { + // some vertices may have the same reports, but different successors; + // therefore, we may need to connect them multiple times, but still only + // clone once + bool needs_cloning = !contains(clones, v); + + DEBUG_PRINTF("Cloning node %zu\n", g[v].index); + // go through all shadows and helpers, including + // original vertex + for (unsigned d = 0; d < max_edit_distance; d++) { + auto shadow_v = shadow_map[make_pair(v, d)]; + auto helper_v = helper_map[make_pair(v, d)]; + + NFAVertex new_shadow_v, new_helper_v; + + // make sure we don't clone the same vertex twice + if (needs_cloning) { + new_shadow_v = clone_vertex(g, shadow_v); + DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", + g[new_shadow_v].index, d); + clones[shadow_v] = new_shadow_v; + } else { + new_shadow_v = clones[shadow_v]; + } + g[new_shadow_v].reports = reports; + + connect_to_targets(new_shadow_v, targets); + + if (shadow_v == helper_v) { + continue; + } + if (needs_cloning) { + new_helper_v = clone_vertex(g, helper_v); + DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", + g[new_helper_v].index, d); + clones[helper_v] = new_helper_v; + } else { + new_helper_v = clones[helper_v]; + } + g[new_helper_v].reports = reports; + + connect_to_targets(new_helper_v, targets); + } + } + + void write_reports(NFAVertex v, const flat_set &reports, + unsigned max_edit_distance, + const flat_set &targets) { + // we're overwriting reports, but we're not losing any + // information as we already cached all the different report + // sets, so vertices having different reports will be cloned and set up + // with the correct report set + + // go through all shadows and helpers, including original + // vertex + for (unsigned d = 0; d < max_edit_distance; d++) { + auto shadow_v = shadow_map[make_pair(v, d)]; + auto helper_v = helper_map[make_pair(v, d)]; + DEBUG_PRINTF("Setting up reports for shadow node: %zu " + "(level %u)\n", + g[shadow_v].index, d); + DEBUG_PRINTF("Setting up reports for helper node: %zu " + "(level %u)\n", + g[helper_v].index, d); + g[shadow_v].reports = reports; + g[helper_v].reports = reports; + + connect_to_targets(shadow_v, targets); + connect_to_targets(helper_v, targets); + } + } + + /* + * we may have multiple report sets per graph. that means, whenever we + * construct additional paths through the graph (alternations, removals), we + * have to account for the fact that some vertices are predecessors to + * vertices with different report sets. + * + * whenever that happens, we have to clone the paths for both report sets, + * and set up these new vertices with their respective report sets as well. + * + * in order to do that, we first have to get all the predecessors for accept + * and acceptEod vertices. then, go through them one by one, and take note + * of the report lists. the first report set we find, wins, the rest we + * clone. + * + * we also have to do this in two passes, because there may be vertices that + * are predecessors to vertices with different report sets, so to avoid + * overwriting reports we will be caching reports info instead. + */ + void create_reports() { + map, flat_set> reports_to_vertices; + flat_set accepts{g.accept, g.acceptEod}; + + // gather reports info from all vertices connected to accept + for (auto accept : accepts) { + for (auto src : inv_adjacent_vertices_range(accept, g)) { + // skip special vertices + if (is_special(src, g)) { + continue; + } + reports_to_vertices[g[src].reports].insert(src); + } + } + + // we expect to see at most two report sets + assert(reports_to_vertices.size() > 0 && + reports_to_vertices.size() <= 2); + + // set up all reports + bool clone = false; + for (auto &pair : reports_to_vertices) { + const auto &reports = pair.first; + const auto &vertices = pair.second; + + for (auto src : vertices) { + // get all predecessors up to edit distance + auto src_vertices_by_depth = + gatherPredecessorsByDepth(g, src, edit_distance); + + // find which accepts source vertex connects to + flat_set targets; + for (const auto &accept : accepts) { + NFAEdge e = edge(src, accept, g); + if (e) { + targets.insert(accept); + } + } + assert(targets.size()); + + for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { + const auto &preds = src_vertices_by_depth[d]; + for (auto v : preds) { + // only clone a node if it already contains reports + if (clone && !g[v].reports.empty()) { + create_clone(v, reports, edit_distance - d, + targets); + } else { + write_reports(v, reports, edit_distance - d, + targets); + } + } + } + } + // clone vertices only if it's not our first report set + clone = true; + } + } +}; + +// check if we will edit our way into a vacuous pattern +static +bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) { + auto depths = calcRevDepths(g); + + depth min_depth = depth::infinity(); + auto idx = g[g.start].index; + + // check distance from start to accept/acceptEod + if (depths[idx].toAccept.min.is_finite()) { + min_depth = min(depths[idx].toAccept.min, min_depth); + } + if (depths[idx].toAcceptEod.min.is_finite()) { + min_depth = min(depths[idx].toAcceptEod.min, min_depth); + } + + idx = g[g.startDs].index; + + // check distance from startDs to accept/acceptEod + if (depths[idx].toAccept.min.is_finite()) { + min_depth = min(depths[idx].toAccept.min, min_depth); + } + if (depths[idx].toAcceptEod.min.is_finite()) { + min_depth = min(depths[idx].toAcceptEod.min, min_depth); + } + + assert(min_depth.is_finite()); + + // now, check if we can edit our way into a vacuous pattern + if (min_depth <= (u64a) edit_distance + 1) { + DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n"); + return true; + } + return false; +} + +void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool utf8, + const Grey &grey) { + if (edit_distance == 0) { + return; + } + if (!grey.allowApproximateMatching) { + throw CompileError("Approximate matching is disabled."); + } + if (edit_distance > grey.maxEditDistance) { + throw CompileError("Edit distance is too big."); + } + if (utf8) { + throw CompileError("UTF-8 is disallowed for approximate matching."); + } + // graph isn't fuzzable if there are edge assertions anywhere in the graph + for (auto e : edges_range(g)) { + if (g[e].assert_flags) { + throw CompileError("Zero-width assertions are disallowed for " + "approximate matching."); + } + } + if (will_turn_vacuous(g, edit_distance)) { + throw CompileError("Approximate matching patterns that reduce to " + "vacuous patterns are disallowed."); + } +} + +void make_fuzzy(NGHolder &g, u32 edit_distance, const Grey &grey) { + if (edit_distance == 0) { + return; + } + + assert(grey.allowApproximateMatching); + assert(grey.maxEditDistance >= edit_distance); + + ShadowGraph sg(g, edit_distance); + sg.fuzz_graph(); + + // For safety, enforce limit on actual vertex count. + if (num_vertices(g) > grey.limitApproxMatchingVertices) { + DEBUG_PRINTF("built %zu vertices > limit of %u\n", num_vertices(g), + grey.limitApproxMatchingVertices); + throw ResourceLimitError(); + } +} + +} // namespace ue2 diff --git a/src/nfa/multiaccel_compilehelper.h b/src/nfagraph/ng_fuzzy.h similarity index 58% rename from src/nfa/multiaccel_compilehelper.h rename to src/nfagraph/ng_fuzzy.h index 27dbe634a..a2c821273 100644 --- a/src/nfa/multiaccel_compilehelper.h +++ b/src/nfagraph/ng_fuzzy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,50 +26,24 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef MULTIACCELCOMPILE_H_ -#define MULTIACCELCOMPILE_H_ - -#include "ue2common.h" +/** \file + * \brief Graph fuzzer for approximate matching + */ -#include "nfagraph/ng_limex_accel.h" +#ifndef NG_FUZZY_H +#define NG_FUZZY_H -#include +#include "ue2common.h" namespace ue2 { +struct Grey; +class NGHolder; +class ReportManager; -/* accel scheme state machine */ -enum accel_scheme_state { - STATE_FIRST_RUN, - STATE_SECOND_RUN, - STATE_WAITING_FOR_GRAB, - STATE_FIRST_TAIL, - STATE_SECOND_TAIL, - STATE_STOPPED, - STATE_INVALID -}; - -struct accel_data { - MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE; - accel_scheme_state state = STATE_INVALID; - unsigned len1 = 0; /* length of first run */ - unsigned len2 = 0; /* length of second run, if present */ - unsigned tlen1 = 0; /* first tail length */ - unsigned tlen2 = 0; /* second tail length */ -}; - -class MultiaccelCompileHelper { -private: - const CharReach &cr; - u32 offset; - std::vector accels; - unsigned max_len; -public: - MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len); - bool canAdvance(); - MultibyteAccelInfo getBestScheme(); - void advance(const ue2::CharReach &cr); -}; +void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool utf8, + const Grey &grey); -}; // namespace +void make_fuzzy(NGHolder &g, u32 edit_distance, const Grey &grey); +} -#endif /* MULTIACCELCOMPILE_H_ */ +#endif // NG_FUZZY_H diff --git a/src/nfagraph/ng_is_equal.h b/src/nfagraph/ng_is_equal.h index 8eba2af59..52b29882f 100644 --- a/src/nfagraph/ng_is_equal.h +++ b/src/nfagraph/ng_is_equal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,7 +39,6 @@ #include "ue2common.h" #include -#include namespace ue2 { diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index d832bdaac..9bf16efea 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Large Bounded Repeat (LBR) engine build code. */ @@ -128,25 +129,24 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, } template static -aligned_unique_ptr makeLbrNfa(NFAEngineType nfa_type, - enum RepeatType rtype, - const depth &repeatMax) { +bytecode_ptr makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, + const depth &repeatMax) { size_t tableLen = 0; if (rtype == REPEAT_SPARSE_OPTIMAL_P) { tableLen = sizeof(u64a) * (repeatMax + 1); } size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + tableLen + sizeof(u64a); - aligned_unique_ptr nfa = aligned_zmalloc_unique(len); + auto nfa = make_zeroed_bytecode_ptr(len); nfa->type = verify_u8(nfa_type); nfa->length = verify_u32(len); return nfa; } static -aligned_unique_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { if (!cr.all()) { return nullptr; } @@ -164,10 +164,9 @@ aligned_unique_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, } static -aligned_unique_ptr buildLbrVerm(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { const CharReach escapes(~cr); if (escapes.count() != 1) { @@ -188,10 +187,9 @@ aligned_unique_ptr buildLbrVerm(const CharReach &cr, } static -aligned_unique_ptr buildLbrNVerm(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrNVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { const CharReach escapes(cr); if (escapes.count() != 1) { @@ -212,10 +210,9 @@ aligned_unique_ptr buildLbrNVerm(const CharReach &cr, } static -aligned_unique_ptr buildLbrShuf(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrShuf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); auto nfa = makeLbrNfa(LBR_NFA_SHUF, rtype, repeatMax); @@ -233,10 +230,9 @@ aligned_unique_ptr buildLbrShuf(const CharReach &cr, } static -aligned_unique_ptr buildLbrTruf(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrTruf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); auto nfa = makeLbrNfa(LBR_NFA_TRUF, rtype, repeatMax); @@ -252,10 +248,9 @@ aligned_unique_ptr buildLbrTruf(const CharReach &cr, } static -aligned_unique_ptr constructLBR(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr constructLBR(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", repeatMin.str().c_str(), repeatMax.str().c_str(), describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), @@ -263,8 +258,8 @@ aligned_unique_ptr constructLBR(const CharReach &cr, assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); - aligned_unique_ptr nfa - = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); + auto nfa = + buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); if (!nfa) { nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, @@ -291,10 +286,10 @@ aligned_unique_ptr constructLBR(const CharReach &cr, return nfa; } -aligned_unique_ptr constructLBR(const CastleProto &proto, - const vector> &triggers, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr constructLBR(const CastleProto &proto, + const vector> &triggers, + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } @@ -330,10 +325,10 @@ aligned_unique_ptr constructLBR(const CastleProto &proto, } /** \brief Construct an LBR engine from the given graph \p g. */ -aligned_unique_ptr constructLBR(const NGHolder &g, - const vector> &triggers, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr constructLBR(const NGHolder &g, + const vector> &triggers, + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } diff --git a/src/nfagraph/ng_lbr.h b/src/nfagraph/ng_lbr.h index 99cb0fcb0..1eec96535 100644 --- a/src/nfagraph/ng_lbr.h +++ b/src/nfagraph/ng_lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Large Bounded Repeat (LBR) engine build code. */ @@ -34,7 +35,7 @@ #define NG_LBR_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -51,14 +52,16 @@ struct CompileContext; struct Grey; /** \brief Construct an LBR engine from the given graph \p g. */ -aligned_unique_ptr +bytecode_ptr constructLBR(const NGHolder &g, const std::vector> &triggers, const CompileContext &cc, const ReportManager &rm); -/** \brief Construct an LBR engine from the given CastleProto, which should - * contain only one repeat. */ -aligned_unique_ptr +/** + * \brief Construct an LBR engine from the given CastleProto, which should + * contain only one repeat. + */ +bytecode_ptr constructLBR(const CastleProto &proto, const std::vector> &triggers, const CompileContext &cc, const ReportManager &rm); diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index e92790b98..283bba22c 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Limex NFA construction code. */ + #include "ng_limex.h" #include "grey.h" @@ -354,7 +356,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, auto ni_inserter = inserter(new_inter, new_inter.end()); set_intersection(top_inter.begin(), top_inter.end(), v_tops.begin(), v_tops.end(), ni_inserter); - top_inter = move(new_inter); + top_inter = std::move(new_inter); succs.insert(v); } @@ -623,7 +625,7 @@ void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { } static -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, const map &fixed_depth_tops, const map>> &triggers, @@ -682,7 +684,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, zombies, do_accel, compress_state, hint, cc); } -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, const map &fixed_depth_tops, const map>> &triggers, @@ -696,7 +698,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, #ifndef RELEASE_BUILD // Variant that allows a hint to be specified. -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, const map &fixed_depth_tops, const map>> &triggers, @@ -709,8 +711,8 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, #endif // RELEASE_BUILD static -aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, - const CompileContext &cc) { +bytecode_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, + const CompileContext &cc) { // Make a mutable copy of the graph that we can renumber etc. NGHolder h; cloneHolder(h, h_in); @@ -739,16 +741,16 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, zombies, false, false, hint, cc); } -aligned_unique_ptr constructReversedNFA(const NGHolder &h_in, - const CompileContext &cc) { +bytecode_ptr constructReversedNFA(const NGHolder &h_in, + const CompileContext &cc) { u32 hint = INVALID_NFA; // no hint return constructReversedNFA_i(h_in, hint, cc); } #ifndef RELEASE_BUILD // Variant that allows a hint to be specified. -aligned_unique_ptr constructReversedNFA(const NGHolder &h_in, u32 hint, - const CompileContext &cc) { +bytecode_ptr constructReversedNFA(const NGHolder &h_in, u32 hint, + const CompileContext &cc) { return constructReversedNFA_i(h_in, hint, cc); } #endif // RELEASE_BUILD diff --git a/src/nfagraph/ng_limex.h b/src/nfagraph/ng_limex.h index 1e36e03dc..9bf46d693 100644 --- a/src/nfagraph/ng_limex.h +++ b/src/nfagraph/ng_limex.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Limex NFA construction code. */ @@ -35,7 +36,7 @@ #include "ue2common.h" #include "som/som.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -51,7 +52,8 @@ class NGHolder; class ReportManager; struct CompileContext; -/** \brief Determine if the given graph is implementable as an NFA. +/** + * \brief Determine if the given graph is implementable as an NFA. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -62,11 +64,14 @@ struct CompileContext; u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, const CompileContext &cc); -/** \brief Late-stage graph reductions. +/** + * \brief Late-stage graph reductions. * * This will call \ref removeRedundancy and apply its changes to the given - * holder only if it is implementable afterwards. */ -void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm, + * holder only if it is implementable afterwards. + */ +void reduceImplementableGraph(NGHolder &g, som_type som, + const ReportManager *rm, const CompileContext &cc); /** @@ -79,7 +84,8 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const CompileContext &cc); -/** \brief Construct an NFA from the given NFAGraph. +/** + * \brief Construct an NFA from the given graph. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -90,23 +96,25 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, * Note: this variant of the function allows a model to be specified with the * \a hint parameter. */ -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &g, const ReportManager *rm, const std::map &fixed_depth_tops, const std::map>> &triggers, bool compress_state, const CompileContext &cc); -/** \brief Build a reverse NFA from the graph given, which should have already +/** + * \brief Build a reverse NFA from the graph given, which should have already * been reversed. * * Used for reverse NFAs used in SOM mode. */ -aligned_unique_ptr constructReversedNFA(const NGHolder &h, - const CompileContext &cc); +bytecode_ptr constructReversedNFA(const NGHolder &h, + const CompileContext &cc); #ifndef RELEASE_BUILD -/** \brief Construct an NFA (with model type hint) from the given NFAGraph. +/** + * \brief Construct an NFA (with model type hint) from the given graph. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -117,19 +125,20 @@ aligned_unique_ptr constructReversedNFA(const NGHolder &h, * Note: this variant of the function allows a model to be specified with the * \a hint parameter. */ -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &g, const ReportManager *rm, const std::map &fixed_depth_tops, const std::map>> &triggers, bool compress_state, u32 hint, const CompileContext &cc); -/** \brief Build a reverse NFA (with model type hint) from the graph given, +/** + * \brief Build a reverse NFA (with model type hint) from the graph given, * which should have already been reversed. * * Used for reverse NFAs used in SOM mode. */ -aligned_unique_ptr constructReversedNFA(const NGHolder &h, u32 hint, - const CompileContext &cc); +bytecode_ptr constructReversedNFA(const NGHolder &h, u32 hint, + const CompileContext &cc); #endif // RELEASE_BUILD diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index bfba7c71b..80e08a7f9 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,6 @@ #include "ue2common.h" #include "nfa/accel.h" -#include "nfa/multiaccel_compilehelper.h" #include "util/bitutils.h" // for CASE_CLEAR #include "util/charreach.h" @@ -45,12 +44,16 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/small_vector.h" #include "util/target_info.h" #include #include +#include + using namespace std; +using boost::adaptors::map_keys; namespace ue2 { @@ -135,15 +138,15 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, static void findPaths(const NGHolder &g, NFAVertex v, const vector &refined_cr, - vector > *paths, + vector> *paths, const flat_set &forbidden, u32 depth) { static const u32 MAGIC_TOO_WIDE_NUMBER = 16; if (!depth) { - paths->push_back(vector()); + paths->push_back({}); return; } if (v == g.accept || v == g.acceptEod) { - paths->push_back(vector()); + paths->push_back({}); if (!generates_callbacks(g) || v == g.acceptEod) { paths->back().push_back(CharReach()); /* red tape options */ } @@ -157,42 +160,37 @@ void findPaths(const NGHolder &g, NFAVertex v, if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER || hasSelfLoop(v, g)) { /* give up on pushing past this point */ - paths->push_back(vector()); - vector &p = paths->back(); - p.push_back(cr); + paths->push_back({cr}); return; } + vector> curr; for (auto w : adjacent_vertices_range(v, g)) { if (contains(forbidden, w)) { /* path has looped back to one of the active+boring acceleration * states. We can ignore this path if we have sufficient back- * off. */ - paths->push_back(vector()); - paths->back().push_back(CharReach()); + paths->push_back({CharReach()}); continue; } u32 new_depth = depth - 1; - vector > curr; do { curr.clear(); findPaths(g, w, refined_cr, &curr, forbidden, new_depth); } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER); - for (vector >::iterator it = curr.begin(); - it != curr.end(); ++it) { - paths->push_back(vector()); - vector &p = paths->back(); - p.swap(*it); - p.push_back(cr); + for (auto &c : curr) { + c.push_back(cr); + paths->push_back(std::move(c)); } } } +namespace { struct SAccelScheme { - SAccelScheme(const CharReach &cr_in, u32 offset_in) - : cr(cr_in), offset(offset_in) { + SAccelScheme(CharReach cr_in, u32 offset_in) + : cr(std::move(cr_in)), offset(offset_in) { assert(offset <= MAX_ACCEL_DEPTH); } @@ -215,30 +213,43 @@ struct SAccelScheme { CharReach cr = CharReach::dot(); u32 offset = MAX_ACCEL_DEPTH + 1; }; +} + +/** + * \brief Limit on the number of (recursive) calls to findBestInternal(). + */ +static constexpr size_t MAX_FINDBEST_CALLS = 1000000; static -void findBest(vector >::const_iterator pb, - vector >::const_iterator pe, - const SAccelScheme &curr, SAccelScheme *best) { +void findBestInternal(vector>::const_iterator pb, + vector>::const_iterator pe, + size_t *num_calls, const SAccelScheme &curr, + SAccelScheme *best) { assert(curr.offset <= MAX_ACCEL_DEPTH); + + if (++(*num_calls) > MAX_FINDBEST_CALLS) { + DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls); + return; + } + DEBUG_PRINTF("paths left %zu\n", pe - pb); if (pb == pe) { if (curr < *best) { - DEBUG_PRINTF("new best\n"); *best = curr; + DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n", + best->cr.count(), describeClass(best->cr).c_str(), + best->offset); } - *best = curr; return; } DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - vector priority_path; + small_vector priority_path; priority_path.reserve(pb->size()); u32 i = 0; - for (vector::const_iterator p = pb->begin(); p != pb->end(); - ++p, i++) { - SAccelScheme as(*p | curr.cr, MAX(i, curr.offset)); + for (auto p = pb->begin(); p != pb->end(); ++p, i++) { + SAccelScheme as(*p | curr.cr, max(i, curr.offset)); if (*best < as) { DEBUG_PRINTF("worse\n"); continue; @@ -259,18 +270,13 @@ void findBest(vector >::const_iterator pb, } DEBUG_PRINTF("---\n"); - for (vector::const_iterator it = priority_path.begin(); - it != priority_path.end(); ++it) { - DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(), - priority_path.end() - it); - - SAccelScheme in = move(*it); - + for (const SAccelScheme &in : priority_path) { + DEBUG_PRINTF("in: count %zu\n", in.cr.count()); if (*best < in) { DEBUG_PRINTF("worse\n"); continue; } - findBest(pb + 1, pe, in, best); + findBestInternal(pb + 1, pe, num_calls, in, best); if (curr.cr == best->cr) { return; /* could only get better by offset */ @@ -278,9 +284,23 @@ void findBest(vector >::const_iterator pb, } } +static +SAccelScheme findBest(const vector> &paths, + const CharReach &terminating) { + SAccelScheme curr(terminating, 0U); + SAccelScheme best; + size_t num_calls = 0; + findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best); + DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls); + DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n", + best.cr.count(), describeClass(best.cr).c_str(), best.offset); + return best; +} + +namespace { struct DAccelScheme { - DAccelScheme(const CharReach &cr_in, u32 offset_in) - : double_cr(cr_in), double_offset(offset_in) { + DAccelScheme(CharReach cr_in, u32 offset_in) + : double_cr(std::move(cr_in)), double_offset(offset_in) { assert(double_offset <= MAX_ACCEL_DEPTH); } @@ -319,6 +339,7 @@ struct DAccelScheme { CharReach double_cr; u32 double_offset = 0; }; +} static DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, @@ -391,11 +412,10 @@ void findDoubleBest(vector >::const_iterator pb, DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - vector priority_path; + small_vector priority_path; priority_path.reserve(pb->size()); u32 i = 0; - for (vector::const_iterator p = pb->begin(); - p != pb->end() && next(p) != pb->end(); + for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end(); ++p, i++) { DAccelScheme as = make_double_accel(curr, *p, *next(p), i); if (*best < as) { @@ -411,9 +431,7 @@ void findDoubleBest(vector >::const_iterator pb, best->double_byte.size(), best->double_cr.count(), best->double_offset); - for (vector::const_iterator it = priority_path.begin(); - it != priority_path.end(); ++it) { - DAccelScheme in = move(*it); + for (const DAccelScheme &in : priority_path) { DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", in.double_byte.size(), in.double_cr.count(), in.double_offset); @@ -427,14 +445,12 @@ void findDoubleBest(vector >::const_iterator pb, #ifdef DEBUG static -void dumpPaths(const vector > &paths) { - for (vector >::const_iterator p = paths.begin(); - p != paths.end(); ++p) { +void dumpPaths(const vector> &paths) { + for (const auto &path : paths) { DEBUG_PRINTF("path: ["); - for (vector::const_iterator it = p->begin(); it != p->end(); - ++it) { + for (const auto &cr : path) { printf(" ["); - describeClass(stdout, *it, 20, CC_OUT_TEXT); + describeClass(stdout, cr, 20, CC_OUT_TEXT); printf("]"); } printf(" ]\n"); @@ -545,14 +561,14 @@ DAccelScheme findBestDoubleAccelScheme(vector > paths, #define MAX_EXPLORE_PATHS 40 -AccelScheme findBestAccelScheme(vector > paths, +AccelScheme findBestAccelScheme(vector> paths, const CharReach &terminating, bool look_for_double_byte) { AccelScheme rv; if (look_for_double_byte) { DAccelScheme da = findBestDoubleAccelScheme(paths, terminating); if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) { - rv.double_byte = move(da.double_byte); + rv.double_byte = std::move(da.double_byte); rv.double_cr = move(da.double_cr); rv.double_offset = da.double_offset; } @@ -568,21 +584,18 @@ AccelScheme findBestAccelScheme(vector > paths, /* if we were smart we would do something netflowy on the paths to find the * best cut. But we aren't, so we will just brute force it. */ - SAccelScheme curr(terminating, 0U); - SAccelScheme best; - findBest(paths.begin(), paths.end(), curr, &best); + SAccelScheme best = findBest(paths, terminating); /* find best is a bit lazy in terms of minimising the offset, see if we can * make it better. need to find the min max offset that we need.*/ u32 offset = 0; - for (vector >::iterator p = paths.begin(); - p != paths.end(); ++p) { + for (const auto &path : paths) { u32 i = 0; - for (vector::iterator it = p->begin(); it != p->end(); - ++it, i++) { - if (it->isSubsetOf(best.cr)) { + for (const auto &cr : path) { + if (cr.isSubsetOf(best.cr)) { break; } + i++; } offset = MAX(offset, i); } @@ -620,17 +633,15 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, return AccelScheme(); /* invalid scheme */ } - vector > paths; + vector> paths; flat_set ignore_vert_set(verts.begin(), verts.end()); /* Note: we can not in general (TODO: ignore when possible) ignore entries * into the bounded repeat cyclic states as that is when the magic happens */ - for (map::const_iterator it - = br_cyclic.begin(); - it != br_cyclic.end(); ++it) { + for (auto v : br_cyclic | map_keys) { /* TODO: can allow if repeatMin <= 1 ? */ - ignore_vert_set.erase(it->first); + ignore_vert_set.erase(v); } for (auto v : verts) { @@ -643,9 +654,8 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, } /* paths built wrong: reverse them */ - for (vector >::iterator it = paths.begin(); - it != paths.end(); ++it) { - reverse(it->begin(), it->end()); + for (auto &path : paths) { + reverse(path.begin(), path.end()); } return findBestAccelScheme(std::move(paths), terminating, @@ -691,134 +701,6 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { return g.startDs; } -static -NFAVertex find_next(const NFAVertex v, const NGHolder &g) { - NFAVertex res = NGHolder::null_vertex(); - for (NFAVertex u : adjacent_vertices_range(v, g)) { - if (u != v) { - res = u; - break; - } - } - return res; -} - -/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ -MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, - const vector &states, - const CompileContext &cc) { - // For a set of states to be accelerable, we basically have to have only - // one state to accelerate. - if (states.size() != 1) { - DEBUG_PRINTF("can't accelerate multiple states\n"); - return MultibyteAccelInfo(); - } - - // Get our base vertex - NFAVertex v = states[0]; - - // We need the base vertex to be a self-looping dotall leading to exactly - // one vertex. - if (!hasSelfLoop(v, g)) { - DEBUG_PRINTF("base vertex has self-loop\n"); - return MultibyteAccelInfo(); - } - - if (!g[v].char_reach.all()) { - DEBUG_PRINTF("can't accelerate anything but dot\n"); - return MultibyteAccelInfo(); - } - - if (proper_out_degree(v, g) != 1) { - DEBUG_PRINTF("can't accelerate states with multiple successors\n"); - return MultibyteAccelInfo(); - } - - // find our start vertex - NFAVertex cur = find_next(v, g); - if (cur == NGHolder::null_vertex()) { - DEBUG_PRINTF("invalid start vertex\n"); - return MultibyteAccelInfo(); - } - - bool has_offset = false; - u32 offset = 0; - CharReach cr = g[cur].char_reach; - - // if we start with a dot, we have an offset, so defer figuring out the - // real CharReach for this accel scheme - if (cr == CharReach::dot()) { - has_offset = true; - offset = 1; - } - - // figure out our offset - while (has_offset) { - // vertices have to have no self loops - if (hasSelfLoop(cur, g)) { - DEBUG_PRINTF("can't have self-loops\n"); - return MultibyteAccelInfo(); - } - - // we have to have exactly 1 successor to have this acceleration scheme - if (out_degree(cur, g) != 1) { - DEBUG_PRINTF("can't have multiple successors\n"); - return MultibyteAccelInfo(); - } - - cur = *adjacent_vertices(cur, g).first; - - // if we met a special vertex, bail out - if (is_special(cur, g)) { - DEBUG_PRINTF("can't have special vertices\n"); - return MultibyteAccelInfo(); - } - - // now, get the real char reach - if (g[cur].char_reach != CharReach::dot()) { - cr = g[cur].char_reach; - has_offset = false; - } else { - offset++; - } - } - - // now, fire up the compilation machinery - target_t ti = cc.target_info; - unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE; - MultiaccelCompileHelper mac(cr, offset, max_len); - - while (mac.canAdvance()) { - // vertices have to have no self loops - if (hasSelfLoop(cur, g)) { - break; - } - - // we have to have exactly 1 successor to have this acceleration scheme - if (out_degree(cur, g) != 1) { - break; - } - - cur = *adjacent_vertices(cur, g).first; - - // if we met a special vertex, bail out - if (is_special(cur, g)) { - break; - } - - mac.advance(g[cur].char_reach); - } - MultibyteAccelInfo mai = mac.getBestScheme(); -#ifdef DEBUG - DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n", - mai.type, mai.offset, mai.len1, mai.len2); - for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) { - DEBUG_PRINTF("multibyte accel char: %zu\n", c); - } -#endif - return mai; -} - /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const vector &refined_cr, diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index cb3d12104..f0c98db2c 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,9 +51,6 @@ namespace ue2 { #define MAX_MERGED_ACCEL_STOPS 200 #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ -#define MULTIACCEL_MIN_LEN 3 -#define MULTIACCEL_MAX_LEN_SSE 15 -#define MULTIACCEL_MAX_LEN_AVX2 31 // forward-declaration of CompileContext struct CompileContext; @@ -84,11 +81,6 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, AccelScheme *as, bool allow_wide); -/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). - */ -MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, - const std::vector &verts, - const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index a5f3468b8..a6664b07e 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -734,55 +734,30 @@ vector scoreEdges(const NGHolder &g, const flat_set &known_bad) { return scores; } -static -bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, - ue2_literal *lit_out, - NGHolder *rhs) { - NFAVertex u; - NFAVertex v; - - if (!anch) { - DEBUG_PRINTF("looking for leading floating literal\n"); - set s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); - - set sds_succ; - insert(&sds_succ, adjacent_vertices(g.startDs, g)); - - bool floating = is_subset_of(s_succ, sds_succ); - if (!floating) { - DEBUG_PRINTF("not floating\n"); - return false; - } - - sds_succ.erase(g.startDs); - if (sds_succ.size() != 1) { - DEBUG_PRINTF("branchy root\n"); - return false; - } - - u = g.startDs; - v = *sds_succ.begin(); - } else { - DEBUG_PRINTF("looking for leading anchored literal\n"); +bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, + NGHolder *rhs) { + DEBUG_PRINTF("looking for leading floating literal\n"); + set s_succ; + insert(&s_succ, adjacent_vertices(g.start, g)); - if (proper_out_degree(g.startDs, g)) { - DEBUG_PRINTF("not anchored\n"); - return false; - } + set sds_succ; + insert(&sds_succ, adjacent_vertices(g.startDs, g)); - set s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); - s_succ.erase(g.startDs); - if (s_succ.size() != 1) { - DEBUG_PRINTF("branchy root\n"); - return false; - } + bool floating = is_subset_of(s_succ, sds_succ); + if (!floating) { + DEBUG_PRINTF("not floating\n"); + return false; + } - u = g.start; - v = *s_succ.begin(); + sds_succ.erase(g.startDs); + if (sds_succ.size() != 1) { + DEBUG_PRINTF("branchy root\n"); + return false; } + NFAVertex u = g.startDs; + NFAVertex v = *sds_succ.begin(); + while (true) { DEBUG_PRINTF("validating vertex %zu\n", g[v].index); @@ -838,8 +813,7 @@ bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, assert(u != g.startDs); ue2::unordered_map rhs_map; - vector pivots; - insert(&pivots, pivots.end(), adjacent_vertices(u, g)); + vector pivots = make_vector_from(adjacent_vertices(u, g)); splitRHS(g, pivots, rhs, &rhs_map); DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(), @@ -848,17 +822,6 @@ bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, return true; } -bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs) { - return splitOffLeadingLiteral_i(g, false, lit_out, rhs); -} - -bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs) { - return splitOffLeadingLiteral_i(g, true, lit_out, rhs); -} - - bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { if (in_degree(g.acceptEod, g) != 1) { return false; diff --git a/src/nfagraph/ng_literal_analysis.h b/src/nfagraph/ng_literal_analysis.h index 6fd9c5251..6bb875561 100644 --- a/src/nfagraph/ng_literal_analysis.h +++ b/src/nfagraph/ng_literal_analysis.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,9 +87,6 @@ u64a sanitizeAndCompressAndScore(std::set &s); bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, NGHolder *rhs); -bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs); - bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out); /** \brief Returns true if the given literal is the only thing in the graph, diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index e3cfe8678..de05e4909 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,12 +30,15 @@ * \brief Literal Component Splitting. Identifies literals that span the * graph and moves them into Rose. */ + +#include "ng_literal_component.h" + #include "grey.h" #include "ng.h" -#include "ng_literal_component.h" #include "ng_prune.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "rose/rose_build.h" #include "util/container.h" #include "util/graph.h" @@ -47,8 +50,8 @@ using namespace std; namespace ue2 { static -bool isLiteralChar(const NGWrapper &g, NFAVertex v, - bool &nocase, bool &casefixed) { +bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase, + bool &casefixed) { const CharReach &cr = g[v].char_reach; const size_t num = cr.count(); if (num > 2) { @@ -93,7 +96,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) { } static -bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, +bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, set &dead) { DEBUG_PRINTF("examine vertex %zu\n", g[v].index); bool nocase = false, casefixed = false; @@ -185,7 +188,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, } /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGWrapper &g) { +bool splitOffLiterals(NG &ng, NGHolder &g) { if (!ng.cc.grey.allowLiteral) { return false; } diff --git a/src/nfagraph/ng_literal_component.h b/src/nfagraph/ng_literal_component.h index dc177c404..1f284ce36 100644 --- a/src/nfagraph/ng_literal_component.h +++ b/src/nfagraph/ng_literal_component.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,10 +37,10 @@ namespace ue2 { class NG; -class NGWrapper; +class NGHolder; /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGWrapper &graph); +bool splitOffLiterals(NG &ng, NGHolder &g); } // namespace ue2 diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 375086a46..9448a0bf3 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,8 @@ #include "util/bitfield.h" #include "util/determinise.h" #include "util/graph_range.h" +#include "util/hash.h" +#include "util/hash_dynamic_bitset.h" #include "util/make_unique.h" #include "util/report_manager.h" #include "util/ue2_containers.h" @@ -377,7 +379,9 @@ class Automaton_Base { NFAVertex v = sq.first; u32 vert_id = graph[v].index; squash.set(vert_id); - squash_mask[vert_id] = shrinkStateSet(sq.second); + squash_mask[vert_id] + = Automaton_Traits::copy_states(std::move(sq.second), + numStates); } cr_by_index = populateCR(graph, v_by_index, alpha); @@ -385,21 +389,11 @@ class Automaton_Base { dynamic_bitset<> temp(numStates); markToppableStarts(graph, unused, single_trigger, triggers, &temp); - toppable = Automaton_Traits::copy_states(temp, numStates); + toppable = Automaton_Traits::copy_states(std::move(temp), + numStates); } } -private: - // Convert an NFAStateSet (as used by the squash code) into a StateSet - StateSet shrinkStateSet(const NFAStateSet &in) const { - StateSet out = Automaton_Traits::init_states(numStates); - for (size_t i = in.find_first(); i != in.npos && i < out.size(); - i = in.find_next(i)) { - out.set(i); - } - return out; - } - public: void transition(const StateSet &in, StateSet *next) { transition_graph(*this, v_by_index, in, next); @@ -467,13 +461,13 @@ class Automaton_Base { struct Big_Traits { using StateSet = dynamic_bitset<>; - using StateMap = map; + using StateMap = unordered_map; static StateSet init_states(u32 num) { return StateSet(num); } - static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { + static StateSet copy_states(dynamic_bitset<> in, UNUSED u32 num) { assert(in.size() == num); return in; } diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 012b4e8d8..3cd9d06d8 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Prefilter Reductions. * * This file contains routines for reducing the size of an NFA graph that we @@ -92,13 +93,13 @@ struct RegionInfo { u32 id; //!< region id deque vertices; //!< vertices in the region CharReach reach; //!< union of region reach - depth minWidth = 0; //!< min width of region subgraph - depth maxWidth = depth::infinity(); //!< max width of region subgraph + depth minWidth{0}; //!< min width of region subgraph + depth maxWidth{depth::infinity()}; //!< max width of region subgraph bool atBoundary = false; //!< region is next to an accept // Bigger score is better. size_t score() const { - // FIXME: charreach should be a signal? + // TODO: charreach should be a signal? size_t numVertices = vertices.size(); if (atBoundary) { return numVertices - min(PENALTY_BOUNDARY, numVertices); diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 7281471fc..984518b0f 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,8 +94,7 @@ void wireNewAccepts(NGHolder &g, NFAVertex head, static bool isFixedDepth(const NGHolder &g, NFAVertex v) { // If the vertex is reachable from startDs, it can't be fixed depth. - vector depthFromStartDs; - calcDepthsFrom(g, g.startDs, depthFromStartDs); + auto depthFromStartDs = calcDepthsFrom(g, g.startDs); u32 idx = g[v].index; const DepthMinMax &ds = depthFromStartDs.at(idx); @@ -104,8 +103,7 @@ bool isFixedDepth(const NGHolder &g, NFAVertex v) { return false; } - vector depthFromStart; - calcDepthsFrom(g, g.start, depthFromStart); + auto depthFromStart = calcDepthsFrom(g, g.start); /* we can still consider the head of a puff chain as at fixed depth if * it has a self-loop: so we look at all the preds of v (other than v diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 0ecd7bd63..91904b466 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -70,65 +70,61 @@ using namespace std; namespace ue2 { -typedef ue2::unordered_set BackEdgeSet; -typedef boost::filtered_graph> - AcyclicGraph; +using BackEdgeSet = unordered_set; +using AcyclicGraph = + boost::filtered_graph>; namespace { struct exit_info { explicit exit_info(NFAVertex v) : exit(v) {} NFAVertex exit; - ue2::unordered_set open; + flat_set open; }; } static void checkAndAddExitCandidate(const AcyclicGraph &g, - const ue2::unordered_set &r, - NFAVertex v, vector *exits) { - // set when we find our first candidate. - ue2::unordered_set *open = nullptr; + const unordered_set &r, NFAVertex v, + vector &exits) { + exit_info v_exit(v); + auto &open = v_exit.open; /* find the set of vertices reachable from v which are not in r */ for (auto w : adjacent_vertices_range(v, g)) { - if (!contains(r, NFAVertex(w))) { - if (!open) { - exits->push_back(exit_info(NFAVertex(v))); - open = &exits->back().open; - } - open->insert(NFAVertex(w)); + if (!contains(r, w)) { + open.insert(w); } } - if (open) { + if (!open.empty()) { DEBUG_PRINTF("exit %zu\n", g[v].index); + exits.push_back(move(v_exit)); } } static -void findExits(const AcyclicGraph &g, const ue2::unordered_set &r, - vector *exits) { - exits->clear(); - +void findExits(const AcyclicGraph &g, const unordered_set &r, + vector &exits) { + exits.clear(); for (auto v : r) { checkAndAddExitCandidate(g, r, v, exits); } } static -void refineExits(const AcyclicGraph &g, const ue2::unordered_set &r, - NFAVertex new_v, vector *exits) { - for (u32 i = 0; i < exits->size(); i++) { - (*exits)[i].open.erase(new_v); /* new_v is no long an open edge */ - if ((*exits)[i].open.empty()) { /* no open edges: no longer an exit */ - /* shuffle to back and kill */ - (*exits)[i] = exits->back(); - exits->pop_back(); - i--; - } +void refineExits(const AcyclicGraph &g, const unordered_set &r, + NFAVertex new_v, vector &exits) { + /* new_v is no long an open edge */ + for (auto &exit : exits) { + exit.open.erase(new_v); } + /* no open edges: no longer an exit */ + exits.erase(remove_if(exits.begin(), exits.end(), + [&](const exit_info &exit) { return exit.open.empty(); }), + exits.end()); + checkAndAddExitCandidate(g, r, new_v, exits); } @@ -136,7 +132,7 @@ void refineExits(const AcyclicGraph &g, const ue2::unordered_set &r, */ static bool exitValid(UNUSED const AcyclicGraph &g, const vector &exits, - const ue2::unordered_set &open_jumps) { + const flat_set &open_jumps) { if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) { return true; } @@ -165,8 +161,8 @@ bool exitValid(UNUSED const AcyclicGraph &g, const vector &exits, } static -void setRegion(const ue2::unordered_set &r, u32 rid, - ue2::unordered_map ®ions) { +void setRegion(const unordered_set &r, u32 rid, + unordered_map ®ions) { for (auto v : r) { regions[v] = rid; } @@ -176,34 +172,36 @@ static void buildInitialCandidate(const AcyclicGraph &g, vector::const_reverse_iterator &it, const vector::const_reverse_iterator &ite, - ue2::unordered_set *candidate, + unordered_set &candidate, /* in exits of prev region; * out exits from candidate */ - vector *exits, - ue2::unordered_set *open_jumps) { + vector &exits, + flat_set &open_jumps) { if (it == ite) { - candidate->clear(); - exits->clear(); + candidate.clear(); + exits.clear(); return; } - if (exits->empty()) { + if (exits.empty()) { DEBUG_PRINTF("odd\n"); - candidate->clear(); + candidate.clear(); DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); - candidate->insert(*it); - open_jumps->erase(*it); - checkAndAddExitCandidate(g, *candidate, *it, exits); + candidate.insert(*it); + open_jumps.erase(*it); + checkAndAddExitCandidate(g, candidate, *it, exits); ++it; return; } - ue2::unordered_set enters = (*exits)[0].open; - candidate->clear(); + // Note: findExits() will clear exits, so it's safe to mutate/move its + // elements here. + auto &enters = exits.front().open; + candidate.clear(); for (; it != ite; ++it) { DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); - candidate->insert(*it); + candidate.insert(*it); if (contains(enters, *it)) { break; } @@ -211,33 +209,34 @@ void buildInitialCandidate(const AcyclicGraph &g, if (it != ite) { enters.erase(*it); - open_jumps->swap(enters); - DEBUG_PRINTF("oj size = %zu\n", open_jumps->size()); + open_jumps = move(enters); + DEBUG_PRINTF("oj size = %zu\n", open_jumps.size()); ++it; } else { - open_jumps->clear(); + open_jumps.clear(); } - findExits(g, *candidate, exits); + findExits(g, candidate, exits); } static void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, const vector &topo, - ue2::unordered_map ®ions) { + unordered_map ®ions) { assert(!topo.empty()); u32 curr_id = 0; - vector::const_reverse_iterator t_it = topo.rbegin(); - vector exits; - ue2::unordered_set candidate; - ue2::unordered_set open_jumps; + auto t_it = topo.rbegin(); + unordered_set candidate; + flat_set open_jumps; DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); - findExits(g, candidate, &exits); + + vector exits; + findExits(g, candidate, exits); while (t_it != topo.rend()) { assert(!candidate.empty()); @@ -253,14 +252,14 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, DEBUG_PRINTF("setting region %u\n", curr_id); } setRegion(candidate, curr_id++, regions); - buildInitialCandidate(g, t_it, topo.rend(), &candidate, &exits, - &open_jumps); + buildInitialCandidate(g, t_it, topo.rend(), candidate, exits, + open_jumps); } else { NFAVertex curr = *t_it; DEBUG_PRINTF("adding %zu to current\n", g[curr].index); candidate.insert(curr); open_jumps.erase(curr); - refineExits(g, candidate, *t_it, &exits); + refineExits(g, candidate, *t_it, exits); DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(), exits.size()); ++t_it; @@ -273,7 +272,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, static void mergeUnderBackEdges(const NGHolder &g, const vector &topo, const BackEdgeSet &backEdges, - ue2::unordered_map ®ions) { + unordered_map ®ions) { for (const auto &e : backEdges) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); @@ -343,7 +342,7 @@ void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g, static void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { - ue2::unordered_set sinks; + unordered_set sinks; for (auto v : vertices_range(acyclic_g)) { if (is_special(v, acyclic_g)) { continue; @@ -388,7 +387,7 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { } NFAVertex s = *ri; DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); - ue2::unordered_set parents; + unordered_set parents; for (const auto &e : in_edges_range(s, acyclic_g)) { parents.insert(NFAVertex(source(e, acyclic_g))); } @@ -416,6 +415,7 @@ vector buildTopoOrder(const NGHolder &w, const AcyclicGraph &acyclic_g, vector &colours) { vector topoOrder; + topoOrder.reserve(num_vertices(w)); topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_iterator_property_map(colours.begin(), @@ -438,7 +438,7 @@ vector buildTopoOrder(const NGHolder &w, return topoOrder; } -ue2::unordered_map assignRegions(const NGHolder &g) { +unordered_map assignRegions(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const u32 numVertices = num_vertices(g); DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices); @@ -460,7 +460,7 @@ ue2::unordered_map assignRegions(const NGHolder &g) { vector topoOrder = buildTopoOrder(g, acyclic_g, colours); // Everybody starts in region 0. - ue2::unordered_map regions; + unordered_map regions; regions.reserve(numVertices); for (auto v : vertices_range(g)) { regions.emplace(v, 0); diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index a16e2715b..60ad22009 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -105,8 +105,8 @@ typedef boost::filtered_graph> RepeatGraph; struct ReachSubgraph { vector vertices; - depth repeatMin = 0; - depth repeatMax = 0; + depth repeatMin{0}; + depth repeatMax{0}; u32 minPeriod = 1; bool is_reset = false; enum RepeatType historyType = REPEAT_RING; @@ -118,13 +118,12 @@ struct ReachSubgraph { static void findInitDepths(const NGHolder &g, ue2::unordered_map &depths) { - vector d; - calcDepths(g, d); + auto d = calcDepths(g); for (auto v : vertices_range(g)) { - u32 idx = g[v].index; + size_t idx = g[v].index; assert(idx < d.size()); - depths.insert(make_pair(v, d[idx])); + depths.emplace(v, d[idx]); } } @@ -296,9 +295,8 @@ void splitSubgraph(const NGHolder &g, const deque &verts, ue2::unordered_map verts_map; // in g -> in verts_g fillHolder(&verts_g, g, verts, &verts_map); - NFAUndirectedGraph ug; ue2::unordered_map old2new; - createUnGraph(verts_g, true, true, ug, old2new); + auto ug = createUnGraph(verts_g, true, true, old2new); ue2::unordered_map repeatMap; @@ -587,8 +585,8 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, range.first, range.second); return false; } - rsi.repeatMin = range.first; - rsi.repeatMax = range.second; + rsi.repeatMin = depth(range.first); + rsi.repeatMax = depth(range.second); // If we've got a self-loop anywhere, we've got inf max. if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) { @@ -1020,9 +1018,8 @@ void buildReachSubgraphs(const NGHolder &g, vector &rs, return; } - NFAUndirectedGraph ug; unordered_map old2new; - createUnGraph(rg, true, true, ug, old2new); + auto ug = createUnGraph(rg, true, true, old2new); unordered_map repeatMap; @@ -2108,7 +2105,7 @@ void populateFixedTopInfo(const map &fixed_depth_tops, td = depth::infinity(); break; } - depth td_t = fixed_depth_tops.at(top); + depth td_t(fixed_depth_tops.at(top)); if (td == td_t) { continue; } else if (td == depth::infinity()) { @@ -2481,7 +2478,7 @@ bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { // have the same report set as the vertices in the repeat. if (repeat.bounds.min == depth(1) && g[g.start].reports == g[v].reports) { - repeat.bounds.min = 0; + repeat.bounds.min = depth(0); DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str()); } else { DEBUG_PRINTF("not a supported repeat\n"); diff --git a/src/nfagraph/ng_reports.cpp b/src/nfagraph/ng_reports.cpp index 3d18a6209..4e9b498df 100644 --- a/src/nfagraph/ng_reports.cpp +++ b/src/nfagraph/ng_reports.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +65,26 @@ bool can_exhaust(const NGHolder &g, const ReportManager &rm) { return true; } +void set_report(NGHolder &g, ReportID internal_report) { + // First, wipe the report IDs on all vertices. + for (auto v : vertices_range(g)) { + g[v].reports.clear(); + } + + // Any predecessors of accept get our id. + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + g[v].reports.insert(internal_report); + } + + // Same for preds of acceptEod, except accept itself. + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; + } + g[v].reports.insert(internal_report); + } +} + /** Derive a maximum offset for the graph from the max_offset values of its * reports. Returns MAX_OFFSET for inf. */ u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) { diff --git a/src/nfagraph/ng_reports.h b/src/nfagraph/ng_reports.h index 3047ff0b4..31c953088 100644 --- a/src/nfagraph/ng_reports.h +++ b/src/nfagraph/ng_reports.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,6 +48,10 @@ std::set all_reports(const NGHolder &g); /** True if *all* reports in the graph are exhaustible. */ bool can_exhaust(const NGHolder &g, const ReportManager &rm); +/** Replaces all existing reports on the holder with the provided internal + * report id. */ +void set_report(NGHolder &g, ReportID internal_report); + /** Derive a maximum offset for the graph from the max_offset values of its * reports. Returns MAX_OFFSET for inf. */ u64a findMaxOffset(const NGHolder &g, const ReportManager &rm); diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp deleted file mode 100644 index 7066ab27d..000000000 --- a/src/nfagraph/ng_rose.cpp +++ /dev/null @@ -1,2977 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder. - */ - -// #define DEBUG -// #define DEBUG_ROSE -#include "ng_rose.h" - -#include "grey.h" -#include "ng_depth.h" -#include "ng_dominators.h" -#include "ng_equivalence.h" -#include "ng_holder.h" -#include "ng_is_equal.h" -#include "ng_literal_analysis.h" -#include "ng_netflow.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_region.h" -#include "ng_reports.h" -#include "ng_split.h" -#include "ng_util.h" -#include "ng_width.h" -#include "rose/rose_build.h" -#include "rose/rose_build_util.h" -#include "rose/rose_in_dump.h" -#include "rose/rose_in_graph.h" -#include "rose/rose_in_util.h" -#include "util/compare.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/make_unique.h" -#include "util/order_check.h" -#include "util/ue2string.h" -#include "util/ue2_containers.h" - -#include -#include -#include -#include - -#define NDEBUG_PRINTF(x, ...) \ - do { if (0) { DEBUG_PRINTF(x, ## __VA_ARGS__); } } while (0) - -using namespace std; - -namespace ue2 { - -/** - * Maps vertices in the original graph to vertices on edge graphs. Each edge - * graph should contain at most one copy of the vertex. Multiple images for a - * vertex arise after we split on multiple literals - in this cases all edges - * should share a common graph. - * - * If, when an edge is split, a vertex ends up in both the LHS and RHS then only - * the LHS is tracked. This is because in general we want to simplify the LHS - * and allow complexity to be pushed further back. - */ -typedef ue2::unordered_map > > - vdest_map_t; - -typedef ue2::unordered_map > vsrc_map_t; - -/** - * \brief Maximum width of the character class usable as an escape class. - */ -static const u32 MAX_ESCAPE_CHARS = 20; - -static -u32 maxDelay(const CompileContext &cc) { - if (!cc.streaming) { - return MO_INVALID_IDX; - } - return cc.grey.maxHistoryAvailable; -} - -static -bool createsAnchoredLHS(const NGHolder &g, const vector &vv, - const vector &depths, - const Grey &grey, depth max_depth = depth::infinity()) { - max_depth = min(max_depth, depth(grey.maxAnchoredRegion)); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - -static -bool createsTransientLHS(const NGHolder &g, const vector &vv, - const vector &depths, - const Grey &grey) { - const depth max_depth(grey.maxHistoryAvailable); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromInit(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - -static -bool isLHSUsablyAnchored(const NGHolder &g, - const vector &depths, - const Grey &grey) { - assert(in_degree(g.acceptEod, g) == 1); - - vector accepts; - insert(&accepts, accepts.end(), inv_adjacent_vertices(g.accept, g)); - - bool rv = createsAnchoredLHS(g, accepts, depths, grey); - DEBUG_PRINTF("lhs is %susably anchored\n", rv ? "" : "not "); - return rv; -} - -static -bool isLHSTransient(const NGHolder &g, - const vector &depths, - const Grey &grey) { - assert(in_degree(g.acceptEod, g) == 1); - - vector accepts; - insert(&accepts, accepts.end(), inv_adjacent_vertices(g.accept, g)); - - bool rv = createsTransientLHS(g, accepts, depths, grey); - DEBUG_PRINTF("lhs is %stransient\n", rv ? "" : "not "); - return rv; -} - -namespace { - -/** - * Information on a cut: vertices and literals. - */ -struct VertLitInfo { - VertLitInfo(NFAVertex v, const set &litlit) - : vv(vector(1, v)), lit(litlit) {} - VertLitInfo(const vector &vvvv, const set &litlit) - : vv(vvvv), lit(litlit) {} - vector vv; - set lit; -}; - -/** - * A factory for candidate simple cuts (literals/vertices). - */ -class LitCollection : boost::noncopyable { - vector> lits; /**< sorted list of potential cuts */ - const NGHolder &g; /**< graph on which cuts are found */ - const vector &depths; /**< depth information for g */ - const ue2::unordered_map ®ion_map; /**< region map for g */ - - /** Set of vertices to avoid selecting as end vertices for cuts as previous - * cuts overlap them. This is solely to prevent us picking literal sets - * which do not add significant value. */ - ue2::unordered_set poisoned; - - /** Back-edges in g. */ - ue2::unordered_map > back_edges; - - const Grey &grey; - bool seeking_transient; - bool seeking_anchored; - - void poisonLHS(const VertLitInfo &picked); - void poisonLitVerts(const VertLitInfo &picked); - void poisonCandidates(const VertLitInfo &picked); - - friend class LitComparator; - -public: - LitCollection(const NGHolder &g_in, const vector &depths_in, - const ue2::unordered_map ®ion_map_in, - const set &ap, const set &ap_raw, - u32 min_len, bool desperation, const CompileContext &cc, - bool override_literal_quality_check = false); - - /**< Returns the next candidate cut. Cut still needs to be inspected for - * complete envelopment. */ - unique_ptr pickNext(void); -}; - -/** - * \brief Comparator class for sorting LitCollection::lits. - * - * This is separated out from LitCollection itself as passing LitCollection to - * std::sort() would incur a (potentially expensive) copy. - */ -class LitComparator { -public: - explicit LitComparator(const LitCollection &lc_in) : lc(lc_in) {} - bool operator()(const unique_ptr &a, - const unique_ptr &b) const { - assert(a && b); - - if (lc.seeking_anchored) { - bool a_anchored = - createsAnchoredLHS(lc.g, a->vv, lc.depths, lc.grey); - bool b_anchored = - createsAnchoredLHS(lc.g, b->vv, lc.depths, lc.grey); - - if (a_anchored != b_anchored) { - return a_anchored < b_anchored; - } - } - - if (lc.seeking_transient) { - bool a_transient = - createsTransientLHS(lc.g, a->vv, lc.depths, lc.grey); - bool b_transient = - createsTransientLHS(lc.g, b->vv, lc.depths, lc.grey); - - if (a_transient != b_transient) { - return a_transient < b_transient; - } - } - - u64a score_a = scoreSet(a->lit); - u64a score_b = scoreSet(b->lit); - - if (score_a != score_b) { - return score_a > score_b; - } - - /* vertices should only be in one candidate cut */ - assert(a->vv == b->vv || a->vv.front() != b->vv.front()); - return lc.g[a->vv.front()].index > - lc.g[b->vv.front()].index; - } - -private: - const LitCollection &lc; -}; - -static -size_t shorter_than(const set &s, size_t limit) { - size_t count = 0; - - for (const auto &lit : s) { - if (lit.length() < limit) { - count++; - } - } - - return count; -} - -static -u32 min_len(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 max_len(const set &s) { - u32 rv = 0; - - for (const auto &lit : s) { - rv = max(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 min_period(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)minStringPeriod(lit)); - } - DEBUG_PRINTF("min period %u\n", rv); - return rv; -} - -static -bool validateRoseLiteralSetQuality(const set &s, u64a score, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - if (!override_literal_quality_check && score >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size()); - return false; - } - - assert(!s.empty()); - if (s.empty()) { - DEBUG_PRINTF("candidate is too bad/something went wrong\n"); - return false; - } - - u32 s_min_len = min_len(s); - u32 s_min_period = min_period(s); - size_t short_count = shorter_than(s, 5); - - DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u" - " short_count=%zu desp=%d\n", - dumpString(*s.begin()).c_str(), score, s.size(), s_min_len, - s_min_period, short_count, (int)desperation); - - bool ok = true; - - if (s.size() > 10 /* magic number is magic */ - || s_min_len < min_allowed_len - || (s_min_period <= 1 && !override_literal_quality_check - && min_allowed_len != 1)) { - ok = false; - } - - if (!ok && desperation - && s.size() <= 20 /* more magic numbers are magical */ - && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10)) - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok && desperation - && s.size() <= 50 /* more magic numbers are magical */ - && s_min_len > 10 - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok) { - DEBUG_PRINTF("candidate is too bad\n"); - return false; - } - - return true; -} - -static UNUSED -void dumpRoseLiteralSet(const set &s) { - for (UNUSED const auto &lit : s) { - DEBUG_PRINTF(" lit: %s\n", dumpString(lit).c_str()); - } -} - -static -void getSimpleRoseLiterals(const NGHolder &g, const set &a_dom, - vector> *lits, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - map scores; - map> lit_info; - set s; - - for (auto v : a_dom) { - s = getLiteralSet(g, v, true); /* RHS will take responsibility for any - revisits to the target vertex */ - - if (s.empty()) { - DEBUG_PRINTF("candidate is too bad\n"); - continue; - } - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = compressAndScore(s); - - if (!validateRoseLiteralSetQuality(s, score, min_allowed_len, - desperation, - override_literal_quality_check)) { - continue; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - scores[v] = score; - lit_info.insert(make_pair(v, ue2::make_unique(v, s))); - } - - /* try to filter out cases where appending some characters produces worse - * literals. Only bother to look back one byte, TODO make better */ - for (auto u : a_dom) { - if (out_degree(u, g) != 1 || !scores[u]) { - continue; - } - NFAVertex v = *adjacent_vertices(u, g).first; - if (contains(scores, v) && scores[v] >= scores[u]) { - DEBUG_PRINTF("killing off v as score %llu >= %llu\n", - scores[v], scores[u]); - lit_info.erase(v); - } - } - - lits->reserve(lit_info.size()); - for (auto &m : lit_info) { - lits->push_back(move(m.second)); - } - DEBUG_PRINTF("%zu candidate literal sets\n", lits->size()); -} - -static -void getRegionRoseLiterals(const NGHolder &g, - const ue2::unordered_map ®ion_map, - const set &a_dom_raw, - vector> *lits, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - /* This allows us to get more places to chop the graph as we are not limited - to points where there is a single vertex to split. */ - - /* TODO: operate over 'proto-regions' which ignore back edges */ - - set mand, optional; - map > exits; - - for (auto v : vertices_range(g)) { - assert(contains(region_map, v)); - const u32 region = region_map.at(v); - - if (is_any_start(v, g) || region == 0) { - continue; - } - - if (is_any_accept(v, g)) { - continue; - } - - if (isRegionExit(g, v, region_map)) { - exits[region].push_back(v); - } - - if (isRegionEntry(g, v, region_map)) { - // Determine whether this region is mandatory or optional. We only - // need to do this check for the first entry vertex we encounter - // for this region. - if (!contains(mand, region) && !contains(optional, region)) { - if (isOptionalRegion(g, v, region_map)) { - optional.insert(region); - } else { - mand.insert(region); - } - } - } - } - - for (const auto &m : exits) { - if (0) { - next_cand: - continue; - } - - const u32 region = m.first; - const vector &vv = m.second; - assert(!vv.empty()); - - if (!contains(mand, region)) { - continue; - } - - for (auto v : vv) { - /* if an exit is in a_dom_raw, the region is already handled well - * by getSimpleRoseLiterals */ - if (contains(a_dom_raw, v)) { - goto next_cand; - } - } - - /* the final region may not have a neat exit. validate that all exits - * have an edge to each accept or none do */ - bool edge_to_a = edge(vv[0], g.accept, g).second; - bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second; - const auto &reports = g[vv[0]].reports; - for (auto v : vv) { - if (edge_to_a != edge(v, g.accept, g).second) { - goto next_cand; - } - - if (edge_to_aeod != edge(v, g.acceptEod, g).second) { - goto next_cand; - } - - if (g[v].reports != reports) { - goto next_cand; - } - } - - DEBUG_PRINTF("inspecting region %u\n", region); - set s; - for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); - /* Note: RHS can not be depended on to take all subsequent revisits - * to this vertex */ - set ss = getLiteralSet(g, v, false); - if (ss.empty()) { - DEBUG_PRINTF("candidate is too bad\n"); - goto next_cand; - } - insert(&s, ss); - } - - assert(!s.empty()); - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = compressAndScore(s); - DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - - if (!validateRoseLiteralSetQuality(s, score, min_allowed_len, - desperation, - override_literal_quality_check)) { - continue; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - lits->push_back(ue2::make_unique(vv, s)); - } -} - -static -void gatherBackEdges(const NGHolder &g, - ue2::unordered_map> *out) { - set backEdges; - BackEdges> be(backEdges); - depth_first_search(g, visitor(be).root_vertex(g.start)); - - for (const auto &e : backEdges) { - (*out)[source(e, g)].push_back(target(e, g)); - } -} - -LitCollection::LitCollection(const NGHolder &g_in, - const vector &depths_in, - const ue2::unordered_map ®ion_map_in, - const set &a_dom, - const set &a_dom_raw, u32 min_len, - bool desperation, const CompileContext &cc, - bool override_literal_quality_check) - : g(g_in), depths(depths_in), region_map(region_map_in), grey(cc.grey), - seeking_transient(cc.streaming), seeking_anchored(true) { - getSimpleRoseLiterals(g, a_dom, &lits, min_len, desperation, - override_literal_quality_check); - getRegionRoseLiterals(g, region_map, a_dom_raw, &lits, min_len, desperation, - override_literal_quality_check); - DEBUG_PRINTF("lit coll is looking for a%d t%d\n", (int)seeking_anchored, - (int)seeking_transient); - DEBUG_PRINTF("we have %zu candidate literal splits\n", lits.size()); - sort(lits.begin(), lits.end(), LitComparator(*this)); - gatherBackEdges(g, &back_edges); -} - -void LitCollection::poisonLHS(const VertLitInfo &picked) { - DEBUG_PRINTF("found anchored %d transient %d\n", - (int)createsAnchoredLHS(g, picked.vv, depths, grey), - (int)createsTransientLHS(g, picked.vv, depths, grey)); - set curr; - set next; - - insert(&curr, picked.vv); - - while (!curr.empty()) { - insert(&poisoned, curr); - next.clear(); - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_special(u, g) && !contains(poisoned, u)) { - next.insert(u); - } - } - } - - curr.swap(next); - } - - seeking_transient = false; - seeking_anchored = false; - - /* reprioritise cuts now that the LHS is taken care off */ - sort(lits.begin(), lits.end(), LitComparator(*this)); -} - -static -void flood_back(const NGHolder &g, u32 len, const set &initial, - set *visited) { - vector curr; - vector next; - - insert(&curr, curr.end(), initial); - - insert(visited, initial); - - /* bfs: flood back len vertices */ - for (u32 i = 1; i < len; i++) { - next.clear(); - DEBUG_PRINTF("poison %u/%u: curr %zu\n", i, len, curr.size()); - - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!contains(*visited, u)) { - next.push_back(u); - visited->insert(u); - } - } - } - - next.swap(curr); - } -} - -/** - * Add vertices near a picked literal to the poison set unless it looks - * like they may still add value (ie they are on they other side of cycle). - */ -void LitCollection::poisonLitVerts(const VertLitInfo &picked) { - DEBUG_PRINTF("poisoning vertices associated with picked literals\n"); - - u32 len = max_len(picked.lit); - - /* poison vertices behind */ - - set starters; - insert(&starters, picked.vv); - - set visited; - - flood_back(g, len, starters, &visited); - - DEBUG_PRINTF("flood %zu vertices\n", visited.size()); - - /* inspect any back edges which are in the flooded subgraph; look for any - * destination vertices which are not starters */ - set anti; - for (auto u : visited) { - if (!contains(back_edges, u) || contains(starters, u)) { - continue; - } - - for (auto v : back_edges[u]) { - if (contains(visited, v) && !contains(starters, v)) { - anti.insert(v); - } - } - } - DEBUG_PRINTF("%zu cycle ends\n", visited.size()); - - /* remove any vertices which lie on the other side of a cycle from the - * visited set */ - set anti_pred; - flood_back(g, len - 1, anti, &anti_pred); - - DEBUG_PRINTF("flood visited %zu vertices; anti %zu\n", visited.size(), - anti_pred.size()); - - erase_all(&visited, anti_pred); - - DEBUG_PRINTF("filtered flood visited %zu vertices\n", visited.size()); - - insert(&poisoned, visited); - - insert(&poisoned, starters); /* complicated back loops can result in start - vertices being removed from the visited - set */ - - for (UNUSED auto v : picked.vv) { - assert(contains(poisoned, v)); - } - - /* TODO: poison vertices in front of us? */ -} - -void LitCollection::poisonCandidates(const VertLitInfo &picked) { - assert(!picked.lit.empty()); - if (picked.lit.empty()) { - return; - } - - if ((seeking_anchored && createsAnchoredLHS(g, picked.vv, depths, grey)) - || (seeking_transient && createsTransientLHS(g, picked.vv, depths, grey))) { - /* We don't want to pick anything to the LHS of picked.v any more as we - * have something good. We also don't want to provide any bonus for - * remaining literals based on anchoredness/transientness of the lhs. - */ - poisonLHS(picked); - } else { - poisonLitVerts(picked); - } -} - -unique_ptr LitCollection::pickNext() { - while (!lits.empty()) { - if (0) { - next_lit: - continue; - } - - for (auto v : lits.back()->vv) { - if (contains(poisoned, v)) { - DEBUG_PRINTF("skipping '%s' as overlapped\n", - dumpString(*(lits.back()->lit.begin())).c_str()); - lits.pop_back(); - goto next_lit; - } - } - - unique_ptr rv = move(lits.back()); - lits.pop_back(); - poisonCandidates(*rv); - DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", - dumpString(*(rv->lit.begin())).c_str(), - g[rv->vv.front()].index, - (int)createsAnchoredLHS(g, rv->vv, depths, grey), - (int)createsTransientLHS(g, rv->vv, depths, grey)); - - return rv; - } - - return nullptr; -} - -} - -static -bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { - set curr, next; - curr.insert(g.accept); - - for (auto it = lit.rbegin(); it != lit.rend(); ++it) { - next.clear(); - - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - if (overhang_ok) { - DEBUG_PRINTF("bail\n"); - return true; - } else { - continue; /* it is not possible for a lhs literal to - * overhang the start */ - } - } - - const CharReach &cr = g[u].char_reach; - if (!overlaps(*it, cr)) { - DEBUG_PRINTF("skip\n"); - continue; - } - - next.insert(u); - } - } - - curr.swap(next); - } - - return !curr.empty(); -} - -u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 max_delay, bool overhang_ok) { - assert(isCorrectlyTopped(g)); - if (max_delay == MO_INVALID_IDX) { - max_delay--; - } - - DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str()); - set curr, next; - curr.insert(g.accept); - - auto it = lit.rbegin(); - for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) { - next.clear(); - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - if (overhang_ok) { - DEBUG_PRINTF("bail\n"); - goto bail; /* things got complicated */ - } else { - continue; /* it is not possible for a lhs literal to - * overhang the start */ - } - } - - const CharReach &cr = g[u].char_reach; - if (!overlaps(*it, cr)) { - DEBUG_PRINTF("skip\n"); - continue; - } - if (isSubsetOf(*it, cr)) { - next.insert(u); - } else { - DEBUG_PRINTF("bail\n"); - goto bail; /* things got complicated */ - } - } - } - - curr.swap(next); - } - bail: - if (curr.empty()) { - /* This can happen when we have an edge representing a cross from two - * sides of an alternation. This whole edge needs to be marked as - * dead */ - assert(0); /* should have been picked up by can match */ - return MO_INVALID_IDX; - } - - u32 delay = distance(lit.rbegin(), it); - assert(delay <= max_delay); - assert(delay <= lit.length()); - DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); - - set pred; - for (auto v : curr) { - insert(&pred, inv_adjacent_vertices_range(v, g)); - } - - clear_in_edges(g.accept, g); - clearReports(g); - - for (auto v : pred) { - NFAEdge e = add_edge(v, g.accept, g); - g[v].reports.insert(0); - if (is_triggered(g) && v == g.start) { - g[e].tops.insert(DEFAULT_TOP); - } - } - - pruneUseless(g); - assert(allMatchStatesHaveReports(g)); - assert(isCorrectlyTopped(g)); - - DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); - return delay; -} - -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay, const vector &preds) { - assert(delay <= lit.length()); - assert(isCorrectlyTopped(g)); - DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); - - NFAVertex prev = g.accept; - auto it = lit.rbegin(); - while (delay--) { - NFAVertex curr = add_vertex(g); - assert(it != lit.rend()); - g[curr].char_reach = *it; - add_edge(curr, prev, g); - ++it; - prev = curr; - } - - for (auto v : preds) { - NFAEdge e = add_edge(v, prev, g); - if (v == g.start && is_triggered(g)) { - g[e].tops.insert(DEFAULT_TOP); - } - } - - // Every predecessor of accept must have a report. - for (auto u : inv_adjacent_vertices_range(g.accept, g)) { - g[u].reports.insert(0); - } - - renumber_vertices(g); - renumber_edges(g); - assert(allMatchStatesHaveReports(g)); - assert(isCorrectlyTopped(g)); -} - -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay) { - vector preds; - insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); - clear_in_edges(g.accept, g); - - for (auto v : preds) { - g[v].reports.clear(); /* clear report from old accepts */ - } - - restoreTrailingLiteralStates(g, lit, delay, preds); -} - -/* return false if we should get rid of the edge altogether */ -static -bool removeLiteralFromLHS(RoseInGraph &ig, const RoseInEdge &lhs, - const CompileContext &cc) { - unique_ptr h = cloneHolder(*ig[lhs].graph); - NGHolder &g = *h; - assert(ig[target(lhs, ig)].type == RIV_LITERAL); - const ue2_literal &lit = ig[target(lhs, ig)].s; - - /* lhs should be connected to a start */ - assert(ig[source(lhs, ig)].type == RIV_START - || ig[source(lhs, ig)].type == RIV_ANCHORED_START); - - if (in_degree(g.acceptEod, g) != 1 /* edge from accept */) { - assert(0); - return true; - } - if (lit.empty()) { - assert(0); - return true; - } - - const u32 max_delay = maxDelay(cc); - - // In streaming mode, we must limit the depth to the available history - // UNLESS the given literal follows start or startDs and has nothing - // before it that we will need to account for. In that case, we can - // lean on FDR's support for long literals. - if (literalIsWholeGraph(g, lit)) { - assert(!ig[lhs].haig); - assert(ig[lhs].minBound == 0); - assert(ig[lhs].maxBound == ROSE_BOUND_INF); - DEBUG_PRINTF("literal is the whole graph\n"); - - u32 delay = removeTrailingLiteralStates(g, lit, MO_INVALID_IDX, false); - assert(delay == lit.length()); - ig[lhs].graph = move(h); - ig[lhs].graph_lag = delay; - return true; - } - - if (!can_match(g, lit, false)) { - /* This is can happen if the literal arises from a large cyclic - to/beyond the pivot. As the LHS graph only cares about the first - reach of the pivot, this literal is junk */ - DEBUG_PRINTF("bogus edge\n"); - return false; - } - - u32 delay = removeTrailingLiteralStates(g, lit, max_delay, - false /* can't overhang start */); - - if (delay == MO_INVALID_IDX) { - /* This is can happen if the literal arises from a large cyclic - to/beyond the pivot. As the LHS graph only cares about the first - reach of the pivot, this literal is junk */ - DEBUG_PRINTF("bogus edge\n"); - return false; - } - - if (!delay) { - return true; - } - - DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get()); - - ig[lhs].graph = move(h); - ig[lhs].graph_lag = delay; - return true; -} - -static -void handleLhsCliche(RoseInGraph &ig, const RoseInEdge &lhs) { - const NGHolder &h = *ig[lhs].graph; - - size_t s_od = out_degree(h.start, h); - size_t sds_od = out_degree(h.startDs, h); - - assert(in_degree(h.acceptEod, h) == 1 /* edge from accept */); - /* need to check if simple floating start */ - if (edge(h.startDs, h.accept, h).second && sds_od == 2 - && ((s_od == 2 && edge(h.start, h.accept, h).second) || s_od == 1)) { - /* no need for graph */ - ig[lhs].graph.reset(); - ig[lhs].graph_lag = 0; - DEBUG_PRINTF("lhs is floating start\n"); - return; - } - - /* need to check if a simple anchor */ - /* start would have edges to sds and accept in this case */ - if (edge(h.start, h.accept, h).second && s_od == 2 && sds_od == 1) { - if (ig[source(lhs, ig)].type == RIV_ANCHORED_START) { - // assert(ig[lhs].graph_lag == ig[target(lhs, ig)].s.length()); - if (ig[lhs].graph_lag != ig[target(lhs, ig)].s.length()) { - DEBUG_PRINTF("oddness\n"); - return; - } - ig[lhs].graph.reset(); - ig[lhs].graph_lag = 0; - ig[lhs].maxBound = 0; - DEBUG_PRINTF("lhs is anchored start\n"); - } else { - DEBUG_PRINTF("lhs rewiring start\n"); - assert(ig[source(lhs, ig)].type == RIV_START); - RoseInVertex t = target(lhs, ig); - remove_edge(lhs, ig); - RoseInVertex s2 - = add_vertex(RoseInVertexProps::makeStart(true), ig); - add_edge(s2, t, RoseInEdgeProps(0U, 0U), ig); - } - return; - } -} - -static -void filterCandPivots(const NGHolder &g, const set &cand_raw, - set *out) { - for (auto u : cand_raw) { - const CharReach &u_cr = g[u].char_reach; - if (u_cr.count() > 40) { - continue; /* too wide to be plausible */ - } - - if (u_cr.count() > 2) { - /* include u as a candidate as successor may have backed away from - * expanding through it */ - out->insert(u); - continue; - } - - NFAVertex v = getSoleDestVertex(g, u); - if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) { - const CharReach &v_cr = g[v].char_reach; - if (v_cr.count() == 1 || v_cr.isCaselessChar()) { - continue; /* v will always generate better literals */ - } - } - - out->insert(u); - } -} - -/* cand_raw is the candidate set before filtering points which are clearly - * a bad idea. */ -static -void getCandidatePivots(const NGHolder &g, set *cand, - set *cand_raw) { - ue2::unordered_map dominators = - findDominators(g); - - set accepts; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - - assert(!accepts.empty()); - - vector dom_trace; - auto ait = accepts.begin(); - assert(ait != accepts.end()); - NFAVertex curr = *ait; - while (curr && !is_special(curr, g)) { - dom_trace.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace.begin(), dom_trace.end()); - for (++ait; ait != accepts.end(); ++ait) { - curr = *ait; - vector dom_trace2; - while (curr && !is_special(curr, g)) { - dom_trace2.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace2.begin(), dom_trace2.end()); - auto dti = dom_trace.begin(), dtie = dom_trace.end(); - auto dtj = dom_trace2.begin(), dtje = dom_trace2.end(); - while (dti != dtie && dtj != dtje && *dti == *dtj) { - ++dti; - ++dtj; - } - dom_trace.erase(dti, dtie); - } - - cand_raw->insert(dom_trace.begin(), dom_trace.end()); - - filterCandPivots(g, *cand_raw, cand); -} - -static -void deanchorIfNeeded(NGHolder &g, bool *orig_anch) { - DEBUG_PRINTF("hi\n"); - if (proper_out_degree(g.startDs, g)) { - return; - } - - /* look for a non-special dot with a loop following start */ - set succ_g; - insert(&succ_g, adjacent_vertices(g.start, g)); - succ_g.erase(g.startDs); - - for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %zu || =%zu\n", g[v].index, - g[v].char_reach.size()); - - if (v == g.startDs || !g[v].char_reach.all()) { - continue; - } - - set succ_v; - insert(&succ_v, adjacent_vertices(v, g)); - - if (succ_v == succ_g) { - DEBUG_PRINTF("found ^.*\n"); - *orig_anch = true; - for (auto succ : succ_g) { - add_edge(g.startDs, succ, g); - } - clear_vertex(v, g); - remove_vertex(v, g); - renumber_vertices(g); - return; - } - - if (succ_g.size() == 1 && hasSelfLoop(v, g)) { - DEBUG_PRINTF("found ^.+\n"); - *orig_anch = true; - add_edge(g.startDs, v, g); - remove_edge(v, v, g); - return; - } - } -} - -static -unique_ptr makeTrivialGraph(const NGHolder &h, - vdest_map_t &v_dest_map, - vsrc_map_t &v_src_map) { - shared_ptr root_g = cloneHolder(h); - bool orig_anch = isAnchored(*root_g); - deanchorIfNeeded(*root_g, &orig_anch); - - DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch); - - unique_ptr igp = ue2::make_unique(); - RoseInVertex start = - add_vertex(RoseInVertexProps::makeStart(orig_anch), *igp); - RoseInVertex accept = - add_vertex(RoseInVertexProps::makeAccept(set()), *igp); - - RoseInEdge e = - add_edge(start, accept, RoseInEdgeProps(root_g, 0), *igp).first; - - for (auto v : vertices_range(*root_g)) { - v_dest_map[v].emplace_back(e, v); - v_src_map[e].push_back(v); - } - - return igp; -} - -static never_inline -void updateVDestMap(const vector > &images, - const ue2::unordered_map &lhs_map, - const vector &l_e, - const ue2::unordered_map &rhs_map, - const vector &r_e, - vdest_map_t &v_dest_map, vsrc_map_t &v_src_map) { - RoseInEdge e = images.front().first; - set edge_set; - for (const auto &image : images) { - edge_set.insert(image.first); - } - const vector &domain = v_src_map[e]; - vector > temp; - - for (auto v : domain) { - vdest_map_t::iterator it = v_dest_map.find(v); - assert(it != v_dest_map.end()); - - temp.clear(); - - for (const auto &dest : it->second) { - const RoseInEdge &old_e = dest.first; - const NFAVertex old_dest = dest.second; - if (old_e != e) { - if (!contains(edge_set, old_e)) { - temp.emplace_back(old_e, old_dest); - } - } else if (contains(lhs_map, old_dest)) { - for (const auto &e2 : l_e) { - temp.emplace_back(e2, lhs_map.at(old_dest)); - } - /* only allow v to be tracked on one side of the split */ - } else if (contains(rhs_map, old_dest)) { - for (const auto &e2 : r_e) { - temp.emplace_back(e2, rhs_map.at(old_dest)); - } - } - } - NDEBUG_PRINTF("%zu images for vertex; prev %zu\n", temp.size(), - it->second.size()); - it->second.swap(temp); - } -} - -/** Returns the collection of vertices from the original graph which end up - * having an image in the [lr]hs side of the graph split. */ -static never_inline -void fillDomain(const vdest_map_t &v_dest_map, const vsrc_map_t &v_src_map, - RoseInEdge e, - const ue2::unordered_map &split_map, - vector *out) { - const vector &presplit_domain = v_src_map.at(e); - for (auto v : presplit_domain) { - /* v is in the original graph, need to find its image on e's graph */ - typedef vector > dests_t; - const dests_t &dests = v_dest_map.at(v); - for (const auto &dest : dests) { - if (dest.first == e) { - NFAVertex vv = dest.second; - /* vv is v image on e's graph */ - if (contains(split_map, vv)) { - out->push_back(v); - } - } - } - } -} - -static -void getSourceVerts(RoseInGraph &ig, - const vector > &images, - vector *out) { - set seen; - for (const auto &image : images) { - RoseInVertex s = source(image.first, ig); - if (contains(seen, s)) { - continue; - } - seen.insert(s); - out->push_back(s); - } -} - -static -void getDestVerts(RoseInGraph &ig, - const vector > &images, - vector *out) { - set seen; - for (const auto &image : images) { - RoseInVertex t = target(image.first, ig); - if (contains(seen, t)) { - continue; - } - seen.insert(t); - out->push_back(t); - } -} - -static -void getSourceVerts(RoseInGraph &ig, const vector &edges, - vector *out) { - set seen; - for (const auto &e : edges) { - RoseInVertex s = source(e, ig); - if (contains(seen, s)) { - continue; - } - seen.insert(s); - out->push_back(s); - } -} - -static -void getDestVerts(RoseInGraph &ig, const vector &edges, - vector *out) { - set seen; - for (const auto &e : edges) { - RoseInVertex t = target(e, ig); - if (contains(seen, t)) { - continue; - } - seen.insert(t); - out->push_back(t); - } -} - -static -bool splitRoseEdge(RoseInGraph &ig, const VertLitInfo &split, - vdest_map_t &v_dest_map, vsrc_map_t &v_src_map) { - const vector &root_splitters = split.vv; /* vertices in the - 'root' graph */ - assert(!root_splitters.empty()); - - /* need copy as split rose edge will update orig map */ - vector > images - = v_dest_map[root_splitters[0]]; - DEBUG_PRINTF("splitting %zu rose edge with %zu literals\n", - images.size(), split.lit.size()); - - /* note: as we haven't removed literals yet the graphs on all edges that we - * are going to split should be identical */ - const auto &base_graph = ig[images.front().first].graph; - - vector splitters; /* vertices in the graph being split */ - for (auto v : root_splitters) { - if (!contains(v_dest_map, v)) { - DEBUG_PRINTF("vertex to split on is no longer in the graph\n"); - return false; - } - - /* sanity check: verify all edges have the same underlying graph */ - for (UNUSED const auto &m : v_dest_map[v]) { - assert(base_graph == ig[m.first].graph); - } - assert(v_dest_map[v].size() == images.size()); - - splitters.push_back(v_dest_map[v].front().second); - } - - /* note: the set of split edges should form a complete bipartite graph */ - vector src_verts; - vector dest_verts; - getSourceVerts(ig, images, &src_verts); - getDestVerts(ig, images, &dest_verts); - assert(images.size() == src_verts.size() * dest_verts.size()); - - shared_ptr lhs = make_shared(); - shared_ptr rhs = make_shared(); - - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; - - assert(base_graph); - splitGraph(*base_graph, splitters, lhs.get(), &lhs_map, - rhs.get(), &rhs_map); - - RoseInEdge first_e = images.front().first; - - /* all will be suffix or none */ - bool suffix = ig[target(first_e, ig)].type == RIV_ACCEPT; - - set splitter_reports; - for (auto v : splitters) { - insert(&splitter_reports, (*base_graph)[v].reports); - } - - bool do_accept = false; - bool do_accept_eod = false; - assert(rhs); - if (isVacuous(*rhs) && suffix) { - if (edge(rhs->start, rhs->accept, *rhs).second) { - DEBUG_PRINTF("rhs has a cliche\n"); - do_accept = true; - remove_edge(rhs->start, rhs->accept, *rhs); - } - - if (edge(rhs->start, rhs->acceptEod, *rhs).second) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - do_accept_eod = true; - remove_edge(rhs->start, rhs->acceptEod, *rhs); - } - } - - bool do_norm = out_degree(rhs->start, *rhs) != 1; /* check if we still have - a graph left over */ - vector lhs_domain; - vector rhs_domain; - fillDomain(v_dest_map, v_src_map, first_e, lhs_map, &lhs_domain); - fillDomain(v_dest_map, v_src_map, first_e, rhs_map, &rhs_domain); - - vector l_e; - vector r_e; - for (const auto &lit : split.lit) { - DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); - RoseInVertex v - = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - /* work out delay later */ - if (do_accept) { - DEBUG_PRINTF("rhs has a cliche\n"); - RoseInVertex tt = add_vertex(RoseInVertexProps::makeAccept( - splitter_reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - if (do_accept_eod) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - RoseInVertex tt = add_vertex(RoseInVertexProps::makeAcceptEod( - splitter_reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - for (auto src_v : src_verts) { - l_e.push_back(add_edge(src_v, v, - RoseInEdgeProps(lhs, 0U), ig).first); - v_src_map[l_e.back()] = lhs_domain; - } - - if (do_norm) { - for (auto dst_v : dest_verts) { - /* work out delay later */ - assert(out_degree(rhs->start, *rhs) > 1); - r_e.push_back( - add_edge(v, dst_v, RoseInEdgeProps(rhs, 0U), ig).first); - v_src_map[r_e.back()] = rhs_domain; - } - } - } - - updateVDestMap(images, lhs_map, l_e, rhs_map, r_e, v_dest_map, v_src_map); - - for (const auto &image : images) { - /* remove old edge */ - remove_edge(image.first, ig); - v_src_map.erase(image.first); - } - - return true; -} - -static -bool isStarCliche(const NGHolder &g) { - DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); - - bool nonspecials_seen = false; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - if (nonspecials_seen) { - return false; - } - nonspecials_seen = true; - - if (!g[v].char_reach.all()) { - return false; - } - - if (!hasSelfLoop(v, g)) { - return false; - } - if (!edge(v, g.accept, g).second) { - return false; - } - } - - if (!nonspecials_seen) { - return false; - } - - if (!edge(g.start, g.accept, g).second) { - return false; - } - - return true; -} - -static -void processInfixes(RoseInGraph &ig, const CompileContext &cc) { - /* we want to ensure that every prefix/infix graph is unique at this stage - * as we have not done any analysis to check if they are safe to share */ - - vector dead; - - for (const auto &e : edges_range(ig)) { - if (!ig[e].graph) { - continue; - } - - RoseInVertex u = source(e, ig), v = target(e, ig); - - // Infixes are edges between two literals. - if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) { - continue; - } - - if (ig[e].graph_lag) { - continue; /* already looked at */ - } - - DEBUG_PRINTF("looking at infix %p\n", ig[e].graph.get()); - - const ue2_literal &lit1 = ig[u].s; - const ue2_literal &lit2 = ig[v].s; - size_t overlap = maxOverlap(lit1, lit2, 0); - - const NGHolder &h = *ig[e].graph; - - DEBUG_PRINTF("infix rose between literals '%s' and '%s', overlap %zu," - "size %zu\n", - dumpString(lit1).c_str(), dumpString(lit2).c_str(), - overlap, num_vertices(h)); - - if (!can_match(h, lit2, true)) { - DEBUG_PRINTF("found bogus edge\n"); - dead.push_back(e); - continue; - } - - unique_ptr h_new = cloneHolder(h); - - u32 delay = removeTrailingLiteralStates(*h_new, lit2, MO_INVALID_IDX); - if (delay == MO_INVALID_IDX) { - DEBUG_PRINTF("found bogus edge\n"); - dead.push_back(e); - continue; - } - - // Delay can be set to at most lit2.length() - overlap, but we must - // truncate to history available in streaming mode. - u32 max_allowed_delay = lit2.length() - overlap; - LIMIT_TO_AT_MOST(&max_allowed_delay, delay); - - if (cc.streaming) { - LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable); - } - - if (delay != max_allowed_delay) { - restoreTrailingLiteralStates(*h_new, lit2, delay); - delay = removeTrailingLiteralStates(*h_new, lit2, max_allowed_delay); - } - - if (isStarCliche(*h_new)) { - DEBUG_PRINTF("is a X star!\n"); - ig[e].graph.reset(); - ig[e].graph_lag = 0; - } else { - ig[e].graph = move(h_new); - ig[e].graph_lag = delay; - DEBUG_PRINTF("delay increased to %u\n", delay); - } - } - - for (const auto &e : dead) { - remove_edge(e, ig); - } -} - -static -void poisonNetflowScores(RoseInGraph &ig, RoseInEdge lhs, - vector *scores) { - assert(ig[lhs].graph); - NGHolder &h = *ig[lhs].graph; - - if (ig[target(lhs, ig)].type != RIV_LITERAL) { - /* nothing to poison in outfixes */ - assert(ig[target(lhs, ig)].type == RIV_ACCEPT); - return; - } - - set curr, next; - insert(&curr, inv_adjacent_vertices(h.accept, h)); - set poisoned; - u32 len = ig[target(lhs, ig)].s.length(); - assert(len); - while (len) { - next.clear(); - for (auto v : curr) { - insert(&poisoned, in_edges(v, h)); - insert(&next, inv_adjacent_vertices(v, h)); - } - - curr.swap(next); - len--; - } - - for (const auto &e : poisoned) { - (*scores)[h[e].index] = NO_LITERAL_AT_EDGE_SCORE; - } -} - -#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */ -#define MAX_LEN_2_LITERALS_PER_CUT 3 - -static -bool checkValidNetflowLits(NGHolder &h, const vector &scores, - const map> &cut_lits, - const Grey &grey) { - DEBUG_PRINTF("cut width %zu\n", cut_lits.size()); - if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) { - return false; - } - - u32 len_2_count = 0; - - for (const auto &cut : cut_lits) { - if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("cut uses a forbidden edge\n"); - return false; - } - - if (min_len(cut.second) < grey.minRoseNetflowLiteralLength) { - DEBUG_PRINTF("cut uses a bad literal\n"); - return false; - } - - for (const auto &lit : cut.second) { - if (lit.length() == 2) { - len_2_count++; - } - } - } - - if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) { - return false; - } - - return true; -} - -static -void splitEdgesByCut(RoseInGraph &ig, const vector &to_cut, - const vector &cut, - const map > &cut_lits) { - assert(!to_cut.empty()); - assert(ig[to_cut.front()].graph); - NGHolder &h = *ig[to_cut.front()].graph; - - /* note: the set of split edges should form a complete bipartite graph */ - vector src_verts; - vector dest_verts; - getSourceVerts(ig, to_cut, &src_verts); - getDestVerts(ig, to_cut, &dest_verts); - assert(to_cut.size() == src_verts.size() * dest_verts.size()); - - map, shared_ptr > done_rhs; - - /* iterate over cut for determinism */ - for (const auto &e : cut) { - NFAVertex prev_v = source(e, h); - NFAVertex pivot = target(e, h); - - vector adj; - insert(&adj, adj.end(), adjacent_vertices(pivot, h)); - /* we can ignore presence of accept, accepteod in adj as it is best - effort */ - - if (!contains(done_rhs, adj)) { - ue2::unordered_map temp_map; - shared_ptr new_rhs = make_shared(); - splitRHS(h, adj, new_rhs.get(), &temp_map); - remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); - remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); - done_rhs.insert(make_pair(adj, new_rhs)); - /* TODO need to update v_mapping (if we were doing more cuts) */ - } - - DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); - ue2::unordered_map temp_map; - shared_ptr new_lhs = make_shared(); - splitLHS(h, pivot, new_lhs.get(), &temp_map); - - /* want to cut of paths to pivot from things other than the pivot - - * makes a more svelte graphy */ - clear_in_edges(temp_map[pivot], *new_lhs); - add_edge(temp_map[prev_v], temp_map[pivot], *new_lhs); - - pruneUseless(*new_lhs); - - const set &lits = cut_lits.at(e); - for (const auto &lit : lits) { - RoseInVertex v - = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - if (edge(pivot, h.accept, h).second) { - /* literal has a direct connection to accept */ - assert(ig[dest_verts.front()].type == RIV_ACCEPT); - const auto &reports = h[pivot].reports; - RoseInVertex tt = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - if (edge(pivot, h.acceptEod, h).second) { - /* literal has a direct connection to accept */ - assert(ig[dest_verts.front()].type == RIV_ACCEPT); - const auto &reports = h[pivot].reports; - RoseInVertex tt = add_vertex( - RoseInVertexProps::makeAcceptEod(reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - assert(done_rhs[adj].get()); - shared_ptr new_rhs = done_rhs[adj]; - if (out_degree(new_rhs->start, *new_rhs) != 1) { - for (auto dst_v : dest_verts) { - add_edge(v, dst_v, RoseInEdgeProps(done_rhs[adj], 0), ig); - } - } - - for (auto src_v : src_verts) { - add_edge(src_v, v, RoseInEdgeProps(new_lhs, 0), ig); - } - } - } - - /* TODO need to update v_mapping (if we were doing more cuts) */ - - for (const auto &e : to_cut) { - assert(ig[e].graph.get() == &h); - remove_edge(e, ig); - } -} - -static -bool doNetflowCut(RoseInGraph &ig, const vector &to_cut, - const Grey &grey) { - DEBUG_PRINTF("doing netflow cut\n"); - /* TODO: we should really get literals/scores from the full graph as this - * allows us to overlap the graph. Doesn't matter at the moment as we - * are working on the LHS. */ - - NGHolder &h = *ig[to_cut.front()].graph; - if (num_edges(h) > grey.maxRoseNetflowEdges) { - /* We have a limit on this because scoring edges and running netflow - * gets very slow for big graphs. */ - DEBUG_PRINTF("too many edges, skipping netflow cut\n"); - return false; - } - - renumber_vertices(h); - renumber_edges(h); - /* Step 1: Get scores for all edges */ - vector scores = scoreEdges(h); /* scores by edge_index */ - /* Step 2: poison scores for edges covered by successor literal */ - for (const auto &e : to_cut) { - assert(&h == ig[e].graph.get()); - poisonNetflowScores(ig, e, &scores); - } - /* Step 3: Find cutset based on scores */ - vector cut = findMinCut(h, scores); - - /* Step 4: Get literals corresponding to cut edges */ - map> cut_lits; - for (const auto &e : cut) { - set lits = getLiteralSet(h, e); - compressAndScore(lits); - cut_lits[e] = lits; - } - - /* if literals are underlength bail or if it involves a forbidden edge*/ - if (!checkValidNetflowLits(h, scores, cut_lits, grey)) { - return false; - } - DEBUG_PRINTF("splitting\n"); - - /* Step 5: Split graph based on cuts */ - splitEdgesByCut(ig, to_cut, cut, cut_lits); - return true; -} - -/** \brief Returns the number of intermediate vertices in the shortest path - * between (from, to). */ -static -u32 min_dist_between(NFAVertex from, NFAVertex to, const NGHolder &g) { - // Check for the trivial case: that way we don't have to set up the - // containers below. - if (edge(from, to, g).second) { - return 0; - } - - ue2::unordered_set visited; - visited.insert(from); - - flat_set curr, next; - curr.insert(from); - - assert(from != to); - - u32 d = 0; - - while (!curr.empty()) { - next.clear(); - for (auto v : curr) { - for (auto w : adjacent_vertices_range(v, g)) { - if (w == to) { - return d; - } - if (visited.insert(w).second) { // first visit to *ai - next.insert(w); - } - } - } - - d++; - curr.swap(next); - } - assert(0); - return ROSE_BOUND_INF; -} - -/** Literals which are completely enveloped by a successor are trouble because - * hamsterwheel acceleration can skip past the start of the literal. */ -static -bool enveloped(const vector &cand_split_v, - const set &cand_lit, const NGHolder &g, - const RoseInVertexProps &succ) { - if (succ.type != RIV_LITERAL) { - return false; - } - - /* TODO: handle multiple v more precisely: not all candidate v can start all - * candidate literals */ - - for (auto v : cand_split_v) { - u32 rhs_min_len = min_dist_between(v, g.accept, g); - if (rhs_min_len + min_len(cand_lit) >= succ.s.length()) { - return false; - } - } - - return true; /* we are in trouble */ -} - -static -bool enveloped(const VertLitInfo &cand_split, const RoseInGraph &ig, - const vdest_map_t &v_dest_map) { - for (auto v : cand_split.vv) { - const auto &images = v_dest_map.at(v); - for (const auto &image : images) { - /* check that we aren't enveloped by the successor */ - if (enveloped(vector(1, image.second), cand_split.lit, - *ig[image.first].graph, - ig[target(image.first, ig)])) { - return true; - } - - const RoseInVertexProps &pred = ig[source(image.first, ig)]; - if (pred.type != RIV_LITERAL) { - continue; - } - - /* check we don't envelop the pred */ - const NGHolder &g = *ig[image.first].graph; - u32 lhs_min_len = min_dist_between(g.start, image.second, g); - if (lhs_min_len + pred.s.length() < max_len(cand_split.lit)) { - return true; - } - } - } - - return false; -} - -static -bool attemptSplit(RoseInGraph &ig, vdest_map_t &v_dest_map, - vsrc_map_t &v_src_map, const vector &v_e, - LitCollection &lits) { - NGHolder &h = *ig[v_e.front()].graph; - unique_ptr split = lits.pickNext(); - - while (split) { - for (const auto &e : v_e) { - RoseInVertex t = target(e, ig); - if (enveloped(split->vv, split->lit, h, ig[t])) { - DEBUG_PRINTF("enveloped\n"); - split = lits.pickNext(); - goto next_split; - } - } - break; - next_split:; - } - - if (!split) { - return false; - } - - for (auto v : split->vv) { - if (edge(v, h.accept, h).second) { - return false; - } - } - - DEBUG_PRINTF("saved by a bad literal\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - return true; -} - -static -void appendLiteral(const ue2_literal &s, const CharReach &cr, - vector *out) { - for (size_t c = cr.find_first(); c != CharReach::npos; - c = cr.find_next(c)) { - bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) - && cr.test(mytolower(c)); - - if (nocase && (char)c == mytolower(c)) { - continue; /* uppercase already handled us */ - } - - out->push_back(s); - out->back().push_back(c, nocase); - } -} - -static -bool findAnchoredLiterals(const NGHolder &g, vector *out, - vector *pivots_out) { - - DEBUG_PRINTF("trying for anchored\n"); -#define MAX_ANCHORED_LITERALS 30 -#define MAX_ANCHORED_LITERAL_LEN 30 - - /* TODO: this could be beefed up by going region-by-region but currently - * that brings back bad memories of ng_rose. OR any AA region we can build - * a dfa out of */ - assert(!proper_out_degree(g.startDs, g)); - - vector lits; - lits.push_back(ue2_literal()); - - set curr; - insert(&curr, adjacent_vertices(g.start, g)); - curr.erase(g.startDs); - - set old; - - if (contains(curr, g.accept) || curr.empty()) { - DEBUG_PRINTF("surprise accept/voidness\n"); - return false; - } - - while (!curr.empty()) { - set next_verts; - insert(&next_verts, adjacent_vertices(*curr.begin(), g)); - bool can_extend - = !next_verts.empty() && !contains(next_verts, g.accept); - CharReach cr; - - for (auto v : curr) { - assert(!is_special(v, g)); - - if (can_extend) { - /* next verts must agree */ - set next_verts_local; - insert(&next_verts_local, adjacent_vertices(v, g)); - can_extend = next_verts_local == next_verts; - } - - cr |= g[v].char_reach; - } - - if (!can_extend) { - goto bail; - } - - /* extend literals */ - assert(cr.any()); - vector next_lits; - for (const auto &lit : lits) { - appendLiteral(lit, cr, &next_lits); - if (next_lits.size() > MAX_ANCHORED_LITERALS) { - goto bail; - } - } - - assert(!next_lits.empty()); - old.swap(curr); - - if (next_lits[0].length() <= MAX_ANCHORED_LITERAL_LEN) { - curr.swap(next_verts); - } else { - curr.clear(); - } - - lits.swap(next_lits); - } - bail: - assert(!lits.empty()); - for (UNUSED const auto &lit : lits) { - DEBUG_PRINTF("found anchored string: %s\n", dumpString(lit).c_str()); - } - - insert(pivots_out, pivots_out->end(), old); - out->swap(lits); - return !out->empty() && !out->begin()->empty(); -} - -static -bool tryForAnchoredImprovement(RoseInGraph &ig, RoseInEdge e) { - vector lits; - vector pivots; - - if (!findAnchoredLiterals(*ig[e].graph, &lits, &pivots)) { - DEBUG_PRINTF("unable to find literals\n"); - return false; - } - DEBUG_PRINTF("found %zu literals to act as anchors\n", lits.size()); - - RoseInVertex s = source(e, ig); - RoseInVertex t = target(e, ig); - - assert(!ig[e].graph_lag); - - shared_ptr lhs = make_shared(); - shared_ptr rhs = make_shared(); - ue2::unordered_map temp1; - ue2::unordered_map temp2; - - splitGraph(*ig[e].graph, pivots, lhs.get(), &temp1, rhs.get(), &temp2); - - for (const auto &lit : lits) { - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), - ig); - add_edge(s, v, RoseInEdgeProps(lhs, 0U), ig); - add_edge(v, t, RoseInEdgeProps(rhs, 0U), ig); - } - remove_edge(e, ig); - - return true; -} - -#define MAX_SINGLE_BYTE_ANCHORED_DIST 30 - -/* returns true if we should make another pass */ -static -bool lastChanceImproveLHS(RoseInGraph &ig, RoseInEdge lhs, - const CompileContext &cc) { - DEBUG_PRINTF("argh lhs is nasty\n"); - assert(ig[lhs].graph); - - /* customise the lhs for this literal */ - /* TODO better, don't recalc */ - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - const NGHolder &h = *ig[lhs].graph; - - /* sanitise literal on lhs */ - const ue2_literal &s = ig[target(lhs, ig)].s; - - if (!can_match(h, s, false)) { - DEBUG_PRINTF("found bogus edge\n"); - return false; - } - - /* see if we can build some anchored literals out of this */ - if (isAnchored(h) && tryForAnchoredImprovement(ig, lhs)) { - return true; - } - - unique_ptr cust = cloneHolder(h); - u32 d = removeTrailingLiteralStates(*cust, s, MO_INVALID_IDX); - if (d == MO_INVALID_IDX) { - DEBUG_PRINTF("found bogus edge\n"); - return false; - } - restoreTrailingLiteralStates(*cust, s, d); - ig[lhs].graph = move(cust); - } - - NGHolder &lhs_graph = *ig[lhs].graph; - set cand; - set cand_raw; - getCandidatePivots(lhs_graph, &cand, &cand_raw); - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - for (auto v : vertices_range(lhs_graph)) { - v_dest_map[v].emplace_back(lhs, v); - v_src_map[lhs].push_back(v); - } - - vector depths; - calcDepths(lhs_graph, depths); - - /* need to ensure regions are valid before we do lit discovery */ - auto region_map = assignRegions(lhs_graph); - - vector to_cut(1, lhs); - DEBUG_PRINTF("see if we can get a better lhs by another cut\n"); - LitCollection lit1(lhs_graph, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, true, cc); - if (attemptSplit(ig, v_dest_map, v_src_map, to_cut, lit1)) { - return true; - } - - if (doNetflowCut(ig, to_cut, cc.grey)) { - return true; - } - - DEBUG_PRINTF("eek last chance try len 1 if it creates an anchored lhs\n"); - { - LitCollection lits(lhs_graph, depths, region_map, cand, cand_raw, 1, - true, cc, true); - unique_ptr split = lits.pickNext(); - - /* TODO fix edge to accept check */ - while (split - && (enveloped(split->vv, split->lit, lhs_graph, - ig[target(lhs, ig)]) - || edge(split->vv.front(), lhs_graph.accept, lhs_graph).second - || !createsAnchoredLHS(lhs_graph, split->vv, depths, cc.grey, - MAX_SINGLE_BYTE_ANCHORED_DIST))) { - split = lits.pickNext(); - } - - if (split) { - DEBUG_PRINTF("saved by a really bad literal\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - return true; - } - } - - return false; -} - -/* returns false if nothing happened */ -static -bool lastChanceImproveLHS(RoseInGraph &ig, const vector &to_cut, - const CompileContext &cc) { - DEBUG_PRINTF("argh lhses are nasty\n"); - - NGHolder &lhs_graph = *ig[to_cut.front()].graph; - set cand; - set cand_raw; - getCandidatePivots(lhs_graph, &cand, &cand_raw); - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - for (auto v : vertices_range(lhs_graph)) { - for (const auto &e : to_cut) { - v_dest_map[v].emplace_back(e, v); - v_src_map[e].push_back(v); - } - } - - vector depths; - calcDepths(lhs_graph, depths); - - auto region_map = assignRegions(lhs_graph); - - DEBUG_PRINTF("see if we can get a better lhs by allowing another cut\n"); - LitCollection lit1(lhs_graph, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, true, cc); - if (attemptSplit(ig, v_dest_map, v_src_map, to_cut, lit1)) { - return true; - } - - return doNetflowCut(ig, to_cut, cc.grey); -} - -static -bool improveLHS(RoseInGraph &ig, const vector &edges, - const CompileContext &cc) { - bool rv = false; - - vector src_verts; - getSourceVerts(ig, edges, &src_verts); - - map> by_src; - for (const auto &e : edges) { - by_src[source(e, ig)].push_back(e); - } - - for (auto v : src_verts) { - const vector &local = by_src[v]; - - vector graphs; - map > by_graph; - for (const auto &e : local) { - NGHolder *gp = ig[e].graph.get(); - if (!contains(by_graph, gp)) { - graphs.push_back(gp); - } - by_graph[gp].push_back(e); - } - - for (auto h : graphs) { - const vector &local2 = by_graph[h]; - if (local2.size() == 1) { - rv |= lastChanceImproveLHS(ig, local2.front(), cc); - continue; - } - - bool lrv = lastChanceImproveLHS(ig, local2, cc); - if (lrv) { - rv = true; - } else { - for (const auto &e2 : local2) { - rv |= lastChanceImproveLHS(ig, e2, cc); - } - } - } - } - - return rv; -} - -static -void processLHS(RoseInGraph &ig, const CompileContext &cc) { - bool redo; - do { - redo = false; - vector to_improve; - for (const auto &lhs : edges_range(ig)) { - if (ig[source(lhs, ig)].type != RIV_START - && ig[source(lhs, ig)].type != RIV_ANCHORED_START) { - continue; - } - - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - DEBUG_PRINTF("checking lhs->'%s'\n", - ig[target(lhs, ig)].s.c_str()); - } else { - DEBUG_PRINTF("checking lhs->?\n"); - } - - - /* if check if lhs is nasty */ - if (ig[target(lhs, ig)].type == RIV_ACCEPT) { - to_improve.push_back(lhs); - continue; - } - - assert(ig[lhs].graph); - const NGHolder *h = ig[lhs].graph.get(); - - vector depths; - calcDepths(*h, depths); - - if (!isLHSTransient(*h, depths, cc.grey) - && !literalIsWholeGraph(*h, ig[target(lhs, ig)].s) - && !isLHSUsablyAnchored(*h, depths, cc.grey)) { - to_improve.push_back(lhs); - } - } - - DEBUG_PRINTF("inspecting %zu lhs\n", to_improve.size()); - if (to_improve.size() > 50) { - DEBUG_PRINTF("too big\n"); - break; - } - - redo = improveLHS(ig, to_improve, cc); - DEBUG_PRINTF("redo = %d\n", (int)redo); - } while (redo); - - vector to_inspect; /* to prevent surprises caused by us - * altering the graph while iterating */ - for (const auto &e : edges_range(ig)) { - if (ig[source(e, ig)].type == RIV_START - || ig[source(e, ig)].type == RIV_ANCHORED_START) { - to_inspect.push_back(e); - } - } - - for (const auto &lhs : to_inspect) { - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - if (removeLiteralFromLHS(ig, lhs, cc)) { - handleLhsCliche(ig, lhs); - } else { - /* telling us to delete the edge */ - remove_edge(lhs, ig); - } - } - } -} - -static -void tryNetflowCutForRHS(RoseInGraph &ig, const Grey &grey) { - vector to_improve; - for (const auto &rhs : edges_range(ig)) { - if (ig[target(rhs, ig)].type != RIV_ACCEPT) { - continue; - } - - if (ig[source(rhs, ig)].type == RIV_LITERAL) { - DEBUG_PRINTF("checking '%s'->rhs\n", ig[source(rhs, ig)].s.c_str()); - } else { - DEBUG_PRINTF("checking ?->rhs\n"); - } - - if (!ig[rhs].graph) { - continue; - } - - DEBUG_PRINTF("%zu vertices\n", num_vertices(*ig[rhs].graph)); - if (num_vertices(*ig[rhs].graph) < 512) { - DEBUG_PRINTF("small\n"); - continue; - } - - /* if check if rhs is nasty */ - to_improve.push_back(rhs); - } - - DEBUG_PRINTF("inspecting %zu lhs\n", to_improve.size()); - if (to_improve.size() > 50) { - DEBUG_PRINTF("too big\n"); - return; - } - - for (const auto &e : to_improve) { - vector to_cut(1, e); - doNetflowCut(ig, to_cut, grey); - } -} - -/* just make the string nocase and get the graph to handle case mask, TODO. - * This could be more nuanced but the effort would probably be better spent - * just making rose less bad. */ -static -void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { - for (const auto &e : in_edges_range(v, g)) { - const RoseInVertex u = source(e, g); - - if (!g[e].graph) { - g[e].graph = make_shared(whatRoseIsThis(g, e)); - g[e].graph_lag = g[v].s.length(); - NGHolder &h = *g[e].graph; - - assert(!g[e].maxBound || g[e].maxBound == ROSE_BOUND_INF); - - if (g[u].type == RIV_START) { - add_edge(h.startDs, h.accept, h); - h[h.startDs].reports.insert(0); - } else if (g[e].maxBound == ROSE_BOUND_INF) { - add_edge(h.start, h.accept, h); - NFAVertex ds = add_vertex(h); - - h[ds].char_reach = CharReach::dot(); - - NFAEdge e_start_to_ds = add_edge(h.start, ds, h); - add_edge(ds, ds, h); - add_edge(ds, h.accept, h); - h[h.start].reports.insert(0); - h[ds].reports.insert(0); - - if (g[u].type == RIV_LITERAL) { - h[e_start_to_ds].tops.insert(DEFAULT_TOP); - } - } else { - assert(g[u].type == RIV_ANCHORED_START); - add_edge(h.start, h.accept, h); - h[h.start].reports.insert(0); - } - } - - if (!g[e].graph_lag) { - continue; - } - unique_ptr newg = cloneHolder(*g[e].graph); - restoreTrailingLiteralStates(*newg, g[v].s, g[e].graph_lag); - g[e].graph_lag = 0; - g[e].graph = move(newg); - } - - make_nocase(&g[v].s); -} - -static -unique_ptr makeGraphCopy(const NGHolder *g) { - if (g) { - return cloneHolder(*g); - } else { - return nullptr; - } -} - -static -void explodeLiteral(RoseInGraph &g, RoseInVertex v, - vector &exploded) { - for (const auto &lit : exploded) { - RoseInVertex v_new = add_vertex(g[v], g); - g[v_new].s = lit; - - for (const auto &e : in_edges_range(v, g)) { - RoseInEdge e2 = add_edge(source(e, g), v_new, g[e], g); - // FIXME: are we safe to share graphs here? For now, make our very - // own copy. - g[e2].graph = makeGraphCopy(g[e].graph.get()); - } - - for (const auto &e : out_edges_range(v, g)) { - RoseInEdge e2 = add_edge(v_new, target(e, g), g[e], g); - // FIXME: are we safe to share graphs here? For now, make our very - // own copy. - g[e2].graph = makeGraphCopy(g[e].graph.get()); - } - } - - clear_vertex(v, g); - remove_vertex(v, g); -} - -/* Sadly rose is hacky in terms of mixed case literals. TODO: remove when rose - * becomes less bad */ -static -void handleLongMixedSensitivityLiterals(RoseInGraph &g) { - const size_t maxExploded = 8; // only case-explode this far - - vector verts; - - for (auto v : vertices_range(g)) { - if (g[v].type != RIV_LITERAL) { - continue; - } - - ue2_literal &s = g[v].s; - - if (!mixed_sensitivity(s)) { - continue; - } - - if (s.length() < MAX_MASK2_WIDTH) { - DEBUG_PRINTF("mixed lit will be handled by benefits mask\n"); - continue; - } - - DEBUG_PRINTF("found mixed lit of len %zu\n", s.length()); - verts.push_back(v); - } - - for (auto v : verts) { - vector exploded; - case_iter cit = caseIterateBegin(g[v].s), cite = caseIterateEnd(); - for (; cit != cite; ++cit) { - exploded.emplace_back(*cit, false); - if (exploded.size() > maxExploded) { - goto dont_explode; - } - } - - DEBUG_PRINTF("exploding literal into %zu pieces\n", exploded.size()); - explodeLiteral(g, v, exploded); - continue; - - dont_explode: - DEBUG_PRINTF("converting to nocase with prefix mask\n"); - makeNocaseWithPrefixMask(g, v); - } - - DEBUG_PRINTF("done!\n"); -} - -static -void dedupe(RoseInGraph &g) { - /* We know that every prefix/infix is unique after the rose construction. - * - * If a vertex has out-going graphs with the same rewind and they are equal - * we can dedupe the graph. - * - * After this, we may share graphs on out-edges of a vertex. */ - map, vector>> buckets; - - for (auto v : vertices_range(g)) { - buckets.clear(); - - for (const auto &e : out_edges_range(v, g)) { - if (!g[e].graph || g[target(e, g)].type != RIV_LITERAL) { - continue; - } - auto k = make_pair(g[e].graph_lag, hash_holder(*g[e].graph)); - auto &bucket = buckets[k]; - for (const auto &h : bucket) { - if (is_equal(*g[e].graph, 0U, *h, 0U)) { - g[e].graph = h; - goto next_edge; - } - } - - bucket.push_back(g[e].graph); - next_edge:; - } - } -} - -static -bool pureReport(NFAVertex v, const NGHolder &g) { - for (auto w : adjacent_vertices_range(v, g)) { - if (w != g.accept && w != g.acceptEod) { - return false; - } - } - return true; -} - -static -bool pureReport(const vector &vv, const NGHolder &g) { - for (auto v : vv) { - if (!pureReport(v, g)) { - return false; - } - } - - return true; -} - -/* ensures that a vertex is followed by a start construct AND the cyclic states - * has a reasonably wide reach */ -static -bool followedByStar(NFAVertex v, const NGHolder &g) { - set succ; - insert(&succ, adjacent_vertices(v, g)); - - set asucc; - - for (auto w : adjacent_vertices_range(v, g)) { - if (g[w].char_reach.count() < N_CHARS - MAX_ESCAPE_CHARS) { - continue; /* state is too narrow to be considered as a sane star - cyclic */ - } - - asucc.clear(); - insert(&asucc, adjacent_vertices(w, g)); - - if (asucc == succ) { - return true; - } - } - return false; -} - -static -bool followedByStar(const vector &vv, const NGHolder &g) { - for (auto v : vv) { - if (!followedByStar(v, g)) { - return false; - } - } - - return true; -} - -static -bool isEodPrefixCandidate(const NGHolder &g) { - if (in_degree(g.accept, g)) { - DEBUG_PRINTF("graph isn't eod anchored\n"); - return false; - } - - // TODO: handle more than one report. - if (all_reports(g).size() != 1) { - return false; - } - - return true; -} - - -static -bool isEodWithPrefix(const RoseInGraph &g) { - if (num_vertices(g) != 2) { - return false; - } - - for (const auto &e : edges_range(g)) { - RoseInVertex u = source(e, g), v = target(e, g); - DEBUG_PRINTF("edge from %d -> %d\n", g[u].type, g[v].type); - - if (g[u].type != RIV_START && g[u].type != RIV_ANCHORED_START) { - DEBUG_PRINTF("source not start, type=%d\n", g[u].type); - return false; - } - - if (g[v].type != RIV_ACCEPT && g[v].type != RIV_ACCEPT_EOD) { - DEBUG_PRINTF("target not accept, type=%d\n", g[v].type); - return false; - } - - // Haigs not handled. - if (g[e].haig) { - DEBUG_PRINTF("edge has haig\n"); - return false; - } - - if (!g[e].graph) { - DEBUG_PRINTF("no graph on edge\n"); - return false; - } - - if (!isEodPrefixCandidate(*g[e].graph)) { - DEBUG_PRINTF("graph is not eod prefix candidate\n"); - return false; - } - } - - return true; -} - -static -void processEodPrefixes(RoseInGraph &g) { - // Find edges to accept with EOD-anchored graphs that we can move over to - // acceptEod. - vector acc_edges; - for (const auto &e : edges_range(g)) { - if (g[target(e, g)].type != RIV_ACCEPT) { - continue; - } - if (g[e].haig || !g[e].graph) { - continue; - } - if (!isEodPrefixCandidate(*g[e].graph)) { - continue; - } - - // TODO: handle cases with multiple out-edges. - if (out_degree(source(e, g), g) > 1) { - continue; - } - - acc_edges.push_back(e); - } - - set accepts; - - for (const RoseInEdge &e : acc_edges) { - RoseInVertex u = source(e, g), v = target(e, g); - assert(g[e].graph); - assert(g[v].type == RIV_ACCEPT); - assert(all_reports(*g[e].graph).size() == 1); - - // Move this edge from accept to acceptEod and give it the right reports - // from the graph on the edge. - const set reports = all_reports(*g[e].graph); - RoseInVertex w = add_vertex( - RoseInVertexProps::makeAcceptEod(reports), g); - add_edge(u, w, g[e], g); - - remove_edge(e, g); - accepts.insert(v); - } - - for (auto v : accepts) { - if (!in_degree(v, g)) { - remove_vertex(v, g); - } - } -} - -/** Run some reduction passes on the graphs on our edges. */ -static -void reduceGraphs(RoseInGraph &g, const CompileContext &cc) { - for (const auto &e : edges_range(g)) { - if (!g[e].graph) { - continue; - } - NGHolder &h = *g[e].graph; - assert(h.kind == whatRoseIsThis(g, e)); - DEBUG_PRINTF("before, graph %p has %zu vertices, %zu edges\n", &h, - num_vertices(h), num_edges(h)); - - pruneUseless(h); - - reduceGraphEquivalences(h, cc); - - removeRedundancy(h, SOM_NONE); /* rose doesn't track som */ - - DEBUG_PRINTF("after, graph %p has %zu vertices, %zu edges\n", &h, - num_vertices(h), num_edges(h)); - - // It's possible that one of our graphs may have reduced to a dot-star - // cliche, i.e. it contains a startDs->accept edge. If so, we can - // remove it from the edge and just use edge bounds to represent it. - if (edge(h.startDs, h.accept, h).second) { - DEBUG_PRINTF("graph reduces to dot-star, deleting\n"); - g[e].graph.reset(); - g[e].graph_lag = 0; - g[e].minBound = 0; - g[e].maxBound = ROSE_BOUND_INF; - } - } -} - -static -unique_ptr buildRose(const NGHolder &h, bool desperation, - const CompileContext &cc) { - /* Need to pick a pivot point which splits the graph in two with starts on - * one side and accepts on the other. Thus the pivot needs to dominate all - * the accept vertices */ - - /* maps a vertex in h to one of its images in the rose graph */ - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - - /* create trivial rose graph */ - unique_ptr igp = makeTrivialGraph(h, v_dest_map, v_src_map); - RoseInGraph &ig = *igp; - - /* root graph is the graph on the only edge in our new RoseInGraph */ - assert(num_edges(ig) == 1); - shared_ptr root_g = ig[*edges(ig).first].graph; - assert(root_g); - - /* find the literals */ - set cand; - set cand_raw; - getCandidatePivots(*root_g, &cand, &cand_raw); - - DEBUG_PRINTF("|cand| = %zu\n", cand.size()); - - vector depths; - calcDepths(*root_g, depths); - - auto region_map = assignRegions(*root_g); - - LitCollection lits(*root_g, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, desperation, cc); - - for (u32 i = 0; i < cc.grey.roseDesiredSplit; ++i) { - DEBUG_PRINTF("attempting split %u (desired %u)\n", i, - cc.grey.roseDesiredSplit); - unique_ptr split = lits.pickNext(); - - /* need to check we aren't creating any enveloping literals */ - while (split && enveloped(*split, ig, v_dest_map)) { - DEBUG_PRINTF("bad cand; getting next split\n"); - split = lits.pickNext(); - } - - if (!split) { - DEBUG_PRINTF("no more lits :(\n"); - break; - } - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - } - - /* try for more split literals if they are followed by .* or accept */ - for (;;) { - DEBUG_PRINTF("attempting bonus split\n"); - unique_ptr split = lits.pickNext(); - - /* need to check we aren't creating any enveloping literals */ - while (split - && (enveloped(*split, ig, v_dest_map) - || (!pureReport(split->vv, *root_g) - && !followedByStar(split->vv, *root_g)))) { - DEBUG_PRINTF("bad cand; getting next split\n"); - split = lits.pickNext(); - } - - if (!split) { - DEBUG_PRINTF("no more lits :(\n"); - break; - } - DEBUG_PRINTF("got bonus split\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - } - - processLHS(ig, cc); - - if (num_vertices(ig) <= 2) { - // At present, we don't accept all outfixes. - // However, we do handle the specific case of a rose that precedes an - // acceptEod, which we will support as a prefix to a special EOD event - // "literal". - if (!isEodWithPrefix(ig)) { - igp.reset(); - return igp; - } - } - - processEodPrefixes(ig); - - processInfixes(ig, cc); - - handleLongMixedSensitivityLiterals(ig); - - dedupe(ig); - - pruneUseless(ig); - - reduceGraphs(ig, cc); - - dumpPreRoseGraph(ig, cc.grey); - - renumber_vertices(ig); - calcVertexOffsets(ig); - return igp; -} - -static -void desperationImprove(RoseInGraph &ig, const CompileContext &cc) { - DEBUG_PRINTF("rose said no; can we do better?\n"); - - /* infixes are tricky as we have to worry about delays, enveloping - * literals, etc */ - tryNetflowCutForRHS(ig, cc.grey); - processInfixes(ig, cc); - - handleLongMixedSensitivityLiterals(ig); - dedupe(ig); - pruneUseless(ig); - renumber_vertices(ig); - calcVertexOffsets(ig); -} - -bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc) { - if (!cc.grey.allowRose) { - return false; - } - - // We should have at least one edge into accept or acceptEod! - assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); - - unique_ptr igp = buildRose(h, false, cc); - if (igp && rose.addRose(*igp, prefilter)) { - goto ok; - } - - igp = buildRose(h, true, cc); - - if (igp) { - if (rose.addRose(*igp, prefilter)) { - goto ok; - } - - desperationImprove(*igp, cc); - - if (rose.addRose(*igp, prefilter)) { - goto ok; - } - } - - DEBUG_PRINTF("rose build failed\n"); - return false; - -ok: - DEBUG_PRINTF("rose build ok\n"); - return true; -} - -bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc) { - DEBUG_PRINTF("final chance rose\n"); - if (!cc.grey.allowRose) { - return false; - } - assert(h.kind == NFA_OUTFIX); - - ue2_literal lit; - bool anch = false; - shared_ptr rhs = make_shared(); - if (!splitOffLeadingLiteral(h, &lit, &*rhs)) { - DEBUG_PRINTF("no floating literal\n"); - anch = true; - if (!splitOffAnchoredLeadingLiteral(h, &lit, &*rhs)) { - DEBUG_PRINTF("no anchored literal\n"); - return false; - } - } - - if (lit.length() < cc.grey.minRoseLiteralLength - || minStringPeriod(lit) < 2 ) { - DEBUG_PRINTF("lit too weak\n"); - return false; - } - - assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); - - RoseInGraph ig; - RoseInVertex s - = add_vertex(RoseInVertexProps::makeStart(anch), ig); - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - add_edge(s, v, RoseInEdgeProps(0, anch ? 0 : ROSE_BOUND_INF), ig); - - ue2_literal lit2; - if (getTrailingLiteral(h, &lit2) - && lit2.length() >= cc.grey.minRoseLiteralLength - && minStringPeriod(lit2) >= 2) { - - /* TODO: handle delay */ - size_t overlap = maxOverlap(lit, lit2, 0); - u32 delay2 = lit2.length() - overlap; - delay2 = min(delay2, maxDelay(cc)); - delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2); - rhs->kind = NFA_INFIX; - assert(delay2 <= lit2.length()); - - RoseInVertex w - = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig); - add_edge(v, w, RoseInEdgeProps(rhs, delay2), ig); - - NFAVertex reporter = getSoleSourceVertex(h, h.accept); - assert(reporter); - const auto &reports = h[reporter].reports; - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(w, a, RoseInEdgeProps(0U, 0U), ig); - } else { - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(set()), ig); - add_edge(v, a, RoseInEdgeProps(rhs, 0U), ig); - } - - renumber_vertices(ig); - calcVertexOffsets(ig); - - return rose.addRose(ig, prefilter, true /* final chance */); -} - -bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc) { - if (!cc.grey.allowRose) { - return false; - } - - // We should have at least one edge into accept or acceptEod! - assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); - - unique_ptr igp; - - // First pass. - - igp = buildRose(h, false, cc); - if (igp && roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - - // Second ("desperation") pass. - - igp = buildRose(h, true, cc); - if (igp) { - if (roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - - desperationImprove(*igp, cc); - - if (roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - } - - return false; -} - -} // namespace ue2 diff --git a/src/nfagraph/ng_rose.h b/src/nfagraph/ng_rose.h deleted file mode 100644 index d180e8a5f..000000000 --- a/src/nfagraph/ng_rose.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder. - */ - -#ifndef NG_ROSE_H -#define NG_ROSE_H - -#include "ng_holder.h" -#include "ue2common.h" - -#include - -namespace ue2 { - -class NGHolder; -class ReportManager; -class RoseBuild; - -struct CompileContext; -struct ue2_literal; - -/** \brief Attempt to consume the entire pattern in graph \a h with Rose. - * Returns true if successful. */ -bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc); - -/** \brief Attempt to consume the entire pattern in graph \a h with Rose. - * This is the last attempt to handle a pattern before we resort to an outfix. - * Returns true if successful. */ -bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc); - -/** \brief True if the pattern in \a h is consumable by Rose. This function - * may be conservative (return false even if supported) for efficiency. */ -bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc); - -/** \brief Returns the delay or MO_INVALID_IDX if the graph cannot match with - * the trailing literal. */ -u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 max_delay, bool overhang_ok = true); - -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay); - -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay, - const std::vector &preds); - -} // namespace ue2 - -#endif // NG_ROSE_H diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index f6ba0fa7d..674381031 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief SOM ("Start of Match") analysis. */ + +#include "ng_som.h" + #include "ng.h" #include "ng_dump.h" #include "ng_equivalence.h" @@ -40,15 +44,15 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_rose.h" -#include "ng_som.h" #include "ng_som_add_redundancy.h" #include "ng_som_util.h" #include "ng_split.h" #include "ng_util.h" +#include "ng_violet.h" #include "ng_width.h" #include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "parser/position.h" @@ -1584,8 +1588,9 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, * implement the full pattern. */ static -void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, - vector &plan, const u32 first_som_slot) { +void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, + NGHolder &g, vector &plan, + const u32 first_som_slot) { ReportManager &rm = ng.rm; SomSlotManager &ssm = ng.ssm; @@ -1598,14 +1603,14 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, // Root plan, which already has a SOM slot assigned (first_som_slot). dumpSomPlan(g, plan.front(), 0); - dumpSomSubComponent(*plan.front().prefix, "04_som", w.expressionIndex, - comp_id, 0, ng.cc.grey); + dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0, + ng.cc.grey); assert(plan.front().prefix); if (plan.front().escapes.any() && !plan.front().is_reset) { /* setup escaper for first som location */ if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, first_som_slot)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -1617,7 +1622,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, for (++it; it != plan.end(); ++it) { const u32 plan_num = it - plan.begin(); dumpSomPlan(g, *it, plan_num); - dumpSomSubComponent(*it->prefix, "04_som", w.expressionIndex, comp_id, + dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id, plan_num, ng.cc.grey); assert(it->parent < plan_num); @@ -1628,7 +1633,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, assert(!it->no_implement); if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); @@ -1639,7 +1644,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, renumber_vertices(*plan.front().prefix); assert(plan.front().prefix->kind == NFA_OUTFIX); if (!ng.addHolder(*plan.front().prefix)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } } @@ -1727,19 +1732,19 @@ void clearProperInEdges(NGHolder &g, const NFAVertex sink) { namespace { struct SomRevNfa { - SomRevNfa(NFAVertex s, ReportID r, aligned_unique_ptr n) + SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr n) : sink(s), report(r), nfa(move(n)) {} - SomRevNfa(SomRevNfa&& s) // MSVC2013 needs this for emplace - : sink(s.sink), report(s.report), nfa(move(s.nfa)) {} + SomRevNfa(SomRevNfa &&s) // MSVC2013 needs this for emplace + : sink(s.sink), report(s.report), nfa(move(s.nfa)) {} NFAVertex sink; ReportID report; - aligned_unique_ptr nfa; + bytecode_ptr nfa; }; } static -aligned_unique_ptr makeBareSomRevNfa(const NGHolder &g, - const CompileContext &cc) { +bytecode_ptr makeBareSomRevNfa(const NGHolder &g, + const CompileContext &cc) { // Create a reversed anchored version of this NFA which fires a zero report // ID on accept. NGHolder g_rev; @@ -1755,7 +1760,7 @@ aligned_unique_ptr makeBareSomRevNfa(const NGHolder &g, DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev)); - aligned_unique_ptr nfa = constructReversedNFA(g_rev, cc); + auto nfa = constructReversedNFA(g_rev, cc); if (!nfa) { return nfa; } @@ -1790,7 +1795,7 @@ bool makeSomRevNfa(vector &som_nfas, const NGHolder &g, renumber_vertices(g2); // for findMinWidth, findMaxWidth. - aligned_unique_ptr nfa = makeBareSomRevNfa(g2, cc); + auto nfa = makeBareSomRevNfa(g2, cc); if (!nfa) { DEBUG_PRINTF("couldn't build rev nfa\n"); return false; @@ -1852,7 +1857,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { } static -u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g, +u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, const CompileContext &cc) { depth maxWidth = findMaxWidth(g); @@ -1861,7 +1866,7 @@ u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g, auto nfa = makeBareSomRevNfa(g, cc); if (!nfa) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } if (ng.cc.streaming) { @@ -2055,8 +2060,8 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr &prefix, } static -sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, - som_type som, +sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som, const ue2::unordered_map ®ions, const map &info, map::const_iterator lower_bound) { @@ -2073,11 +2078,11 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ u32 som_loc = ssm.getPrivateSomSlot(); - if (!checkRose(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { + if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { // This is an optimisation: if we can't build a Haig from a portion of // the graph, then we won't be able to manage it as an outfix either // when we fall back. - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } while (1) { @@ -2152,7 +2157,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, goto next_try; } - implementSomPlan(ng, w, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); Report ir = makeCallback(0U, 0); assert(!plan.empty()); @@ -2877,7 +2882,7 @@ unique_ptr makePrefixForChain(NGHolder &g, return prefix; } -sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som hello\n"); @@ -3001,7 +3006,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, /* create prefix to set the som_loc */ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); if (prefix_by_rev) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } renumber_vertices(*prefix); @@ -3084,18 +3089,18 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); } if (prefix_by_rev && !plan.front().no_implement) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } - implementSomPlan(ng, w, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); DEBUG_PRINTF("success\n"); return SOMBE_HANDLED_INTERNAL; } -sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, - som_type som) { +sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som+haig hello\n"); @@ -3132,7 +3137,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, buildRegionMapping(g, regions, info, true); sombe_rv rv = - doHaigLitSom(ng, g, w, comp_id, som, regions, info, info.begin()); + doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin()); if (rv == SOMBE_FAIL) { clear_graph(g); cloneHolder(g, g_pristine); diff --git a/src/nfagraph/ng_som.h b/src/nfagraph/ng_som.h index 707109454..ecae4c67f 100644 --- a/src/nfagraph/ng_som.h +++ b/src/nfagraph/ng_som.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,12 +34,14 @@ #define NG_SOM_H #include "som/som.h" +#include "ue2common.h" namespace ue2 { +class ExpressionInfo; class NG; class NGHolder; -class NGWrapper; +class ReportManager; struct Grey; enum sombe_rv { @@ -63,14 +65,14 @@ enum sombe_rv { * May throw a "Pattern too large" exception if prefixes of the * pattern are too large to compile. */ -sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id, som_type som); /** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established. * May also throw pattern too large if prefixes of the pattern are too large to * compile. */ -sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, - som_type som); +sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr, + u32 comp_id, som_type som); void makeReportsSomPass(ReportManager &rm, NGHolder &g); diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index c43373415..a3b6ee5fd 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,8 +78,8 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { //dumpGraph("som_depth.dot", g); - vector temp_depths; // numbered by vertex index in g - calcDepthsFrom(g, g.start, temp_depths); + // Find depths, indexed by vertex index in g + auto temp_depths = calcDepthsFrom(g, g.start); // Transfer depths, indexed by vertex index in g_orig. vector depths(num_vertices(g_orig)); @@ -94,7 +94,7 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) { // StartDs and virtual starts always have zero depth. - d = DepthMinMax(0, 0); + d = DepthMinMax(depth(0), depth(0)); } else { u32 new_idx = g[v_new].index; d = temp_depths.at(new_idx); diff --git a/src/nfagraph/ng_stop.cpp b/src/nfagraph/ng_stop.cpp index e601f5411..c335540ac 100644 --- a/src/nfagraph/ng_stop.cpp +++ b/src/nfagraph/ng_stop.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,10 +60,9 @@ namespace { /** Depths from start, startDs for this graph. */ struct InitDepths { - explicit InitDepths(const NGHolder &g) { - calcDepthsFrom(g, g.start, start); - calcDepthsFrom(g, g.startDs, startDs); - } + explicit InitDepths(const NGHolder &g) + : start(calcDepthsFrom(g, g.start)), + startDs(calcDepthsFrom(g, g.startDs)) {} depth maxDist(const NGHolder &g, NFAVertex v) const { u32 idx = g[v].index; diff --git a/src/nfagraph/ng_undirected.h b/src/nfagraph/ng_undirected.h index 7df6c7dc4..1e27ad791 100644 --- a/src/nfagraph/ng_undirected.h +++ b/src/nfagraph/ng_undirected.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ * \brief Create an undirected graph from an NFAGraph. */ -#ifndef NG_UNDIRECTED_H_CB42C71CF38E3D -#define NG_UNDIRECTED_H_CB42C71CF38E3D +#ifndef NG_UNDIRECTED_H +#define NG_UNDIRECTED_H #include "ng_holder.h" #include "ng_util.h" @@ -52,13 +52,13 @@ namespace ue2 { * of parallel edges. The only vertex property constructed is \a * vertex_index_t. */ -typedef boost::adjacency_list > -NFAUndirectedGraph; +using NFAUndirectedGraph = boost::adjacency_list< + boost::listS, // out edges + boost::listS, // vertices + boost::undirectedS, // graph is undirected + boost::property>; // vertex properties -typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; +using NFAUndirectedVertex = NFAUndirectedGraph::vertex_descriptor; /** * Make a copy of an NFAGraph with undirected edges, optionally without start @@ -67,15 +67,17 @@ typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; * Note that new vertex indices are assigned contiguously in \a vertices(g) * order. */ -template -void createUnGraph(const GraphT &g, +template +NFAUndirectedGraph createUnGraph(const Graph &g, bool excludeStarts, bool excludeAccepts, - NFAUndirectedGraph &ug, - ue2::unordered_map &old2new) { + unordered_map &old2new) { + NFAUndirectedGraph ug; size_t idx = 0; - typedef typename GraphT::vertex_descriptor VertexT; + + assert(old2new.empty()); + old2new.reserve(num_vertices(g)); for (auto v : ue2::vertices_range(g)) { // skip all accept nodes @@ -88,32 +90,47 @@ void createUnGraph(const GraphT &g, continue; } - NFAUndirectedVertex nuv = boost::add_vertex(ug); - old2new[v] = nuv; + auto nuv = boost::add_vertex(ug); + old2new.emplace(v, nuv); boost::put(boost::vertex_index, ug, nuv, idx++); } + // Track seen edges so that we don't insert parallel edges. + using Vertex = typename Graph::vertex_descriptor; + unordered_set> seen; + seen.reserve(num_edges(g)); + auto make_ordered_edge = [](Vertex a, Vertex b) { + return std::make_pair(std::min(a, b), std::max(a, b)); + }; + for (const auto &e : ue2::edges_range(g)) { - VertexT src = source(e, g); - VertexT targ = target(e, g); + auto u = source(e, g); + auto v = target(e, g); - if ((excludeAccepts && is_any_accept(src, g)) - || (excludeStarts && is_any_start(src, g))) { + if ((excludeAccepts && is_any_accept(u, g)) + || (excludeStarts && is_any_start(u, g))) { continue; } - if ((excludeAccepts && is_any_accept(targ, g)) - || (excludeStarts && is_any_start(targ, g))) { + if ((excludeAccepts && is_any_accept(v, g)) + || (excludeStarts && is_any_start(v, g))) { continue; } - NFAUndirectedVertex new_src = old2new[src]; - NFAUndirectedVertex new_targ = old2new[targ]; + if (!seen.emplace(make_ordered_edge(u, v)).second) { + continue; // skip parallel edge. + } - boost::add_edge(new_src, new_targ, ug); + NFAUndirectedVertex new_u = old2new.at(u); + NFAUndirectedVertex new_v = old2new.at(v); + + boost::add_edge(new_u, new_v, ug); } + + assert(!has_parallel_edge(ug)); + return ug; } } // namespace ue2 -#endif /* NG_UNDIRECTED_H_CB42C71CF38E3D */ +#endif /* NG_UNDIRECTED_H */ diff --git a/src/nfagraph/ng_utf8.cpp b/src/nfagraph/ng_utf8.cpp index 383aa142d..89500fe39 100644 --- a/src/nfagraph/ng_utf8.cpp +++ b/src/nfagraph/ng_utf8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "ng.h" #include "ng_prune.h" #include "ng_util.h" +#include "compiler/compiler.h" #include "util/graph_range.h" #include "util/unicode_def.h" @@ -45,14 +46,14 @@ using namespace std; namespace ue2 { static -void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) { - if (in_degree(v, w) != 1) { +void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) { + if (in_degree(v, g) != 1) { DEBUG_PRINTF("unexpected pred\n"); assert(0); /* should be true due to the early stage of this analysis */ return; } - CharReach &cr = w[v].char_reach; + CharReach &cr = g[v].char_reach; if (pred_char == 0xe0) { assert(cr.isSubsetOf(CharReach(0xa0, 0xbf))); if (cr == CharReach(0xa0, 0xbf)) { @@ -79,8 +80,8 @@ void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) { * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGWrapper &w) { - if (!w.utf8) { +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { + if (!expr.utf8) { return; } @@ -88,12 +89,12 @@ void relaxForbiddenUtf8(NGWrapper &w) { const CharReach f0(0xf0); const CharReach f4(0xf4); - for (auto v : vertices_range(w)) { - const CharReach &cr = w[v].char_reach; + for (auto v : vertices_range(g)) { + const CharReach &cr = g[v].char_reach; if (cr == e0 || cr == f0 || cr == f4) { u8 pred_char = cr.find_first(); - for (auto t : adjacent_vertices_range(v, w)) { - allowIllegal(w, t, pred_char); + for (auto t : adjacent_vertices_range(v, g)) { + allowIllegal(g, t, pred_char); } } } diff --git a/src/nfagraph/ng_utf8.h b/src/nfagraph/ng_utf8.h index e1b08e405..7c4288336 100644 --- a/src/nfagraph/ng_utf8.h +++ b/src/nfagraph/ng_utf8.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ namespace ue2 { -class NGWrapper; +class ExpressionInfo; class NGHolder; /** \brief Relax forbidden UTF-8 sequences. @@ -44,7 +44,7 @@ class NGHolder; * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGWrapper &w); +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr); /** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex * where possible, based on the assumption that we will always be matching diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 5252eb18d..0776fa044 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "grey.h" #include "ng_depth.h" // for NFAVertexDepth #include "ng_dump.h" +#include "ng_prune.h" #include "ue2common.h" #include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS. #include "parser/position.h" @@ -43,6 +44,7 @@ #include "util/ue2string.h" #include "util/report_manager.h" +#include #include #include #include @@ -672,6 +674,86 @@ void reverseHolder(const NGHolder &g_in, NGHolder &g) { assert(num_edges(g) == num_edges(g_in)); } +u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 max_delay, bool overhang_ok) { + assert(isCorrectlyTopped(g)); + if (max_delay == numeric_limits::max()) { + max_delay--; + } + + DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str()); + set curr, next; + curr.insert(g.accept); + + auto it = lit.rbegin(); + for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) { + next.clear(); + for (auto v : curr) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start) { + if (overhang_ok) { + DEBUG_PRINTF("bail\n"); + goto bail; /* things got complicated */ + } else { + continue; /* it is not possible for a lhs literal to + * overhang the start */ + } + } + + const CharReach &cr = g[u].char_reach; + if (!overlaps(*it, cr)) { + DEBUG_PRINTF("skip\n"); + continue; + } + if (isSubsetOf(*it, cr)) { + next.insert(u); + } else { + DEBUG_PRINTF("bail\n"); + goto bail; /* things got complicated */ + } + } + } + + curr.swap(next); + } + bail: + if (curr.empty()) { + /* This can happen when we have an edge representing a cross from two + * sides of an alternation. This whole edge needs to be marked as + * dead */ + assert(0); /* should have been picked up by can match */ + return numeric_limits::max(); + } + + u32 delay = distance(lit.rbegin(), it); + assert(delay <= max_delay); + assert(delay <= lit.length()); + DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); + + set pred; + for (auto v : curr) { + insert(&pred, inv_adjacent_vertices_range(v, g)); + } + + clear_in_edges(g.accept, g); + clearReports(g); + + for (auto v : pred) { + NFAEdge e = add_edge(v, g.accept, g); + g[v].reports.insert(0); + if (is_triggered(g) && v == g.start) { + g[e].tops.insert(DEFAULT_TOP); + } + } + + pruneUseless(g); + assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); + + DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); + return delay; +} + #ifndef NDEBUG bool allMatchStatesHaveReports(const NGHolder &g) { diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index a07525339..1d3a6f325 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -124,6 +124,22 @@ bad_edge_filter make_bad_edge_filter(const EdgeSet *e) { return bad_edge_filter(e); } +/** \brief vertex graph filter. */ +template +struct bad_vertex_filter { + bad_vertex_filter() = default; + explicit bad_vertex_filter(const VertexSet *bad_v) : bad_vertices(bad_v) {} + bool operator()(const typename VertexSet::value_type &v) const { + return !contains(*bad_vertices, v); /* keep vertices not in bad set */ + } + const VertexSet *bad_vertices = nullptr; +}; + +template +bad_vertex_filter make_bad_vertex_filter(const VertexSet *v) { + return bad_vertex_filter(v); +} + /** Visitor that records back edges */ template class BackEdges : public boost::default_dfs_visitor { @@ -275,6 +291,11 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); * accepts. */ void reverseHolder(const NGHolder &g, NGHolder &out); +/** \brief Returns the delay or ~0U if the graph cannot match with + * the trailing literal. */ +u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 max_delay, bool overhang_ok = true); + #ifndef NDEBUG // Assertions: only available in internal builds. diff --git a/src/nfagraph/ng_vacuous.cpp b/src/nfagraph/ng_vacuous.cpp index 53672a1bd..d1123dff4 100644 --- a/src/nfagraph/ng_vacuous.cpp +++ b/src/nfagraph/ng_vacuous.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,29 +34,31 @@ #include "grey.h" #include "ng.h" #include "ng_util.h" +#include "compiler/compiler.h" using namespace std; namespace ue2 { static -ReportID getInternalId(ReportManager &rm, const NGWrapper &graph) { - Report ir = rm.getBasicInternalReport(graph); +ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) { + Report ir = rm.getBasicInternalReport(expr); // Apply any extended params. - if (graph.min_offset || graph.max_offset != MAX_OFFSET) { - ir.minOffset = graph.min_offset; - ir.maxOffset = graph.max_offset; + if (expr.min_offset || expr.max_offset != MAX_OFFSET) { + ir.minOffset = expr.min_offset; + ir.maxOffset = expr.max_offset; } - assert(!graph.min_length); // should be handled elsewhere. + assert(!expr.min_length); // should be handled elsewhere. return rm.getInternalId(ir); } static -void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) { - const ReportID r = getInternalId(rm, g); +void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { + const ReportID r = getInternalId(rm, expr); boundary.report_at_0_eod.insert(r); boundary.report_at_0.insert(r); @@ -81,8 +83,8 @@ void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) { static void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_0.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0.insert(getInternalId(rm, expr)); remove_edge(g.start, g.accept, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -90,8 +92,8 @@ void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_eod.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_eod.insert(getInternalId(rm, expr)); remove_edge(g.startDs, g.acceptEod, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -100,18 +102,18 @@ void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_0_eod.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0_eod.insert(getInternalId(rm, expr)); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); } bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { + NGHolder &g, const ExpressionInfo &expr) { if (edge(g.startDs, g.accept, g).second) { // e.g. '.*'; match "between" every byte DEBUG_PRINTF("graph is firehose\n"); - makeFirehose(boundary, rm, g); + makeFirehose(boundary, rm, g, expr); return true; } @@ -119,19 +121,19 @@ bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, if (edge(g.start, g.accept, g).second) { DEBUG_PRINTF("creating anchored acceptor\n"); - makeAnchoredAcceptor(boundary, rm, g); + makeAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.startDs, g.acceptEod, g).second) { DEBUG_PRINTF("creating end-anchored acceptor\n"); - makeEndAnchoredAcceptor(boundary, rm, g); + makeEndAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.start, g.acceptEod, g).second) { DEBUG_PRINTF("creating nothing acceptor\n"); - makeNothingAcceptor(boundary, rm, g); + makeNothingAcceptor(boundary, rm, g, expr); work_done = true; } diff --git a/src/nfagraph/ng_vacuous.h b/src/nfagraph/ng_vacuous.h index ebbc9d17b..c33cb312d 100644 --- a/src/nfagraph/ng_vacuous.h +++ b/src/nfagraph/ng_vacuous.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,13 @@ namespace ue2 { struct BoundaryReports; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; // Returns true if a "vacuous" reporter was created. bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &graph); + NGHolder &g, const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 985246f03..4195045c4 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,15 +38,17 @@ #include "ng_holder.h" #include "ng_is_equal.h" #include "ng_literal_analysis.h" +#include "ng_limex.h" +#include "ng_mcclellan.h" #include "ng_netflow.h" #include "ng_prune.h" #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_rose.h" #include "ng_split.h" #include "ng_util.h" #include "ng_width.h" +#include "nfa/rdfa.h" #include "rose/rose_build.h" #include "rose/rose_build_util.h" #include "rose/rose_in_dump.h" @@ -66,7 +68,7 @@ #include #include #include -#include +#include #include #define STAGE_DEBUG_PRINTF DEBUG_PRINTF @@ -130,6 +132,44 @@ bool createsTransientLHS(const NGHolder &g, const vector &vv, return true; } +static +double calcSplitRatio(const NGHolder &g, const vector &vv) { + flat_set not_reachable; + find_unreachable(g, vv, ¬_reachable); + double rv = (double)not_reachable.size() / num_vertices(g); + rv = rv > 0.5 ? 1 - rv : rv; + + return rv; +} + +static +size_t shorter_than(const set &s, size_t limit) { + return count_if(s.begin(), s.end(), + [&](const ue2_literal &a) { return a.length() < limit; }); +} + +static +u32 min_len(const set &s) { + u32 rv = ~0U; + + for (const auto &lit : s) { + rv = min(rv, (u32)lit.length()); + } + + return rv; +} + +static +u32 min_period(const set &s) { + u32 rv = ~0U; + + for (const auto &lit : s) { + rv = min(rv, (u32)minStringPeriod(lit)); + } + DEBUG_PRINTF("min period %u\n", rv); + return rv; +} + namespace { /** * Information on a cut: vertices and literals. @@ -148,18 +188,19 @@ struct VertLitInfo { bool creates_anchored = false; bool creates_transient = false; + double split_ratio = 0; }; +#define LAST_CHANCE_STRONG_LEN 1 + /** - * \brief Comparator class for sorting LitCollection::lits. - * - * This is separated out from LitCollection itself as passing LitCollection to - * std::sort() would incur a (potentially expensive) copy. + * \brief Comparator class for comparing different literal cuts. */ class LitComparator { public: - LitComparator(const NGHolder &g_in, bool sa, bool st) - : g(g_in), seeking_anchored(sa), seeking_transient(st) {} + LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc) + : g(g_in), seeking_anchored(sa), seeking_transient(st), + last_chance(lc) {} bool operator()(const unique_ptr &a, const unique_ptr &b) const { assert(a && b); @@ -176,6 +217,14 @@ class LitComparator { } } + if (last_chance + && min_len(a->lit) > LAST_CHANCE_STRONG_LEN + && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) { + DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio, + b->split_ratio); + return a->split_ratio < b->split_ratio; + } + u64a score_a = scoreSet(a->lit); u64a score_b = scoreSet(b->lit); @@ -193,52 +242,29 @@ class LitComparator { bool seeking_anchored; bool seeking_transient; + bool last_chance; }; } -static -size_t shorter_than(const set &s, size_t limit) { - size_t count = 0; - - for (const auto &lit : s) { - if (lit.length() < limit) { - count++; - } - } - - return count; -} - -static -u32 min_len(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 min_period(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)minStringPeriod(lit)); - } - DEBUG_PRINTF("min period %u\n", rv); - return rv; -} - #define MIN_ANCHORED_LEN 2 +#define MIN_ANCHORED_DESPERATE_LEN 1 +/* anchored here means that the cut creates a 'usefully' anchored LHS */ static bool validateRoseLiteralSetQuality(const set &s, u64a score, bool anchored, u32 min_allowed_floating_len, - bool desperation) { + bool desperation, bool last_chance) { u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN : min_allowed_floating_len; + if (anchored && last_chance) { + min_allowed_len = MIN_ANCHORED_DESPERATE_LEN; + } + if (last_chance) { + desperation = true; + } + + DEBUG_PRINTF("validating%s set, min allowed len %u\n", + anchored ? " anchored" : "", min_allowed_len); assert(none_of(begin(s), end(s), bad_mixed_sensitivity)); @@ -267,6 +293,7 @@ bool validateRoseLiteralSetQuality(const set &s, u64a score, if (s.size() > 10 /* magic number is magic */ || s_min_len < min_allowed_len || (s_min_period <= 1 && min_allowed_len != 1)) { + DEBUG_PRINTF("candidate may be bad\n"); ok = false; } @@ -307,7 +334,7 @@ void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, const set &a_dom, vector> *lits, u32 min_allowed_len, bool desperation, - const CompileContext &cc) { + bool last_chance, const CompileContext &cc) { assert(depths || !seeking_anchored); map scores; @@ -333,7 +360,7 @@ void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, } if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, - desperation)) { + desperation, last_chance)) { continue; } @@ -370,7 +397,7 @@ void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, const set *allowed, vector> *lits, u32 min_allowed_len, bool desperation, - const CompileContext &cc) { + bool last_chance, const CompileContext &cc) { /* This allows us to get more places to split the graph as we are not limited to points where there is a single vertex to split at. */ @@ -490,7 +517,7 @@ void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, } if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, - desperation)) { + desperation, last_chance)) { goto next_cand; } @@ -588,6 +615,7 @@ unique_ptr findBestSplit(const NGHolder &g, bool for_prefix, u32 min_len, const set *allowed_cand, const set *disallowed_cand, + bool last_chance, const CompileContext &cc) { assert(!for_prefix || depths); @@ -634,17 +662,16 @@ unique_ptr findBestSplit(const NGHolder &g, DEBUG_PRINTF("|cand| = %zu\n", cand.size()); bool seeking_anchored = for_prefix; - bool seeking_transient = for_prefix; //cc.streaming; + bool seeking_transient = for_prefix; - /* TODO: revisit when backstop goes away */ bool desperation = for_prefix && cc.streaming; vector> lits; /**< sorted list of potential cuts */ getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len, - desperation, cc); + desperation, last_chance, cc); getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand, - &lits, min_len, desperation, cc); + &lits, min_len, desperation, last_chance, cc); if (lits.empty()) { DEBUG_PRINTF("no literals found\n"); @@ -658,7 +685,14 @@ unique_ptr findBestSplit(const NGHolder &g, } } - auto cmp = LitComparator(g, seeking_anchored, seeking_transient); + if (last_chance) { + for (auto &a : lits) { + a->split_ratio = calcSplitRatio(g, a->vv); + } + } + + auto cmp = LitComparator(g, seeking_anchored, seeking_transient, + last_chance); unique_ptr best = move(lits.back()); lits.pop_back(); @@ -684,27 +718,39 @@ void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n", num_vertices(h), succ.length()); - map > curr; + using EdgeSet = boost::dynamic_bitset<>; + + const size_t edge_count = num_edges(h); + EdgeSet bad_edges(edge_count); + + unordered_map curr; for (const auto &e : in_edges_range(h.accept, h)) { - curr[source(e, h)].insert(e); + auto &path_set = curr[source(e, h)]; + if (path_set.empty()) { + path_set.resize(edge_count); + } + path_set.set(h[e].index); } - map > next; + unordered_map next; for (auto it = succ.rbegin(); it != succ.rend(); ++it) { for (const auto &path : curr) { NFAVertex u = path.first; const auto &path_set = path.second; if (u == h.start && overhang_ok) { DEBUG_PRINTF("poisoning early %zu [overhang]\n", - path_set.size()); - insert(&bad, path_set); + path_set.count()); + bad_edges |= path_set; continue; } if (overlaps(h[u].char_reach, *it)) { for (const auto &e : in_edges_range(u, h)) { auto &new_path_set = next[source(e, h)]; - insert(&new_path_set, path_set); - new_path_set.insert(e); + if (new_path_set.empty()) { + new_path_set.resize(edge_count); + } + new_path_set |= path_set; + new_path_set.set(h[e].index); } } } @@ -716,8 +762,14 @@ void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, assert(overhang_ok || !curr.empty()); for (const auto &path : curr) { - insert(&bad, path.second); - DEBUG_PRINTF("poisoning %zu vertices\n", path.second.size()); + bad_edges |= path.second; + DEBUG_PRINTF("poisoning %zu vertices\n", path.second.count()); + } + + for (const auto &e : edges_range(h)) { + if (bad_edges.test(h[e].index)) { + bad.insert(e); + } } } @@ -733,6 +785,11 @@ void poisonForGoodPrefix(const NGHolder &h, } } +static UNUSED +bool is_any_accept_type(RoseInVertexType t) { + return t == RIV_ACCEPT || t == RIV_ACCEPT_EOD; +} + static flat_set poisonEdges(const NGHolder &h, const vector *depths, @@ -746,7 +803,8 @@ flat_set poisonEdges(const NGHolder &h, for (const RoseInEdge &ve : ee) { if (vg[target(ve, vg)].type != RIV_LITERAL) { /* nothing to poison in suffixes/outfixes */ - assert(vg[target(ve, vg)].type == RIV_ACCEPT); + assert(generates_callbacks(h)); + assert(is_any_accept_type(vg[target(ve, vg)].type)); continue; } succs.insert({vg[target(ve, vg)].s, @@ -793,7 +851,19 @@ unique_ptr findBestNormalSplit(const NGHolder &g, set bad_vertices = poisonVertices(g, vg, ee, cc.grey); return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, - nullptr, &bad_vertices, cc); + nullptr, &bad_vertices, false, cc); +} + +static +unique_ptr findBestLastChanceSplit(const NGHolder &g, + const RoseInGraph &vg, + const vector &ee, + const CompileContext &cc) { + assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); + set bad_vertices = poisonVertices(g, vg, ee, cc.grey); + + return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, + nullptr, &bad_vertices, true, cc); } static @@ -870,11 +940,12 @@ unique_ptr findBestPrefixSplit(const NGHolder &g, const vector &depths, const RoseInGraph &vg, const vector &ee, + bool last_chance, const CompileContext &cc) { - assert(g.kind == NFA_PREFIX); + assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX); set bad_vertices = poisonVertices(g, vg, ee, cc.grey); auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength, - nullptr, &bad_vertices, cc); + nullptr, &bad_vertices, last_chance, cc); /* large back edges may prevent us identifying anchored or transient cases * properly - use a simple walk instead */ @@ -905,7 +976,7 @@ unique_ptr findBestCleanSplit(const NGHolder &g, return nullptr; } return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen, - &cleanSplits, nullptr, cc); + &cleanSplits, nullptr, false, cc); } static @@ -961,7 +1032,7 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, to_string(lhs->kind).c_str(), num_vertices(*lhs), to_string(rhs->kind).c_str(), num_vertices(*rhs)); - bool suffix = vg[target(ee.front(), vg)].type == RIV_ACCEPT; + bool suffix = generates_callbacks(base_graph); if (is_triggered(base_graph)) { /* if we are already guarded, check if the split reduces the size of @@ -1377,12 +1448,11 @@ RoseInGraph populateTrivialGraph(const NGHolder &h) { } static -void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) { +void avoidOutfixes(RoseInGraph &vg, bool last_chance, + const CompileContext &cc) { STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n"); - if (num_vertices(vg) > 2) { - /* must be at least one literal aside from start and accept */ - return; - } + assert(num_vertices(vg) == 2); + assert(num_edges(vg) == 1); RoseInEdge e = *edges(vg).first; @@ -1392,13 +1462,27 @@ void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) { renumber_vertices(h); renumber_edges(h); - unique_ptr split = findBestNormalSplit(h, vg, {e}, cc); + unique_ptr split = findBestNormalSplit(h, vg, {e}, cc); if (split && splitRoseEdge(h, vg, {e}, *split)) { DEBUG_PRINTF("split on simple literal\n"); - } else { - doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); + return; } + + if (last_chance) { + /* look for a prefix split as it allows us to accept very weak anchored + * literals. */ + auto depths = calcDepths(h); + + split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc); + + if (split && splitRoseEdge(h, vg, {e}, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return; + } + } + + doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); } static @@ -1463,6 +1547,11 @@ void removeRedundantLiteralsFromPrefixes(RoseInGraph &g, continue; } + if (g[e].graph_lag) { + /* already removed redundant parts of literals */ + continue; + } + assert(!g[t].delay); const ue2_literal &lit = g[t].s; @@ -1564,20 +1653,22 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, * taking into account overlap of successor literals. */ set preds; + set succs; for (const RoseInEdge &e : ee) { RoseInVertex u = source(e, ig); assert(ig[u].type == RIV_LITERAL); - assert(!ig[e].graph_lag); assert(!ig[u].delay); preds.insert(ig[u].s); - } - set succs; - for (const RoseInEdge &e : ee) { RoseInVertex v = target(e, ig); assert(ig[v].type == RIV_LITERAL); assert(!ig[v].delay); succs.insert(ig[v].s); + + if (ig[e].graph_lag) { + /* already removed redundant parts of literals */ + return; + } } map, u32> > graphs; /* + delay */ @@ -1818,6 +1909,59 @@ bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, return true; } +static +void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 delay, const vector &preds) { + assert(delay <= lit.length()); + assert(isCorrectlyTopped(g)); + DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); + + NFAVertex prev = g.accept; + auto it = lit.rbegin(); + while (delay--) { + NFAVertex curr = add_vertex(g); + assert(it != lit.rend()); + g[curr].char_reach = *it; + add_edge(curr, prev, g); + ++it; + prev = curr; + } + + for (auto v : preds) { + NFAEdge e = add_edge_if_not_present(v, prev, g); + if (v == g.start && is_triggered(g)) { + g[e].tops.insert(DEFAULT_TOP); + } + } + + // Every predecessor of accept must have a report. + set_report(g, 0); + + renumber_vertices(g); + renumber_edges(g); + assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); +} + +static +void restoreTrailingLiteralStates(NGHolder &g, + const vector> &lits) { + vector preds; + insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); + clear_in_edges(g.accept, g); + + for (auto v : preds) { + g[v].reports.clear(); /* clear report from old accepts */ + } + + for (const auto &p : lits) { + const ue2_literal &lit = p.first; + u32 delay = p.second; + + restoreTrailingLiteralStates(g, lit, delay, preds); + } +} + static bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, const CompileContext &cc) { @@ -1828,8 +1972,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, renumber_vertices(h); renumber_edges(h); - vector depths; - calcDepths(h, depths); + auto depths = calcDepths(h); /* If the reason the prefix is not transient is due to a very long literal * following, we can make it transient by restricting ourselves to using @@ -1838,7 +1981,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, return true; } - unique_ptr split = findBestPrefixSplit(h, depths, vg, ee, cc); + auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc); if (split && (split->creates_transient || split->creates_anchored) && splitRoseEdge(h, vg, ee, *split)) { @@ -1897,27 +2040,18 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, trimmed.clear(); for (auto &elem : trimmed_vec) { shared_ptr &hp = elem.first; - NGHolder &eh = *hp; - - vector base_states; - insert(&base_states, base_states.end(), - inv_adjacent_vertices(eh.accept, eh)); - clear_in_edges(eh.accept, eh); - - for (auto v : base_states) { - eh[v].reports.clear(); /* clear report from old accepts */ - } + vector> succ_lits; for (const auto &edge_delay : elem.second) { const RoseInEdge &e = edge_delay.first; u32 delay = edge_delay.second; - auto succ_lit = vg[target(e, vg)].s; + auto lit = vg[target(e, vg)].s; vg[e].graph = hp; - assert(delay <= succ_lit.length()); - restoreTrailingLiteralStates(*vg[e].graph, succ_lit, delay, - base_states); + assert(delay <= lit.length()); + succ_lits.emplace_back(lit, delay); } + restoreTrailingLiteralStates(*hp, succ_lits); } return true; } @@ -2234,7 +2368,7 @@ bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg, if (vli.lit.empty() || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, - false)) { + false, false)) { return false; } } @@ -2616,24 +2750,215 @@ void rehomeEodSuffixes(RoseInGraph &vg) { /* old accept vertices will be tidied up by final pruneUseless() call */ } -bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc) { - assert(!can_never_match(h)); +static +bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) { + switch (h.kind) { + case NFA_OUTFIX: /* 'prefix' of eod */ + case NFA_PREFIX: + return cc.grey.earlyMcClellanPrefix; + case NFA_INFIX: + return cc.grey.earlyMcClellanInfix; + case NFA_SUFFIX: + return cc.grey.earlyMcClellanSuffix; + default: + DEBUG_PRINTF("kind %u\n", (u32)h.kind); + assert(0); + return false; + } +} - if (!cc.grey.allowViolet) { +static +vector> getDfaTriggers(RoseInGraph &vg, + const vector &edges, + bool *single_trigger) { + vector> triggers; + u32 min_offset = ~0U; + u32 max_offset = 0; + for (const auto &e : edges) { + RoseInVertex s = source(e, vg); + if (vg[s].type == RIV_LITERAL) { + triggers.push_back(as_cr_seq(vg[s].s)); + } + ENSURE_AT_LEAST(&max_offset, vg[s].max_offset); + LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset); + } + + *single_trigger = min_offset == max_offset; + DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset); + + return triggers; +} + +static +bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, + const vector &edges, bool final_chance, + const ReportManager &rm, const CompileContext &cc) { + DEBUG_PRINTF("trying for dfa\n"); + + bool single_trigger; + for (const auto &e : edges) { + if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) { + /* TODO: support eod prefixes */ + return false; + } + } + + auto triggers = getDfaTriggers(vg, edges, &single_trigger); + + /* TODO: literal delay things */ + if (!generates_callbacks(h)) { + set_report(h, rose.getNewNfaReport()); + } + + shared_ptr dfa = buildMcClellan(h, &rm, single_trigger, triggers, + cc.grey, final_chance); + + if (!dfa) { return false; } - DEBUG_PRINTF("hello world\n"); + DEBUG_PRINTF("dfa ok\n"); + for (const auto &e : edges) { + vg[e].dfa = dfa; + } + + return true; +} + +#define MAX_EDGES_FOR_IMPLEMENTABILITY 50 + +static +bool splitForImplementabilty(RoseInGraph &vg, NGHolder &h, + const vector &edges, + const CompileContext &cc) { + vector> succ_lits; + DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", + to_string(h.kind).c_str(), num_vertices(h), edges.size()); + + if (edges.size() > MAX_EDGES_FOR_IMPLEMENTABILITY) { + return false; + } + + if (!generates_callbacks(h)) { + for (const auto &e : edges) { + const auto &lit = vg[target(e, vg)].s; + u32 delay = vg[e].graph_lag; + vg[e].graph_lag = 0; + + assert(delay <= lit.length()); + succ_lits.emplace_back(lit, delay); + } + restoreTrailingLiteralStates(h, succ_lits); + } + + unique_ptr split; + bool last_chance = true; + if (h.kind == NFA_PREFIX) { + auto depths = calcDepths(h); + + split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); + } else { + split = findBestLastChanceSplit(h, vg, edges, cc); + } + + if (split && splitRoseEdge(h, vg, edges, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return true; + } + + DEBUG_PRINTF("trying to netflow\n"); + bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey); + DEBUG_PRINTF("done\n"); + + return rv; +} + +#define MAX_IMPLEMENTABLE_SPLITS 50 + +bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, + bool final_chance, const ReportManager &rm, + const CompileContext &cc) { + DEBUG_PRINTF("checking for impl %d\n", final_chance); + bool changed = false; + bool need_to_recalc = false; + u32 added_count = 0; + do { + changed = false; + DEBUG_PRINTF("added %u\n", added_count); + map > edges_by_graph; + vector graphs; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph) { + NGHolder *h = vg[ve].graph.get(); + if (!contains(edges_by_graph, h)) { + graphs.push_back(h); + } + edges_by_graph[h].push_back(ve); + } + } + for (NGHolder *h : graphs) { + if (isImplementableNFA(*h, &rm, cc)) { + continue; + } + + if (tryForEarlyDfa(*h, cc) + && doEarlyDfa(rose, vg, *h, edges_by_graph[h], final_chance, rm, + cc)) { + continue; + } + + DEBUG_PRINTF("eek\n"); + if (!allow_changes) { + return false; + } + + if (splitForImplementabilty(vg, *h, edges_by_graph[h], cc)) { + added_count++; + changed = true; + continue; + } + + return false; + } + + if (added_count > MAX_IMPLEMENTABLE_SPLITS) { + return false; + } + + if (changed) { + removeRedundantLiterals(vg, cc); + pruneUseless(vg); + need_to_recalc = true; + } + } while (changed); + + if (need_to_recalc) { + renumber_vertices(vg); + calcVertexOffsets(vg); + } + + DEBUG_PRINTF("ok!\n"); + return true; +} + +static +RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance, + const CompileContext &cc) { + assert(!can_never_match(h)); RoseInGraph vg = populateTrivialGraph(h); + if (!cc.grey.allowViolet) { + return vg; + } + + DEBUG_PRINTF("hello world\n"); + /* Step 1: avoid outfixes as we always have to run them. */ - avoidOutfixes(vg, cc); + avoidOutfixes(vg, last_chance, cc); if (num_vertices(vg) <= 2) { - /* only have an outfix; leave for ng_rose for now */ - return false; + return vg; /* unable to transform pattern */ } removeRedundantPrefixes(vg); @@ -2663,10 +2988,6 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, decomposeLiteralChains(vg, cc); } - /* Step 5: avoid unimplementable, or overly large engines if possible */ - /* TODO: later - ng_rose is currently acting as a backstop */ - - /* Step 6: send to rose */ rehomeEodSuffixes(vg); removeRedundantLiterals(vg, cc); @@ -2674,9 +2995,40 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, dumpPreRoseGraph(vg, cc.grey); renumber_vertices(vg); calcVertexOffsets(vg); + + return vg; +} + +bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + bool last_chance, const ReportManager &rm, + const CompileContext &cc) { + auto vg = doInitialVioletTransform(h, last_chance, cc); + if (num_vertices(vg) <= 2) { + return false; + } + + /* Step 5: avoid unimplementable, or overly large engines if possible */ + if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) { + return false; + } + dumpPreRoseGraph(vg, cc.grey, "post_ensure_rose.dot"); + + /* Step 6: send to rose */ bool rv = rose.addRose(vg, prefilter); DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); return rv; } +bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, + const CompileContext &cc) { + auto vg = doInitialVioletTransform(h, true, cc); + if (num_vertices(vg) <= 2) { + return false; + } + + bool rv = roseCheckRose(vg, prefilter, rm, cc); + DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); + return rv; +} + } diff --git a/src/nfagraph/ng_violet.h b/src/nfagraph/ng_violet.h index fb62bfc0c..3fe57dbfa 100644 --- a/src/nfagraph/ng_violet.h +++ b/src/nfagraph/ng_violet.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,12 +41,25 @@ class NGHolder; class RoseBuild; struct CompileContext; +class ReportManager; +struct RoseInGraph; /** \brief Attempt to consume the entire pattern in graph \a h with Rose. * Returns true if successful. */ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + bool last_chance, const ReportManager &rm, const CompileContext &cc); +bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, + bool final_chance, const ReportManager &rm, + const CompileContext &cc); + +/** \brief True if the pattern in \a h is consumable by Rose/Violet. This + * function may be conservative (return false even if supported) for + * efficiency. */ +bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, + const CompileContext &cc); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index d596b7b5d..c2e9eb1a6 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -157,12 +157,12 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, if (colors.at(NODE_ACCEPT) == boost::white_color) { acceptDepth = depth::unreachable(); } else { - acceptDepth = -1 * distance.at(NODE_ACCEPT); + acceptDepth = depth(-1 * distance.at(NODE_ACCEPT)); } if (colors.at(NODE_ACCEPT_EOD) == boost::white_color) { acceptEodDepth = depth::unreachable(); } else { - acceptEodDepth = -1 * distance.at(NODE_ACCEPT_EOD); + acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD)); } depth d; diff --git a/src/parser/Parser.h b/src/parser/Parser.h index 45c3ac7af..a034a18fc 100644 --- a/src/parser/Parser.h +++ b/src/parser/Parser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,7 +69,7 @@ struct ParseMode { * * This call will throw a ParseError on failure. */ -std::unique_ptr parse(const char *const ptr, ParseMode &mode); +std::unique_ptr parse(const char *ptr, ParseMode &mode); } // namespace ue2 diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 53130ddf3..52b3340c6 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ /* Parser.cpp is a built source, may not be in same dir as parser files */ #include "parser/check_refs.h" +#include "parser/control_verbs.h" #include "parser/ComponentAlternation.h" #include "parser/ComponentAssertion.h" #include "parser/ComponentAtomicGroup.h" @@ -115,7 +116,7 @@ unsigned parseAsDecimal(unsigned oct) { static constexpr u32 MAX_NUMBER = INT_MAX; static -void pushDec(u32 *acc, u8 raw_digit) { +void pushDec(u32 *acc, char raw_digit) { assert(raw_digit >= '0' && raw_digit <= '9'); u32 digit_val = raw_digit - '0'; @@ -129,7 +130,7 @@ void pushDec(u32 *acc, u8 raw_digit) { } static -void pushOct(u32 *acc, u8 raw_digit) { +void pushOct(u32 *acc, char raw_digit) { assert(raw_digit >= '0' && raw_digit <= '7'); u32 digit_val = raw_digit - '0'; @@ -168,8 +169,7 @@ ComponentSequence *enterSequence(ComponentSequence *parent, } static -void addLiteral(ComponentSequence *currentSeq, unsigned char c, - const ParseMode &mode) { +void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) { if (mode.utf8 && mode.caseless) { /* leverage ComponentClass to generate the vertices */ auto cc = getComponentClass(mode); @@ -196,7 +196,7 @@ void addEscaped(ComponentSequence *currentSeq, unichar accum, if (accum > 255) { throw LocatedParseError(err_msg); } - addLiteral(currentSeq, (unsigned char)accum, mode); + addLiteral(currentSeq, (char)accum, mode); } } @@ -216,7 +216,7 @@ void addEscapedHex(ComponentSequence *currentSeq, unichar accum, #define SLASH_C_ERROR "\\c must be followed by an ASCII character" static -u8 decodeCtrl(u8 raw) { +u8 decodeCtrl(char raw) { if (raw & 0x80) { throw LocatedParseError(SLASH_C_ERROR); } @@ -224,10 +224,10 @@ u8 decodeCtrl(u8 raw) { } static -unichar readUtf8CodePoint2c(const u8 *ts) { +unichar readUtf8CodePoint2c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xc0 && ts[0] < 0xe0); assert(ts[1] >= 0x80 && ts[1] < 0xc0); - unichar val = ts[0] & 0x1f; val <<= 6; val |= ts[1] & 0x3f; @@ -237,7 +237,8 @@ unichar readUtf8CodePoint2c(const u8 *ts) { } static -unichar readUtf8CodePoint3c(const u8 *ts) { +unichar readUtf8CodePoint3c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xe0 && ts[0] < 0xf0); assert(ts[1] >= 0x80 && ts[1] < 0xc0); assert(ts[2] >= 0x80 && ts[2] < 0xc0); @@ -252,7 +253,8 @@ unichar readUtf8CodePoint3c(const u8 *ts) { } static -unichar readUtf8CodePoint4c(const u8 *ts) { +unichar readUtf8CodePoint4c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xf0 && ts[0] < 0xf8); assert(ts[1] >= 0x80 && ts[1] < 0xc0); assert(ts[2] >= 0x80 && ts[2] < 0xc0); @@ -272,12 +274,10 @@ unichar readUtf8CodePoint4c(const u8 *ts) { %%{ machine regex; - alphtype unsigned char; - action throwUnsupportedEscape { ostringstream str; - str << "'\\" << (char)*(ts + 1) << "' at index " - << ts - ptr << " not supported in a character class."; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported in a character class."; throw ParseError(str.str()); } action unsupportedProperty { @@ -549,26 +549,25 @@ unichar readUtf8CodePoint4c(const u8 *ts) { ############################################################# readVerb := |* 'UTF8)' => { - if (ts != ptr + 2) { - throw LocatedParseError("(*UTF8) must be at start of " - "expression, encountered"); - } - mode.utf8 = true; - globalMode.utf8 = true; /* once you unicode, you can't stop */ - ucp_start_p = te; /* (*UCP) can appear after us */ - fret; + throw LocatedParseError("(*UTF8) must be at start of " + "expression, encountered"); + }; + 'UTF)' => { + throw LocatedParseError("(*UTF) must be at start of " + "expression, encountered"); }; 'UCP)' => { - if (ts != ucp_start_p + 2) { - throw LocatedParseError("(*UCP) must be at start of " - "expression, encountered"); - } - mode.ucp = true; - globalMode.ucp = true; /* once you unicode, you can't stop */ - fret; + throw LocatedParseError("(*UCP) must be at start of " + "expression, encountered"); }; - 'UTF16)' => { - throw LocatedParseError("(*UTF16) not supported"); + # Use the control verb mini-parser to report an error for this + # unsupported/unknown verb. + [^)]+ ')' => { + ParseMode temp_mode; + assert(ts - 2 >= ptr); // parser needs the '(*' at the start too. + read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode); + assert(0); // Should have thrown a parse error. + throw LocatedParseError("Unknown control verb"); }; any => { throw LocatedParseError("Unknown control verb"); @@ -977,8 +976,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) { }; '\\o{' [0-7]+ '}' => { - string oct((const char *)ts + 3, te - ts - 4); - long int val = strtol(oct.c_str(), nullptr, 8); + string oct(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -1002,8 +1006,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) { }; # Unicode Hex '\\x{' xdigit+ '}' => { - string hex((const char *)ts + 3, te - ts - 4); - long int val = strtol(hex.c_str(), nullptr, 16); + string hex(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } @@ -1092,7 +1101,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # Literal character (any - ']') => { - currentCls->add(*ts); + currentCls->add((u8)*ts); }; ']' => { @@ -1446,7 +1455,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { // Otherwise, we interpret the first three digits as an // octal escape, and the remaining characters stand for // themselves as literals. - const u8 *s = ts; + const char *s = ts; unsigned int accum = 0; unsigned int oct_digits = 0; assert(*s == '\\'); // token starts at backslash @@ -1491,8 +1500,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throw LocatedParseError("Invalid reference after \\g"); }; '\\o{' [0-7]+ '}' => { - string oct((const char *)ts + 3, te - ts - 4); - long int val = strtol(oct.c_str(), nullptr, 8); + string oct(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -1508,8 +1522,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) { }; # Unicode Hex '\\x{' xdigit+ '}' => { - string hex((const char *)ts + 3, te - ts - 4); - long int val = strtol(hex.c_str(), nullptr, 16); + string hex(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } @@ -1532,8 +1551,8 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # A bunch of unsupported (for now) escapes escapedUnsupported => { ostringstream str; - str << "'\\" << (char)*(ts + 1) << "' at index " - << ts - ptr << " not supported."; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported."; throw ParseError(str.str()); }; @@ -1834,16 +1853,22 @@ unichar readUtf8CodePoint4c(const u8 *ts) { %% write data nofinal; /** \brief Main parser call, returns root Component or nullptr. */ -unique_ptr parse(const char *const c_ptr, ParseMode &globalMode) { - const u8 * const ptr = (const u8 * const)c_ptr; - const u8 *p = ptr; - const u8 *pe = ptr + strlen(c_ptr); - const u8 *eof = pe; +unique_ptr parse(const char *ptr, ParseMode &globalMode) { + assert(ptr); + + const char *p = ptr; + const char *pe = ptr + strlen(ptr); + + // First, read the control verbs, set any global mode flags and move the + // ptr forward. + p = read_control_verbs(p, pe, 0, globalMode); + + const char *eof = pe; int cs; UNUSED int act; int top; vector stack; - const u8 *ts, *te; + const char *ts, *te; unichar accumulator = 0; unichar octAccumulator = 0; /* required as we are also accumulating for * back ref when looking for octals */ @@ -1889,9 +1914,7 @@ unique_ptr parse(const char *const c_ptr, ParseMode &globalMode) { bool inCharClassEarly = false; // Location at which the current character class began. - const u8 *currentClsBegin = p; - - const u8 *ucp_start_p = p; /* for (*UCP) verb */ + const char *currentClsBegin = p; // We throw exceptions on various parsing failures beyond this point: we // use a try/catch block here to clean up our allocated memory before we diff --git a/src/parser/buildstate.h b/src/parser/buildstate.h index 8a69f44fa..5ddaf9b23 100644 --- a/src/parser/buildstate.h +++ b/src/parser/buildstate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,10 +35,10 @@ #include "ue2common.h" #include "position.h" +#include "util/noncopyable.h" #include #include -#include namespace ue2 { @@ -49,7 +49,7 @@ class PositionInfo; * * Abstract base class; use \ref makeGlushkovBuildState to get one of these you * can use. */ -class GlushkovBuildState : boost::noncopyable { +class GlushkovBuildState : noncopyable { public: /** \brief Represents an uninitialized state. */ static const Position POS_UNINITIALIZED; diff --git a/src/rose/rose_dump.h b/src/parser/control_verbs.h similarity index 77% rename from src/rose/rose_dump.h rename to src/parser/control_verbs.h index fe66302d2..58934ec2c 100644 --- a/src/rose/rose_dump.h +++ b/src/parser/control_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,25 +26,23 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ROSE_DUMP_H -#define ROSE_DUMP_H - -#ifdef DUMP_SUPPORT +/** + * \file + * \brief Parser for control verbs that can occur at the beginning of a pattern. + */ -#include -#include +#ifndef CONTROL_VERBS_H +#define CONTROL_VERBS_H -struct RoseEngine; +#include "ue2common.h" namespace ue2 { -void roseDumpText(const RoseEngine *t, FILE *f); -void roseDumpInternals(const RoseEngine *t, const std::string &base); -void roseDumpComponents(const RoseEngine *t, bool dump_raw, - const std::string &base); -void roseDumpStructRaw(const RoseEngine *t, FILE *f); +struct ParseMode; + +const char *read_control_verbs(const char *ptr, const char *end, size_t start, + ParseMode &mode); } // namespace ue2 -#endif -#endif +#endif // CONTROL_VERBS_H diff --git a/src/parser/control_verbs.rl b/src/parser/control_verbs.rl new file mode 100644 index 000000000..1d3e33a9a --- /dev/null +++ b/src/parser/control_verbs.rl @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Parser for control verbs that can occur at the beginning of a pattern. + */ + +#include "parser/control_verbs.h" + +#include "parser/Parser.h" +#include "parser/parse_error.h" + +#include +#include + +using namespace std; + +namespace ue2 { + +const char *read_control_verbs(const char *ptr, const char *end, size_t start, + ParseMode &mode) { + const char *p = ptr; + const char *pe = end; + const char *eof = pe; + const char *ts, *te; + int cs; + UNUSED int act; + + %%{ + machine ControlVerbs; + + # Verbs that we recognise but do not support. + unhandledVerbs = '(*' ( + 'LIMIT_MATCH=' [0-9]+ | + 'LIMIT_RECURSION=' [0-9]+ | + 'NO_AUTO_POSSESS' | + 'NO_START_OPT' | + 'UTF16' | + 'UTF32' | + 'CR' | + 'LF' | + 'CRLF' | + 'ANYCRLF' | + 'ANY' | + 'BSR_ANYCRLF' | + 'BSR_UNICODE' + ) . ')'; + + main := |* + '(*UTF8)' | '(*UTF)' => { + mode.utf8 = true; + }; + + '(*UCP)' => { + mode.ucp = true; + }; + + unhandledVerbs => { + ostringstream str; + str << "Unsupported control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }; + + '(*' [^)]+ ')' => { + ostringstream str; + str << "Unknown control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }; + + # Anything else means we're done. + any => { + fhold; + fbreak; + }; + *|; + + write data; + write init; + }%% + + try { + %% write exec; + } catch (LocatedParseError &error) { + if (ts >= ptr && ts <= pe) { + error.locate(ts - ptr + start); + } else { + error.locate(0); + } + throw; + } + + return p; +} + +} // namespace ue2 diff --git a/src/parser/parse_error.cpp b/src/parser/parse_error.cpp index 6245adb9f..e7f60b264 100644 --- a/src/parser/parse_error.cpp +++ b/src/parser/parse_error.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,9 +44,13 @@ ParseError::~ParseError() {} LocatedParseError::~LocatedParseError() {} void LocatedParseError::locate(size_t offset) { + if (finalized) { + return; + } std::ostringstream str; str << reason << " at index " << offset << "."; reason = str.str(); + finalized = true; } } diff --git a/src/parser/parse_error.h b/src/parser/parse_error.h index e727991db..4556ed5e0 100644 --- a/src/parser/parse_error.h +++ b/src/parser/parse_error.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ * \brief Parse/Compile exceptions. */ -#ifndef PARSE_ERROR_H_A02047D1AA16C9 -#define PARSE_ERROR_H_A02047D1AA16C9 +#ifndef PARSE_ERROR_H +#define PARSE_ERROR_H #include "util/compile_error.h" @@ -44,22 +44,24 @@ class ParseError : public CompileError { public: // Note: 'why' should describe why the error occurred and end with a // full stop, but no line break. - explicit ParseError(const std::string &why) : CompileError(why) {} + explicit ParseError(std::string why) : CompileError(std::move(why)) {} ~ParseError() override; }; class LocatedParseError : public ParseError { public: - explicit LocatedParseError(const std::string &why) : ParseError(".") { - reason = why; // don't use ParseError ctor + explicit LocatedParseError(std::string why) : ParseError(".") { + reason = std::move(why); // don't use ParseError ctor } ~LocatedParseError() override; void locate(size_t offset); +private: + bool finalized = false; //!< true when locate() has been called. }; } // namespace ue2 -#endif /* PARSE_ERROR_H_A02047D1AA16C9 */ +#endif /* PARSE_ERROR_H */ diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index 3f58d7526..4539836ab 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -159,23 +159,26 @@ class ConstructLiteralVisitor : public ConstComponentVisitor { ConstructLiteralVisitor::~ConstructLiteralVisitor() {} /** \brief True if the literal expression \a expr could be added to Rose. */ -bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { - assert(expr.component); +bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { + assert(pe.component); if (!ng.cc.grey.allowLiteral) { return false; } + const auto &expr = pe.expr; + // XXX: don't shortcut literals with extended params (yet) - if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length) { + if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || + expr.edit_distance) { DEBUG_PRINTF("extended params not allowed\n"); return false; } ConstructLiteralVisitor vis; try { - assert(expr.component); - expr.component->accept(vis); + assert(pe.component); + pe.component->accept(vis); assert(vis.repeat_stack.empty()); } catch (const ConstructLiteralVisitor::NotLiteral&) { DEBUG_PRINTF("not a literal\n"); @@ -195,7 +198,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { } DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); - return ng.addLiteral(lit, expr.index, expr.id, expr.highlander, expr.som); + return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, + expr.som); } } // namespace ue2 diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h index 76db5a77c..976208b73 100644 --- a/src/rose/counting_miracle.h +++ b/src/rose/counting_miracle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -98,8 +98,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, for (; d + 16 <= d_end; d_end -= 16) { m128 data = loadu128(d_end - 16); - m128 c_lo = pshufb(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); @@ -117,8 +117,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, memset(temp, poison, sizeof(temp)); memcpy(temp, d, d_end - d); m128 data = loadu128(temp); - m128 c_lo = pshufb(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); diff --git a/src/rose/match.c b/src/rose/match.c index b641e39d8..daf81eac0 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -85,19 +85,13 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); - const u32 *delayRebuildPrograms = - getByOffset(t, t->litDelayRebuildProgramOffset); - assert(id < t->literalCount); - const u32 program = delayRebuildPrograms[id]; - - if (program) { - const u64a som = 0; - const size_t match_len = end - start + 1; - const u8 flags = 0; - UNUSED hwlmcb_rv_t rv = roseRunProgram(t, scratch, program, som, - real_end, match_len, flags); - assert(rv != HWLM_TERMINATE_MATCHING); - } + assert(id && id < t->size); // id is a program offset + const u64a som = 0; + const size_t match_len = end - start + 1; + const u8 flags = 0; + UNUSED hwlmcb_rv_t rv = + roseRunProgram(t, scratch, id, som, real_end, match_len, flags); + assert(rv != HWLM_TERMINATE_MATCHING); /* we are just repopulating the delay queue, groups should be * already set from the original scan. */ @@ -156,7 +150,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, } if (top_squash_distance) { - assert(q->cur != q->end); + assert(q->cur < q->end); struct mq_item *last = &q->items[q->end - 1]; if (last->type == event && last->location >= loc - (s64a)top_squash_distance) { @@ -242,33 +236,13 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { */ static really_inline hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - size_t match_len, u32 id) { - DEBUG_PRINTF("id=%u\n", id); - const u32 *programs = getByOffset(t, t->litProgramOffset); - assert(id < t->literalCount); - const u64a som = 0; - const u8 flags = 0; - return roseRunProgram_i(t, scratch, programs[id], som, end, match_len, - flags); -} - -/** - * \brief Run the program for the given literal ID, with the interpreter - * out of line. - * - * Assumes not in_anchored. - */ -static really_inline -hwlmcb_rv_t roseProcessMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - size_t match_len, u32 id) { + struct hs_scratch *scratch, u64a end, + size_t match_len, u32 id) { DEBUG_PRINTF("id=%u\n", id); - const u32 *programs = getByOffset(t, t->litProgramOffset); - assert(id < t->literalCount); + assert(id && id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; - return roseRunProgram(t, scratch, programs[id], som, end, match_len, flags); + return roseRunProgram_i(t, scratch, id, som, end, match_len, flags); } static rose_inline @@ -290,14 +264,17 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, roseFlushLastByteHistory(t, scratch, offset); tctxt->lastEndOffset = offset; + const u32 *programs = getByOffset(t, t->delayProgramOffset); + for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { - u32 literal_id = t->delay_base_id + it; - UNUSED rose_group old_groups = tctxt->groups; - DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", literal_id, offset); - hwlmcb_rv_t rv = roseProcessMatch(t, scratch, offset, 0, literal_id); + DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", it, offset); + const u64a som = 0; + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, offset, + 0, flags); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* delayed literals can't safely set groups. @@ -322,16 +299,19 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; u32 region_width = t->anchored_count; + const u32 *programs = getByOffset(t, t->anchoredProgramOffset); + DEBUG_PRINTF("report matches at curr loc\n"); for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID); it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) { DEBUG_PRINTF("it = %u/%u\n", it, region_width); - u32 literal_id = t->anchored_base_id + it; rose_group old_groups = tctxt->groups; - DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", literal_id, - curr_loc); - hwlmcb_rv_t rv = roseProcessMatch(t, scratch, curr_loc, 0, literal_id); + DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", it, curr_loc); + const u64a som = 0; + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, curr_loc, + 0, flags); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* anchored literals can't safely set groups. diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index e883c239e..b140a2bcd 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,7 @@ #include "miracle.h" #include "report.h" #include "rose.h" +#include "rose_common.h" #include "rose_internal.h" #include "rose_program.h" #include "rose_types.h" @@ -102,7 +103,7 @@ void rosePushDelayedMatch(const struct RoseEngine *t, static rose_inline void recordAnchoredLiteralMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 literal_id, + struct hs_scratch *scratch, u32 anch_id, u64a end) { assert(end); @@ -112,7 +113,7 @@ void recordAnchoredLiteralMatch(const struct RoseEngine *t, struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); - DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); + DEBUG_PRINTF("record %u (of %u) @ %llu\n", anch_id, t->anchored_count, end); if (!bf64_set(&scratch->al_log_sum, end - 1)) { // first time, clear row @@ -120,11 +121,8 @@ void recordAnchoredLiteralMatch(const struct RoseEngine *t, fatbit_clear(anchoredLiteralRows[end - 1]); } - u32 rel_idx = literal_id - t->anchored_base_id; - DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, - t->anchored_count); - assert(rel_idx < t->anchored_count); - fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); + assert(anch_id < t->anchored_count); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, anch_id); } static rose_inline @@ -486,7 +484,6 @@ static rose_inline hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, u64a end, ReportID onmatch, s32 offset_adjust, u32 ekey) { - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); updateLastMatchOffset(&scratch->tctxt, end); @@ -520,13 +517,11 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, } static rose_inline -void roseHandleSom(UNUSED const struct RoseEngine *t, - struct hs_scratch *scratch, const struct som_operation *sr, +void roseHandleSom(struct hs_scratch *scratch, const struct som_operation *sr, u64a end) { DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end, scratch->tctxt.minMatchOffset); - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); updateLastMatchOffset(&scratch->tctxt, end); handleSomInternal(scratch, sr, end); } @@ -535,7 +530,6 @@ static rose_inline hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, struct hs_scratch *scratch, u64a start, u64a end, ReportID onmatch, s32 offset_adjust, u32 ekey) { - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n", onmatch, start, end); updateLastMatchOffset(&scratch->tctxt, end); @@ -555,13 +549,11 @@ hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, } static rose_inline -void roseHandleSomSom(UNUSED const struct RoseEngine *t, - struct hs_scratch *scratch, +void roseHandleSomSom(struct hs_scratch *scratch, const struct som_operation *sr, u64a start, u64a end) { DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end, scratch->tctxt.minMatchOffset); - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); updateLastMatchOffset(&scratch->tctxt, end); setSomFromSomAware(scratch, sr, start, end); } @@ -859,13 +851,13 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, } static rose_inline -m128 getData128(const struct core_info *ci, s64a offset, u16 *valid_data_mask) { +m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { if (offset > 0 && offset + sizeof(m128) <= ci->len) { *valid_data_mask = 0xffff; return loadu128(ci->buf + offset); } ALIGN_DIRECTIVE u8 data[sizeof(m128)]; - *valid_data_mask = (u16)getBufferDataComplex(ci, offset, data, 16); + *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); return *(m128 *)data; } @@ -894,7 +886,7 @@ int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, return 0; } - u16 valid_data_mask = 0; + u32 valid_data_mask = 0; m128 data = getData128(ci, offset, &valid_data_mask); if (unlikely(!valid_data_mask)) { return 1; @@ -926,7 +918,7 @@ int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, return 0; } - u16 valid_data_mask = 0; + u32 valid_data_mask = 0; m128 data = getData128(ci, offset, &valid_data_mask); if (unlikely(!valid_data_mask)) { return 1; @@ -1022,8 +1014,9 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, static rose_inline int roseCheckSingleLookaround(const struct RoseEngine *t, const struct hs_scratch *scratch, - s8 checkOffset, u32 lookaroundIndex, u64a end) { - assert(lookaroundIndex != MO_INVALID_IDX); + s8 checkOffset, u32 lookaroundReachIndex, + u64a end) { + assert(lookaroundReachIndex != MO_INVALID_IDX); const struct core_info *ci = &scratch->core_info; DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); @@ -1038,8 +1031,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, return 0; } - const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = getByOffset(t, lookaroundReachIndex); u8 c; if (offset >= 0 && offset < (s64a)ci->len) { @@ -1065,23 +1057,22 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, */ static rose_inline int roseCheckLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, u32 lookaroundIndex, + const struct hs_scratch *scratch, + u32 lookaroundLookIndex, u32 lookaroundReachIndex, u32 lookaroundCount, u64a end) { - assert(lookaroundIndex != MO_INVALID_IDX); + assert(lookaroundLookIndex != MO_INVALID_IDX); + assert(lookaroundReachIndex != MO_INVALID_IDX); assert(lookaroundCount > 0); const struct core_info *ci = &scratch->core_info; DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + lookaroundIndex; + const s8 *look = getByOffset(t, lookaroundLookIndex); const s8 *look_end = look + lookaroundCount; assert(look < look_end); - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = getByOffset(t, lookaroundReachIndex); // The following code assumes that the lookaround structures are ordered by // increasing offset. @@ -1153,6 +1144,357 @@ int roseCheckLookaround(const struct RoseEngine *t, return 1; } +/** + * \brief Trying to find a matching path by the corresponding path mask of + * every lookaround location. + */ +static rose_inline +int roseMultipathLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + u32 multipathLookaroundLookIndex, + u32 multipathLookaroundReachIndex, + u32 multipathLookaroundCount, + s32 last_start, const u8 *start_mask, + u64a end) { + assert(multipathLookaroundCount > 0); + + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s8 *look = getByOffset(t, multipathLookaroundLookIndex); + const s8 *look_end = look + multipathLookaroundCount; + assert(look < look_end); + + const u8 *reach = getByOffset(t, multipathLookaroundReachIndex); + + const s64a base_offset = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + + u8 path = 0xff; + + assert(last_start < 0); + + if (unlikely((u64a)(0 - last_start) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + s8 base_look_offset = *look; + do { + s64a offset = base_offset + *look; + u32 start_offset = (u32)(*look - base_look_offset); + DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, + start_mask[start_offset]); + path = start_mask[start_offset]; + if (offset >= -(s64a)ci->hlen) { + break; + } + DEBUG_PRINTF("look=%d before history\n", *look); + look++; + reach += MULTI_REACH_BITVECTOR_LEN; + } while (look < look_end); + + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + break; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static never_inline +int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_init = getData128(ci, offset, &valid_data_mask); + m128 data_select_mask = loadu128(ri->data_select_mask); + + u32 valid_path_mask = 0; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + m128 expand_valid; + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x2(valid_hi, valid_lo); + valid_path_mask = ~movemask128(pshufb_m128(expand_valid, + data_select_mask)); + } + + m128 data = pshufb_m128(data_init, data_select_mask); + m256 nib_mask = loadu256(ri->nib_mask); + m128 bucket_select_mask = loadu128(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask16x8(data, nib_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); + } + + m256 data = pshufb_m256(data_double, data_select_mask); + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + m256 bucket_select_mask = loadu256(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); + } + + m256 data = pshufb_m256(data_double, data_select_mask); + + m256 hi_mask_1 = loadu2x128(ri->hi_mask); + m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); + m256 lo_mask_1 = loadu2x128(ri->lo_mask); + m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16); + + m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi); + m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, + bucket_select_mask_hi, + bucket_select_mask_lo, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti64(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_m256 = set2x128(data_m128); + m256 data_select_mask_1 = loadu256(ri->data_select_mask); + m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32); + + u64a valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + u32 valid_path_1 = movemask256(pshufb_m256(expand_valid, + data_select_mask_1)); + u32 valid_path_2 = movemask256(pshufb_m256(expand_valid, + data_select_mask_2)); + valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); + } + + m256 data_1 = pshufb_m256(data_m256, data_select_mask_1); + m256 data_2 = pshufb_m256(data_m256, data_select_mask_2); + + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + + m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask); + m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32); + + u64a hi_bits_mask = ri->hi_bits_mask; + u64a lo_bits_mask = ri->lo_bits_mask; + u64a neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask, + bucket_select_mask_1, + bucket_select_mask_2, hi_bits_mask, + lo_bits_mask, neg_mask, + valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-64 successfully\n"); + return 1; + } else { + return 0; + } +} + int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context); static rose_inline @@ -1409,6 +1751,68 @@ int roseCheckLongLiteral(const struct RoseEngine *t, return 1; } +static rose_inline +int roseCheckMediumLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the history buffer. + assert(t->mode != HS_MODE_BLOCK); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind); + + // History length check required for confirm in the EOD and delayed + // rebuild paths. + if (hist_rewind > ci->hlen) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ci->hlen, hist_rewind); + assert(hist_rewind <= ci->hlen); + if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len, + nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -1439,6 +1843,7 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); + assert(programOffset != ROSE_INVALID_PROG_OFFSET); assert(programOffset >= sizeof(struct RoseEngine)); assert(programOffset < t->size); @@ -1481,6 +1886,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, DEBUG_PRINTF("delay until playback\n"); tctxt->groups |= ri->groups; work_done = 1; + recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); + assert(ri->done_jump); // must progress pc += ri->done_jump; continue; @@ -1492,7 +1899,9 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, if (end < ri->min_offset) { DEBUG_PRINTF("halt: before min_offset=%u\n", ri->min_offset); - return HWLM_CONTINUE_MATCHING; + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION @@ -1551,8 +1960,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, scratch, ri->index, ri->count, - end)) { + if (!roseCheckLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, end)) { DEBUG_PRINTF("failed lookaround check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; @@ -1672,8 +2081,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(RECORD_ANCHORED) { - recordAnchoredLiteralMatch(t, scratch, ri->id, end); + PROGRAM_CASE(DUMMY_NOP) { + assert(0); } PROGRAM_NEXT_INSTRUCTION @@ -1792,14 +2201,14 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, PROGRAM_CASE(REPORT_SOM_INT) { updateSeqPoint(tctxt, end, from_mpv); - roseHandleSom(t, scratch, &ri->som, end); + roseHandleSom(scratch, &ri->som, end); work_done = 1; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_AWARE) { updateSeqPoint(tctxt, end, from_mpv); - roseHandleSomSom(t, scratch, &ri->som, som, end); + roseHandleSomSom(scratch, &ri->som, som, end); work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -2060,8 +2469,10 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, const char nocase = 0; if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, ri->lit_length, nocase)) { - DEBUG_PRINTF("halt: failed long lit check\n"); - return HWLM_CONTINUE_MATCHING; + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION @@ -2070,8 +2481,93 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, const char nocase = 1; if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, ri->lit_length, nocase)) { - DEBUG_PRINTF("halt: failed nocase long lit check\n"); - return HWLM_CONTINUE_MATCHING; + DEBUG_PRINTF("failed nocase long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + const char nocase = 0; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CLEAR_WORK_DONE) { + DEBUG_PRINTF("clear work_done flag\n"); + work_done = 0; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + if (!roseMultipathLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, + ri->last_start, ri->start_mask, + end)) { + DEBUG_PRINTF("failed multi-path lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + if (!roseCheckMultipathShufti16x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 16x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + if (!roseCheckMultipathShufti32x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + if (!roseCheckMultipathShufti32x16(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x16 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + if (!roseCheckMultipathShufti64(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 64 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index c71671fa0..cbb925f79 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,8 +40,9 @@ #include "ue2common.h" #include "rose_common.h" #include "rose_in_graph.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" +#include "util/noncopyable.h" #include "util/ue2_containers.h" #include "util/ue2string.h" @@ -50,8 +51,6 @@ #include #include -#include - struct NFA; struct SmallWriteEngine; struct RoseEngine; @@ -80,7 +79,7 @@ class RoseDedupeAux { /** \brief Abstract interface intended for callers from elsewhere in the tree, * real underlying implementation is RoseBuildImpl in rose_build_impl.h. */ -class RoseBuild : boost::noncopyable { +class RoseBuild : noncopyable { public: virtual ~RoseBuild(); @@ -88,8 +87,7 @@ class RoseBuild : boost::noncopyable { virtual void add(bool anchored, bool eod, const ue2_literal &lit, const ue2::flat_set &ids) = 0; - virtual bool addRose(const RoseInGraph &ig, bool prefilter, - bool finalChance = false) = 0; + virtual bool addRose(const RoseInGraph &ig, bool prefilter) = 0; virtual bool addSombeRose(const RoseInGraph &ig) = 0; virtual bool addOutfix(const NGHolder &h) = 0; @@ -115,7 +113,7 @@ class RoseBuild : boost::noncopyable { bool eod) = 0; /** \brief Construct a runtime implementation. */ - virtual ue2::aligned_unique_ptr buildRose(u32 minWidth) = 0; + virtual bytecode_ptr buildRose(u32 minWidth) = 0; virtual std::unique_ptr generateDedupeAux() const = 0; @@ -136,8 +134,6 @@ std::unique_ptr makeRoseBuilder(ReportManager &rm, bool roseCheckRose(const RoseInGraph &ig, bool prefilter, const ReportManager &rm, const CompileContext &cc); -size_t roseSize(const RoseEngine *t); - /* used by heuristics to determine the small write engine. High numbers are * intended to indicate a lightweight rose. */ u32 roseQuality(const RoseEngine *t); diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 8b10bc7dc..4c895cafc 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,7 +46,6 @@ #include "nfagraph/ng_region.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" -#include "nfagraph/ng_rose.h" #include "nfagraph/ng_util.h" #include "nfagraph/ng_width.h" #include "util/charreach.h" @@ -57,6 +56,7 @@ #include "util/dump_charclass.h" #include "util/graph_range.h" #include "util/make_unique.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/ue2string.h" @@ -69,8 +69,6 @@ #include #include -#include - using namespace std; namespace ue2 { @@ -78,16 +76,13 @@ namespace ue2 { /** * \brief Data used by most of the construction code in this file. */ -struct RoseBuildData : boost::noncopyable { +struct RoseBuildData : noncopyable { RoseBuildData(const RoseInGraph &ig_in, bool som_in) : ig(ig_in), som(som_in) {} /** Input rose graph. */ const RoseInGraph &ig; - /** Mapping from engine graph to constructed DFA for pre-build DFAs. */ - ue2::unordered_map > early_dfas; - /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which * require ANCH history to prevent overlap. */ ue2::unordered_set anch_history_edges; @@ -281,8 +276,8 @@ void createVertices(RoseBuildImpl *tbi, if (prefix_graph) { g[w].left.graph = prefix_graph; - if (contains(bd.early_dfas, prefix_graph.get())) { - g[w].left.dfa = bd.early_dfas.at(prefix_graph.get()); + if (edge_props.dfa) { + g[w].left.dfa = edge_props.dfa; } g[w].left.haig = edge_props.haig; g[w].left.lag = prefix_lag; @@ -300,7 +295,7 @@ void createVertices(RoseBuildImpl *tbi, if (bd.som && !g[w].left.haig) { /* no prefix - som based on literal start */ assert(!prefix_graph); - g[w].som_adjust = tbi->literals.right.at(literalId).elength(); + g[w].som_adjust = tbi->literals.at(literalId).elength(); DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust); } @@ -338,7 +333,7 @@ void createVertices(RoseBuildImpl *tbi, u32 ghostId = tbi->literal_info[literalId].undelayed_id; DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId); assert(ghostId != literalId); - assert(tbi->literals.right.at(ghostId).delay == 0); + assert(tbi->literals.at(ghostId).delay == 0); // Adjust offsets, removing delay. u32 ghost_min = min_offset, ghost_max = max_offset; @@ -726,7 +721,9 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, RoseEdge e = add_edge(v, w, g); g[e].minBound = 0; g[e].maxBound = 0; - g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; + /* No need to set history as the event is only delivered at the last + * byte anyway - no need to invalidate stale entries. */ + g[e].history = ROSE_ROLE_HISTORY_NONE; DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); } } @@ -769,9 +766,9 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, assert(!g[u].suffix); if (ig[iv].type == RIV_ACCEPT) { assert(!tbi->isAnyStart(u)); - if (contains(bd.early_dfas, edge_props.graph.get())) { + if (edge_props.dfa) { DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index); - g[u].suffix.rdfa = bd.early_dfas.at(edge_props.graph.get()); + g[u].suffix.rdfa = edge_props.dfa; g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph); g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph); } else if (edge_props.graph) { @@ -1033,18 +1030,9 @@ bool empty(const GraphT &g) { return vi == ve; } -/* We only try to implement as a dfa if a non-nullptr as_dfa is provided to return - * the raw dfa to. */ static -bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, - const vector &edges, bool prefilter, - const ReportManager &rm, const CompileContext &cc, - bool finalChance, unique_ptr *as_dfa) { - assert(!edges.empty()); - assert(&*in[edges[0]].graph == &h); - - assert(h.kind == whatRoseIsThis(in, edges[0])); - +bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm, + const CompileContext &cc) { if (isImplementableNFA(h, &rm, cc)) { return true; } @@ -1061,64 +1049,6 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, } } - if (as_dfa) { - switch (h.kind) { - case NFA_OUTFIX: /* 'prefix' of eod */ - case NFA_PREFIX: - if (!cc.grey.earlyMcClellanPrefix) { - return false; - } - break; - case NFA_INFIX: - if (!cc.grey.earlyMcClellanInfix) { - return false; - } - break; - case NFA_SUFFIX: - if (!cc.grey.earlyMcClellanSuffix) { - return false; - } - break; - case NFA_EAGER_PREFIX: - case NFA_REV_PREFIX: - case NFA_OUTFIX_RAW: - DEBUG_PRINTF("kind %u\n", (u32)h.kind); - assert(0); - } - assert(!*as_dfa); - assert(tbi); - vector > triggers; - u32 min_offset = ~0U; - u32 max_offset = 0; - for (const auto &e : edges) { - RoseInVertex s = source(e, in); - RoseInVertex t = target(e, in); - if (in[s].type == RIV_LITERAL) { - triggers.push_back(as_cr_seq(in[s].s)); - } - if (in[t].type == RIV_ACCEPT_EOD) { - /* TODO: support eod prefixes */ - return false; - } - ENSURE_AT_LEAST(&max_offset, in[s].max_offset); - LIMIT_TO_AT_MOST(&min_offset, in[s].min_offset); - } - - if (!generates_callbacks(h)) { - setReportId(h, tbi->getNewNfaReport()); - } - - bool single_trigger = min_offset == max_offset; - - DEBUG_PRINTF("trying for mcclellan (%u, %u)\n", min_offset, max_offset); - *as_dfa = buildMcClellan(h, &rm, single_trigger, triggers, cc.grey, - finalChance); - - if (*as_dfa) { - return true; - } - } - DEBUG_PRINTF("unable to build engine\n"); return false; } @@ -1573,8 +1503,7 @@ bool validateKinds(const RoseInGraph &g) { } #endif -bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, - bool finalChance) { +bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { DEBUG_PRINTF("trying to rose\n"); assert(validateKinds(ig)); assert(hasCorrectlyNumberedVertices(ig)); @@ -1601,11 +1530,14 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, for (const auto &e : edges_range(in)) { if (!in[e].graph) { + assert(!in[e].dfa); + assert(!in[e].haig); continue; // no graph } - if (in[e].haig) { - // Haigs are always implementable (we've already built the raw DFA). + if (in[e].haig || in[e].dfa) { + /* Early DFAs/Haigs are always implementable (we've already built + * the raw DFA). */ continue; } @@ -1623,17 +1555,10 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, vector graph_edges; for (auto h : ordered_graphs) { - const vector &h_edges = graphs.at(h); - unique_ptr as_dfa; - /* allow finalChance as fallback is basically an outfix at this point */ - if (!canImplementGraph(this, in, *h, h_edges, prefilter, rm, cc, - finalChance, &as_dfa)) { + if (!canImplementGraph(*h, prefilter, rm, cc)) { return false; } - if (as_dfa) { - bd.early_dfas[h] = move(as_dfa); - } - insert(&graph_edges, graph_edges.end(), h_edges); + insert(&graph_edges, graph_edges.end(), graphs[h]); } /* we are now past the point of no return. We can start making irreversible @@ -1647,9 +1572,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, assert(allMatchStatesHaveReports(h)); if (!generates_callbacks(whatRoseIsThis(in, e)) - && !contains(bd.early_dfas, &h) && in[target(e, in)].type != RIV_ACCEPT_EOD) { - setReportId(h, getNewNfaReport()); + set_report(h, getNewNfaReport()); } } @@ -1692,7 +1616,7 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, return false; } - map> graphs; + vector graphs; for (const auto &e : edges_range(ig)) { if (!ig[e].graph) { @@ -1704,12 +1628,11 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, continue; } - graphs[ig[e].graph.get()].push_back(e); + graphs.push_back(ig[e].graph.get()); } - for (const auto &m : graphs) { - if (!canImplementGraph(nullptr, ig, *m.first, m.second, prefilter, rm, - cc, false, nullptr)) { + for (const auto &g : graphs) { + if (!canImplementGraph(*g, prefilter, rm, cc)) { return false; } } @@ -1775,9 +1698,70 @@ void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h, populateReverseAccelerationInfo(outfix.rev_info, h); } +static +bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) { + map, ReportID> report_remap; + shared_ptr eod_leftfix + = makeRoseEodPrefix(h, build, report_remap); + + bool nfa_ok = isImplementableNFA(h, &build.rm, build.cc); + + /* TODO: check if early dfa is possible */ + + if (!nfa_ok) { + DEBUG_PRINTF("could not build as NFA\n"); + return false; + } + + u32 eod_event = getEodEventID(build); + + auto &g = build.g; + for (const auto &report_mapping : report_remap) { + RoseVertex v = add_vertex(g); + g[v].literals.insert(eod_event); + build.literal_info[eod_event].vertices.insert(v); + + g[v].left.graph = eod_leftfix; + g[v].left.leftfix_report = report_mapping.second; + g[v].left.lag = 0; + RoseEdge e1 = add_edge(build.anchored_root, v, g); + g[e1].minBound = 0; + g[e1].maxBound = ROSE_BOUND_INF; + g[v].min_offset = findMinWidth(*eod_leftfix); + g[v].max_offset = ROSE_BOUND_INF; + + depth max_width = findMaxWidth(*g[v].left.graph); + if (max_width.is_finite() && isPureAnchored(*eod_leftfix)) { + g[e1].maxBound = max_width; + g[v].max_offset = max_width; + } + + g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix + RoseVertex w = add_vertex(g); + g[w].eod_accept = true; + g[w].reports = report_mapping.first; + g[w].min_offset = g[v].min_offset; + g[w].max_offset = g[v].max_offset; + RoseEdge e = add_edge(v, w, g); + g[e].minBound = 0; + g[e].maxBound = 0; + g[e].history = ROSE_ROLE_HISTORY_NONE; + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); + } + + return true; +} + bool RoseBuildImpl::addOutfix(const NGHolder &h) { DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); + /* TODO: handle more than one report */ + if (!in_degree(h.accept, h) + && all_reports(h).size() == 1 + && addEodOutfix(*this, h)) { + return true; + } + const u32 nfa_states = isImplementableNFA(h, &rm, cc); if (nfa_states) { DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states); @@ -1923,16 +1907,20 @@ void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set &lit_ids) { return; } + DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size()); + // lit_ids should be a contiguous range. assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1); + assert(*lit_ids.rbegin() == tbi.literals.size() - 1); - for (const u32 &lit_id : lit_ids) { - assert(lit_id < tbi.literal_info.size()); - assert(tbi.literals.right.at(lit_id).table == ROSE_ANCHORED); - assert(tbi.literal_info[lit_id].vertices.empty()); + assert(all_of_in(lit_ids, [&](u32 lit_id) { + return lit_id < tbi.literal_info.size() && + tbi.literals.at(lit_id).table == ROSE_ANCHORED && + tbi.literal_info[lit_id].vertices.empty(); + })); - tbi.literals.right.erase(lit_id); - } + tbi.literals.erase_back(lit_ids.size()); + assert(tbi.literals.size() == *lit_ids.begin()); // lit_ids should be at the end of tbi.literal_info. assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1); @@ -1940,8 +1928,7 @@ void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set &lit_ids) { } bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) { - vector vertexDepths; - calcDepthsFrom(h, h.start, vertexDepths); + auto vertexDepths = calcDepthsFrom(h, h.start); map > reportMap; /* NFAVertex -> literal ids */ map depthMap; /* literal id -> min/max depth */ diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index de3bdf0a3..bd8eed0c0 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -480,7 +480,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, // Everyone gets the same report ID. ReportID mask_report = build.getNewNfaReport(); - setReportId(*mask_graph, mask_report); + set_report(*mask_graph, mask_report); // Build the HWLM literal mask. vector msk, cmp; diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 3d0affc6b..a2af160e4 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "grey.h" #include "rose_build_impl.h" +#include "rose_build_matchers.h" #include "rose_internal.h" #include "ue2common.h" #include "nfa/dfa_min.h" @@ -71,6 +72,8 @@ namespace ue2 { #define INIT_STATE (DEAD_STATE + 1) +#define NO_FRAG_ID (~0U) + // Adds a vertex with the given reach. static NFAVertex add_vertex(NGHolder &h, const CharReach &cr) { @@ -173,7 +176,7 @@ void mergeAnchoredDfas(vector> &dfas, } static -void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { +void remapAnchoredReports(raw_dfa &rdfa, const vector &frag_map) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -182,33 +185,49 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { flat_set new_reports; for (auto id : ds.reports) { - assert(id < build.literal_info.size()); - new_reports.insert(build.literal_info.at(id).final_id); + assert(id < frag_map.size()); + new_reports.insert(frag_map[id]); } - ds.reports = move(new_reports); + ds.reports = std::move(new_reports); } } /** * \brief Replaces the report ids currently in the dfas (rose graph literal - * ids) with the final id for each literal. + * ids) with the fragment id for each literal. */ static -void remapAnchoredReports(RoseBuildImpl &build) { +void remapAnchoredReports(RoseBuildImpl &build, const vector &frag_map) { for (auto &m : build.anchored_nfas) { for (auto &rdfa : m.second) { assert(rdfa); - remapAnchoredReports(*rdfa, build); + remapAnchoredReports(*rdfa, frag_map); } } } +/** + * Returns mapping from literal ids to fragment ids. + */ +static +vector reverseFragMap(const RoseBuildImpl &build, + const vector &fragments) { + vector rev(build.literal_info.size(), NO_FRAG_ID); + for (const auto &f : fragments) { + for (u32 lit_id : f.lit_ids) { + assert(lit_id < rev.size()); + rev[lit_id] = f.fragment_id; + } + } + return rev; +} + /** * \brief Replace the reports (which are literal final_ids) in the given * raw_dfa with program offsets. */ static -void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms) { +void remapIdsToPrograms(const vector &fragments, raw_dfa &rdfa) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -216,25 +235,27 @@ void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms) { } flat_set new_reports; - for (auto id : ds.reports) { - assert(id < litPrograms.size()); - new_reports.insert(litPrograms.at(id)); + for (auto fragment_id : ds.reports) { + const auto &frag = fragments.at(fragment_id); + new_reports.insert(frag.lit_program_offset); } - ds.reports = move(new_reports); + ds.reports = std::move(new_reports); } } static -void populate_holder(const simple_anchored_info &sai, const set &exit_ids, - NGHolder *h_in) { +unique_ptr populate_holder(const simple_anchored_info &sai, + const flat_set &exit_ids) { DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound, dumpString(sai.literal).c_str()); - NGHolder &h = *h_in; - set ends = addDotsToGraph(h, h.start, sai.min_bound, - sai.max_bound, CharReach::dot()); + auto h_ptr = make_unique(); + NGHolder &h = *h_ptr; + auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound, + CharReach::dot()); NFAVertex v = addToGraph(h, ends, sai.literal); add_edge(v, h.accept, h); h[v].reports.insert(exit_ids.begin(), exit_ids.end()); + return h_ptr; } u32 anchoredStateSize(const anchored_matcher_info &atable) { @@ -467,7 +488,7 @@ bool check_dupe(const raw_dfa &rdfa, } static -bool check_dupe_simple(const RoseBuildImpl &tbi, u32 min_bound, u32 max_bound, +bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound, const ue2_literal &lit, ReportID *remap) { if (!remap) { DEBUG_PRINTF("no remap\n"); @@ -475,8 +496,8 @@ bool check_dupe_simple(const RoseBuildImpl &tbi, u32 min_bound, u32 max_bound, } simple_anchored_info sai(min_bound, max_bound, lit); - if (contains(tbi.anchored_simple, sai)) { - *remap = *tbi.anchored_simple.at(sai).begin(); + if (contains(build.anchored_simple, sai)) { + *remap = *build.anchored_simple.at(sai).begin(); return true; } @@ -640,7 +661,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, } static -int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, +int finalise_out(RoseBuildImpl &build, const NGHolder &h, const Automaton_Holder &autom, unique_ptr out_dfa, ReportID *remap) { u32 min_bound = ~0U; @@ -649,12 +670,12 @@ int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, u32 simple_report = MO_INVALID_IDX; if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) { assert(simple_report != MO_INVALID_IDX); - if (check_dupe_simple(tbi, min_bound, max_bound, lit, remap)) { + if (check_dupe_simple(build, min_bound, max_bound, lit, remap)) { DEBUG_PRINTF("found duplicate remapping to %u\n", *remap); return ANCHORED_REMAP; } DEBUG_PRINTF("add with report %u\n", simple_report); - tbi.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)] + build.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)] .insert(simple_report); return ANCHORED_SUCCESS; } @@ -664,15 +685,15 @@ int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, out_dfa->alpha_size = autom.alphasize; out_dfa->alpha_remap = autom.alpha; auto hash = hash_dfa_no_reports(*out_dfa); - if (check_dupe(*out_dfa, tbi.anchored_nfas[hash], remap)) { + if (check_dupe(*out_dfa, build.anchored_nfas[hash], remap)) { return ANCHORED_REMAP; } - tbi.anchored_nfas[hash].push_back(move(out_dfa)); + build.anchored_nfas[hash].push_back(move(out_dfa)); return ANCHORED_SUCCESS; } static -int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { +int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) { DEBUG_PRINTF("autom bad!\n"); return ANCHORED_FAIL; @@ -682,7 +703,7 @@ int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { - return finalise_out(tbi, h, autom, move(out_dfa), remap); + return finalise_out(build, h, autom, move(out_dfa), remap); } DEBUG_PRINTF("determinise failed\n"); @@ -700,7 +721,7 @@ void setReports(NGHolder &h, const map> &reportMap, } } -int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper, +int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const map> &reportMap) { NGHolder h; ue2::unordered_map orig_to_copy; @@ -711,10 +732,10 @@ int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper, clearReports(h); setReports(h, reportMap, orig_to_copy); - return addAutomaton(tbi, h, nullptr); + return addAutomaton(build, h, nullptr); } -int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored, +int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, u32 exit_id, ReportID *remap) { NGHolder h; cloneHolder(h, anchored); @@ -725,23 +746,24 @@ int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored, h[v].reports.insert(exit_id); } - return addAutomaton(tbi, h, remap); + return addAutomaton(build, h, remap); } static -void buildSimpleDfas(const RoseBuildImpl &tbi, +void buildSimpleDfas(const RoseBuildImpl &build, const vector &frag_map, vector> *anchored_dfas) { /* we should have determinised all of these before so there should be no * chance of failure. */ - for (const auto &simple : tbi.anchored_simple) { - set exit_ids; + flat_set exit_ids; + for (const auto &simple : build.anchored_simple) { + exit_ids.clear(); for (auto lit_id : simple.second) { - exit_ids.insert(tbi.literal_info[lit_id].final_id); + assert(lit_id < frag_map.size()); + exit_ids.insert(frag_map[lit_id]); } - NGHolder h; - populate_holder(simple.first, exit_ids, &h); - Automaton_Holder autom(h); - unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX_RAW); + auto h = populate_holder(simple.first, exit_ids); + Automaton_Holder autom(*h); + auto rdfa = ue2::make_unique(NFA_OUTFIX_RAW); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(!rv); rdfa->start_anchored = INIT_STATE; @@ -758,7 +780,8 @@ void buildSimpleDfas(const RoseBuildImpl &tbi, * from RoseBuildImpl. */ static -vector> getAnchoredDfas(RoseBuildImpl &build) { +vector> getAnchoredDfas(RoseBuildImpl &build, + const vector &frag_map) { vector> dfas; // DFAs that already exist as raw_dfas. @@ -771,7 +794,7 @@ vector> getAnchoredDfas(RoseBuildImpl &build) { // DFAs we currently have as simple literals. if (!build.anchored_simple.empty()) { - buildSimpleDfas(build, &dfas); + buildSimpleDfas(build, frag_map, &dfas); build.anchored_simple.clear(); } @@ -790,7 +813,7 @@ vector> getAnchoredDfas(RoseBuildImpl &build) { */ static size_t buildNfas(vector &anchored_dfas, - vector> *nfas, + vector> *nfas, vector *start_offset, const CompileContext &cc, const ReportManager &rm) { const size_t num_dfas = anchored_dfas.size(); @@ -806,7 +829,7 @@ size_t buildNfas(vector &anchored_dfas, minimize_hopcroft(rdfa, cc.grey); - auto nfa = mcclellanCompile(rdfa, cc, rm); + auto nfa = mcclellanCompile(rdfa, cc, rm, false); if (!nfa) { assert(0); throw std::bad_alloc(); @@ -823,7 +846,8 @@ size_t buildNfas(vector &anchored_dfas, return total_size; } -vector buildAnchoredDfas(RoseBuildImpl &build) { +vector buildAnchoredDfas(RoseBuildImpl &build, + const vector &fragments) { vector dfas; if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { @@ -831,9 +855,10 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { return dfas; } - remapAnchoredReports(build); + const auto frag_map = reverseFragMap(build, fragments); + remapAnchoredReports(build, frag_map); - auto anch_dfas = getAnchoredDfas(build); + auto anch_dfas = getAnchoredDfas(build, frag_map); mergeAnchoredDfas(anch_dfas, build); dfas.reserve(anch_dfas.size()); @@ -844,22 +869,21 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { return dfas; } -aligned_unique_ptr -buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, - const vector &litPrograms, size_t *asize) { +bytecode_ptr +buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, + vector &dfas) { const CompileContext &cc = build.cc; if (dfas.empty()) { DEBUG_PRINTF("empty\n"); - *asize = 0; return nullptr; } for (auto &rdfa : dfas) { - remapIdsToPrograms(rdfa, litPrograms); + remapIdsToPrograms(fragments, rdfa); } - vector> nfas; + vector> nfas; vector start_offset; // start offset for each dfa (dots removed) size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); @@ -867,8 +891,8 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, throw ResourceLimitError(); } - *asize = total_size; - auto atable = aligned_zmalloc_unique(total_size); + auto atable = + make_zeroed_bytecode_ptr(total_size, 64); char *curr = (char *)atable.get(); u32 state_offset = 0; @@ -894,7 +918,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, ami->anchoredMinDistance = start_offset[i]; } - DEBUG_PRINTF("success %zu\n", *asize); + DEBUG_PRINTF("success %zu\n", atable.size()); return atable; } diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index ef06fcbbe..37d268ac5 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,9 +30,9 @@ #define ROSE_BUILD_ANCHORED #include "ue2common.h" -#include "rose_build.h" +#include "rose_build_impl.h" #include "nfagraph/ng_holder.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -44,11 +44,13 @@ namespace ue2 { class RoseBuildImpl; struct raw_dfa; +struct LitFragment; /** * \brief Construct a set of anchored DFAs from our anchored literals/engines. */ -std::vector buildAnchoredDfas(RoseBuildImpl &build); +std::vector buildAnchoredDfas(RoseBuildImpl &build, + const std::vector &fragments); /** * \brief Construct an anchored_matcher_info runtime structure from the given @@ -57,9 +59,10 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build); * Remap the literal final_ids used for raw_dfa reports to the program offsets * given in litPrograms. */ -aligned_unique_ptr -buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, - const std::vector &litPrograms, size_t *asize); +bytecode_ptr +buildAnchoredMatcher(RoseBuildImpl &build, + const std::vector &fragments, + std::vector &dfas); u32 anchoredStateSize(const anchored_matcher_info &atable); @@ -67,10 +70,10 @@ u32 anchoredStateSize(const anchored_matcher_info &atable); #define ANCHORED_SUCCESS 1 #define ANCHORED_REMAP 2 -int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper, +int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const std::map> &reportMap); -int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored, +int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, u32 exit_id, ReportID *remap); } // namespace ue2 diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 9f4abcadf..4d0793bfe 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" +#include "rose_build_dump.h" #include "rose_build_engine_blob.h" #include "rose_build_exclusive.h" #include "rose_build_groups.h" @@ -41,6 +42,7 @@ #include "rose_build_lookaround.h" #include "rose_build_matchers.h" #include "rose_build_program.h" +#include "rose_build_resources.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" @@ -73,7 +75,6 @@ #include "nfagraph/ng_width.h" #include "smallwrite/smallwrite_build.h" #include "som/slot_manager.h" -#include "util/alloc.h" #include "util/bitutils.h" #include "util/boundary_reports.h" #include "util/charreach.h" @@ -85,6 +86,7 @@ #include "util/graph_range.h" #include "util/make_unique.h" #include "util/multibit_build.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include "util/popcount.h" #include "util/queue_index_factory.h" @@ -97,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -129,140 +132,66 @@ namespace ue2 { namespace /* anon */ { -struct left_build_info { - // Constructor for an engine implementation. - left_build_info(u32 q, u32 l, u32 t, rose_group sm, - const std::vector &stops, u32 max_ql, u8 cm_count, - const CharReach &cm_cr) - : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), - max_queuelen(max_ql), countingMiracleCount(cm_count), - countingMiracleReach(cm_cr) {} - - // Constructor for a lookaround implementation. - explicit left_build_info(const vector &look) - : has_lookaround(true), lookaround(look) {} - - u32 queue = 0; /* uniquely idents the left_build_info */ - u32 lag = 0; - u32 transient = 0; - rose_group squash_mask = ~rose_group{0}; - vector stopAlphabet; - u32 max_queuelen = 0; - u8 countingMiracleCount = 0; - CharReach countingMiracleReach; - u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ - bool has_lookaround = false; - vector lookaround; // alternative implementation to the NFA -}; - -/** - * \brief Structure tracking which resources are used by this Rose instance at - * runtime. - * - * We use this to control how much initialisation we need to do at the - * beginning of a stream/block at runtime. - */ -struct RoseResources { - bool has_outfixes = false; - bool has_suffixes = false; - bool has_leftfixes = false; - bool has_literals = false; - bool has_states = false; - bool checks_groups = false; - bool has_lit_delay = false; - bool has_lit_check = false; // long literal support - bool has_anchored = false; - bool has_eod = false; -}; - -struct build_context : boost::noncopyable { +struct build_context : noncopyable { /** \brief information about engines to the left of a vertex */ map leftfix_info; /** \brief mapping from suffix to queue index. */ map suffixes; - /** \brief Mapping from vertex to key, for vertices with a - * CHECK_NOT_HANDLED instruction. */ - ue2::unordered_map handledKeys; - - /** \brief Number of roles with a state bit. - * - * This is set by assignStateIndices() and should be constant throughout - * the rest of the compile. - */ - size_t numStates = 0; + /** \brief engine info by queue. */ + map engine_info_by_queue; /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ ue2::unordered_map program_cache; - /** \brief LookEntry list cache, so that we don't have to go scanning - * through the full list to find cases we've used already. */ - ue2::unordered_map, size_t> lookaround_cache; - - /** \brief Lookaround table for Rose roles. */ - vector lookaround; - - /** \brief State indices, for those roles that have them. */ - ue2::unordered_map roleStateIndices; + /** \brief State indices, for those roles that have them. + * Each vertex present has a unique state index in the range + * [0, roleStateIndices.size()). */ + unordered_map roleStateIndices; /** \brief Mapping from queue index to bytecode offset for built engines * that have already been pushed into the engine_blob. */ ue2::unordered_map engineOffsets; - /** \brief Literal programs, indexed by final_id, after they have been - * written to the engine_blob. */ - vector litPrograms; - - /** \brief List of long literals (ones with CHECK_LITERAL instructions) + /** \brief List of long literals (ones with CHECK_LONG_LIT instructions) * that need hash table support. */ vector longLiterals; - /** \brief Minimum offset of a match from the floating table. */ - u32 floatingMinLiteralMatchOffset = 0; - - /** \brief Long literal length threshold, used in streaming mode. */ - size_t longLitLengthThreshold = 0; - /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ RoseEngineBlob engine_blob; - /** \brief True if reports need CATCH_UP instructions, to catch up anchored - * matches, suffixes, outfixes etc. */ - bool needs_catchup = false; - /** \brief True if this Rose engine has an MPV engine. */ bool needs_mpv_catchup = false; /** \brief Resources in use (tracked as programs are added). */ RoseResources resources; +}; - /** \brief Mapping from every vertex to the groups that must be on for that - * vertex to be reached. */ - ue2::unordered_map vertex_group_map; - - /** \brief Global bitmap of groups that can be squashed. */ - rose_group squashable_groups = 0; +/** \brief subengine info including built engine and +* corresponding triggering rose vertices */ +struct ExclusiveSubengine { + bytecode_ptr nfa; + vector vertices; }; -} +/** \brief exclusive info to build tamarama */ +struct ExclusiveInfo : noncopyable { + // subengine info + vector subengines; + // all the report in tamarama + set reports; + // assigned queue id + u32 queue; +}; -static -const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { - assert(contains(bc.engineOffsets, qi)); - u32 nfa_offset = bc.engineOffsets.at(qi); - assert(nfa_offset >= bc.engine_blob.base_offset); - const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - - bc.engine_blob.base_offset); - assert(n->queueIndex == qi); - return n; } static -const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { +void add_nfa_to_blob(build_context &bc, NFA &nfa) { u32 qi = nfa.queueIndex; u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, @@ -270,10 +199,6 @@ const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { assert(!contains(bc.engineOffsets, qi)); bc.engineOffsets.emplace(qi, nfa_offset); - - const NFA *n = get_nfa_from_blob(bc, qi); - assert(memcmp(&nfa, n, nfa.length) == 0); - return n; } static @@ -288,38 +213,39 @@ u32 countRosePrefixes(const vector &roses) { } /** - * \brief True if this Rose engine needs to run a catch up whenever a report is - * generated. + * \brief True if this Rose engine needs to run a catch up whenever a literal + * report is generated. * * Catch up is necessary if there are output-exposed engines (suffixes, - * outfixes) or an anchored table (anchored literals, acyclic DFAs). + * outfixes). */ static -bool needsCatchup(const RoseBuildImpl &build, - const vector &anchored_dfas) { +bool needsCatchup(const RoseBuildImpl &build) { + /* Note: we could be more selective about when we need to generate catch up + * instructions rather than just a boolean yes/no - for instance, if we know + * that a role can only match before the point that an outfix/suffix could + * match, we do not strictly need a catchup instruction. + * + * However, this would add a certain amount of complexity to the + * catchup logic and would likely have limited applicability - how many + * reporting roles have a fixed max offset and how much time is spent on + * catchup for these cases? + */ + if (!build.outfixes.empty()) { + /* TODO: check that they have non-eod reports */ DEBUG_PRINTF("has outfixes\n"); return true; } - if (!anchored_dfas.empty()) { - DEBUG_PRINTF("has anchored dfas\n"); - return true; - } const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { - if (build.root == v) { - continue; - } - if (build.anchored_root == v) { - continue; - } if (g[v].suffix) { + /* TODO: check that they have non-eod reports */ DEBUG_PRINTF("vertex %zu has suffix\n", g[v].index); return true; } - } DEBUG_PRINTF("no need for catch-up on report\n"); @@ -328,6 +254,11 @@ bool needsCatchup(const RoseBuildImpl &build, static bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { + if (!resources.has_floating) { + DEBUG_PRINTF("no floating table\n"); + return false; + } + if (resources.has_outfixes || resources.has_suffixes || resources.has_leftfixes) { DEBUG_PRINTF("has engines\n"); @@ -355,8 +286,8 @@ bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { } if (cc.streaming && resources.has_lit_check) { - DEBUG_PRINTF("has long literals in streaming mode, which needs " - "long literal table support\n"); + DEBUG_PRINTF("has long literals in streaming mode, which needs long " + "literal table support\n"); return false; } @@ -394,20 +325,21 @@ bool isSingleOutfix(const RoseBuildImpl &tbi) { } static -u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, +u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources, UNUSED u32 outfixEndQueue) { - DEBUG_PRINTF("has_outfixes=%d\n", bc.resources.has_outfixes); - DEBUG_PRINTF("has_suffixes=%d\n", bc.resources.has_suffixes); - DEBUG_PRINTF("has_leftfixes=%d\n", bc.resources.has_leftfixes); - DEBUG_PRINTF("has_literals=%d\n", bc.resources.has_literals); - DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states); - DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups); - DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); - DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check); - DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); - DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); - - if (isPureFloating(bc.resources, build.cc)) { + DEBUG_PRINTF("has_outfixes=%d\n", resources.has_outfixes); + DEBUG_PRINTF("has_suffixes=%d\n", resources.has_suffixes); + DEBUG_PRINTF("has_leftfixes=%d\n", resources.has_leftfixes); + DEBUG_PRINTF("has_literals=%d\n", resources.has_literals); + DEBUG_PRINTF("has_states=%d\n", resources.has_states); + DEBUG_PRINTF("checks_groups=%d\n", resources.checks_groups); + DEBUG_PRINTF("has_lit_delay=%d\n", resources.has_lit_delay); + DEBUG_PRINTF("has_lit_check=%d\n", resources.has_lit_check); + DEBUG_PRINTF("has_anchored=%d\n", resources.has_anchored); + DEBUG_PRINTF("has_floating=%d\n", resources.has_floating); + DEBUG_PRINTF("has_eod=%d\n", resources.has_eod); + + if (isPureFloating(resources, build.cc)) { return ROSE_RUNTIME_PURE_LITERAL; } @@ -444,7 +376,7 @@ bool needsMpvCatchup(const RoseBuildImpl &build) { } static -void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, +void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, u32 activeLeftCount, u32 laggedRoseCount, u32 longLitStreamStateRequired, u32 historyRequired, @@ -476,7 +408,7 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, curr_offset += anchorStateSize; so->groups = curr_offset; - so->groups_size = (tbi.group_end + 7) / 8; + so->groups_size = (build.group_end + 7) / 8; assert(so->groups_size <= sizeof(u64a)); curr_offset += so->groups_size; @@ -486,22 +418,22 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, // Exhaustion multibit. so->exhausted = curr_offset; - curr_offset += mmbit_size(tbi.rm.numEkeys()); + curr_offset += mmbit_size(build.rm.numEkeys()); // SOM locations and valid/writeable multibit structures. - if (tbi.ssm.numSomSlots()) { - const u32 somWidth = tbi.ssm.somPrecision(); + if (build.ssm.numSomSlots()) { + const u32 somWidth = build.ssm.somPrecision(); if (somWidth) { // somWidth is zero in block mode. curr_offset = ROUNDUP_N(curr_offset, somWidth); so->somLocation = curr_offset; - curr_offset += tbi.ssm.numSomSlots() * somWidth; + curr_offset += build.ssm.numSomSlots() * somWidth; } else { so->somLocation = 0; } so->somValid = curr_offset; - curr_offset += mmbit_size(tbi.ssm.numSomSlots()); + curr_offset += mmbit_size(build.ssm.numSomSlots()); so->somWritable = curr_offset; - curr_offset += mmbit_size(tbi.ssm.numSomSlots()); + curr_offset += mmbit_size(build.ssm.numSomSlots()); } else { // No SOM handling, avoid growing the stream state any further. so->somLocation = 0; @@ -515,7 +447,10 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, // Get the mask of initial vertices due to root and anchored_root. rose_group RoseBuildImpl::getInitialGroups() const { - rose_group groups = getSuccGroups(root) | getSuccGroups(anchored_root); + rose_group groups = getSuccGroups(root) + | getSuccGroups(anchored_root) + | boundary_group_mask; + DEBUG_PRINTF("initial groups = %016llx\n", groups); return groups; } @@ -599,8 +534,8 @@ void findFixedDepthTops(const RoseGraph &g, const set &triggers, * engine. */ static -aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, - aligned_unique_ptr nfa_impl) { +bytecode_ptr pickImpl(bytecode_ptr dfa_impl, + bytecode_ptr nfa_impl) { assert(nfa_impl); assert(dfa_impl); assert(isDfaType(dfa_impl->type)); @@ -652,7 +587,7 @@ aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, * otherwise a Castle. */ static -aligned_unique_ptr +bytecode_ptr buildRepeatEngine(const CastleProto &proto, const map>> &triggers, const CompileContext &cc, const ReportManager &rm) { @@ -668,11 +603,10 @@ buildRepeatEngine(const CastleProto &proto, } static -aligned_unique_ptr getDfa(raw_dfa &rdfa, bool is_transient, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr getDfa(raw_dfa &rdfa, bool is_transient, + const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! - auto dfa = shengCompile(rdfa, cc, rm); + auto dfa = shengCompile(rdfa, cc, rm, false); if (!dfa && !is_transient) { // Sheng wasn't successful, so unleash McClellan! /* We don't try the hybrid for transient prefixes due to the extra @@ -681,14 +615,14 @@ aligned_unique_ptr getDfa(raw_dfa &rdfa, bool is_transient, } if (!dfa) { // Sheng wasn't successful, so unleash McClellan! - dfa = mcclellanCompile(rdfa, cc, rm); + dfa = mcclellanCompile(rdfa, cc, rm, false); } return dfa; } /* builds suffix nfas */ static -aligned_unique_ptr +bytecode_ptr buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, const map &fixed_depth_tops, const map>> &triggers, @@ -810,21 +744,21 @@ void findTriggerSequences(const RoseBuildImpl &tbi, const u32 top = e.first; const set &lit_ids = e.second; - for (u32 id : lit_ids) { - const rose_literal_id &lit = tbi.literals.right.at(id); + for (u32 id : lit_ids) { + const rose_literal_id &lit = tbi.literals.at(id); (*trigger_lits)[top].push_back(as_cr_seq(lit)); } } } -static aligned_unique_ptr -makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, - const bool is_prefix, const bool is_transient, - const map > &infixTriggers, - const CompileContext &cc) { +static +bytecode_ptr makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, + const bool is_prefix, const bool is_transient, + const map> &infixTriggers, + const CompileContext &cc) { const ReportManager &rm = tbi.rm; - aligned_unique_ptr n; + bytecode_ptr n; // Should compress state if this rose is non-transient and we're in // streaming mode. @@ -969,8 +903,8 @@ u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h, for (RoseVertex v : succs) { u32 lag = rg[v].left.lag; for (u32 lit_id : rg[v].literals) { - u32 delay = build.literals.right.at(lit_id).delay; - const ue2_literal &literal = build.literals.right.at(lit_id).s; + u32 delay = build.literals.at(lit_id).delay; + const ue2_literal &literal = build.literals.at(lit_id).s; assert(lag <= literal.length() + delay); size_t base = literal.length() + delay - lag; if (base >= literal.length()) { @@ -1105,6 +1039,31 @@ left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, return leftfix; } +static +void enforceEngineSizeLimit(const NFA *n, const Grey &grey) { + const size_t nfa_size = n->length; + // Global limit. + if (nfa_size > grey.limitEngineSize) { + throw ResourceLimitError(); + } + + // Type-specific limit checks follow. + + if (isDfaType(n->type)) { + if (nfa_size > grey.limitDFASize) { + throw ResourceLimitError(); + } + } else if (isNfaType(n->type)) { + if (nfa_size > grey.limitNFASize) { + throw ResourceLimitError(); + } + } else if (isLbrType(n->type)) { + if (nfa_size > grey.limitLBRSize) { + throw ResourceLimitError(); + } + } +} + static bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, const map > &infixTriggers, @@ -1125,7 +1084,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs); } - aligned_unique_ptr nfa; + bytecode_ptr nfa; // Need to build NFA, which is either predestined to be a Haig (in SOM mode) // or could be all manner of things. if (leftfix.haig()) { @@ -1142,8 +1101,10 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, setLeftNfaProperties(*nfa, leftfix); - build.leftfix_queue_map.emplace(leftfix, qi); nfa->queueIndex = qi; + enforceEngineSizeLimit(nfa.get(), cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), is_transient)); if (!prefix && !leftfix.haig() && leftfix.graph() && nfaStuckOn(*leftfix.graph())) { @@ -1171,7 +1132,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, for (RoseVertex v : succs) { for (auto u : inv_adjacent_vertices_range(v, g)) { for (u32 lit_id : g[u].literals) { - lits.insert(build.literals.right.at(lit_id).s); + lits.insert(build.literals.at(lit_id).s); } } } @@ -1241,12 +1202,10 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, for (const auto &n : tamaInfo.subengines) { for (const auto &v : subengines[i].vertices) { if (is_suffix) { - tamaProto.add(n, g[v].index, g[v].suffix.top, - out_top_remap); + tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); } else { for (const auto &e : in_edges_range(v, g)) { - tamaProto.add(n, g[v].index, g[e].rose_top, - out_top_remap); + tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap); } } } @@ -1259,32 +1218,34 @@ shared_ptr constructContainerEngine(const RoseGraph &g, build_context &bc, const ExclusiveInfo &info, const u32 queue, - const bool is_suffix) { + const bool is_suffix, + const Grey &grey) { const auto &subengines = info.subengines; - auto tamaInfo = - constructTamaInfo(g, subengines, is_suffix); + auto tamaInfo = constructTamaInfo(g, subengines, is_suffix); map, u32> out_top_remap; auto n = buildTamarama(*tamaInfo, queue, out_top_remap); + enforceEngineSizeLimit(n.get(), grey); + bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false)); add_nfa_to_blob(bc, *n); DEBUG_PRINTF("queue id:%u\n", queue); shared_ptr tamaProto = make_shared(); tamaProto->reports = info.reports; - updateTops(g, *tamaInfo, *tamaProto, subengines, - out_top_remap, is_suffix); + updateTops(g, *tamaInfo, *tamaProto, subengines, out_top_remap, is_suffix); return tamaProto; } static void buildInfixContainer(RoseGraph &g, build_context &bc, - const vector &exclusive_info) { + const vector &exclusive_info, + const Grey &grey) { // Build tamarama engine for (const auto &info : exclusive_info) { const u32 queue = info.queue; const auto &subengines = info.subengines; auto tamaProto = - constructContainerEngine(g, bc, info, queue, false); + constructContainerEngine(g, bc, info, queue, false, grey); for (const auto &sub : subengines) { const auto &verts = sub.vertices; @@ -1298,13 +1259,14 @@ void buildInfixContainer(RoseGraph &g, build_context &bc, static void buildSuffixContainer(RoseGraph &g, build_context &bc, - const vector &exclusive_info) { + const vector &exclusive_info, + const Grey &grey) { // Build tamarama engine for (const auto &info : exclusive_info) { const u32 queue = info.queue; const auto &subengines = info.subengines; - auto tamaProto = - constructContainerEngine(g, bc, info, queue, true); + auto tamaProto = constructContainerEngine(g, bc, info, queue, true, + grey); for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { @@ -1320,9 +1282,9 @@ void buildSuffixContainer(RoseGraph &g, build_context &bc, static void updateExclusiveInfixProperties(const RoseBuildImpl &build, - build_context &bc, - const vector &exclusive_info, - set *no_retrigger_queues) { + const vector &exclusive_info, + map &leftfix_info, + set *no_retrigger_queues) { const RoseGraph &g = build.g; for (const auto &info : exclusive_info) { // Set leftfix optimisations, disabled for tamarama subengines @@ -1351,7 +1313,7 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build, set lits; for (auto u : inv_adjacent_vertices_range(v, build.g)) { for (u32 lit_id : build.g[u].literals) { - lits.insert(build.literals.right.at(lit_id).s); + lits.insert(build.literals.at(lit_id).s); } } DEBUG_PRINTF("%zu literals\n", lits.size()); @@ -1372,9 +1334,10 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build, const auto &verts = sub.vertices; for (const auto &v : verts) { u32 lag = g[v].left.lag; - bc.leftfix_info.emplace( - v, left_build_info(qi, lag, max_width, squash_mask, stop, - max_queuelen, cm_count, cm_cr)); + leftfix_info.emplace(v, left_build_info(qi, lag, max_width, + squash_mask, stop, + max_queuelen, cm_count, + cm_cr)); } } } @@ -1436,9 +1399,9 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, info.queue = qif.get_queue(); exclusive_info.push_back(move(info)); } - updateExclusiveInfixProperties(build, bc, exclusive_info, + updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, no_retrigger_queues); - buildInfixContainer(g, bc, exclusive_info); + buildInfixContainer(g, bc, exclusive_info, build.cc.grey); } static @@ -1510,8 +1473,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, findInfixTriggers(tbi, &infixTriggers); if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { - findExclusiveInfixes(tbi, bc, qif, infixTriggers, - no_retrigger_queues); + findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues); } for (auto v : vertices_range(g)) { @@ -1540,7 +1502,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, // TODO: Handle SOM-tracking cases as well. if (cc.grey.roseLookaroundMasks && is_transient && !g[v].left.tracksSom()) { - vector lookaround; + vector> lookaround; if (makeLeftfixLookaround(tbi, v, lookaround)) { DEBUG_PRINTF("implementing as lookaround!\n"); bc.leftfix_info.emplace(v, left_build_info(lookaround)); @@ -1613,26 +1575,26 @@ bool hasNonSmallBlockOutfix(const vector &outfixes) { } namespace { -class OutfixBuilder : public boost::static_visitor> { +class OutfixBuilder : public boost::static_visitor> { public: explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {} - aligned_unique_ptr operator()(boost::blank&) const { + bytecode_ptr operator()(boost::blank&) const { return nullptr; }; - aligned_unique_ptr operator()(unique_ptr &rdfa) const { + bytecode_ptr operator()(unique_ptr &rdfa) const { // Unleash the mighty DFA! return getDfa(*rdfa, false, build.cc, build.rm); } - aligned_unique_ptr operator()(unique_ptr &haig) const { + bytecode_ptr operator()(unique_ptr &haig) const { // Unleash the Goughfish! return goughCompile(*haig, build.ssm.somPrecision(), build.cc, build.rm); } - aligned_unique_ptr operator()(unique_ptr &holder) const { + bytecode_ptr operator()(unique_ptr &holder) const { const CompileContext &cc = build.cc; const ReportManager &rm = build.rm; @@ -1661,7 +1623,7 @@ class OutfixBuilder : public boost::static_visitor> { return n; } - aligned_unique_ptr operator()(UNUSED MpvProto &mpv) const { + bytecode_ptr operator()(UNUSED MpvProto &mpv) const { // MPV construction handled separately. assert(mpv.puffettes.empty()); return nullptr; @@ -1673,7 +1635,7 @@ class OutfixBuilder : public boost::static_visitor> { } static -aligned_unique_ptr buildOutfix(RoseBuildImpl &build, OutfixInfo &outfix) { +bytecode_ptr buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) { assert(!outfix.is_dead()); // should not be marked dead. auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto); @@ -1719,6 +1681,9 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, u32 qi = mpv_outfix->get_queue(tbi.qif); nfa->queueIndex = qi; + enforceEngineSizeLimit(nfa.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), false)); DEBUG_PRINTF("built mpv\n"); @@ -1777,6 +1742,9 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, setOutfixProperties(*n, out); n->queueIndex = out.get_queue(tbi.qif); + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); if (!*historyRequired && requires_decompress_key(*n)) { *historyRequired = 1; @@ -1789,7 +1757,7 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, } static -void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { +void assignSuffixQueues(RoseBuildImpl &build, map &suffixes) { const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { @@ -1802,14 +1770,13 @@ void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); // We may have already built this NFA. - if (contains(bc.suffixes, s)) { + if (contains(suffixes, s)) { continue; } u32 queue = build.qif.get_queue(); DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue); - bc.suffixes.emplace(s, queue); - build.suffix_queue_map.emplace(s, queue); + suffixes.emplace(s, queue); } } @@ -1875,14 +1842,14 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc, } updateExclusiveSuffixProperties(build, exclusive_info, no_retrigger_queues); - buildSuffixContainer(g, bc, exclusive_info); + buildSuffixContainer(g, bc, exclusive_info, build.cc.grey); } static void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, - QueueIndexFactory &qif, - map> &suffixTriggers, - set *no_retrigger_queues) { + QueueIndexFactory &qif, + map> &suffixTriggers, + set *no_retrigger_queues) { const RoseGraph &g = tbi.g; map suffixes; @@ -1972,6 +1939,10 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, setSuffixProperties(*n, s, tbi.rm); n->queueIndex = queue; + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); + if (s.graph() && nfaStuckOn(*s.graph())) { /* todo: have corresponding * haig analysis */ assert(!s.haig()); @@ -2042,7 +2013,7 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, no_retrigger_queues); } - assignSuffixQueues(tbi, bc); + assignSuffixQueues(tbi, bc.suffixes); if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) { return false; @@ -2065,65 +2036,47 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, } static -void allocateStateSpace(const NFA *nfa, const set &transient_queues, - RoseStateOffsets *so, NfaInfo *nfa_infos, - u32 *currFullStateSize, u32 *maskStateSize, - u32 *tStateSize) { - u32 qi = nfa->queueIndex; - bool transient = transient_queues.find(qi) != transient_queues.end(); - u32 stateSize = verify_u32(nfa->streamStateSize); - +void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, + RoseStateOffsets *so, u32 *scratchStateSize, + u32 *streamStateSize, u32 *transientStateSize) { u32 state_offset; - if (transient) { - state_offset = *tStateSize; - *tStateSize += stateSize; + if (eng_info.transient) { + // Transient engines do not use stream state, but must have room in + // transient state (stored in scratch). + state_offset = *transientStateSize; + *transientStateSize += eng_info.stream_size; } else { - // Pack NFA state on to the end of the Rose state. + // Pack NFA stream state on to the end of the Rose stream state. state_offset = so->end; - so->end += stateSize; - *maskStateSize += stateSize; + so->end += eng_info.stream_size; + *streamStateSize += eng_info.stream_size; } - nfa_infos[qi].stateOffset = state_offset; + nfa_info.stateOffset = state_offset; - // Uncompressed state must be aligned. - u32 scratchStateSize = verify_u32(nfa->scratchStateSize); - u32 alignReq = state_alignment(*nfa); - assert(alignReq); - while (*currFullStateSize % alignReq) { - (*currFullStateSize)++; - } - nfa_infos[qi].fullStateOffset = *currFullStateSize; - *currFullStateSize += scratchStateSize; + // Uncompressed state in scratch must be aligned. + *scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align); + nfa_info.fullStateOffset = *scratchStateSize; + *scratchStateSize += eng_info.scratch_size; } static -void findTransientQueues(const map &leftfix_info, - set *out) { - DEBUG_PRINTF("curating transient queues\n"); - for (const auto &build : leftfix_info | map_values) { - if (build.transient) { - DEBUG_PRINTF("q %u is transient\n", build.queue); - out->insert(build.queue); - } +void updateNfaState(const build_context &bc, vector &nfa_infos, + RoseStateOffsets *so, u32 *scratchStateSize, + u32 *streamStateSize, u32 *transientStateSize) { + if (nfa_infos.empty()) { + return; } -} - -static -void updateNfaState(const build_context &bc, RoseStateOffsets *so, - NfaInfo *nfa_infos, u32 *fullStateSize, u32 *nfaStateSize, - u32 *tStateSize) { - *nfaStateSize = 0; - *tStateSize = 0; - *fullStateSize = 0; - set transient_queues; - findTransientQueues(bc.leftfix_info, &transient_queues); + *streamStateSize = 0; + *transientStateSize = 0; + *scratchStateSize = 0; - for (const auto &m : bc.engineOffsets) { - const NFA *n = get_nfa_from_blob(bc, m.first); - allocateStateSpace(n, transient_queues, so, nfa_infos, fullStateSize, - nfaStateSize, tStateSize); + for (u32 qi = 0; qi < nfa_infos.size(); qi++) { + NfaInfo &nfa_info = nfa_infos[qi]; + const auto &eng_info = bc.engine_info_by_queue.at(qi); + allocateStateSpace(eng_info, nfa_info, so, scratchStateSize, + streamStateSize, transientStateSize); } } @@ -2162,9 +2115,8 @@ u32 RoseBuildImpl::calcHistoryRequired() const { } // Delayed literals contribute to history requirement as well. - for (const auto &e : literals.right) { - const u32 id = e.first; - const auto &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const auto &lit = literals.at(id); if (lit.delay) { // If the literal is delayed _and_ has a mask that is longer than // the literal, we need enough history to match the whole mask as @@ -2203,6 +2155,7 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { auto it = bc.roleStateIndices.find(v); if (it != end(bc.roleStateIndices)) { lb_roles.push_back(it->second); + DEBUG_PRINTF("last byte %u\n", it->second); } } @@ -2210,35 +2163,10 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { return 0; /* invalid offset */ } - vector iter; - mmbBuildSparseIterator(iter, lb_roles, bc.numStates); + auto iter = mmbBuildSparseIterator(lb_roles, bc.roleStateIndices.size()); return bc.engine_blob.add_iterator(iter); } -static -void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) { - // Global limit. - if (nfa_size > grey.limitEngineSize) { - throw ResourceLimitError(); - } - - // Type-specific limit checks follow. - - if (isDfaType(n->type)) { - if (nfa_size > grey.limitDFASize) { - throw ResourceLimitError(); - } - } else if (isNfaType(n->type)) { - if (nfa_size > grey.limitNFASize) { - throw ResourceLimitError(); - } - } else if (isLbrType(n->type)) { - if (nfa_size > grey.limitLBRSize) { - throw ResourceLimitError(); - } - } -} - static u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, const vector &anchored_dfas) { @@ -2269,17 +2197,16 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, } static -void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, - const QueueIndexFactory &qif, - vector *out) { - out->resize(qif.allocated_count()); +vector buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc, + const QueueIndexFactory &qif) { + vector out(qif.allocated_count()); - map > qi_to_ekeys; /* for determinism */ + map> qi_to_ekeys; /* for determinism */ for (const auto &e : bc.suffixes) { const suffix_id &s = e.first; u32 qi = e.second; - set ekeys = reportsToEkeys(all_reports(s), tbi.rm); + set ekeys = reportsToEkeys(all_reports(s), build.rm); if (!ekeys.empty()) { qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; @@ -2287,9 +2214,9 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, } /* for each outfix also build elists */ - for (const auto &outfix : tbi.outfixes) { + for (const auto &outfix : build.outfixes) { u32 qi = outfix.get_queue(); - set ekeys = reportsToEkeys(all_reports(outfix), tbi.rm); + set ekeys = reportsToEkeys(all_reports(outfix), build.rm); if (!ekeys.empty()) { qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; @@ -2297,11 +2224,14 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, } for (auto &e : qi_to_ekeys) { - assert(!e.second.empty()); - e.second.push_back(INVALID_EKEY); /* terminator */ - (*out)[e.first] = bc.engine_blob.add(e.second.begin(), - e.second.end()); + u32 qi = e.first; + auto &ekeys = e.second; + assert(!ekeys.empty()); + ekeys.push_back(INVALID_EKEY); /* terminator */ + out[qi] = bc.engine_blob.add_range(ekeys); } + + return out; } /** Returns sparse iter offset in engine blob. */ @@ -2309,8 +2239,8 @@ static u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { vector keys; for (u32 qi = 0; qi < activeQueueCount; ++qi) { - const NFA *n = get_nfa_from_blob(bc, qi); - if (nfaAcceptsEod(n)) { + const auto &eng_info = bc.engine_info_by_queue.at(qi); + if (eng_info.accepts_eod) { DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi); keys.push_back(qi); } @@ -2322,8 +2252,7 @@ u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { DEBUG_PRINTF("building iter for %zu nfas\n", keys.size()); - vector iter; - mmbBuildSparseIterator(iter, keys, activeQueueCount); + auto iter = mmbBuildSparseIterator(keys, activeQueueCount); return bc.engine_blob.add_iterator(iter); } @@ -2368,42 +2297,6 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &tbi) { return false; } -static -void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, - const vector &outfixes, - const vector &ekeyListOffsets, - const set &no_retrigger_queues, - NfaInfo *infos) { - const u32 num_queues = build.qif.allocated_count(); - for (u32 qi = 0; qi < num_queues; qi++) { - const NFA *n = get_nfa_from_blob(bc, qi); - enforceEngineSizeLimit(n, n->length, build.cc.grey); - - NfaInfo &info = infos[qi]; - info.nfaOffset = bc.engineOffsets.at(qi); - info.ekeyListOffset = ekeyListOffsets[qi]; - info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; - } - - // Mark outfixes that are in the small block matcher. - for (const auto &out : outfixes) { - const u32 qi = out.get_queue(); - infos[qi].in_sbmatcher = out.in_sbmatcher; - } - - // Mark suffixes triggered by EOD table literals. - const RoseGraph &g = build.g; - for (auto v : vertices_range(g)) { - if (!g[v].suffix) { - continue; - } - u32 qi = bc.suffixes.at(g[v].suffix); - if (build.isInETable(v)) { - infos[qi].eod = 1; - } - } -} - struct DerivedBoundaryReports { explicit DerivedBoundaryReports(const BoundaryReports &boundary) { insert(&report_at_0_eod_full, boundary.report_at_0_eod); @@ -2414,144 +2307,33 @@ struct DerivedBoundaryReports { }; static -void prepSomRevNfas(const SomSlotManager &ssm, u32 *rev_nfa_table_offset, - vector *nfa_offsets, u32 *currOffset) { - const deque> &nfas = ssm.getRevNfas(); - - *currOffset = ROUNDUP_N(*currOffset, alignof(u32)); - *rev_nfa_table_offset = *currOffset; - *currOffset += sizeof(u32) * nfas.size(); - - *currOffset = ROUNDUP_CL(*currOffset); - for (const auto &n : nfas) { - u32 bs_offset; - bs_offset = *currOffset; - nfa_offsets->push_back(bs_offset); - *currOffset += ROUNDUP_CL(n->length); +void addSomRevNfas(build_context &bc, RoseEngine &proto, + const SomSlotManager &ssm) { + const auto &nfas = ssm.getRevNfas(); + vector nfa_offsets; + nfa_offsets.reserve(nfas.size()); + for (const auto &nfa : nfas) { + assert(nfa); + u32 offset = bc.engine_blob.add(*nfa, nfa->length); + DEBUG_PRINTF("wrote SOM rev NFA %zu (len %u) to offset %u\n", + nfa_offsets.size(), nfa->length, offset); + nfa_offsets.push_back(offset); /* note: som rev nfas don't need a queue assigned as only run in block * mode reverse */ } - assert(nfa_offsets->size() == nfas.size()); -} - -static -void fillInSomRevNfas(RoseEngine *engine, const SomSlotManager &ssm, - u32 rev_nfa_table_offset, - const vector &nfa_offsets) { - const deque> &nfas = ssm.getRevNfas(); - assert(nfa_offsets.size() == nfas.size()); - - engine->somRevCount = (u32)nfas.size(); - engine->somRevOffsetOffset = rev_nfa_table_offset; - - if (nfas.empty()) { - return; - } - - char *out = (char *)engine + rev_nfa_table_offset; - size_t table_size = sizeof(u32) * nfa_offsets.size(); - memcpy(out, nfa_offsets.data(), table_size); - out = (char *)engine + ROUNDUP_CL(rev_nfa_table_offset + table_size); - - // Write the SOM reverse NFAs into place. - UNUSED size_t i = 0; - for (const auto &n : nfas) { - assert(n != nullptr); - assert(out == (char *)engine + nfa_offsets[i]); - - memcpy(out, n.get(), n->length); - out += ROUNDUP_CL(n->length); - DEBUG_PRINTF("wrote som rev nfa with len %u\n", n->length); - ++i; - } -} - -static -vector -getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { - vector out; - - const auto &final_id_to_literal = build.final_id_to_literal; - assert(contains(final_id_to_literal, final_id)); - - const auto &lits = final_id_to_literal.find(final_id)->second; - assert(!lits.empty()); - - for (const auto &lit_id : lits) { - const rose_literal_info &li = build.literal_info[lit_id]; - assert(li.final_id == final_id); - out.push_back(&li); - } - - return out; -} - -static -void applyFinalSpecialisation(RoseProgram &program) { - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - if (program.size() < 2) { - return; - } - - /* Replace the second-to-last instruction (before END) with a one-shot - * specialisation if available. */ - auto it = next(program.rbegin()); - if (auto *ri = dynamic_cast(it->get())) { - DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); - program.replace(it, make_unique( - ri->onmatch, ri->offset_adjust)); - } -} - -static -void recordResources(RoseResources &resources, const RoseProgram &program) { - for (const auto &ri : program) { - switch (ri->code()) { - case ROSE_INSTR_TRIGGER_SUFFIX: - resources.has_suffixes = true; - break; - case ROSE_INSTR_TRIGGER_INFIX: - case ROSE_INSTR_CHECK_INFIX: - case ROSE_INSTR_CHECK_PREFIX: - case ROSE_INSTR_SOM_LEFTFIX: - resources.has_leftfixes = true; - break; - case ROSE_INSTR_SET_STATE: - case ROSE_INSTR_CHECK_STATE: - case ROSE_INSTR_SPARSE_ITER_BEGIN: - case ROSE_INSTR_SPARSE_ITER_NEXT: - resources.has_states = true; - break; - case ROSE_INSTR_CHECK_GROUPS: - resources.checks_groups = true; - break; - case ROSE_INSTR_PUSH_DELAYED: - resources.has_lit_delay = true; - break; - case ROSE_INSTR_CHECK_LONG_LIT: - case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: - resources.has_lit_check = true; - break; - default: - break; - } - } + proto.somRevCount = verify_u32(nfas.size()); + proto.somRevOffsetOffset = bc.engine_blob.add_range(nfa_offsets); } static -void recordResources(RoseResources &resources, - const RoseBuildImpl &build) { +void recordResources(RoseResources &resources, const RoseBuildImpl &build, + const vector &fragments) { if (!build.outfixes.empty()) { resources.has_outfixes = true; } - for (u32 i = 0; i < build.literal_info.size(); i++) { - if (build.hasFinalId(i)) { - resources.has_literals = true; - break; - } - } + + resources.has_literals = !fragments.empty(); const auto &g = build.g; for (const auto &v : vertices_range(g)) { @@ -2566,25 +2348,6 @@ void recordResources(RoseResources &resources, } } -static -void recordLongLiterals(build_context &bc, const RoseProgram &program) { - for (const auto &ri : program) { - if (const auto *ri_check = - dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n", - escapeString(ri_check->literal).c_str()); - bc.longLiterals.emplace_back(ri_check->literal, false); - continue; - } - if (const auto *ri_check = - dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n", - escapeString(ri_check->literal).c_str()); - bc.longLiterals.emplace_back(ri_check->literal, true); - } - } -} - static u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { @@ -2592,6 +2355,8 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { return 0; } + applyFinalSpecialisation(program); + auto it = bc.program_cache.find(program); if (it != end(bc.program_cache)) { DEBUG_PRINTF("reusing cached program at %u\n", it->second); @@ -2599,73 +2364,43 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { } recordResources(bc.resources, program); - recordLongLiterals(bc, program); + recordLongLiterals(bc.longLiterals, program); - u32 len = 0; - auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); - u32 offset = bc.engine_blob.add(prog_bytecode.get(), len, - ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("prog len %u written at offset %u\n", len, offset); + auto prog_bytecode = writeProgram(bc.engine_blob, program); + u32 offset = bc.engine_blob.add(prog_bytecode); + DEBUG_PRINTF("prog len %zu written at offset %u\n", prog_bytecode.size(), + offset); bc.program_cache.emplace(move(program), offset); return offset; } static -void buildActiveLeftIter(const vector &leftTable, - vector &out) { +u32 writeActiveLeftIter(RoseEngineBlob &engine_blob, + const vector &leftInfoTable) { vector keys; - for (size_t i = 0; i < leftTable.size(); i++) { - if (!leftTable[i].transient) { - DEBUG_PRINTF("rose %zu is active\n", i); + for (size_t i = 0; i < leftInfoTable.size(); i++) { + if (!leftInfoTable[i].transient) { + DEBUG_PRINTF("leftfix %zu is active\n", i); keys.push_back(verify_u32(i)); } } - DEBUG_PRINTF("%zu active roses\n", keys.size()); + DEBUG_PRINTF("%zu active leftfixes\n", keys.size()); if (keys.empty()) { - out.clear(); - return; - } - - mmbBuildSparseIterator(out, keys, leftTable.size()); -} - -static -bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { - const auto &g = build.g; - const auto v = target(e, g); - - if (!build.g[v].eod_accept) { - return false; - } - - // If there's a graph between us and EOD, we shouldn't be eager. - if (build.g[v].left) { - return false; - } - - // Must be exactly at EOD. - if (g[e].minBound != 0 || g[e].maxBound != 0) { - return false; - } - - // In streaming mode, we can only eagerly report EOD for literals in the - // EOD-anchored table, as that's the only time we actually know where EOD - // is. In block mode, we always have this information. - const auto u = source(e, g); - if (build.cc.streaming && !build.isInETable(u)) { - return false; + return 0; } - return true; + auto iter = mmbBuildSparseIterator(keys, verify_u32(leftInfoTable.size())); + return engine_blob.add_iterator(iter); } static bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, u32 outfixEndQueue) { for (u32 i = 0; i < outfixEndQueue; i++) { - if (nfaAcceptsEod(get_nfa_from_blob(bc, i))) { + const auto &eng_info = bc.engine_info_by_queue.at(i); + if (eng_info.accepts_eod) { DEBUG_PRINTF("outfix has eod\n"); return true; } @@ -2692,23 +2427,72 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, } static -void fillLookaroundTables(char *look_base, char *reach_base, - const vector &look_vec) { - DEBUG_PRINTF("%zu lookaround table entries\n", look_vec.size()); +void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, + RoseEngine &proto) { + const auto inv_dkeys = rm.getDkeyToReportTable(); + proto.invDkeyOffset = engine_blob.add_range(inv_dkeys); + proto.dkeyCount = rm.numDkeys(); + proto.dkeyLogSize = fatbit_size(proto.dkeyCount); +} + +static +void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto, + const vector &leftInfoTable) { + proto.leftOffset = engine_blob.add_range(leftInfoTable); + proto.activeLeftIterOffset + = writeActiveLeftIter(engine_blob, leftInfoTable); + proto.roseCount = verify_u32(leftInfoTable.size()); + proto.activeLeftCount = verify_u32(leftInfoTable.size()); + proto.rosePrefixCount = countRosePrefixes(leftInfoTable); +} + +static +void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, + RoseEngine &proto, const set &no_retrigger_queues) { + const u32 queue_count = build.qif.allocated_count(); + if (!queue_count) { + return; + } + + auto ekey_lists = buildSuffixEkeyLists(build, bc, build.qif); - s8 *look = (s8 *)look_base; - u8 *reach = (u8 *)reach_base; // base for 256-bit bitvectors + vector infos(queue_count); + memset(infos.data(), 0, sizeof(NfaInfo) * queue_count); - for (const auto &le : look_vec) { - *look = verify_s8(le.offset); - const CharReach &cr = le.reach; + for (u32 qi = 0; qi < queue_count; qi++) { + NfaInfo &info = infos[qi]; + info.nfaOffset = bc.engineOffsets.at(qi); + assert(qi < ekey_lists.size()); + info.ekeyListOffset = ekey_lists.at(qi); + info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; + } - assert(cr.any()); // Should be at least one character! - fill_bitvector(cr, reach); + // Mark outfixes that are in the small block matcher. + for (const auto &out : build.outfixes) { + const u32 qi = out.get_queue(); + assert(qi < infos.size()); + infos.at(qi).in_sbmatcher = out.in_sbmatcher; + } - ++look; - reach += REACH_BITVECTOR_LEN; + // Mark suffixes triggered by EOD table literals. + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (!g[v].suffix) { + continue; + } + u32 qi = bc.suffixes.at(g[v].suffix); + assert(qi < infos.size()); + if (build.isInETable(v)) { + infos.at(qi).eod = 1; + } } + + // Update state offsets to do with NFAs in proto and in the NfaInfo + // structures. + updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize, + &proto.nfaStateSize, &proto.tStateSize); + + proto.nfaInfoOffset = bc.engine_blob.add_range(infos); } static @@ -2729,1150 +2513,31 @@ bool hasBoundaryReports(const BoundaryReports &boundary) { return false; } -/** - * \brief True if the given vertex is a role that can only be switched on at - * EOD. - */ static -bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { - const RoseGraph &g = tbi.g; - - // All such roles have only (0,0) edges to vertices with the eod_accept - // property, and no other effects (suffixes, ordinary reports, etc, etc). - - if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { - return false; - } - - for (const auto &e : out_edges_range(v, g)) { - RoseVertex w = target(e, g); - if (!g[w].eod_accept) { - return false; - } - assert(!g[w].reports.empty()); - assert(g[w].literals.empty()); +void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, + const BoundaryReports &boundary, + const DerivedBoundaryReports &dboundary, + RoseBoundaryReports &out) { + DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); + DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); + DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); - if (g[e].minBound || g[e].maxBound) { - return false; - } - } + auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod); + out.reportEodOffset = writeProgram(bc, move(eod_prog)); - /* There is no pointing enforcing this check at runtime if - * this role is only fired by the eod event literal */ - if (tbi.eod_event_literal_id != MO_INVALID_IDX && - g[v].literals.size() == 1 && - *g[v].literals.begin() == tbi.eod_event_literal_id) { - return false; - } + auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0); + out.reportZeroOffset = writeProgram(bc, move(zero_prog)); - return true; + auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full); + out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog)); } static -u32 addLookaround(build_context &bc, const vector &look) { - // Check the cache. - auto it = bc.lookaround_cache.find(look); - if (it != bc.lookaround_cache.end()) { - DEBUG_PRINTF("reusing look at idx %zu\n", it->second); - return verify_u32(it->second); - } - - // Linear scan for sequence. - auto seq_it = search(begin(bc.lookaround), end(bc.lookaround), begin(look), - end(look)); - if (seq_it != end(bc.lookaround)) { - size_t idx = distance(begin(bc.lookaround), seq_it); - DEBUG_PRINTF("linear scan found look at idx %zu\n", idx); - bc.lookaround_cache.emplace(look, idx); - return verify_u32(idx); - } - - // New sequence. - size_t idx = bc.lookaround.size(); - bc.lookaround_cache.emplace(look, idx); - insert(&bc.lookaround, bc.lookaround.end(), look); - DEBUG_PRINTF("adding look at idx %zu\n", idx); - return verify_u32(idx); -} - -static -bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { - size_t reach_size = cr.count(); - assert(reach_size > 0); - // check whether entry_size is some power of 2. - if ((reach_size - 1) & reach_size) { - return false; - } - make_and_cmp_mask(cr, &andmask, &cmpmask); - if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { - return false; - } - return true; -} - -static -bool checkReachWithFlip(const CharReach &cr, u8 &andmask, - u8 &cmpmask, u8 &flip) { - if (checkReachMask(cr, andmask, cmpmask)) { - flip = 0; - return true; - } - if (checkReachMask(~cr, andmask, cmpmask)) { - flip = 1; - return true; - } - return false; -} - -static -bool makeRoleByte(const vector &look, RoseProgram &program) { - if (look.size() == 1) { - const auto &entry = look[0]; - u8 andmask_u8, cmpmask_u8; - u8 flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { - return false; - } - s32 checkbyte_offset = verify_s32(entry.offset); - DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(andmask_u8, cmpmask_u8, flip, - checkbyte_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static -bool makeRoleMask(const vector &look, RoseProgram &program) { - if (look.back().offset < look.front().offset + 8) { - s32 base_offset = verify_s32(look.front().offset); - u64a and_mask = 0; - u64a cmp_mask = 0; - u64a neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - DEBUG_PRINTF("entry offset %d\n", entry.offset); - u32 shift = (entry.offset - base_offset) << 3; - and_mask |= (u64a)andmask_u8 << shift; - cmp_mask |= (u64a)cmpmask_u8 << shift; - if (flip) { - neg_mask |= 0xffLLU << shift; - } - } - DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", - and_mask, cmp_mask); - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static UNUSED -string convertMaskstoString(u8 *p, int byte_len) { - string s; - for (int i = 0; i < byte_len; i++) { - u8 hi = *p >> 4; - u8 lo = *p & 0xf; - s += (char)(hi + (hi < 10 ? 48 : 87)); - s += (char)(lo + (lo < 10 ? 48 : 87)); - p++; - } - return s; -} - -static -bool makeRoleMask32(const vector &look, - RoseProgram &program) { - if (look.back().offset >= look.front().offset + 32) { - return false; - } - s32 base_offset = verify_s32(look.front().offset); - array and_mask, cmp_mask; - and_mask.fill(0); - cmp_mask.fill(0); - u32 neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - u32 shift = entry.offset - base_offset; - assert(shift < 32); - and_mask[shift] = andmask_u8; - cmp_mask[shift] = cmpmask_u8; - if (flip) { - neg_mask |= 1 << shift; - } - } - - DEBUG_PRINTF("and_mask %s\n", - convertMaskstoString(and_mask.data(), 32).c_str()); - DEBUG_PRINTF("cmp_mask %s\n", - convertMaskstoString(cmp_mask.data(), 32).c_str()); - DEBUG_PRINTF("neg_mask %08x\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; -} - -// Sorting by the size of every bucket. -// Used in map, cmpNibble>. -struct cmpNibble { - bool operator()(const u32 data1, const u32 data2) const{ - u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); - u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); - return std::tie(size1, data1) < std::tie(size2, data2); - } -}; - -// Insert all pairs of bucket and offset into buckets. -static really_inline -void getAllBuckets(const vector &look, - map, cmpNibble> &buckets, u32 &neg_mask) { - s32 base_offset = verify_s32(look.front().offset); - for (const auto &entry : look) { - CharReach cr = entry.reach; - // Flip heavy character classes to save buckets. - if (cr.count() > 128 ) { - cr.flip(); - } else { - neg_mask ^= 1 << (entry.offset - base_offset); - } - map lo2hi; - // We treat Ascii Table as a 16x16 grid. - // Push every row in cr into lo2hi and mark the row number. - for (size_t i = cr.find_first(); i != CharReach::npos;) { - u8 it_hi = i >> 4; - u16 low_encode = 0; - while (i != CharReach::npos && (i >> 4) == it_hi) { - low_encode |= 1 << (i & 0xf); - i = cr.find_next(i); - } - lo2hi[low_encode] |= 1 << it_hi; - } - for (const auto &it : lo2hi) { - u32 hi_lo = (it.second << 16) | it.first; - buckets[hi_lo].push_back(entry.offset); - } - } -} - -// Once we have a new bucket, we'll try to combine it with all old buckets. -static really_inline -void nibUpdate(map &nib, u32 hi_lo) { - u16 hi = hi_lo >> 16; - u16 lo = hi_lo & 0xffff; - for (const auto pairs : nib) { - u32 old = pairs.first; - if ((old >> 16) == hi || (old & 0xffff) == lo) { - if (!nib[old | hi_lo]) { - nib[old | hi_lo] = nib[old] | nib[hi_lo]; - } - } - } -} - -static really_inline -void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { - for (u8 index = 0; data > 0; data >>= 1, index++) { - if (data & 1) { - // 0 ~ 7 bucket in first 16 bytes, - // 8 ~ 15 bucket in second 16 bytes. - if (bit_index >= 8) { - mask[index + 16] |= 1 << (bit_index - 8); - } else { - mask[index] |= 1 << bit_index; - } - } - } -} - -static -bool makeRoleShufti(const vector &look, - RoseProgram &program) { - - s32 base_offset = verify_s32(look.front().offset); - if (look.back().offset >= base_offset + 32) { - return false; - } - array hi_mask, lo_mask; - hi_mask.fill(0); - lo_mask.fill(0); - array bucket_select_hi, bucket_select_lo; - bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. - bucket_select_lo.fill(0); - u8 bit_index = 0; // number of buckets - map nib; // map every bucket to its bucket number. - map, cmpNibble> bucket2offsets; - u32 neg_mask = ~0u; - - getAllBuckets(look, bucket2offsets, neg_mask); - - for (const auto &it : bucket2offsets) { - u32 hi_lo = it.first; - // New bucket. - if (!nib[hi_lo]) { - if (bit_index >= 16) { - return false; - } - nib[hi_lo] = 1 << bit_index; - - nibUpdate(nib, hi_lo); - nibMaskUpdate(hi_mask, hi_lo >> 16, bit_index); - nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_index); - bit_index++; - } - - DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); - - // Update bucket_select_mask. - u8 nib_hi = nib[hi_lo] >> 8; - u8 nib_lo = nib[hi_lo] & 0xff; - for (const auto offset : it.second) { - bucket_select_hi[offset - base_offset] |= nib_hi; - bucket_select_lo[offset - base_offset] |= nib_lo; - } - } - - DEBUG_PRINTF("hi_mask %s\n", - convertMaskstoString(hi_mask.data(), 32).c_str()); - DEBUG_PRINTF("lo_mask %s\n", - convertMaskstoString(lo_mask.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_hi %s\n", - convertMaskstoString(bucket_select_hi.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_lo %s\n", - convertMaskstoString(bucket_select_lo.data(), 32).c_str()); - - const auto *end_inst = program.end_instruction(); - if (bit_index < 8) { - if (look.back().offset < base_offset + 16) { - neg_mask &= 0xffff; - array nib_mask; - array bucket_select_mask_16; - copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); - copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, - bucket_select_mask_16.begin()); - auto ri = make_unique - (nib_mask, bucket_select_mask_16, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } else { - array hi_mask_16; - array lo_mask_16; - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); - auto ri = make_unique - (hi_mask_16, lo_mask_16, bucket_select_lo, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } - } else { - if (look.back().offset < base_offset + 16) { - neg_mask &= 0xffff; - array bucket_select_mask_32; - copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, - bucket_select_mask_32.begin()); - copy(bucket_select_hi.begin(), bucket_select_hi.begin() + 16, - bucket_select_mask_32.begin() + 16); - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_mask_32, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } else { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } - } - return true; -} - -/** - * Builds a lookaround instruction, or an appropriate specialization if one is - * available. - */ -static -void makeLookaroundInstruction(build_context &bc, const vector &look, - RoseProgram &program) { - assert(!look.empty()); - - if (makeRoleByte(look, program)) { - return; - } - - if (look.size() == 1) { - s8 offset = look.begin()->offset; - u32 look_idx = addLookaround(bc, look); - auto ri = make_unique(offset, look_idx, - program.end_instruction()); - program.add_before_end(move(ri)); - return; - } - - if (makeRoleMask(look, program)) { - return; - } - - if (makeRoleMask32(look, program)) { - return; - } - - if (makeRoleShufti(look, program)) { - return; - } - - u32 look_idx = addLookaround(bc, look); - u32 look_count = verify_u32(look.size()); - - auto ri = make_unique(look_idx, look_count, - program.end_instruction()); - program.add_before_end(move(ri)); -} - -static -void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { - if (!build.cc.grey.roseLookaroundMasks) { - return; - } - - vector look; - - // Lookaround from leftfix (mandatory). - if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) { - DEBUG_PRINTF("using leftfix lookaround\n"); - look = bc.leftfix_info.at(v).lookaround; - } - - // We may be able to find more lookaround info (advisory) and merge it - // in. - vector look_more; - findLookaroundMasks(build, v, look_more); - mergeLookaround(look, look_more); - - if (look.empty()) { - return; - } - - makeLookaroundInstruction(bc, look, program); -} - -static -void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { - auto it = bc.leftfix_info.find(v); - if (it == end(bc.leftfix_info)) { - return; - } - const left_build_info &lni = it->second; - if (lni.has_lookaround) { - return; // Leftfix completely implemented by lookaround. - } - - assert(!build.cc.streaming || - build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - - bool is_prefix = build.isRootSuccessor(v); - const auto *end_inst = program.end_instruction(); - - unique_ptr ri; - if (is_prefix) { - ri = make_unique(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } else { - ri = make_unique(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } - program.add_before_end(move(ri)); -} - -static -void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, - RoseVertex v, RoseProgram &program) { - // Only relevant for roles that can be triggered by the anchored table. - if (!build.isAnchored(v)) { - return; - } - - // If this match cannot occur after floatingMinLiteralMatchOffset, we do - // not need this check. - if (build.g[v].max_offset <= bc.floatingMinLiteralMatchOffset) { - return; - } - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(build.g[v].groups, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupe(const RoseBuildImpl &build, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = - make_unique(report.quashSom, build.rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupeSom(const RoseBuildImpl &build, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(report.quashSom, - build.rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeCatchup(RoseBuildImpl &build, build_context &bc, - const flat_set &reports, RoseProgram &program) { - if (!bc.needs_catchup) { - return; - } - - // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run - // before reports are triggered. - - auto report_needs_catchup = [&](const ReportID &id) { - const Report &report = build.rm.getReport(id); - return report.type != INTERNAL_ROSE_CHAIN; - }; - - if (!any_of(begin(reports), end(reports), report_needs_catchup)) { - DEBUG_PRINTF("none of the given reports needs catchup\n"); - return; - } - - program.add_before_end(make_unique()); -} - -static -void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, - RoseProgram &program) { - if (!bc.needs_mpv_catchup) { - return; - } - - const Report &report = build.rm.getReport(id); - if (report.type == INTERNAL_ROSE_CHAIN) { - return; - } - - program.add_before_end(make_unique()); -} - -static -void writeSomOperation(const Report &report, som_operation *op) { - assert(op); - - memset(op, 0, sizeof(*op)); - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REL: - op->type = SOM_EXTERNAL_CALLBACK_REL; - break; - case INTERNAL_SOM_LOC_SET: - op->type = SOM_INTERNAL_LOC_SET; - break; - case INTERNAL_SOM_LOC_SET_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_COPY: - op->type = SOM_INTERNAL_LOC_COPY; - break; - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; - break; - case EXTERNAL_CALLBACK_SOM_STORED: - op->type = SOM_EXTERNAL_CALLBACK_STORED; - break; - case EXTERNAL_CALLBACK_SOM_ABS: - op->type = SOM_EXTERNAL_CALLBACK_ABS; - break; - case EXTERNAL_CALLBACK_SOM_REV_NFA: - op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_FROM: - op->type = SOM_INTERNAL_LOC_SET_FROM; - break; - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; - break; - default: - // This report doesn't correspond to a SOM operation. - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - op->onmatch = report.onmatch; - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->aux.revNfaIndex = report.revNfaIndex; - break; - default: - op->aux.somDistance = report.somDistance; - break; - } -} - -static -void makeReport(RoseBuildImpl &build, const ReportID id, - const bool has_som, RoseProgram &program) { - assert(id < build.rm.numReports()); - const Report &report = build.rm.getReport(id); - - RoseProgram report_block; - const RoseInstruction *end_inst = report_block.end_instruction(); - - // Handle min/max offset checks. - if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { - auto ri = make_unique(report.minOffset, - report.maxOffset, end_inst); - report_block.add_before_end(move(ri)); - } - - // If this report has an exhaustion key, we can check it in the program - // rather than waiting until we're in the callback adaptor. - if (report.ekey != INVALID_EKEY) { - auto ri = make_unique(report.ekey, end_inst); - report_block.add_before_end(move(ri)); - } - - // External SOM reports that aren't passthrough need their SOM value - // calculated. - if (isExternalSomReport(report) && - report.type != EXTERNAL_CALLBACK_SOM_PASS) { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - - // Min length constraint. - if (report.minLength > 0) { - assert(build.hasSom); - auto ri = make_unique( - report.offsetAdjust, report.minLength, end_inst); - report_block.add_before_end(move(ri)); - } - - if (report.quashSom) { - report_block.add_before_end(make_unique()); - } - - switch (report.type) { - case EXTERNAL_CALLBACK: - if (!has_som) { - // Dedupe is only necessary if this report has a dkey, or if there - // are SOM reports to catch up. - bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; - if (report.ekey == INVALID_EKEY) { - if (needs_dedupe) { - report_block.add_before_end( - make_unique( - report.quashSom, build.rm.getDkey(report), - report.onmatch, report.offsetAdjust, end_inst)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } - } else { - if (needs_dedupe) { - makeDedupe(build, report, report_block); - } - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - } else { // has_som - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end( - make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - } - break; - case INTERNAL_SOM_LOC_SET: - case INTERNAL_SOM_LOC_SET_IF_UNSET: - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - case INTERNAL_SOM_LOC_COPY: - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - case INTERNAL_SOM_LOC_SET_FROM: - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - if (has_som) { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } else { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - break; - case INTERNAL_ROSE_CHAIN: { - report_block.add_before_end(make_unique( - report.onmatch, report.topSquashDistance)); - break; - } - case EXTERNAL_CALLBACK_SOM_REL: - case EXTERNAL_CALLBACK_SOM_STORED: - case EXTERNAL_CALLBACK_SOM_ABS: - case EXTERNAL_CALLBACK_SOM_REV_NFA: - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - break; - case EXTERNAL_CALLBACK_SOM_PASS: - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - break; - - default: - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - assert(!report_block.empty()); - program.add_block(move(report_block)); -} - -static -void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { - const auto &g = build.g; - - /* we are a suffaig - need to update role to provide som to the - * suffix. */ - bool has_som = false; - if (g[v].left.tracksSom()) { - assert(contains(bc.leftfix_info, v)); - const left_build_info &lni = bc.leftfix_info.at(v); - program.add_before_end( - make_unique(lni.queue, g[v].left.lag)); - has_som = true; - } else if (g[v].som_adjust) { - program.add_before_end( - make_unique(g[v].som_adjust)); - has_som = true; - } - - const auto &reports = g[v].reports; - makeCatchup(build, bc, reports, program); - - RoseProgram report_block; - for (ReportID id : reports) { - makeReport(build, id, has_som, report_block); - } - program.add_before_end(move(report_block)); -} - -static -void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { - const auto &g = build.g; - if (!g[v].suffix) { - return; - } - assert(contains(bc.suffixes, g[v].suffix)); - u32 qi = bc.suffixes.at(g[v].suffix); - assert(contains(bc.engineOffsets, qi)); - const NFA *nfa = get_nfa_from_blob(bc, qi); - u32 suffixEvent; - if (isContainerType(nfa->type)) { - auto tamaProto = g[v].suffix.tamarama.get(); - assert(tamaProto); - u32 top = (u32)MQE_TOP_FIRST + - tamaProto->top_remap.at(make_pair(g[v].index, - g[v].suffix.top)); - assert(top < MQE_INVALID); - suffixEvent = top; - } else if (isMultiTopType(nfa->type)) { - assert(!g[v].suffix.haig); - u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; - assert(top < MQE_INVALID); - suffixEvent = top; - } else { - // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP - // event. - assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); - suffixEvent = MQE_TOP; - } - program.add_before_end( - make_unique(qi, suffixEvent)); -} - -static -void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { - const auto &g = build.g; - rose_group groups = g[v].groups; - if (!groups) { - return; - } - - // The set of "already on" groups as we process this vertex is the - // intersection of the groups set by our predecessors. - assert(in_degree(v, g) > 0); - rose_group already_on = ~rose_group{0}; - for (const auto &u : inv_adjacent_vertices_range(v, g)) { - already_on &= bc.vertex_group_map.at(u); - } - - DEBUG_PRINTF("already_on=0x%llx\n", already_on); - DEBUG_PRINTF("squashable=0x%llx\n", bc.squashable_groups); - DEBUG_PRINTF("groups=0x%llx\n", groups); - - already_on &= ~bc.squashable_groups; - DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); - - // We don't *have* to mask off the groups that we know are already on, but - // this will make bugs more apparent. - groups &= ~already_on; - - if (!groups) { - DEBUG_PRINTF("no new groups to set, skipping\n"); - return; - } - - program.add_before_end(make_unique(groups)); -} - -static -void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, - RoseVertex u, RoseProgram &program) { - const auto &g = build.g; - - vector infix_program; - - for (const auto &e : out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (!g[v].left) { - continue; - } - - assert(contains(bc.leftfix_info, v)); - const left_build_info &lbi = bc.leftfix_info.at(v); - if (lbi.has_lookaround) { - continue; - } - - const NFA *nfa = get_nfa_from_blob(bc, lbi.queue); - - // DFAs have no TOP_N support, so they get a classic MQE_TOP event. - u32 top; - if (isContainerType(nfa->type)) { - auto tamaProto = g[v].left.tamarama.get(); - assert(tamaProto); - top = MQE_TOP_FIRST + tamaProto->top_remap.at( - make_pair(g[v].index, g[e].rose_top)); - assert(top < MQE_INVALID); - } else if (!isMultiTopType(nfa->type)) { - assert(num_tops(g[v].left) == 1); - top = MQE_TOP; - } else { - top = MQE_TOP_FIRST + g[e].rose_top; - assert(top < MQE_INVALID); - } - - infix_program.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); - } - - if (infix_program.empty()) { - return; - } - - // Order, de-dupe and add instructions to the end of program. - sort(begin(infix_program), end(infix_program), - [](const RoseInstrTriggerInfix &a, const RoseInstrTriggerInfix &b) { - return tie(a.cancel, a.queue, a.event) < - tie(b.cancel, b.queue, b.event); - }); - infix_program.erase(unique(begin(infix_program), end(infix_program)), - end(infix_program)); - for (const auto &ri : infix_program) { - program.add_before_end(make_unique(ri)); - } -} - -static -void makeRoleSetState(const build_context &bc, RoseVertex v, - RoseProgram &program) { - // We only need this instruction if a state index has been assigned to this - // vertex. - auto it = bc.roleStateIndices.find(v); - if (it == end(bc.roleStateIndices)) { - return; - } - program.add_before_end(make_unique(it->second)); -} - -static -void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, RoseProgram &program) { - const RoseGraph &g = build.g; - const RoseVertex u = source(e, g); - - // We know that we can trust the anchored table (DFA) to always deliver us - // literals at the correct offset. - if (build.isAnchored(v)) { - DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); - return; - } - - // Use the minimum literal length. - u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); - - u64a min_bound = g[e].minBound + lit_length; - u64a max_bound = g[e].maxBound == ROSE_BOUND_INF - ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - assert(g[u].fixedOffset()); - // Make offsets absolute. - min_bound += g[u].max_offset; - if (max_bound != ROSE_BOUND_INF) { - max_bound += g[u].max_offset; - } - } - - assert(max_bound <= ROSE_BOUND_INF); - assert(min_bound <= max_bound); - - // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET - // (max value of a u64a) to represent ROSE_BOUND_INF. - if (max_bound == ROSE_BOUND_INF) { - max_bound = MAX_OFFSET; - } - - // This instruction should be doing _something_ -- bounds should be tighter - // than just {length, inf}. - assert(min_bound > lit_length || max_bound < MAX_OFFSET); - - const auto *end_inst = program.end_instruction(); - program.add_before_end( - make_unique(min_bound, max_bound, end_inst)); -} - -static -void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, - RoseProgram &program) { - u32 handled_key; - if (contains(bc.handledKeys, v)) { - handled_key = bc.handledKeys.at(v); - } else { - handled_key = verify_u32(bc.handledKeys.size()); - bc.handledKeys.emplace(v, handled_key); - } - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(handled_key, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, - RoseVertex v, RoseProgram &program) { - RoseProgram eod_program; - - for (const auto &e : out_edges_range(v, build.g)) { - if (canEagerlyReportAtEod(build, e)) { - RoseProgram block; - makeRoleReports(build, bc, target(e, build.g), block); - eod_program.add_block(move(block)); - } - } - - if (eod_program.empty()) { - return; - } - - if (!onlyAtEod(build, v)) { - // The rest of our program wasn't EOD anchored, so we need to guard - // these reports with a check. - const auto *end_inst = eod_program.end_instruction(); - eod_program.insert(begin(eod_program), - make_unique(end_inst)); - } - - program.add_before_end(move(eod_program)); -} - -static -RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { - const RoseGraph &g = build.g; - auto v = target(e, g); - - RoseProgram program; - - // First, add program instructions that enforce preconditions without - // effects. - - makeRoleAnchoredDelay(build, bc, v, program); - - if (onlyAtEod(build, v)) { - DEBUG_PRINTF("only at eod\n"); - const auto *end_inst = program.end_instruction(); - program.add_before_end(make_unique(end_inst)); - } - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - // This program may be triggered by different predecessors, with different - // offset bounds. We must ensure we put this check/set operation after the - // bounds check to deal with this case. - if (in_degree(v, g) > 1) { - makeRoleCheckNotHandled(bc, v, program); - } - - makeRoleLookaround(build, bc, v, program); - makeRoleCheckLeftfix(build, bc, v, program); - - // Next, we can add program instructions that have effects. This must be - // done as a series of blocks, as some of them (like reports) are - // escapable. - - RoseProgram effects_block; - - RoseProgram reports_block; - makeRoleReports(build, bc, v, reports_block); - effects_block.add_block(move(reports_block)); - - RoseProgram infix_block; - makeRoleInfixTriggers(build, bc, v, infix_block); - effects_block.add_block(move(infix_block)); - - // Note: SET_GROUPS instruction must be after infix triggers, as an infix - // going dead may switch off groups. - RoseProgram groups_block; - makeRoleGroups(build, bc, v, groups_block); - effects_block.add_block(move(groups_block)); - - RoseProgram suffix_block; - makeRoleSuffix(build, bc, v, suffix_block); - effects_block.add_block(move(suffix_block)); - - RoseProgram state_block; - makeRoleSetState(bc, v, state_block); - effects_block.add_block(move(state_block)); - - // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if - // the program doesn't have one already). - RoseProgram eod_block; - makeRoleEagerEodReports(build, bc, v, eod_block); - effects_block.add_block(move(eod_block)); - - program.add_before_end(move(effects_block)); - return program; -} - -static -u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, - const set &reports) { - if (reports.empty()) { - return 0; - } - - // Note: no CATCHUP instruction is necessary in the boundary case, as we - // should always be caught up (and may not even have the resources in - // scratch to support it). - - const bool has_som = false; - RoseProgram program; - for (const auto &id : reports) { - makeReport(build, id, has_som, program); - } - applyFinalSpecialisation(program); - return writeProgram(bc, move(program)); -} - -static -RoseBoundaryReports -makeBoundaryPrograms(RoseBuildImpl &build, build_context &bc, - const BoundaryReports &boundary, - const DerivedBoundaryReports &dboundary) { - RoseBoundaryReports out; - memset(&out, 0, sizeof(out)); - - DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); - DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); - DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); - - out.reportEodOffset = - writeBoundaryProgram(build, bc, boundary.report_at_eod); - out.reportZeroOffset = - writeBoundaryProgram(build, bc, boundary.report_at_0); - out.reportZeroEodOffset = - writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full); - - return out; -} - -static -void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { - const auto &g = build.g; +unordered_map assignStateIndices(const RoseBuildImpl &build) { + const auto &g = build.g; u32 state = 0; - + unordered_map roleStateIndices; for (auto v : vertices_range(g)) { // Virtual vertices (starts, EOD accept vertices) never need state // indices. @@ -3895,12 +2560,13 @@ void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { } /* TODO: also don't need a state index if all edges are nfa based */ - bc.roleStateIndices.emplace(v, state++); + roleStateIndices.emplace(v, state++); } DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state, num_vertices(g)); - bc.numStates = state; + + return roleStateIndices; } static @@ -3915,10 +2581,9 @@ bool hasUsefulStops(const left_build_info &build) { static void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, - const set &eager_queues, - u32 leftfixBeginQueue, u32 leftfixCount, - vector &leftTable, u32 *laggedRoseCount, - size_t *history) { + const set &eager_queues, u32 leftfixBeginQueue, + u32 leftfixCount, vector &leftTable, + u32 *laggedRoseCount, size_t *history) { const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; @@ -3965,8 +2630,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, if (hasUsefulStops(lbi)) { assert(lbi.stopAlphabet.size() == N_CHARS); - left.stopTable = bc.engine_blob.add(lbi.stopAlphabet.begin(), - lbi.stopAlphabet.end()); + left.stopTable = bc.engine_blob.add_range(lbi.stopAlphabet); } assert(lbi.countingMiracleOffset || !lbi.countingMiracleCount); @@ -3985,11 +2649,11 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } else { left.lagIndex = ROSE_OFFSET_INVALID; } - - DEBUG_PRINTF("rose %u is %s\n", left_index, - left.infix ? "infix" : "prefix"); } + DEBUG_PRINTF("rose %u is %s\n", left_index, + left.infix ? "infix" : "prefix"); + // Update squash mask. left.squash_mask &= lbi.squash_mask; @@ -4006,568 +2670,299 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } static -void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, - RoseProgram &program) { - // Prepend an instruction to check the pred state is on. - const auto *end_inst = pred_block.end_instruction(); - pred_block.insert(begin(pred_block), - make_unique(pred_state, end_inst)); - program.add_block(move(pred_block)); -} - -static -void addPredBlocksAny(build_context &bc, map &pred_blocks, - RoseProgram &program) { - RoseProgram sparse_program; - - vector keys; - for (const u32 &key : pred_blocks | map_keys) { - keys.push_back(key); - } - - const RoseInstruction *end_inst = sparse_program.end_instruction(); - auto ri = make_unique(bc.numStates, keys, end_inst); - sparse_program.add_before_end(move(ri)); - - RoseProgram &block = pred_blocks.begin()->second; - sparse_program.add_before_end(move(block)); - program.add_block(move(sparse_program)); -} - -static -void addPredBlocksMulti(build_context &bc, map &pred_blocks, - RoseProgram &program) { - assert(!pred_blocks.empty()); - - RoseProgram sparse_program; - const RoseInstruction *end_inst = sparse_program.end_instruction(); - vector> jump_table; - - // BEGIN instruction. - auto ri_begin = - make_unique(bc.numStates, end_inst); - RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); - sparse_program.add_before_end(move(ri_begin)); - - // NEXT instructions, one per pred program. - u32 prev_key = pred_blocks.begin()->first; - for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { - auto ri = make_unique(prev_key, begin_inst, - end_inst); - sparse_program.add_before_end(move(ri)); - prev_key = it->first; - } - - // Splice in each pred program after its BEGIN/NEXT. - auto out_it = begin(sparse_program); - for (auto &m : pred_blocks) { - u32 key = m.first; - RoseProgram &flat_prog = m.second; - assert(!flat_prog.empty()); - const size_t block_len = flat_prog.size() - 1; // without INSTR_END. - - assert(dynamic_cast(out_it->get()) || - dynamic_cast(out_it->get())); - out_it = sparse_program.insert(++out_it, move(flat_prog)); - - // Jump table target for this key is the beginning of the block we just - // spliced in. - jump_table.emplace_back(key, out_it->get()); - - assert(distance(begin(sparse_program), out_it) + block_len <= - sparse_program.size()); - advance(out_it, block_len); - } - - // Write the jump table back into the SPARSE_ITER_BEGIN instruction. - begin_inst->jump_table = move(jump_table); - - program.add_block(move(sparse_program)); -} - -static -void addPredBlocks(build_context &bc, map &pred_blocks, - RoseProgram &program) { - // Trim empty blocks, if any exist. - for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { - if (it->second.empty()) { - it = pred_blocks.erase(it); - } else { - ++it; - } - } - - const size_t num_preds = pred_blocks.size(); - if (num_preds == 0) { - return; - } - - if (num_preds == 1) { - const auto head = pred_blocks.begin(); - addPredBlockSingle(head->first, head->second, program); - return; - } - - // First, see if all our blocks are equivalent, in which case we can - // collapse them down into one. - const auto &blocks = pred_blocks | map_values; - if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { - return RoseProgramEquivalence()(*begin(blocks), block); - })) { - DEBUG_PRINTF("all blocks equiv\n"); - addPredBlocksAny(bc, pred_blocks, program); - return; - } - - addPredBlocksMulti(bc, pred_blocks, program); -} - -static -void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, - RoseProgram &program) { - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); - const auto &arb_lit_info = **lit_infos.begin(); - if (arb_lit_info.delayed_ids.empty()) { - return; - } - - for (const auto &int_id : arb_lit_info.delayed_ids) { - const auto &child_literal = build.literals.right.at(int_id); - u32 child_id = build.literal_info[int_id].final_id; - u32 delay_index = child_id - build.delay_base_id; - - DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id, - child_literal.delay, child_id); - - auto ri = make_unique( - verify_u8(child_literal.delay), delay_index); - program.add_before_end(move(ri)); - } -} - -static -rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, u32 final_id) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); - - rose_group groups = 0; - for (const auto &li : lit_infos) { - groups |= li->group_mask; - } - return groups; -} - -static -void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, - RoseProgram &program) { - rose_group groups = getFinalIdGroupsUnion(build, final_id); - if (!groups) { - return; - } - program.add_before_end(make_unique(groups)); -} - -static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, - u32 final_id, RoseProgram &program) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); - assert(!lit_infos.empty()); - - if (!lit_infos.front()->requires_benefits) { - return; - } - - vector look; - - assert(build.final_id_to_literal.at(final_id).size() == 1); - u32 lit_id = *build.final_id_to_literal.at(final_id).begin(); - const ue2_literal &s = build.literals.right.at(lit_id).s; - DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id, - final_id, dumpString(s).c_str()); - assert(s.length() <= MAX_MASK2_WIDTH); - s32 i = 0 - s.length(); - for (const auto &e : s) { - if (!e.nocase) { - look.emplace_back(verify_s8(i), e); - } - i++; - } - - assert(!look.empty()); - makeLookaroundInstruction(bc, look, program); -} - -static -void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, - RoseProgram &program) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); - - if (!lit_infos.front()->squash_group) { - return; - } - - rose_group groups = getFinalIdGroupsUnion(build, final_id); - if (!groups) { - return; - } - - DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups); - program.add_before_end( - make_unique(~groups)); // Note negated. -} - -static -u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 lit_id, + const map> &lit_edge_map, + bool is_anchored_replay_program) { + const vector no_edges; - u32 max_offset = 0; - for (const auto &v : lit_vertices) { - max_offset = max(max_offset, build.g[v].max_offset); + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const vector *edges_ptr; + if (contains(lit_edge_map, lit_id)) { + edges_ptr = &lit_edge_map.at(lit_id); + } else { + /* literal may happen only in a delay context */ + edges_ptr = &no_edges; } - return max_offset; + return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, + bc.roleStateIndices, prog_build, lit_id, + *edges_ptr, is_anchored_replay_program); } static -void makeRecordAnchoredInstruction(const RoseBuildImpl &build, - build_context &bc, u32 final_id, - RoseProgram &program) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_ids = build.final_id_to_literal.at(final_id); - - // Must be anchored. +RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, + const vector &lit_ids, + const map> &lit_edge_map) { assert(!lit_ids.empty()); - if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) { - return; - } - - // If this anchored literal can never match past - // floatingMinLiteralMatchOffset, we will never have to record it. - u32 max_offset = 0; - for (u32 lit_id : lit_ids) { - assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED); - max_offset = max(max_offset, findMaxOffset(build, lit_id)); - } - - if (max_offset <= bc.floatingMinLiteralMatchOffset) { - return; - } - - program.add_before_end(make_unique(final_id)); -} - -static -u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); - u32 min_offset = UINT32_MAX; - for (const auto &v : lit_vertices) { - min_offset = min(min_offset, build.g[v].min_offset); + vector blocks; + for (const auto &lit_id : lit_ids) { + auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, + lit_edge_map, false); + blocks.push_back(move(prog)); } - return min_offset; + return assembleProgramBlocks(move(blocks)); } +/** + * \brief Returns a map from literal ID to a list of edges leading into + * vertices with that literal ID. + */ static -void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, - u32 final_id, - const vector &lit_edges, - RoseProgram &program) { - if (lit_edges.empty()) { - return; - } - - if (bc.floatingMinLiteralMatchOffset == 0) { - return; - } - - RoseVertex v = target(lit_edges.front(), build.g); - if (!build.isFloating(v)) { - return; - } - - const auto &lit_ids = build.final_id_to_literal.at(final_id); - if (lit_ids.empty()) { - return; - } +map> findEdgesByLiteral(const RoseBuildImpl &build) { + // Use a set of edges while building the map to cull duplicates. + map> unique_lit_edge_map; - size_t min_len = SIZE_MAX; - u32 min_offset = UINT32_MAX; - for (u32 lit_id : lit_ids) { - const auto &lit = build.literals.right.at(lit_id); - size_t lit_min_len = lit.elength(); - u32 lit_min_offset = findMinOffset(build, lit_id); - DEBUG_PRINTF("lit_id=%u has min_len=%zu, min_offset=%u\n", lit_id, - lit_min_len, lit_min_offset); - min_len = min(min_len, lit_min_len); - min_offset = min(min_offset, lit_min_offset); + const auto &g = build.g; + for (const auto &e : edges_range(g)) { + const auto &v = target(e, g); + for (const auto &lit_id : g[v].literals) { + unique_lit_edge_map[lit_id].insert(e); + } } - DEBUG_PRINTF("final_id=%u has min_len=%zu, min_offset=%u, " - "global min is %u\n", final_id, min_len, min_offset, - bc.floatingMinLiteralMatchOffset); - - // If we can't match before the min offset, we don't need the check. - if (min_len >= bc.floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("no need for check, min is %u\n", - bc.floatingMinLiteralMatchOffset); - return; + // Build output map, sorting edges by (source, target) vertex index. + map> lit_edge_map; + for (const auto &m : unique_lit_edge_map) { + auto edge_list = vector(begin(m.second), end(m.second)); + sort(begin(edge_list), end(edge_list), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); + }); + lit_edge_map.emplace(m.first, std::move(edge_list)); } - assert(min_offset >= bc.floatingMinLiteralMatchOffset); - assert(min_offset < UINT32_MAX); - - DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - program.add_before_end(make_unique(min_offset)); + return lit_edge_map; } static -void makeCheckLiteralInstruction(const RoseBuildImpl &build, - const build_context &bc, u32 final_id, - RoseProgram &program) { - const auto &lits = build.final_id_to_literal.at(final_id); - if (lits.size() != 1) { - // Long literals should not share a final_id. - assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { - const rose_literal_id &lit = build.literals.right.at(lit_id); - return lit.table != ROSE_FLOATING || - lit.s.length() <= bc.longLitLengthThreshold; - })); - return; - } - - u32 lit_id = *lits.begin(); - if (build.isDelayed(lit_id)) { - return; - } - - const rose_literal_id &lit = build.literals.right.at(lit_id); - if (lit.table != ROSE_FLOATING) { - return; - } - assert(bc.longLitLengthThreshold > 0); - if (lit.s.length() <= bc.longLitLengthThreshold) { - return; - } - - // Check resource limits as well. - if (lit.s.length() > build.cc.grey.limitLiteralLength) { - throw ResourceLimitError(); - } - - unique_ptr ri; - if (lit.s.any_nocase()) { - ri = make_unique(lit.s.get_string()); - } else { - ri = make_unique(lit.s.get_string()); +bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { + assert(lit_id < build.literal_info.size()); + const auto &info = build.literal_info[lit_id]; + if (!info.vertices.empty()) { + return true; } - program.add_before_end(move(ri)); -} -static -bool hasDelayedLiteral(RoseBuildImpl &build, - const vector &lit_edges) { - auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); - for (const auto &e : lit_edges) { - auto v = target(e, build.g); - const auto &lits = build.g[v].literals; - if (any_of(begin(lits), end(lits), is_delayed)) { + for (const u32 &delayed_id : info.delayed_ids) { + assert(delayed_id < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info[delayed_id]; + if (!delayed_info.vertices.empty()) { return true; } } + + DEBUG_PRINTF("literal %u has no refs\n", lit_id); return false; } static -RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, - u32 final_id, - const vector &lit_edges) { - RoseProgram program; - - // No initial program for EOD. - if (final_id == MO_INVALID_IDX) { - return program; +rose_literal_id getFragment(const rose_literal_id &lit) { + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("whole lit is frag\n"); + return lit; } - DEBUG_PRINTF("final_id %u\n", final_id); + rose_literal_id frag = lit; + frag.s = frag.s.substr(frag.s.length() - ROSE_SHORT_LITERAL_LEN_MAX); - // Check long literal info. - makeCheckLiteralInstruction(build, bc, final_id, program); - - // Check lit mask. - makeCheckLitMaskInstruction(build, bc, final_id, program); - - // Check literal groups. This is an optimisation that we only perform for - // delayed literals, as their groups may be switched off; ordinarily, we - // can trust the HWLM matcher. - if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, final_id, program); - } - - // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, final_id, program); - - // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, program); - - return program; + DEBUG_PRINTF("fragment: %s\n", dumpString(frag.s).c_str()); + return frag; } static -RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, - u32 final_id, - const vector &lit_edges) { - const auto &g = build.g; +vector groupByFragment(const RoseBuildImpl &build) { + vector fragments; + u32 frag_id = 0; - DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); + struct FragmentInfo { + vector lit_ids; + rose_group groups = 0; + }; - RoseProgram program; + map frag_info; - // Predecessor state id -> program block. - map pred_blocks; - - // Construct sparse iter sub-programs. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (build.isAnyStart(u)) { - continue; // Root roles are not handled with sparse iterator. - } - DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - assert(contains(bc.roleStateIndices, u)); - u32 pred_state = bc.roleStateIndices.at(u); - pred_blocks[pred_state].add_block(makeProgram(build, bc, e)); - } + for (u32 lit_id = 0; lit_id < build.literals.size(); lit_id++) { + const auto &lit = build.literals.at(lit_id); + const auto &info = build.literal_info.at(lit_id); - // Add blocks to deal with non-root edges (triggered by sparse iterator or - // mmbit_isset checks). - addPredBlocks(bc, pred_blocks, program); + if (!isUsedLiteral(build, lit_id)) { + DEBUG_PRINTF("lit %u is unused\n", lit_id); + continue; + } - // Add blocks to handle root roles. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (!build.isAnyStart(u)) { + if (lit.table == ROSE_EVENT) { + DEBUG_PRINTF("lit %u is an event\n", lit_id); continue; } - DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - program.add_block(makeProgram(build, bc, e)); - } - if (final_id != MO_INVALID_IDX) { - RoseProgram root_block; + auto groups = info.group_mask; - // Literal may squash groups. - makeGroupSquashInstruction(build, final_id, root_block); + if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { + fragments.emplace_back(frag_id, groups, lit_id); + frag_id++; + continue; + } - // Literal may be anchored and need to be recorded. - makeRecordAnchoredInstruction(build, bc, final_id, root_block); + DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, + dumpString(lit.s).c_str()); + auto &fi = frag_info[getFragment(lit)]; + fi.lit_ids.push_back(lit_id); + fi.groups |= groups; + } - program.add_block(move(root_block)); + for (auto &m : frag_info) { + auto &fi = m.second; + DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), + as_string_list(fi.lit_ids).c_str()); + fragments.emplace_back(frag_id, fi.groups, move(fi.lit_ids)); + frag_id++; + assert(frag_id == fragments.size()); } - // Construct initial program up front, as its early checks must be able to - // jump to end and terminate processing for this literal. - auto lit_program = buildLitInitialProgram(build, bc, final_id, lit_edges); - lit_program.add_before_end(move(program)); - return lit_program; + return fragments; } +/** + * \brief Build the interpreter programs for each literal. + */ static -u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, - const vector &lit_edges) { - RoseProgram program = buildLiteralProgram(build, bc, final_id, lit_edges); - if (program.empty()) { - return 0; - } - applyFinalSpecialisation(program); - return writeProgram(bc, move(program)); -} +void buildLiteralPrograms(const RoseBuildImpl &build, + vector &fragments, build_context &bc, + ProgramBuild &prog_build) { + DEBUG_PRINTF("%zu fragments\n", fragments.size()); + auto lit_edge_map = findEdgesByLiteral(build); -static -u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, - u32 final_id) { - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); - const auto &arb_lit_info = **lit_infos.begin(); - if (arb_lit_info.delayed_ids.empty()) { - return 0; // No delayed IDs, no work to do. - } + for (auto &frag : fragments) { + DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id, + as_string_list(frag.lit_ids).c_str()); - RoseProgram program; - makeCheckLitMaskInstruction(build, bc, final_id, program); - makePushDelayedInstructions(build, final_id, program); - assert(!program.empty()); - applyFinalSpecialisation(program); - return writeProgram(bc, move(program)); + auto lit_prog = makeFragmentProgram(build, bc, prog_build, frag.lit_ids, + lit_edge_map); + frag.lit_program_offset = writeProgram(bc, move(lit_prog)); + + // We only do delayed rebuild in streaming mode. + if (!build.cc.streaming) { + continue; + } + + auto rebuild_prog = makeDelayRebuildProgram(build, prog_build, + frag.lit_ids); + frag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); + } } +/** + * \brief Write delay replay programs to the bytecode. + * + * Returns the offset of the beginning of the program array, and the number of + * programs. + */ static -map> findEdgesByLiteral(const RoseBuildImpl &build) { - // Use a set of edges while building the map to cull duplicates. - map> unique_lit_edge_map; +pair writeDelayPrograms(const RoseBuildImpl &build, + const vector &fragments, + build_context &bc, + ProgramBuild &prog_build) { + auto lit_edge_map = findEdgesByLiteral(build); - const auto &g = build.g; - for (const auto &e : edges_range(g)) { - const auto &v = target(e, g); - for (const auto &lit_id : g[v].literals) { - assert(lit_id < build.literal_info.size()); - u32 final_id = build.literal_info.at(lit_id).final_id; - if (final_id == MO_INVALID_IDX) { - // Unused, special report IDs are handled elsewhere. - continue; + vector programs; // program offsets indexed by (delayed) lit id + unordered_map cache; // program offsets we have already seen + + for (const auto &frag : fragments) { + for (const u32 lit_id : frag.lit_ids) { + const auto &info = build.literal_info.at(lit_id); + + for (const auto &delayed_lit_id : info.delayed_ids) { + DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); + auto prog = makeLiteralProgram(build, bc, prog_build, + delayed_lit_id, lit_edge_map, + false); + u32 offset = writeProgram(bc, move(prog)); + + u32 delay_id; + auto it = cache.find(offset); + if (it != end(cache)) { + delay_id = it->second; + DEBUG_PRINTF("reusing delay_id %u for offset %u\n", + delay_id, offset); + } else { + delay_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, delay_id); + DEBUG_PRINTF("assigned new delay_id %u for offset %u\n", + delay_id, offset); + } + prog_build.delay_programs.emplace(delayed_lit_id, delay_id); } - unique_lit_edge_map[final_id].insert(e); } } - // Build output map, sorting edges by (source, target) vertex index. - map> lit_edge_map; - for (const auto &m : unique_lit_edge_map) { - auto edge_list = vector(begin(m.second), end(m.second)); - sort(begin(edge_list), end(edge_list), - [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].index, g[target(a, g)].index) < - tie(g[source(b, g)].index, g[target(b, g)].index); - }); - lit_edge_map.emplace(m.first, edge_list); - } - - return lit_edge_map; + DEBUG_PRINTF("%zu delay programs\n", programs.size()); + return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; } /** - * \brief Build the interpreter programs for each literal. + * \brief Write anchored replay programs to the bytecode. * - * Returns the base of the literal program list and the base of the delay - * rebuild program list. + * Returns the offset of the beginning of the program array, and the number of + * programs. */ static -pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { - const u32 num_literals = build.final_id_to_literal.size(); +pair writeAnchoredPrograms(const RoseBuildImpl &build, + const vector &fragments, + build_context &bc, + ProgramBuild &prog_build) { auto lit_edge_map = findEdgesByLiteral(build); - bc.litPrograms.resize(num_literals); - vector delayRebuildPrograms(num_literals); + vector programs; // program offsets indexed by anchored id + unordered_map cache; // program offsets we have already seen - for (u32 finalId = 0; finalId != num_literals; ++finalId) { - const auto &lit_edges = lit_edge_map[finalId]; + for (const auto &frag : fragments) { + for (const u32 lit_id : frag.lit_ids) { + const auto &lit = build.literals.at(lit_id); - bc.litPrograms[finalId] = - writeLiteralProgram(build, bc, finalId, lit_edges); - delayRebuildPrograms[finalId] = - buildDelayRebuildProgram(build, bc, finalId); - } + if (lit.table != ROSE_ANCHORED) { + continue; + } + + // If this anchored literal can never match past + // floatingMinLiteralMatchOffset, we will never have to record it. + if (findMaxOffset(build, lit_id) + <= prog_build.floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("can never match after " + "floatingMinLiteralMatchOffset=%u\n", + prog_build.floatingMinLiteralMatchOffset); + continue; + } - u32 litProgramsOffset = - bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms)); - u32 delayRebuildProgramsOffset = bc.engine_blob.add( - begin(delayRebuildPrograms), end(delayRebuildPrograms)); + auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, + lit_edge_map, true); + u32 offset = writeProgram(bc, move(prog)); + DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); + + u32 anch_id; + auto it = cache.find(offset); + if (it != end(cache)) { + anch_id = it->second; + DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, + offset); + } else { + anch_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, anch_id); + DEBUG_PRINTF("assigned new anch_id %u for offset %u\n", anch_id, + offset); + } + prog_build.anchored_programs.emplace(lit_id, anch_id); + } + } - return {litProgramsOffset, delayRebuildProgramsOffset}; + DEBUG_PRINTF("%zu anchored programs\n", programs.size()); + return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; } /** @@ -4598,17 +2993,14 @@ set findEngineReports(const RoseBuildImpl &build) { } static -pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { +pair buildReportPrograms(const RoseBuildImpl &build, + build_context &bc) { const auto reports = findEngineReports(build); vector programs; programs.reserve(reports.size()); for (ReportID id : reports) { - RoseProgram program; - const bool has_som = false; - makeCatchupMpv(build, bc, id, program); - makeReport(build, id, has_som, program); - applyFinalSpecialisation(program); + auto program = makeReportProgram(build, bc.needs_mpv_catchup, id); u32 offset = writeProgram(bc, move(program)); programs.push_back(offset); build.rm.setProgramOffset(id, offset); @@ -4616,41 +3008,11 @@ pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { programs.back(), program.size()); } - u32 offset = bc.engine_blob.add(begin(programs), end(programs)); + u32 offset = bc.engine_blob.add_range(programs); u32 count = verify_u32(programs.size()); return {offset, count}; } -static -RoseProgram makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e, const bool multiple_preds) { - const RoseGraph &g = build.g; - const RoseVertex v = target(e, g); - - RoseProgram program; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - if (multiple_preds) { - // Only necessary when there is more than one pred. - makeRoleCheckNotHandled(bc, v, program); - } - - const auto &reports = g[v].reports; - makeCatchup(build, bc, reports, program); - - const bool has_som = false; - RoseProgram report_block; - for (const auto &id : reports) { - makeReport(build, id, has_som, report_block); - } - program.add_before_end(move(report_block)); - - return program; -} - static bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { const RoseGraph &g = build.g; @@ -4677,8 +3039,9 @@ bool hasEodMatcher(const RoseBuildImpl &build) { } static -void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - bool in_etable, RoseProgram &program) { +void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, + ProgramBuild &prog_build, bool in_etable, + RoseProgram &program) { const RoseGraph &g = build.g; // Predecessor state id -> program block. @@ -4701,7 +3064,8 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, continue; } if (canEagerlyReportAtEod(build, e)) { - DEBUG_PRINTF("already done report for vertex %zu\n", g[u].index); + DEBUG_PRINTF("already done report for vertex %zu\n", + g[u].index); continue; } edge_list.push_back(e); @@ -4713,16 +3077,16 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); pred_blocks[pred_state].add_block( - makeEodAnchorProgram(build, bc, e, multiple_preds)); + makeEodAnchorProgram(build, prog_build, e, multiple_preds)); } } - addPredBlocks(bc, pred_blocks, program); + addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program); } static -void addEodEventProgram(RoseBuildImpl &build, build_context &bc, - RoseProgram &program) { +void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, RoseProgram &program) { if (build.eod_event_literal_id == MO_INVALID_IDX) { return; } @@ -4748,61 +3112,31 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].index, g[target(b, g)].index); }); - program.add_block( - buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list)); -} - -static -void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { - if (!eodNfaIterOffset) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique(eodNfaIterOffset)); - program.add_block(move(block)); -} - -static -void addSuffixesEodProgram(const RoseBuildImpl &build, RoseProgram &program) { - if (!hasEodAnchoredSuffix(build)) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique()); - program.add_block(move(block)); -} - -static -void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { - if (!hasEodMatcher(build)) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique()); + auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, + bc.roleStateIndices, prog_build, + build.eod_event_literal_id, edge_list, + false); program.add_block(move(block)); } static -u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, - u32 eodNfaIterOffset) { +RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 eodNfaIterOffset) { RoseProgram program; - addEodEventProgram(build, bc, program); + addEodEventProgram(build, bc, prog_build, program); addEnginesEodProgram(eodNfaIterOffset, program); - addEodAnchorProgram(build, bc, false, program); - addMatcherEodProgram(build, program); - addEodAnchorProgram(build, bc, true, program); - addSuffixesEodProgram(build, program); - - if (program.empty()) { - return 0; + addEodAnchorProgram(build, bc, prog_build, false, program); + if (hasEodMatcher(build)) { + addMatcherEodProgram(program); + } + addEodAnchorProgram(build, bc, prog_build, true, program); + if (hasEodAnchoredSuffix(build)) { + addSuffixesEodProgram(program); } - applyFinalSpecialisation(program); - return writeProgram(bc, move(program)); + return program; } static @@ -4834,7 +3168,7 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { assert(g[v].min_offset <= g[v].max_offset); for (u32 lit_id : g[v].literals) { - const rose_literal_id &key = build.literals.right.at(lit_id); + const rose_literal_id &key = build.literals.at(lit_id); u32 max_d = g[v].max_offset; u32 min_d = g[v].min_offset; @@ -4907,9 +3241,8 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { } static -u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, - u32 queue_count, - build_context &bc) { +u32 writeEagerQueueIter(const set &eager, u32 leftfixBeginQueue, + u32 queue_count, RoseEngineBlob &engine_blob) { if (eager.empty()) { return 0; } @@ -4920,182 +3253,13 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, vec.push_back(q - leftfixBeginQueue); } - vector iter; - mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue); - return bc.engine_blob.add_iterator(iter); -} - -static -void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, - size_t longLitLengthThreshold, u32 *next_final_id) { - const auto &g = build.g; - auto &literal_info = build.literal_info; - auto &final_id_to_literal = build.final_id_to_literal; - - /* We can allocate the same final id to multiple literals of the same type - * if they share the same vertex set and trigger the same delayed literal - * ids and squash the same roles and have the same group squashing - * behaviour. Benefits literals cannot be merged. */ - - assert(longLitLengthThreshold > 0); - - for (u32 int_id : lits) { - rose_literal_info &curr_info = literal_info[int_id]; - const rose_literal_id &lit = build.literals.right.at(int_id); - const auto &verts = curr_info.vertices; - - // Literals with benefits cannot be merged. - if (curr_info.requires_benefits) { - DEBUG_PRINTF("id %u has benefits\n", int_id); - goto assign_new_id; - } - - // Long literals (that require CHECK_LITERAL instructions) cannot be - // merged. - if (lit.s.length() > longLitLengthThreshold) { - DEBUG_PRINTF("id %u is a long literal\n", int_id); - goto assign_new_id; - } - - if (!verts.empty() && curr_info.delayed_ids.empty()) { - vector cand; - insert(&cand, cand.end(), g[*verts.begin()].literals); - for (auto v : verts) { - vector temp; - set_intersection(cand.begin(), cand.end(), - g[v].literals.begin(), - g[v].literals.end(), - inserter(temp, temp.end())); - cand.swap(temp); - } - - for (u32 cand_id : cand) { - if (cand_id >= int_id) { - break; - } - - const auto &cand_info = literal_info[cand_id]; - const auto &cand_lit = build.literals.right.at(cand_id); - - if (cand_lit.s.length() > longLitLengthThreshold) { - continue; - } - - if (cand_info.requires_benefits) { - continue; - } - - if (!cand_info.delayed_ids.empty()) { - /* TODO: allow cases where delayed ids are equivalent. - * This is awkward currently as the have not had their - * final ids allocated yet */ - continue; - } - - if (lits.find(cand_id) == lits.end() - || cand_info.vertices.size() != verts.size() - || cand_info.squash_group != curr_info.squash_group) { - continue; - } - - /* if we are squashing groups we need to check if they are the - * same group */ - if (cand_info.squash_group - && cand_info.group_mask != curr_info.group_mask) { - continue; - } - - u32 final_id = cand_info.final_id; - assert(final_id != MO_INVALID_IDX); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = final_id; - final_id_to_literal[final_id].insert(int_id); - goto next_lit; - } - } - - assign_new_id: - /* oh well, have to give it a fresh one, hang the expense */ - DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = *next_final_id; - final_id_to_literal[*next_final_id].insert(int_id); - (*next_final_id)++; - next_lit:; - } -} - -static -bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { - assert(lit_id < build.literal_info.size()); - const auto &info = build.literal_info[lit_id]; - if (!info.vertices.empty()) { - return true; - } - - for (const u32 &delayed_id : info.delayed_ids) { - assert(delayed_id < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info[delayed_id]; - if (!delayed_info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("literal %u has no refs\n", lit_id); - return false; -} - -/** \brief Allocate final literal IDs for all literals. */ -static -void allocateFinalLiteralId(RoseBuildImpl &build, - size_t longLitLengthThreshold) { - set anch; - set norm; - set delay; - - /* undelayed ids come first */ - assert(build.final_id_to_literal.empty()); - u32 next_final_id = 0; - for (u32 i = 0; i < build.literal_info.size(); i++) { - assert(!build.hasFinalId(i)); - - if (!isUsedLiteral(build, i)) { - /* what is this literal good for? absolutely nothing */ - continue; - } - - // The special EOD event literal has its own program and does not need - // a real literal ID. - if (i == build.eod_event_literal_id) { - assert(build.eod_event_literal_id != MO_INVALID_IDX); - continue; - } - - if (build.isDelayed(i)) { - assert(!build.literal_info[i].requires_benefits); - delay.insert(i); - } else if (build.literals.right.at(i).table == ROSE_ANCHORED) { - anch.insert(i); - } else { - norm.insert(i); - } - } - - /* normal lits */ - allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id); - - /* next anchored stuff */ - build.anchored_base_id = next_final_id; - allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id); - - /* delayed ids come last */ - build.delay_base_id = next_final_id; - allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id); + auto iter = mmbBuildSparseIterator(vec, queue_count - leftfixBeginQueue); + return engine_blob.add_iterator(iter); } static -aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, - aligned_unique_ptr rose) { +bytecode_ptr addSmallWriteEngine(const RoseBuildImpl &build, + bytecode_ptr rose) { assert(rose); if (roseIsPureLiteral(rose.get())) { @@ -5110,14 +3274,14 @@ aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, return rose; } - const size_t mainSize = roseSize(rose.get()); - const size_t smallWriteSize = smwrSize(smwr_engine.get()); + const size_t mainSize = rose.size(); + const size_t smallWriteSize = smwr_engine.size(); DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize); const size_t smwrOffset = ROUNDUP_CL(mainSize); const size_t newSize = smwrOffset + smallWriteSize; - auto rose2 = aligned_zmalloc_unique(newSize); + auto rose2 = make_zeroed_bytecode_ptr(newSize, 64); char *ptr = (char *)rose2.get(); memcpy(ptr, rose.get(), mainSize); memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize); @@ -5137,9 +3301,8 @@ pair floatingCountAndMaxLen(const RoseBuildImpl &build) { size_t num = 0; size_t max_len = 0; - for (const auto &e : build.literals.right) { - const u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < build.literals.size(); id++) { + const rose_literal_id &lit = build.literals.at(id); if (lit.table != ROSE_FLOATING) { continue; @@ -5164,10 +3327,11 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build, const size_t historyRequired) { const auto &cc = build.cc; - // In block mode, we should only use the long literal support for literals - // that cannot be handled by HWLM. + // In block mode, we don't have history, so we don't need long literal + // support and can just use "medium-length" literal confirm. TODO: we could + // specialize further and have a block mode literal confirm instruction. if (!cc.streaming) { - return HWLM_LITERAL_MAX_LEN; + return SIZE_MAX; } size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; @@ -5195,7 +3359,40 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build, return longLitLengthThreshold; } -aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { +static +map makeLeftQueueMap(const RoseGraph &g, + const map &leftfix_info) { + map lqm; + for (const auto &e : leftfix_info) { + if (e.second.has_lookaround) { + continue; + } + DEBUG_PRINTF("%zu: using queue %u\n", g[e.first].index, e.second.queue); + assert(e.second.queue != INVALID_QUEUE); + left_id left(g[e.first].left); + assert(!contains(lqm, left) || lqm[left] == e.second.queue); + lqm[left] = e.second.queue; + } + + return lqm; +} + +bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { + // We keep all our offsets, counts etc. in a prototype RoseEngine which we + // will copy into the real one once it is allocated: we can't do this + // until we know how big it will be. + RoseEngine proto; + memset(&proto, 0, sizeof(proto)); + + // Set scanning mode. + if (!cc.streaming) { + proto.mode = HS_MODE_BLOCK; + } else if (cc.vectored) { + proto.mode = HS_MODE_VECTORED; + } else { + proto.mode = HS_MODE_STREAM; + } + DerivedBoundaryReports dboundary(boundary); size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. @@ -5203,49 +3400,43 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { historyRequired); DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); - allocateFinalLiteralId(*this, longLitLengthThreshold); + vector fragments = groupByFragment(*this); - auto anchored_dfas = buildAnchoredDfas(*this); + auto anchored_dfas = buildAnchoredDfas(*this, fragments); build_context bc; - bc.floatingMinLiteralMatchOffset = - findMinFloatingLiteralMatch(*this, anchored_dfas); - bc.longLitLengthThreshold = longLitLengthThreshold; - bc.needs_catchup = needsCatchup(*this, anchored_dfas); - recordResources(bc.resources, *this); + u32 floatingMinLiteralMatchOffset + = findMinFloatingLiteralMatch(*this, anchored_dfas); + recordResources(bc.resources, *this, fragments); if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; } bc.needs_mpv_catchup = needsMpvCatchup(*this); - bc.vertex_group_map = getVertexGroupMap(*this); - bc.squashable_groups = getSquashableGroups(*this); - auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); + makeBoundaryPrograms(*this, bc, boundary, dboundary, proto.boundary); - u32 reportProgramOffset; - u32 reportProgramCount; - tie(reportProgramOffset, reportProgramCount) = + tie(proto.reportProgramOffset, proto.reportProgramCount) = buildReportPrograms(*this, bc); // Build NFAs - set no_retrigger_queues; bool mpv_as_outfix; prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); - u32 outfixBeginQueue = qif.allocated_count(); + proto.outfixBeginQueue = qif.allocated_count(); if (!prepOutfixes(*this, bc, &historyRequired)) { return nullptr; } - u32 outfixEndQueue = qif.allocated_count(); - u32 leftfixBeginQueue = outfixEndQueue; + proto.outfixEndQueue = qif.allocated_count(); + proto.leftfixBeginQueue = proto.outfixEndQueue; + set no_retrigger_queues; set eager_queues; /* Note: buildNfas may reduce the lag for vertices that have prefixes */ if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues, - &leftfixBeginQueue)) { + &proto.leftfixBeginQueue)) { return nullptr; } - u32 eodNfaIterOffset = buildEodNfaIterator(bc, leftfixBeginQueue); + u32 eodNfaIterOffset = buildEodNfaIterator(bc, proto.leftfixBeginQueue); buildCountingMiracles(bc); u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q; @@ -5254,127 +3445,88 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - vector suffixEkeyLists; - buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists); + // Enforce role table resource limit. + if (num_vertices(g) > cc.grey.limitRoseRoleCount) { + throw ResourceLimitError(); + } - assignStateIndices(*this, bc); + bc.roleStateIndices = assignStateIndices(*this); u32 laggedRoseCount = 0; vector leftInfoTable; - buildLeftInfoTable(*this, bc, eager_queues, leftfixBeginQueue, - queue_count - leftfixBeginQueue, leftInfoTable, + buildLeftInfoTable(*this, bc, eager_queues, proto.leftfixBeginQueue, + queue_count - proto.leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - u32 litProgramOffset; - u32 litDelayRebuildProgramOffset; - tie(litProgramOffset, litDelayRebuildProgramOffset) = - buildLiteralPrograms(*this, bc); + // Information only needed for program construction. + ProgramBuild prog_build(floatingMinLiteralMatchOffset, + longLitLengthThreshold, needsCatchup(*this)); + prog_build.vertex_group_map = getVertexGroupMap(*this); + prog_build.squashable_groups = getSquashableGroups(*this); - u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); + tie(proto.anchoredProgramOffset, proto.anchored_count) = + writeAnchoredPrograms(*this, fragments, bc, prog_build); - size_t longLitStreamStateRequired = 0; - u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, - bc.longLiterals, longLitLengthThreshold, &historyRequired, - &longLitStreamStateRequired); + tie(proto.delayProgramOffset, proto.delay_count) = + writeDelayPrograms(*this, fragments, bc, prog_build); - vector activeLeftIter; - buildActiveLeftIter(leftInfoTable, activeLeftIter); + buildLiteralPrograms(*this, fragments, bc, prog_build); - u32 lastByteOffset = buildLastByteIter(g, bc); - u32 eagerIterOffset = buildEagerQueueIter(eager_queues, leftfixBeginQueue, - queue_count, bc); + auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset); + proto.eodProgramOffset = writeProgram(bc, move(eod_prog)); - // Enforce role table resource limit. - if (num_vertices(g) > cc.grey.limitRoseRoleCount) { - throw ResourceLimitError(); - } + size_t longLitStreamStateRequired = 0; + proto.longLitTableOffset + = buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals, + longLitLengthThreshold, &historyRequired, + &longLitStreamStateRequired); - u32 currOffset; /* relative to base of RoseEngine */ - if (!bc.engine_blob.empty()) { - currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); - } else { - currOffset = sizeof(RoseEngine); - } + proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); + proto.eagerIterOffset = writeEagerQueueIter( + eager_queues, proto.leftfixBeginQueue, queue_count, bc.engine_blob); - UNUSED const size_t engineBlobSize = bc.engine_blob.size(); // test later + addSomRevNfas(bc, proto, ssm); - currOffset = ROUNDUP_CL(currOffset); - DEBUG_PRINTF("currOffset %u\n", currOffset); + writeDkeyInfo(rm, bc.engine_blob, proto); + writeLeftInfo(bc.engine_blob, proto, leftInfoTable); // Build anchored matcher. - size_t asize = 0; - u32 amatcherOffset = 0; - auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms, - &asize); + auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); if (atable) { - currOffset = ROUNDUP_CL(currOffset); - amatcherOffset = currOffset; - currOffset += verify_u32(asize); + proto.amatcherOffset = bc.engine_blob.add(atable); } // Build floating HWLM matcher. rose_group fgroups = 0; - size_t fsize = 0; - auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, - &fgroups, &fsize, &historyRequired); - u32 fmatcherOffset = 0; + auto ftable = buildFloatingMatcher(*this, fragments, longLitLengthThreshold, + &fgroups, &historyRequired); if (ftable) { - currOffset = ROUNDUP_CL(currOffset); - fmatcherOffset = currOffset; - currOffset += verify_u32(fsize); + proto.fmatcherOffset = bc.engine_blob.add(ftable); + bc.resources.has_floating = true; + } + + // Build delay rebuild HWLM matcher. + auto drtable = buildDelayRebuildMatcher(*this, fragments, + longLitLengthThreshold); + if (drtable) { + proto.drmatcherOffset = bc.engine_blob.add(drtable); } // Build EOD-anchored HWLM matcher. - size_t esize = 0; - auto etable = buildEodAnchoredMatcher(*this, &esize); - u32 ematcherOffset = 0; + auto etable = buildEodAnchoredMatcher(*this, fragments); if (etable) { - currOffset = ROUNDUP_CL(currOffset); - ematcherOffset = currOffset; - currOffset += verify_u32(esize); + proto.ematcherOffset = bc.engine_blob.add(etable); } // Build small-block HWLM matcher. - size_t sbsize = 0; - auto sbtable = buildSmallBlockMatcher(*this, &sbsize); - u32 sbmatcherOffset = 0; + auto sbtable = buildSmallBlockMatcher(*this, fragments); if (sbtable) { - currOffset = ROUNDUP_CL(currOffset); - sbmatcherOffset = currOffset; - currOffset += verify_u32(sbsize); + proto.sbmatcherOffset = bc.engine_blob.add(sbtable); } - u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); - u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size(); - currOffset = leftOffset + roseLen; - - u32 lookaroundReachOffset = currOffset; - u32 lookaroundReachLen = REACH_BITVECTOR_LEN * bc.lookaround.size(); - currOffset = lookaroundReachOffset + lookaroundReachLen; - - u32 lookaroundTableOffset = currOffset; - u32 lookaroundTableLen = sizeof(s8) * bc.lookaround.size(); - currOffset = lookaroundTableOffset + lookaroundTableLen; - - u32 nfaInfoOffset = ROUNDUP_N(currOffset, sizeof(u32)); - u32 nfaInfoLen = sizeof(NfaInfo) * queue_count; - currOffset = nfaInfoOffset + nfaInfoLen; + proto.activeArrayCount = proto.leftfixBeginQueue; - currOffset = ROUNDUP_N(currOffset, alignof(mmbit_sparse_iter)); - u32 activeLeftIterOffset = currOffset; - currOffset += activeLeftIter.size() * sizeof(mmbit_sparse_iter); - - u32 activeArrayCount = leftfixBeginQueue; - u32 activeLeftCount = leftInfoTable.size(); - u32 rosePrefixCount = countRosePrefixes(leftInfoTable); - - u32 rev_nfa_table_offset; - vector rev_nfa_offsets; - prepSomRevNfas(ssm, &rev_nfa_table_offset, &rev_nfa_offsets, &currOffset); - - // Build engine header and copy tables into place. - - u32 anchorStateSize = atable ? anchoredStateSize(*atable) : 0; + proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0; DEBUG_PRINTF("rose history required %zu\n", historyRequired); assert(!cc.streaming || historyRequired <= cc.grey.maxHistoryAvailable); @@ -5385,192 +3537,112 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(!cc.streaming || historyRequired <= max(cc.grey.maxHistoryAvailable, cc.grey.somMaxRevNfaLength)); - RoseStateOffsets stateOffsets; - memset(&stateOffsets, 0, sizeof(stateOffsets)); - fillStateOffsets(*this, bc.numStates, anchorStateSize, - activeArrayCount, activeLeftCount, laggedRoseCount, - longLitStreamStateRequired, historyRequired, - &stateOffsets); - - scatter_plan_raw state_scatter; - buildStateScatterPlan(sizeof(u8), bc.numStates, - activeLeftCount, rosePrefixCount, stateOffsets, - cc.streaming, activeArrayCount, outfixBeginQueue, - outfixEndQueue, &state_scatter); - - currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a)); - - u32 state_scatter_aux_offset = currOffset; - currOffset += aux_size(state_scatter); + fillStateOffsets(*this, bc.roleStateIndices.size(), proto.anchorStateSize, + proto.activeArrayCount, proto.activeLeftCount, + laggedRoseCount, longLitStreamStateRequired, + historyRequired, &proto.stateOffsets); - currOffset = ROUNDUP_N(currOffset, alignof(ReportID)); - u32 dkeyOffset = currOffset; - currOffset += rm.numDkeys() * sizeof(ReportID); + // Write in NfaInfo structures. This will also update state size + // information in proto. + writeNfaInfo(*this, bc, proto, no_retrigger_queues); - aligned_unique_ptr engine - = aligned_zmalloc_unique(currOffset); - assert(engine); // will have thrown bad_alloc otherwise. - char *ptr = (char *)engine.get(); - assert(ISALIGNED_CL(ptr)); + scatter_plan_raw state_scatter = buildStateScatterPlan( + sizeof(u8), bc.roleStateIndices.size(), proto.activeLeftCount, + proto.rosePrefixCount, proto.stateOffsets, cc.streaming, + proto.activeArrayCount, proto.outfixBeginQueue, proto.outfixEndQueue); - if (atable) { - assert(amatcherOffset); - memcpy(ptr + amatcherOffset, atable.get(), asize); - } - if (ftable) { - assert(fmatcherOffset); - memcpy(ptr + fmatcherOffset, ftable.get(), fsize); - } - if (etable) { - assert(ematcherOffset); - memcpy(ptr + ematcherOffset, etable.get(), esize); - } - if (sbtable) { - assert(sbmatcherOffset); - memcpy(ptr + sbmatcherOffset, sbtable.get(), sbsize); + u32 currOffset; /* relative to base of RoseEngine */ + if (!bc.engine_blob.empty()) { + currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); + } else { + currOffset = sizeof(RoseEngine); } - memcpy(&engine->stateOffsets, &stateOffsets, sizeof(stateOffsets)); - - engine->historyRequired = verify_u32(historyRequired); - - engine->ekeyCount = rm.numEkeys(); - engine->dkeyCount = rm.numDkeys(); - engine->dkeyLogSize = fatbit_size(engine->dkeyCount); - engine->invDkeyOffset = dkeyOffset; - copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable()); - - engine->somHorizon = ssm.somPrecision(); - engine->somLocationCount = ssm.numSomSlots(); - engine->somLocationFatbitSize = fatbit_size(engine->somLocationCount); - - engine->needsCatchup = bc.needs_catchup ? 1 : 0; - - engine->literalCount = verify_u32(final_id_to_literal.size()); - engine->litProgramOffset = litProgramOffset; - engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; - engine->reportProgramOffset = reportProgramOffset; - engine->reportProgramCount = reportProgramCount; - engine->runtimeImpl = pickRuntimeImpl(*this, bc, outfixEndQueue); - engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); - - engine->activeArrayCount = activeArrayCount; - engine->activeLeftCount = activeLeftCount; - engine->queueCount = queue_count; - engine->activeQueueArraySize = fatbit_size(queue_count); - engine->eagerIterOffset = eagerIterOffset; - engine->handledKeyCount = bc.handledKeys.size(); - engine->handledKeyFatbitSize = fatbit_size(engine->handledKeyCount); + currOffset = ROUNDUP_CL(currOffset); + DEBUG_PRINTF("currOffset %u\n", currOffset); - engine->rolesWithStateCount = bc.numStates; + currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a)); + u32 state_scatter_aux_offset = currOffset; + currOffset += aux_size(state_scatter); - engine->leftOffset = leftOffset; - engine->roseCount = verify_u32(leftInfoTable.size()); - engine->lookaroundTableOffset = lookaroundTableOffset; - engine->lookaroundReachOffset = lookaroundReachOffset; - engine->outfixBeginQueue = outfixBeginQueue; - engine->outfixEndQueue = outfixEndQueue; - engine->leftfixBeginQueue = leftfixBeginQueue; - engine->initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; - engine->stateSize = mmbit_size(bc.numStates); - engine->anchorStateSize = anchorStateSize; - engine->nfaInfoOffset = nfaInfoOffset; + proto.historyRequired = verify_u32(historyRequired); + proto.ekeyCount = rm.numEkeys(); - engine->eodProgramOffset = eodProgramOffset; + proto.somHorizon = ssm.somPrecision(); + proto.somLocationCount = ssm.numSomSlots(); + proto.somLocationFatbitSize = fatbit_size(proto.somLocationCount); - engine->lastByteHistoryIterOffset = lastByteOffset; + proto.runtimeImpl = pickRuntimeImpl(*this, bc.resources, + proto.outfixEndQueue); + proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); - engine->delay_count = - verify_u32(final_id_to_literal.size() - delay_base_id); - engine->delay_fatbit_size = fatbit_size(engine->delay_count); - engine->delay_base_id = delay_base_id; - engine->anchored_base_id = anchored_base_id; - engine->anchored_count = delay_base_id - anchored_base_id; - engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); + proto.queueCount = queue_count; + proto.activeQueueArraySize = fatbit_size(queue_count); + proto.handledKeyCount = prog_build.handledKeys.size(); + proto.handledKeyFatbitSize = fatbit_size(proto.handledKeyCount); - engine->rosePrefixCount = rosePrefixCount; + proto.rolesWithStateCount = bc.roleStateIndices.size(); - engine->activeLeftIterOffset - = activeLeftIter.empty() ? 0 : activeLeftIterOffset; + proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; + proto.stateSize = mmbit_size(bc.roleStateIndices.size()); - // Set scanning mode. - if (!cc.streaming) { - engine->mode = HS_MODE_BLOCK; - } else if (cc.vectored) { - engine->mode = HS_MODE_VECTORED; - } else { - engine->mode = HS_MODE_STREAM; - } + proto.delay_fatbit_size = fatbit_size(proto.delay_count); + proto.anchored_fatbit_size = fatbit_size(proto.anchored_count); // The Small Write matcher is (conditionally) added to the RoseEngine in // another pass by the caller. Set to zero (meaning no SMWR engine) for // now. - engine->smallWriteOffset = 0; - - engine->amatcherOffset = amatcherOffset; - engine->ematcherOffset = ematcherOffset; - engine->sbmatcherOffset = sbmatcherOffset; - engine->fmatcherOffset = fmatcherOffset; - engine->longLitTableOffset = longLitTableOffset; - engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); - engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); - engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); - engine->amatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_ANCHORED); - engine->fmatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_FLOATING); - engine->size = currOffset; - engine->minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; - engine->minWidthExcludingBoundaries = minWidth; - engine->floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset; - - engine->maxBiAnchoredWidth = findMaxBAWidth(*this); - engine->noFloatingRoots = hasNoFloatingRoots(); - engine->requiresEodCheck = hasEodAnchors(*this, bc, outfixEndQueue); - engine->hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); - engine->canExhaust = rm.patternSetCanExhaust(); - engine->hasSom = hasSom; + proto.smallWriteOffset = 0; + + proto.amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); + proto.fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); + proto.eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); + proto.amatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_ANCHORED); + proto.fmatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_FLOATING); + proto.minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; + proto.minWidthExcludingBoundaries = minWidth; + proto.floatingMinLiteralMatchOffset = floatingMinLiteralMatchOffset; + + proto.maxBiAnchoredWidth = findMaxBAWidth(*this); + proto.noFloatingRoots = hasNoFloatingRoots(); + proto.requiresEodCheck = hasEodAnchors(*this, bc, proto.outfixEndQueue); + proto.hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); + proto.canExhaust = rm.patternSetCanExhaust(); + proto.hasSom = hasSom; /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */ - fillMatcherDistances(*this, engine.get()); + fillMatcherDistances(*this, &proto); + + proto.initialGroups = getInitialGroups(); + proto.floating_group_mask = fgroups; + proto.totalNumLiterals = verify_u32(literal_info.size()); + proto.asize = verify_u32(atable.size()); + proto.ematcherRegionSize = ematcher_region_size; + proto.longLitStreamState = verify_u32(longLitStreamStateRequired); - engine->initialGroups = getInitialGroups(); - engine->floating_group_mask = fgroups; - engine->totalNumLiterals = verify_u32(literal_info.size()); - engine->asize = verify_u32(asize); - engine->ematcherRegionSize = ematcher_region_size; - engine->longLitStreamState = verify_u32(longLitStreamStateRequired); + proto.size = currOffset; - engine->boundary.reportEodOffset = boundary_out.reportEodOffset; - engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset; - engine->boundary.reportZeroEodOffset = boundary_out.reportZeroEodOffset; + // Time to allocate the real RoseEngine structure, at cacheline alignment. + auto engine = make_zeroed_bytecode_ptr(currOffset, 64); + assert(engine); // will have thrown bad_alloc otherwise. + + // Copy in our prototype engine data. + memcpy(engine.get(), &proto, sizeof(proto)); write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); - NfaInfo *nfa_infos = (NfaInfo *)(ptr + nfaInfoOffset); - populateNfaInfoBasics(*this, bc, outfixes, suffixEkeyLists, - no_retrigger_queues, nfa_infos); - updateNfaState(bc, &engine->stateOffsets, nfa_infos, - &engine->scratchStateSize, &engine->nfaStateSize, - &engine->tStateSize); - - // Copy in other tables + // Copy in the engine blob. bc.engine_blob.write_bytes(engine.get()); - copy_bytes(ptr + engine->leftOffset, leftInfoTable); - - fillLookaroundTables(ptr + lookaroundTableOffset, - ptr + lookaroundReachOffset, bc.lookaround); - - fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); - copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); - - // Safety check: we shouldn't have written anything to the engine blob - // after we copied it into the engine bytecode. - assert(bc.engine_blob.size() == engineBlobSize); // Add a small write engine if appropriate. engine = addSmallWriteEngine(*this, move(engine)); DEBUG_PRINTF("rose done %p\n", engine.get()); + + dumpRose(*this, fragments, makeLeftQueueMap(g, bc.leftfix_info), + bc.suffixes, engine.get()); + return engine; } diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp index 7987b0f61..a85a784fc 100644 --- a/src/rose/rose_build_castle.cpp +++ b/src/rose/rose_build_castle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -131,7 +131,7 @@ vector literals_for_vertex(const RoseBuildImpl &tbi, vector rv; for (const u32 id : tbi.g[v].literals) { - rv.push_back(tbi.literals.right.at(id)); + rv.push_back(tbi.literals.at(id)); } return rv; @@ -366,7 +366,7 @@ bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, /* check each pred literal to see if they all kill previous castle * state */ for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { - const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); + const rose_literal_id &pred_lit = tbi.literals.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); const CharReach &cr = c.reach(); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index e13d7c5c7..96241e39d 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include "rose_build_role_aliasing.h" #include "rose_build_util.h" #include "ue2common.h" +#include "hwlm/hwlm_literal.h" #include "nfa/nfa_internal.h" #include "nfa/rdfa.h" #include "nfagraph/ng_holder.h" @@ -102,10 +103,75 @@ bool limited_explosion(const ue2_literal &s) { return nc_count <= MAX_EXPLOSION_NC; } +static +void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) { + assert(id < build.literal_info.size()); + auto &info = build.literal_info.at(id); + for (const auto &v : info.vertices) { + build.g[v].literals.erase(id); + } + info.vertices.clear(); +} + +/** + * \brief Replace the given mixed-case literal with the set of its caseless + * variants. + */ +static +void explodeLiteral(RoseBuildImpl &build, u32 id) { + const auto &lit = build.literals.at(id); + auto &info = build.literal_info[id]; + + assert(!info.group_mask); // not set yet + assert(info.undelayed_id == id); // we do not explode delayed literals + + for (auto it = caseIterateBegin(lit.s); it != caseIterateEnd(); ++it) { + ue2_literal new_str(*it, false); + + if (!maskIsConsistent(new_str.get_string(), false, lit.msk, lit.cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + continue; + } + + u32 new_id = + build.getLiteralId(new_str, lit.msk, lit.cmp, lit.delay, lit.table); + + DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id, + dumpString(new_str).c_str()); + + const auto &new_lit = build.literals.at(new_id); + auto &new_info = build.literal_info.at(new_id); + insert(&new_info.vertices, info.vertices); + for (const auto &v : info.vertices) { + build.g[v].literals.insert(new_id); + } + + build.literal_info[new_id].undelayed_id = new_id; + if (!info.delayed_ids.empty()) { + flat_set &del_ids = new_info.delayed_ids; + for (u32 delay_id : info.delayed_ids) { + const auto &dlit = build.literals.at(delay_id); + u32 new_delay_id = + build.getLiteralId(new_lit.s, new_lit.msk, new_lit.cmp, + dlit.delay, dlit.table); + del_ids.insert(new_delay_id); + build.literal_info[new_delay_id].undelayed_id = new_id; + } + } + } + + // Remove the old literal and any old delay variants. + removeLiteralFromGraph(build, id); + for (u32 delay_id : info.delayed_ids) { + removeLiteralFromGraph(build, delay_id); + } + info.delayed_ids.clear(); +} + void RoseBuildImpl::handleMixedSensitivity(void) { - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + vector explode; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); if (lit.delay) { continue; /* delay id's are virtual-ish */ @@ -120,18 +186,23 @@ void RoseBuildImpl::handleMixedSensitivity(void) { } // We don't want to explode long literals, as they require confirmation - // with a CHECK_LITERAL instruction and need unique final_ids. + // with a CHECK_LONG_LIT instruction and need unique final_ids. // TODO: we could allow explosion for literals where the prefixes - // covered by CHECK_LITERAL are identical. + // covered by CHECK_LONG_LIT are identical. + if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && - limited_explosion(lit.s)) { + limited_explosion(lit.s) && literal_info[id].delayed_ids.empty()) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); - literal_info[id].requires_explode = true; + explode.push_back(id); } else { literal_info[id].requires_benefits = true; } } + + for (u32 id : explode) { + explodeLiteral(*this, id); + } } // Returns the length of the longest prefix of s that is (a) also a suffix of s @@ -348,7 +419,7 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { } } - if (literals.right.at(id).table == ROSE_ANCHORED) { + if (literals.at(id).table == ROSE_ANCHORED) { /* in-edges are irrelevant for anchored region. */ continue; } @@ -367,7 +438,7 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { } DEBUG_PRINTF("literal %u ('%s') is a %s report\n", id, - dumpString(literals.right.at(id).s).c_str(), + dumpString(literals.at(id).s).c_str(), info.vertices.size() > 1 ? "multi-direct" : "direct"); return true; } @@ -412,7 +483,7 @@ bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) { } static -bool checkEodStealFloating(const RoseBuildImpl &tbi, +bool checkEodStealFloating(const RoseBuildImpl &build, const vector &eodLiteralsForFloating, u32 numFloatingLiterals, size_t shortestFloatingLen) { @@ -426,27 +497,35 @@ bool checkEodStealFloating(const RoseBuildImpl &tbi, return false; } - if (tbi.hasNoFloatingRoots()) { + if (build.hasNoFloatingRoots()) { DEBUG_PRINTF("skipping as floating table is conditional\n"); /* TODO: investigate putting stuff in atable */ return false; } - if (checkFloatingKillableByPrefixes(tbi)) { + if (checkFloatingKillableByPrefixes(build)) { DEBUG_PRINTF("skipping as prefixes may make ftable conditional\n"); return false; } + // Collect a set of all floating literals. + unordered_set floating_lits; + for (auto &lit : build.literals) { + if (lit.table == ROSE_FLOATING) { + floating_lits.insert(lit.s); + } + } + DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n", eodLiteralsForFloating.size(), numFloatingLiterals, shortestFloatingLen); u32 new_floating_lits = 0; for (u32 eod_id : eodLiteralsForFloating) { - const rose_literal_id &lit = tbi.literals.right.at(eod_id); + const rose_literal_id &lit = build.literals.at(eod_id); DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str()); - if (tbi.hasLiteral(lit.s, ROSE_FLOATING)) { + if (contains(floating_lits, lit.s)) { DEBUG_PRINTF("skip; there is already a floating version\n"); continue; } @@ -477,12 +556,16 @@ bool checkEodStealFloating(const RoseBuildImpl &tbi, static void promoteEodToFloating(RoseBuildImpl &tbi, const vector &eodLiterals) { - DEBUG_PRINTF("promoting eod literals to floating table\n"); + DEBUG_PRINTF("promoting %zu eod literals to floating table\n", + eodLiterals.size()); for (u32 eod_id : eodLiterals) { - const rose_literal_id &lit = tbi.literals.right.at(eod_id); + const rose_literal_id &lit = tbi.literals.at(eod_id); + DEBUG_PRINTF("eod_id=%u, lit=%s\n", eod_id, dumpString(lit.s).c_str()); u32 floating_id = tbi.getLiteralId(lit.s, lit.msk, lit.cmp, lit.delay, ROSE_FLOATING); + DEBUG_PRINTF("floating_id=%u, lit=%s\n", floating_id, + dumpString(tbi.literals.at(floating_id).s).c_str()); auto &float_verts = tbi.literal_info[floating_id].vertices; auto &eod_verts = tbi.literal_info[eod_id].vertices; @@ -496,8 +579,6 @@ void promoteEodToFloating(RoseBuildImpl &tbi, const vector &eodLiterals) { tbi.g[v].literals.insert(floating_id); } - tbi.literal_info[floating_id].requires_explode - = tbi.literal_info[eod_id].requires_explode; tbi.literal_info[floating_id].requires_benefits = tbi.literal_info[eod_id].requires_benefits; } @@ -509,7 +590,7 @@ bool promoteEodToAnchored(RoseBuildImpl &tbi, const vector &eodLiterals) { bool rv = true; for (u32 eod_id : eodLiterals) { - const rose_literal_id &lit = tbi.literals.right.at(eod_id); + const rose_literal_id &lit = tbi.literals.at(eod_id); NGHolder h; add_edge(h.start, h.accept, h); @@ -649,7 +730,7 @@ void stealEodVertices(RoseBuildImpl &tbi) { continue; // skip unused literals } - const rose_literal_id &lit = tbi.literals.right.at(i); + const rose_literal_id &lit = tbi.literals.at(i); if (lit.table == ROSE_EOD_ANCHORED) { if (suitableForAnchored(tbi, lit, info)) { @@ -689,13 +770,9 @@ bool RoseBuildImpl::isDelayed(u32 id) const { return literal_info.at(id).undelayed_id != id; } -bool RoseBuildImpl::hasFinalId(u32 id) const { - return literal_info.at(id).final_id != MO_INVALID_IDX; -} - bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const { for (u32 lit_id : g[v].literals) { - if (literals.right.at(lit_id).delay) { + if (literals.at(lit_id).delay) { return true; } } @@ -966,7 +1043,7 @@ void packInfixTops(NGHolder &h, RoseGraph &g, updated_tops.insert(top_mapping.at(t)); } } - h[e].tops = move(updated_tops); + h[e].tops = std::move(updated_tops); if (h[e].tops.empty()) { DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); @@ -1021,7 +1098,7 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, /* check each pred literal to see if they all kill previous graph * state */ for (u32 lit_id : build.g[source(e, build.g)].literals) { - const rose_literal_id &pred_lit = build.literals.right.at(lit_id); + const rose_literal_id &pred_lit = build.literals.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); DEBUG_PRINTF("running graph %zu\n", states.size()); @@ -1095,7 +1172,7 @@ void findTopTriggerCancels(RoseBuildImpl &build) { } for (u32 lit_id : pred_lit_ids) { - const rose_literal_id &p_lit = build.literals.right.at(lit_id); + const rose_literal_id &p_lit = build.literals.at(lit_id); if (p_lit.delay || p_lit.table == ROSE_ANCHORED) { goto next_rose; } @@ -1166,11 +1243,15 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) { } } - rose_group unsquashable = 0; + rose_group unsquashable = tbi.boundary_group_mask; for (u32 lit_id : lit_ids) { const rose_literal_info &info = tbi.literal_info[lit_id]; - if (info.vertices.size() > 1 || !info.delayed_ids.empty()) { + if (!info.delayed_ids.empty() + || !all_of_in(info.vertices, + [&](RoseVertex v) { + return left == tbi.g[v].left; })) { + DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask); unsquashable |= info.group_mask; } } @@ -1192,7 +1273,7 @@ void countFloatingLiterals(const RoseBuildImpl &tbi, u32 *total_count, u32 *short_count) { *total_count = 0; *short_count = 0; - for (const rose_literal_id &lit : tbi.literals.right | map_values) { + for (const rose_literal_id &lit : tbi.literals) { if (lit.delay) { continue; /* delay id's are virtual-ish */ } @@ -1598,8 +1679,8 @@ bool roleOffsetsAreValid(const RoseGraph &g) { } #endif // NDEBUG -aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { - dumpRoseGraph(*this, nullptr, "rose_early.dot"); +bytecode_ptr RoseBuildImpl::buildRose(u32 minWidth) { + dumpRoseGraph(*this, "rose_early.dot"); // Early check for Rose implementability. assert(canImplementGraphs(*this)); @@ -1644,8 +1725,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { dedupeLeftfixes(*this); aliasRoles(*this, false); // Don't merge leftfixes. dedupeLeftfixes(*this); - - convertBadLeaves(*this); uncalcLeaves(*this); /* note the leftfixes which do not need to keep state across stream @@ -1712,7 +1791,7 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { assert(roleOffsetsAreValid(g)); assert(historiesAreValid(g)); - dumpRoseGraph(*this, nullptr, "rose_pre_norm.dot"); + dumpRoseGraph(*this, "rose_pre_norm.dot"); return buildFinalEngine(minWidth); } diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index b151c0c91..0c1f43386 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,301 +76,6 @@ NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) { return v; } -// Returns the first and last vertices. -static -pair addLiteralVertices(const RoseGraph &g, - const RoseLiteralMap &literals, - const RoseVertex &t_v, - NGHolder &out) { - // We have limited cases that we support: one literal of arbitrary length, - // or a bunch of literals of length one that just become a vertex with - // their reach unioned together. - - // TODO: generalise this and handle more cases. - - const auto &litids = g[t_v].literals; - if (litids.size() > 1) { - // Multiple literals of len 1. - CharReach v_cr; - for (const auto &lit_id : litids) { - const rose_literal_id &litv = literals.right.at(lit_id); - assert(litv.s.length() == 1); - v_cr |= *litv.s.begin(); - } - - NFAVertex v = addHolderVertex(v_cr, out); - return make_pair(v, v); - } - - // Otherwise, we have a single literal, could be of arbitrary length. - assert(litids.size() == 1); - u32 lit_id = *(litids.begin()); - const rose_literal_id &litv = literals.right.at(lit_id); - assert(!litv.s.empty()); - - ue2_literal::const_iterator it = litv.s.begin(), ite = litv.s.end(); - NFAVertex first = addHolderVertex(*it, out), last = first; - for (++it; it != ite; ++it) { - NFAVertex v = addHolderVertex(*it, out); - add_edge(last, v, out); - last = v; - } - - return make_pair(first, last); -} - -static -unique_ptr convertLeafToHolder(const RoseGraph &g, - const RoseEdge &t_e, - const RoseLiteralMap &literals) { - RoseVertex t_v = target(t_e, g); // leaf vertex for demolition. - u32 minBound = g[t_e].minBound; - u32 maxBound = g[t_e].maxBound; - - const CharReach dot = CharReach::dot(); - - assert(!g[t_v].left); - - auto out = ue2::make_unique(NFA_SUFFIX); - - // Repeats wired to the start of the graph. - DEBUG_PRINTF("bounds [%u, %u]\n", minBound, maxBound); - u32 i = 1; - NFAVertex last = out->start; - for (; i <= minBound; i++) { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last, v, *out); - last = v; - } - NFAVertex last_mand = last; - if (maxBound != ROSE_BOUND_INF) { - for (; i <= maxBound; i++) { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last_mand, v, *out); - if (last != last_mand) { - add_edge(last, v, *out); - } - last = v; - } - } else { - if (minBound) { - add_edge(last_mand, last_mand, *out); - } else { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last_mand, v, *out); - add_edge(v, v, *out); - last = v; - } - } - - setTops(*out); - - // Literal vertices wired to accept. - NFAVertex litfirst, litlast; - tie(litfirst, litlast) = addLiteralVertices(g, literals, t_v, *out); - add_edge(last, litfirst, *out); - if (last != last_mand) { - add_edge(last_mand, litfirst, *out); - } - add_edge(litlast, out->accept, *out); - insert(&(*out)[litlast].reports, g[t_v].reports); - return out; -} - -static -bool areLiteralsConvertible(const RoseLiteralMap &literals, - const flat_set &ids) { - // Every literal in v must have the same length. - - // TODO: at the moment, we only handle two cases in construction: (a) one - // literal of arbitrary length, and (b) many literals, but all with length - // 1. - - if (ids.empty()) { - return false; - } - - auto it = ids.begin(), ite = ids.end(); - const size_t len = literals.right.at(*it).elength(); - - // Note: len may be 0 for cases with special literals, like EOD prefixes. - - if (len != 1 && ids.size() != 1) { - DEBUG_PRINTF("more than one literal of len > 1\n"); - return false; - } - - // Check the others all have the same length. - while (++it != ite) { - if (literals.right.at(*it).elength() != len) { - DEBUG_PRINTF("literals have different lengths\n"); - return false; - } - } - - return true; -} - -// Returns true if the given vertex doesn't qualify as a bad leaf to be eaten -// by an NFA. -static -bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) { - const RoseGraph &g = tbi.g; - - if (in_degree(v, g) != 1) { - DEBUG_PRINTF("more than one in-edge\n"); - return true; - } - - const RoseEdge &e = *(in_edges(v, g).first); - RoseVertex u = source(e, g); - - if (!g[u].reports.empty()) { - DEBUG_PRINTF("pred has accept\n"); - return true; - } - - if (g[u].suffix) { - // TODO: this could be handled by adding new vertices to the existing - // suffix. - DEBUG_PRINTF("pred already has suffix\n"); - return true; - } - - if (tbi.isAnyStart(u)) { - DEBUG_PRINTF("fail start\n"); - return true; - } - - if (tbi.isAnchored(u)) { - /* TODO need to check for possible anchored queue overflow? maybe? */ - DEBUG_PRINTF("fail anchored\n"); - return true; - } - - if (g[v].reports.empty() || g[v].eod_accept) { - DEBUG_PRINTF("bad accept\n"); - return true; - } - - if (g[v].suffix) { - DEBUG_PRINTF("suffix\n"); - return true; - } - - if (g[v].left) { - /* TODO: we really should handle this case as we would be checking - * an nfa each time. However it requires completely different graph - * fiddling logic */ - DEBUG_PRINTF("rose prefix action\n"); - return true; - } - - if (!areLiteralsConvertible(tbi.literals, g[v].literals)) { - DEBUG_PRINTF("fail length\n"); - return true; - } - - u32 max_lit_len = tbi.maxLiteralLen(v); - - u32 maxbound = max_lit_len == 1 ? 124 : 32; // arbitrary magic numbers - if (g[e].maxBound > maxbound && g[e].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("fail maxbound (%u)\n", maxbound); - return true; - } - - if (g[e].maxBound == ROSE_BOUND_INF) { - /* slightly risky as nfa won't die */ - DEBUG_PRINTF("fail: .*\n"); - return true; - } - - return false; -} - -// Find all of the leaves with literals whose length is <= len. -static -void findBadLeaves(RoseBuildImpl &tbi, set &bad) { - RoseGraph &g = tbi.g; - u32 len = tbi.cc.grey.roseMaxBadLeafLength; - - for (const auto &m : tbi.literals.right) { - if (m.second.s.length() > len) { - continue; - } - u32 lid = m.first; - DEBUG_PRINTF("%u is a short lit (length %zu)\n", lid, - m.second.s.length()); - - if (tbi.isDelayed(lid)) { - DEBUG_PRINTF("delayed, skipping!\n"); - continue; - } - - const rose_literal_info &info = tbi.literal_info[lid]; - - for (auto v : info.vertices) { - if (!isLeafNode(v, g)) { - continue; - } - if (isUnconvertibleLeaf(tbi, v)) { - continue; // we don't want to touch it - } - - // This leaf may have a predecessor with more than one successor, - // in which case we want to clone the pred just to support this - // leaf. - const RoseEdge &e = *in_edges(v, g).first; - RoseVertex u = source(e, g); - if (out_degree(u, g) != 1) { - DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].index); - RoseVertex u2 = tbi.cloneVertex(u); - for (const auto &e_in : in_edges_range(u, g)) { - add_edge(source(e_in, g), u2, g[e_in], g); - } - add_edge(u2, v, g[e], g); - remove_edge(e, g); - } - - DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].index); - bad.insert(v); - } - } -} - -void convertBadLeaves(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - set bad; - findBadLeaves(tbi, bad); - DEBUG_PRINTF("found %zu bad leaves\n", bad.size()); - - if (bad.empty()) { - return; - } - - vector dead; - for (auto v : bad) { - assert(in_degree(v, g)); - - const RoseEdge &e = *(in_edges(v, g).first); - - shared_ptr h = convertLeafToHolder(g, e, tbi.literals); - if (num_vertices(*h) >= NFA_MAX_STATES) { - assert(0); // too big! - continue; - } - - RoseVertex u = source(e, g); - assert(!g[u].suffix); - g[u].suffix.graph = h; - DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].index, h.get()); - - dead.push_back(v); - } - - tbi.removeVertices(dead); -} - static size_t suffixFloodLen(const ue2_literal &s) { if (s.empty()) { @@ -461,7 +166,7 @@ bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id, shared_ptr h = makeRosePrefix(lit.s); ReportID prefix_report = 0; - setReportId(*h, prefix_report); + set_report(*h, prefix_report); if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) { DEBUG_PRINTF("prefix not implementable\n"); @@ -530,7 +235,7 @@ void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id, static size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) { size_t numLiterals = 0; - for (const rose_literal_id &lit : tbi.literals.right | map_values) { + for (const rose_literal_id &lit : tbi.literals) { if (lit.delay) { continue; // delay ids are virtual-ish } @@ -588,7 +293,7 @@ void convertFloodProneSuffixes(RoseBuildImpl &tbi) { } u32 lit_id = *g[v].literals.begin(); - const rose_literal_id &lit = tbi.literals.right.at(lit_id); + const rose_literal_id &lit = tbi.literals.at(lit_id); // anchored or delayed literals need thought. if (lit.table != ROSE_FLOATING || lit.delay) { @@ -846,7 +551,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, && is_subset_of(exits, base_succ) && is_subset_of(base_succ, exits_and_repeat_verts)) { /* we have a jump edge */ - ri.repeatMin = 0; + ri.repeatMin = depth(0); } else { return false; } @@ -1097,7 +802,7 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { DepthMinMax bounds(pr.bounds); // copy if (delay_adj > bounds.min) { - bounds.min = 0; + bounds.min = depth(0); } else { bounds.min -= delay_adj; } diff --git a/src/rose/rose_build_convert.h b/src/rose/rose_build_convert.h index fd7c6d3ea..7307c213c 100644 --- a/src/rose/rose_build_convert.h +++ b/src/rose/rose_build_convert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ namespace ue2 { class RoseBuildImpl; void convertFloodProneSuffixes(RoseBuildImpl &tbi); -void convertBadLeaves(RoseBuildImpl &tbi); void convertPrefixToBounds(RoseBuildImpl &tbi); void convertAnchPrefixToBounds(RoseBuildImpl &tbi); diff --git a/src/rose/rose_build_dedupe.cpp b/src/rose/rose_build_dedupe.cpp new file mode 100644 index 000000000..d3e723133 --- /dev/null +++ b/src/rose/rose_build_dedupe.cpp @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_impl.h" +#include "nfa/castlecompile.h" +#include "nfagraph/ng_repeat.h" +#include "util/compile_context.h" +#include "util/boundary_reports.h" +#include "util/make_unique.h" +#include "util/report_manager.h" + +using namespace std; + +namespace ue2 { + +static +bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, + const Grey &grey) { + /* TODO: tighten */ + NFAVertex seen_vert = NGHolder::null_vertex(); + + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + if (has_intersection(h[v].reports, reports)) { + if (seen_vert != NGHolder::null_vertex()) { + return true; + } + seen_vert = v; + } + } + + for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { + if (has_intersection(h[v].reports, reports)) { + if (seen_vert != NGHolder::null_vertex()) { + return true; + } + seen_vert = v; + } + } + + if (seen_vert) { + /* if the reporting vertex is part of of a terminal repeat, the + * construction process may reform the graph splitting it into two + * vertices (pos, cyclic) and hence require dedupe */ + vector repeats; + findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); + for (const auto &repeat : repeats) { + if (find(repeat.vertices.begin(), repeat.vertices.end(), + seen_vert) != repeat.vertices.end()) { + return true; + } + } + } + + return false; +} + +class RoseDedupeAuxImpl : public RoseDedupeAux { +public: + explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in); + bool requiresDedupeSupport( + const ue2::flat_set &reports) const override; + +private: + bool hasSafeMultiReports(const ue2::flat_set &reports) const; + + const RoseBuildImpl &build; + map> vert_map; //!< ordinary literals + map> sb_vert_map; //!< small block literals + map> suffix_map; + map> outfix_map; + map> puff_map; + + unordered_set live_reports; //!< all live internal reports. +}; + +unique_ptr RoseBuildImpl::generateDedupeAux() const { + return ue2::make_unique(*this); +} + +RoseDedupeAux::~RoseDedupeAux() = default; + +RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in) + : build(build_in) { + const RoseGraph &g = build.g; + + set suffixes; + + for (auto v : vertices_range(g)) { + insert(&live_reports, g[v].reports); + + // Literals in the small block table are "shadow" copies of literals in + // the other tables that do not run in the same runtime invocation. + // Dedupe key assignment will be taken care of by the real literals. + if (build.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { + for (const auto &report_id : g[v].reports) { + sb_vert_map[report_id].insert(v); + } + } else { + for (const auto &report_id : g[v].reports) { + vert_map[report_id].insert(v); + } + } + + // Several vertices may share a suffix, so we collect the set of + // suffixes first to avoid repeating work. + if (g[v].suffix) { + suffixes.insert(g[v].suffix); + } + } + + for (const auto &suffix : suffixes) { + for (const auto &report_id : all_reports(suffix)) { + suffix_map[report_id].insert(suffix); + live_reports.insert(report_id); + } + } + + for (const auto &outfix : build.outfixes) { + for (const auto &report_id : all_reports(outfix)) { + outfix_map[report_id].insert(&outfix); + live_reports.insert(report_id); + } + } + + if (build.mpv_outfix) { + auto *mpv = build.mpv_outfix->mpv(); + for (const auto &puff : mpv->puffettes) { + puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); + } + for (const auto &puff : mpv->triggered_puffettes) { + puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); + } + } + + // Collect live reports from boundary reports. + insert(&live_reports, build.boundary.report_at_0); + insert(&live_reports, build.boundary.report_at_0_eod); + insert(&live_reports, build.boundary.report_at_eod); + + DEBUG_PRINTF("%zu of %zu reports are live\n", live_reports.size(), + build.rm.numReports()); +} + +static +vector makePath(const rose_literal_id &lit) { + vector path(begin(lit.s), end(lit.s)); + for (u32 i = 0; i < lit.delay; i++) { + path.push_back(CharReach::dot()); + } + return path; +} + +/** + * \brief True if one of the given literals overlaps with the suffix of + * another, meaning that they could arrive at the same offset. + */ +static +bool literalsCouldRace(const rose_literal_id &lit1, + const rose_literal_id &lit2) { + DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n", + dumpString(lit1.s).c_str(), lit1.delay, + dumpString(lit2.s).c_str(), lit2.delay); + + // Add dots on the end of each literal for delay. + const auto v1 = makePath(lit1); + const auto v2 = makePath(lit2); + + // See if the smaller path is a suffix of the larger path. + const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; + const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; + auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), + overlaps); + return r.first == smaller->rend(); +} + +bool RoseDedupeAuxImpl::hasSafeMultiReports( + const flat_set &reports) const { + if (reports.size() <= 1) { + return true; + } + + /* We have more than one ReportID corresponding to the external ID that is + * presented to the user. These may differ in offset adjustment, bounds + * checks, etc. */ + + /* TODO: work out if these differences will actually cause problems */ + + /* One common case where we know we don't have a problem is if there are + * precisely two reports, one for the main Rose path and one for the + * "small block matcher" path. */ + if (reports.size() == 2) { + ReportID id1 = *reports.begin(); + ReportID id2 = *reports.rbegin(); + + bool has_verts_1 = contains(vert_map, id1); + bool has_verts_2 = contains(vert_map, id2); + bool has_sb_verts_1 = contains(sb_vert_map, id1); + bool has_sb_verts_2 = contains(sb_vert_map, id2); + + if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { + DEBUG_PRINTF("two reports, one full and one small block: ok\n"); + return true; + } + } + + DEBUG_PRINTF("more than one report\n"); + return false; +} + +bool RoseDedupeAuxImpl::requiresDedupeSupport( + const flat_set &reports_in) const { + /* TODO: this could be expanded to check for offset or character + constraints */ + + // We don't want to consider dead reports (tracked by ReportManager but no + // longer used) for the purposes of assigning dupe keys. + flat_set reports; + for (auto id : reports_in) { + if (contains(live_reports, id)) { + reports.insert(id); + } + } + + DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str()); + + const RoseGraph &g = build.g; + + bool has_suffix = false; + bool has_outfix = false; + + if (!hasSafeMultiReports(reports)) { + DEBUG_PRINTF("multiple reports not safe\n"); + return true; + } + + set roles; + set suffixes; + set outfixes; + set puffettes; + for (ReportID r : reports) { + if (contains(vert_map, r)) { + insert(&roles, vert_map.at(r)); + } + if (contains(suffix_map, r)) { + insert(&suffixes, suffix_map.at(r)); + } + + if (contains(outfix_map, r)) { + insert(&outfixes, outfix_map.at(r)); + } + + if (contains(puff_map, r)) { + insert(&puffettes, puff_map.at(r)); + } + } + + /* roles */ + + map lits; // Literal ID -> count of occurrences. + + const bool has_role = !roles.empty(); + for (auto v : roles) { + for (const auto &lit : g[v].literals) { + lits[lit]++; + } + if (g[v].eod_accept) { + // Literals plugged into this EOD accept must be taken into account + // as well. + for (auto u : inv_adjacent_vertices_range(v, g)) { + for (const auto &lit : g[u].literals) { + lits[lit]++; + } + } + } + } + + /* literals */ + + for (const auto &m : lits) { + if (m.second > 1) { + DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first); + return true; + } + } + + for (auto it = begin(lits); it != end(lits); ++it) { + const auto &lit1 = build.literals.at(it->first); + for (auto jt = next(it); jt != end(lits); ++jt) { + const auto &lit2 = build.literals.at(jt->first); + if (literalsCouldRace(lit1, lit2)) { + DEBUG_PRINTF("literals could race\n"); + return true; + } + } + } + + /* suffixes */ + + for (const auto &suffix : suffixes) { + if (has_suffix || has_role) { + return true; /* scope for badness */ + } + + has_suffix = true; + + /* some lesser suffix engines (nfas, haig, castle) can raise multiple + * matches for a report id at the same offset if there are multiple + * report states live. */ + if (suffix.haig()) { + return true; + } + if (suffix.graph() && + requiresDedupe(*suffix.graph(), reports, build.cc.grey)) { + return true; + } + if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { + return true; + } + } + + /* outfixes */ + + for (const auto &outfix_ptr : outfixes) { + assert(outfix_ptr); + const OutfixInfo &out = *outfix_ptr; + + if (has_outfix || has_role || has_suffix) { + return true; + } + has_outfix = true; + + if (out.haig()) { + return true; /* haig may report matches with different SOM at the + same offset */ + } + + if (out.holder() && + requiresDedupe(*out.holder(), reports, build.cc.grey)) { + return true; + } + } + + /* mpv */ + for (UNUSED const auto &puff : puffettes) { + if (has_outfix || has_role || has_suffix) { + return true; + } + has_outfix = true; + } + + /* boundary */ + if (has_intersection(build.boundary.report_at_eod, reports)) { + if (has_outfix || has_role || has_suffix) { + return true; + } + } + + return false; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 105ee338d..b527db6c8 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,11 +32,16 @@ #include "rose_build_impl.h" #include "rose_build_matchers.h" -#include "rose/rose_dump.h" #include "rose_internal.h" +#include "rose_program.h" #include "ue2common.h" +#include "hs_compile.h" #include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_dump.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" +#include "nfa/nfa_build_util.h" +#include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" #include "som/slot_manager_dump.h" @@ -44,9 +49,12 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/multibit.h" +#include "util/multibit_build.h" #include "util/ue2string.h" #include +#include #include #include #include @@ -81,10 +89,34 @@ string render_kind(const Graph &g) { namespace { +struct rose_off { + explicit rose_off(u32 j) : i(j) {} + string str(void) const; + u32 i; +}; + +ostream &operator<<(ostream &o, const rose_off &to) { + if (to.i == ROSE_BOUND_INF) { + o << "inf"; + } else { + o << to.i; + } + return o; +} + +string rose_off::str(void) const { + ostringstream out; + out << *this; + return out.str(); +} + class RoseGraphWriter { public: - RoseGraphWriter(const RoseBuildImpl &b_in, const RoseEngine *t_in) : - build(b_in), t(t_in) { + RoseGraphWriter(const RoseBuildImpl &b_in, const map &frag_map_in, + const map &lqm_in, + const map &sqm_in, const RoseEngine *t_in) + : frag_map(frag_map_in), leftfix_queue_map(lqm_in), + suffix_queue_map(sqm_in), build(b_in), t(t_in) { for (const auto &m : build.ghost) { ghost.insert(m.second); } @@ -131,8 +163,8 @@ class RoseGraphWriter { if (g[v].suffix) { suffix_id suff(g[v].suffix); os << "\\n" << render_kind(suff) << " (top " << g[v].suffix.top; - auto it = build.suffix_queue_map.find(suff); - if (it != end(build.suffix_queue_map)) { + auto it = suffix_queue_map.find(suff); + if (it != end(suffix_queue_map)) { os << ", queue " << it->second; } os << ")"; @@ -145,8 +177,8 @@ class RoseGraphWriter { if (g[v].left) { left_id left(g[v].left); os << "\\n" << render_kind(left) << " (queue "; - auto it = build.leftfix_queue_map.find(left); - if (it != end(build.leftfix_queue_map)) { + auto it = leftfix_queue_map.find(left); + if (it != end(leftfix_queue_map)) { os << it->second; } else { os << "??"; @@ -219,37 +251,50 @@ class RoseGraphWriter { // Render the literal associated with a vertex. void writeLiteral(ostream &os, u32 id) const { os << "lit=" << id; - if (id < build.literal_info.size()) { - os << "/" << build.literal_info[id].final_id << " "; + if (contains(frag_map, id)) { + os << "/" << frag_map.at(id) << " "; } else { - os << "/nofinal "; + os << "/nofrag "; } - if (contains(build.literals.right, id)) { - const auto &lit = build.literals.right.at(id); - os << '\'' << dotEscapeString(lit.s.get_string()) << '\''; - if (lit.s.any_nocase()) { - os << " (nocase)"; - } - if (lit.delay) { - os << " +" << lit.delay; - } - } else { - os << ""; + const auto &lit = build.literals.at(id); + os << '\'' << dotEscapeString(lit.s.get_string()) << '\''; + if (lit.s.any_nocase()) { + os << " (nocase)"; + } + if (lit.delay) { + os << " +" << lit.delay; } } set ghost; + const map &frag_map; + const map &leftfix_queue_map; + const map &suffix_queue_map; const RoseBuildImpl &build; const RoseEngine *t; }; } // namespace -void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t, - const char *filename) { - const RoseBuildImpl &build = dynamic_cast(build_base); +static +map makeFragMap(const vector &fragments) { + map fm; + for (const auto &f : fragments) { + for (u32 id : f.lit_ids) { + fm[id] = f.fragment_id; + } + } + return fm; +} + +static +void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, + const vector &fragments, + const map &leftfix_queue_map, + const map &suffix_queue_map, + const char *filename) { const Grey &grey = build.cc.grey; /* "early" rose graphs should only be dumped if we are dumping intermediate @@ -266,10 +311,16 @@ void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t, DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str()); ofstream os(ss.str()); - RoseGraphWriter writer(build, t); + auto frag_map = makeFragMap(fragments); + RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map, + t); writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g)); } +void dumpRoseGraph(const RoseBuildImpl &build, const char *filename) { + dumpRoseGraph(build, nullptr, {}, {}, {}, filename); +} + namespace { struct CompareVertexRole { explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {} @@ -294,21 +345,25 @@ void lit_graph_info(const RoseBuildImpl &build, const rose_literal_info &li, } static -void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { +void dumpRoseLiterals(const RoseBuildImpl &build, + const vector &fragments, + const Grey &grey) { const RoseGraph &g = build.g; + map frag_map = makeFragMap(fragments); DEBUG_PRINTF("dumping literals\n"); - ofstream os(filename); + ofstream os(grey.dumpPath + "rose_literals.txt"); - os << "ROSE LITERALS: a total of " << build.literals.right.size() - << " literals and " << num_vertices(g) << " roles." << endl << endl; + os << "ROSE LITERALS: a total of " << build.literals.size() + << " literals and " << num_vertices(g) << " roles." << endl + << endl; - for (const auto &e : build.literals.right) { - u32 id = e.first; - const ue2_literal &s = e.second.s; + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); + const ue2_literal &s = lit.s; const rose_literal_info &lit_info = build.literal_info[id]; - switch (e.second.table) { + switch (lit.table) { case ROSE_ANCHORED: os << "ANCHORED"; break; @@ -326,8 +381,11 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { break; } - os << " ID " << id << "/" << lit_info.final_id << ": \"" - << escapeString(s.get_string()) << "\"" + os << " ID " << id; + if (contains(frag_map, id)) { + os << "/" << frag_map.at(id); + } + os << ": \"" << escapeString(s.get_string()) << "\"" << " (len " << s.length() << ","; if (s.any_nocase()) { os << " nocase,"; @@ -336,8 +394,8 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { os << " benefits,"; } - if (e.second.delay) { - os << " delayed "<< e.second.delay << ","; + if (lit.delay) { + os << " delayed "<< lit.delay << ","; } os << " groups 0x" << hex << setw(16) << setfill('0') @@ -420,60 +478,1751 @@ string toHex(Iter i, const Iter &end) { } static -void dumpTestLiterals(const string &filename, const vector &lits) { - ofstream of(filename.c_str()); +bool isMetaChar(char c) { + switch (c) { + case '#': + case '$': + case '(': + case ')': + case '*': + case '+': + case '.': + case '/': + case '?': + case '[': + case '\\': + case ']': + case '^': + case '{': + case '|': + case '}': + return true; + default: + return false; + } +} - for (const hwlmLiteral &lit : lits) { - of << lit.id << "="; - if (lit.nocase) { - of << "!"; +static +string toRegex(const string &lit) { + ostringstream os; + for (char c : lit) { + if (0x20 <= c && c <= 0x7e) { + if (isMetaChar(c)) { + os << "\\" << c; + } else { + os << c; + } + } else if (c == '\n') { + os << "\\n"; + } else if (c == '\r') { + os << "\\r"; + } else if (c == '\t') { + os << "\\t"; + } else { + os << "\\x" << hex << setw(2) << setfill('0') + << (unsigned)(c & 0xff) << dec; } - of << toHex(lit.s.begin(), lit.s.end()); + } + return os.str(); +} + +void dumpMatcherLiterals(const vector &lits, const string &name, + const Grey &grey) { + if (!grey.dumpFlags) { + return; + } + + ofstream of(grey.dumpPath + "rose_" + name + "_test_literals.txt"); + + // Unique regex index, as literals may share an ID. + u32 i = 0; + + for (const hwlmLiteral &lit : lits) { + // First, detail in a comment. + of << "# id=" << lit.id; if (!lit.msk.empty()) { - of << " " << toHex(lit.msk.begin(), lit.msk.end()); - of << " " << toHex(lit.cmp.begin(), lit.cmp.end()); + of << " msk=0x" << toHex(lit.msk.begin(), lit.msk.end()); + of << " cmp=0x" << toHex(lit.cmp.begin(), lit.cmp.end()); + } + of << " groups=0x" << hex << setfill('0') << lit.groups << dec; + if (lit.noruns) { + of << " noruns"; } + of << endl; + + // Second, literal rendered as a regex. + of << i << ":/" << toRegex(lit.s) << (lit.nocase ? "/i" : "/"); of << endl; + + i++; } of.close(); } static -void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { - size_t historyRequired = build.calcHistoryRequired(); - size_t longLitLengthThreshold = - calcLongLitThreshold(build, historyRequired); +const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) { + if (!offset) { + return nullptr; + } + + const char *lt = (const char *)t + offset; + return lt; +} + +static +const void *getAnchoredMatcher(const RoseEngine *t) { + return loadFromByteCodeOffset(t, t->amatcherOffset); +} + +static +const HWLM *getFloatingMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); +} + +static +const HWLM *getDelayRebuildMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset); +} + +static +const HWLM *getEodMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); +} - auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED, - longLitLengthThreshold); - dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits); +static +const HWLM *getSmallBlockMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); +} - lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold); - dumpTestLiterals(base + "rose_float_test_literals.txt", lits); +static +CharReach bitvectorToReach(const u8 *reach) { + CharReach cr; - lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, - build.ematcher_region_size); - dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); + for (size_t i = 0; i < N_CHARS; i++) { + if (reach[i / 8] & (1U << (i % 8))) { + cr.set(i); + } + } + return cr; +} - if (!build.cc.streaming) { - lits = fillHamsterLiteralList(build, ROSE_FLOATING, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - lits.insert(end(lits), begin(lits2), end(lits2)); - dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); +static +CharReach multiBitvectorToReach(const u8 *reach, u8 path_mask) { + CharReach cr; + for (size_t i = 0; i < N_CHARS; i++) { + if (reach[i] & path_mask) { + cr.set(i); + } } + return cr; } -void dumpRose(const RoseBuild &build_base, const RoseEngine *t, - const Grey &grey) { - if (!grey.dumpFlags) { +static +void dumpLookaround(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_CHECK_LOOKAROUND *ri) { + assert(ri); + + const u8 *base = (const u8 *)t; + + const s8 *look = (const s8 *)base + ri->look_index; + const s8 *look_end = look + ri->count; + const u8 *reach = base + ri->reach_index; + + os << " contents:" << endl; + + for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { + os << " " << std::setw(4) << std::setfill(' ') << int{*look} + << ": "; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; + } +} + +static +void dumpMultipathLookaround(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_MULTIPATH_LOOKAROUND *ri) { + assert(ri); + + const u8 *base = (const u8 *)t; + + const s8 *look_begin = (const s8 *)base + ri->look_index; + const s8 *look_end = look_begin + ri->count; + const u8 *reach_begin = base + ri->reach_index; + + os << " contents:" << endl; + + u32 path_mask = ri->start_mask[0]; + while (path_mask) { + u32 path = findAndClearLSB_32(&path_mask); + os << " Path #" << path << ":" << endl; + os << " "; + + const s8 *look = look_begin; + const u8 *reach = reach_begin; + for (; look < look_end; look++, reach += MULTI_REACH_BITVECTOR_LEN) { + CharReach cr = multiBitvectorToReach(reach, 1U << path); + if (cr.any() && !cr.all()) { + os << "<" << int(*look) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; + } +} + +static +vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { + vector keys; + + if (num_bits == 0) { + return keys; + } + + vector bits(mmbit_size(num_bits), u8{0xff}); // All bits on. + vector state(MAX_SPARSE_ITER_STATES); + + const u8 *b = bits.data(); + mmbit_sparse_state *s = state.data(); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s); + while (i != MMB_INVALID) { + keys.push_back(i); + i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s); + } + + return keys; +} + +static +void dumpJumpTable(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) { + auto *it = + (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset); + auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table); + + for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) { + os << " " << std::setw(4) << std::setfill(' ') << key << " : +" + << *jumps << endl; + ++jumps; + } +} + +static +void dumpSomOperation(ofstream &os, const som_operation &op) { + os << " som (type=" << u32{op.type} << ", onmatch=" << op.onmatch; + switch (op.type) { + case SOM_EXTERNAL_CALLBACK_REV_NFA: + case SOM_INTERNAL_LOC_SET_REV_NFA: + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET: + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: + os << ", revNfaIndex=" << op.aux.revNfaIndex; + break; + default: + os << ", somDistance=" << op.aux.somDistance; + break; + } + os << ")" << endl; +} + +static +string dumpStrMask(const u8 *mask, size_t len) { + ostringstream oss; + for (size_t i = 0; i < len; i++) { + oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]} + << " "; + } + return oss.str(); +} + +static +CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) { + CharReach cr; + for (u32 i = 0; i < N_CHARS; i++) { + if(lo[i & 0xf] & hi[i >> 4] & bucket_mask) { + cr.set(i); + } + } + return cr; +} + +static +void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *bucket_mask, u32 neg_mask, s32 offset) { + assert(len == 16 || len == 32); + os << " contents:" << endl; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + + if (neg_mask & (1U << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + os << " " << std::setw(4) << std::setfill(' ') + << int(offset + idx) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << endl; + } + } +} + +static +void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, + const u8 *bucket_mask_2, u32 neg_mask, s32 offset) { + assert(len == 16 || len == 32); + os << " contents:" << endl; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); + + if (neg_mask & (1U << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + os << " " << std::setw(4) << std::setfill(' ') + << int(offset + idx) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << endl; + } + } +} + +static +void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *bucket_mask, const u8 *data_offset, + u64a neg_mask, s32 base_offset) { + assert(len == 16 || len == 32 || len == 64); + os << " contents:" << endl; + u32 path = 0; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + + if (neg_mask & (1ULL << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { + path++; + if (idx) { + os << endl; + } + os << " Path #" << path << ":" << endl; + os << " "; + } + + os << "<" << int(base_offset + data_offset[idx]) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; +} + +static +void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, + const u8 *bucket_mask_2, const u8 *data_offset, + u32 neg_mask, s32 base_offset) { + assert(len == 16 || len == 32 || len == 64); + os << " contents:"; + u32 path = 0; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); + + if (neg_mask & (1ULL << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { + path++; + os << endl; + os << " Path #" << path << ":" << endl; + os << " "; + } + + os << "<" << int(base_offset + data_offset[idx]) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; +} + + #define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ + << ": " #name "\n"; \ + const auto *ri = (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + + +static +void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { + const char *pc_base = pc; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= LAST_ROSE_INSTRUCTION); + const size_t offset = pc - pc_base; + switch (code) { + PROGRAM_CASE(END) { return; } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ANCHORED_DELAY) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + os << " anch_id " << ri->anch_id << "\n"; + os << " done_jump " << offset + ri->done_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LIT_EARLY) { + os << " min_offset " << ri->min_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BOUNDS) { + os << " min_bound " << ri->min_bound << endl; + os << " max_bound " << ri->max_bound << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + os << " key " << ri->key << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { + os << " offset " << int{ri->offset} << endl; + os << " reach_index " << ri->reach_index << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + const u8 *reach = (const u8 *)t + ri->reach_index; + os << " contents "; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + os << " look_index " << ri->look_index << endl; + os << " reach_index " << ri->reach_index << endl; + os << " count " << ri->count << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaround(os, t, ri); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK) { + os << " and_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->and_mask << std::dec << endl; + os << " cmp_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->cmp_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK_32) { + os << " and_mask " + << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) + << endl; + os << " cmp_mask " + << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BYTE) { + os << " and_mask 0x" << std::hex << std::setw(2) + << std::setfill('0') << u32{ri->and_mask} << std::dec + << endl; + os << " cmp_mask 0x" << std::hex << std::setw(2) + << std::setfill('0') << u32{ri->cmp_mask} << std::dec + << endl; + os << " negation " << u32{ri->negation} << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x8) { + os << " nib_mask " + << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, + ri->bucket_select_mask, ri->neg_mask, + ri->offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, ri->neg_mask, + ri->offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 16, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask, + ri->bucket_select_mask + 16, + ri->neg_mask, ri->offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask_hi " + << dumpStrMask(ri->bucket_select_mask_hi, + sizeof(ri->bucket_select_mask_hi)) + << endl; + os << " bucket_select_mask_lo " + << dumpStrMask(ri->bucket_select_mask_lo, + sizeof(ri->bucket_select_mask_lo)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask_lo, + ri->bucket_select_mask_hi, + ri->neg_mask, ri->offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_INFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + os << " report " << ri->report << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_PREFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + os << " report " << ri->report << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(PUSH_DELAYED) { + os << " delay " << u32{ri->delay} << endl; + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DUMMY_NOP) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP_MPV) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + os << " distance " << ri->distance << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_FROM_REPORT) { + dumpSomOperation(os, ri->som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + os << " cancel " << u32{ri->cancel} << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE) { + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_SOM) { + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + os << " event " << ri->event << endl; + os << " top_squash_distance " << ri->top_squash_distance + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + dumpSomOperation(os, ri->som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + dumpSomOperation(os, ri->som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EXHAUST) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " ekey " << ri->ekey << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_EXHAUST) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " ekey " << ri->ekey << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_AND_REPORT) { + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FINAL_REPORT) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_EXHAUSTED) { + os << " ekey " << ri->ekey << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + os << " end_adj " << ri->end_adj << endl; + os << " min_length " << ri->min_length << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SQUASH_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + os << " index " << ri->index << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + dumpJumpTable(os, t, ri); + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + os << " state " << ri->state << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_ANY) { + os << " iter_offset " << ri->iter_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ENGINES_EOD) { + os << " iter_offset " << ri->iter_offset << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SUFFIXES_EOD) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MATCHER_EOD) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CLEAR_WORK_DONE) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + os << " look_index " << ri->look_index << endl; + os << " reach_index " << ri->reach_index << endl; + os << " count " << ri->count << endl; + os << " last_start " << ri->last_start << endl; + os << " start_mask " + << dumpStrMask(ri->start_mask, sizeof(ri->start_mask)) + << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathLookaround(os, t, ri); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + os << " nib_mask " + << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask_hi " + << dumpStrMask(ri->bucket_select_mask_hi, + sizeof(ri->bucket_select_mask_hi)) + << endl; + os << " bucket_select_mask_lo " + << dumpStrMask(ri->bucket_select_mask_lo, + sizeof(ri->bucket_select_mask_lo)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask_lo, + ri->bucket_select_mask_hi, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 64, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + default: + os << " UNKNOWN (code " << int{code} << ")" << endl; + os << " " << endl; + return; + } + } +} + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION + +static +void dumpRoseLitPrograms(const vector &fragments, + const RoseEngine *t, const string &filename) { + ofstream os(filename); + + // Collect all programs referenced by a literal fragment. + vector programs; + for (const auto &frag : fragments) { + if (frag.lit_program_offset) { + programs.push_back(frag.lit_program_offset); + } + if (frag.delay_program_offset) { + programs.push_back(frag.delay_program_offset); + } + } + sort_and_unique(programs); + + for (u32 prog_offset : programs) { + os << "Program @ " << prog_offset << ":" << endl; + const char *prog = (const char *)loadFromByteCodeOffset(t, prog_offset); + dumpProgram(os, t, prog); + os << endl; + } + + os.close(); +} + +static +void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + const char *base = (const char *)t; + + if (t->eodProgramOffset) { + os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; + dumpProgram(os, t, base + t->eodProgramOffset); + os << endl; + } else { + os << "" << endl; + } + + os.close(); +} + +static +void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset); + + for (u32 i = 0; i < t->reportProgramCount; i++) { + os << "Report " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + } + + os.close(); +} + +static +void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->anchoredProgramOffset); + + for (u32 i = 0; i < t->anchored_count; i++) { + os << "Anchored entry " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + os << endl; + } + + os.close(); +} + +static +void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset); + + for (u32 i = 0; i < t->delay_count; i++) { + os << "Delay entry " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + os << endl; + } + + os.close(); +} + +static +void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { + const u32 qindex = n->queueIndex; + + if (qindex < t->outfixBeginQueue) { + fout << "chained"; + return; + } + + if (qindex < t->outfixEndQueue) { + fout << "outfix"; + return; + } + + const NfaInfo *nfa_info = getNfaInfoByQueue(t, qindex); + const NFA *nfa = getNfaByInfo(t, nfa_info); + + if (nfa_info->eod) { + fout << "eod "; + } + + if (qindex < t->leftfixBeginQueue) { + fout << "suffix"; + return; + } + + const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex); + if (left->eager) { + fout << "eager "; + } + if (left->transient) { + fout << "transient " << (u32)left->transient << " "; + } + if (left->infix) { + fout << "infix"; + u32 maxQueueLen = left->maxQueueLen; + if (maxQueueLen != (u32)(-1)) { + fout << " maxqlen=" << maxQueueLen; + } + } else { + fout << "prefix"; + } + fout << " maxlag=" << left->maxLag; + if (left->stopTable) { + fout << " miracles"; + } + if (left->countingMiracleOffset) { + const RoseCountingMiracle *cm + = (const RoseCountingMiracle *)((const char *)t + + left->countingMiracleOffset); + fout << " counting_miracle:" << (int)cm->count + << (cm->shufti ? "s" : "v"); + } + if (nfaSupportsZombie(nfa)) { + fout << " zombie"; + } + if (left->eod_check) { + fout << " eod"; + } +} + +static +void dumpComponentInfo(const RoseEngine *t, const string &base) { + stringstream ss; + ss << base << "rose_components.txt"; + ofstream fout(ss.str().c_str()); + + fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; + + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); + + fout << left << setw(6) << i << " "; + + fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */ + + fout << left << setw(16) << describe(*n) << "\t"; + + fout << left << setw(6) << n->nPositions << " "; + fout << left << setw(7) << n->streamStateSize << " "; + fout << left << setw(7) << n->length << " "; + + dumpNfaNotes(fout, t, n); + + fout << endl; + } +} + + +static +void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { + FILE *f = fopen((base +"rose_components.csv").c_str(), "w"); + + fprintf(f, "Index, Offset,Engine Type,States,Stream State,Bytecode Size," + "Kind,Notes\n"); + + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); + nfa_kind kind; + stringstream notes; + + if (i < t->outfixBeginQueue) { + notes << "chained;"; + } + + if (nfa_info->eod) { + notes << "eod;"; + } + + if (i < t->outfixEndQueue) { + kind = NFA_OUTFIX; + } else if (i < t->leftfixBeginQueue) { + kind = NFA_SUFFIX; + } else { + const LeftNfaInfo *left = getLeftInfoByQueue(t, i); + if (left->eager) { + notes << "eager;"; + } + if (left->transient) { + notes << "transient " << (u32)left->transient << ";"; + } + if (left->infix) { + kind = NFA_INFIX; + u32 maxQueueLen = left->maxQueueLen; + if (maxQueueLen != (u32)(-1)) { + notes << "maxqlen=" << maxQueueLen << ";"; + } + } else { + kind = NFA_PREFIX; + } + notes << "maxlag=" << left->maxLag << ";"; + if (left->stopTable) { + notes << "miracles;"; + } + if (left->countingMiracleOffset) { + auto cm = (const RoseCountingMiracle *) + ((const char *)t + left->countingMiracleOffset); + notes << "counting_miracle:" << (int)cm->count + << (cm->shufti ? "s" : "v") << ";"; + } + if (nfaSupportsZombie(n)) { + notes << " zombie;"; + } + if (left->eod_check) { + notes << "left_eod;"; + } + } + + fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, + (const char *)n - (const char *)t, describe(*n).c_str(), + n->nPositions, n->streamStateSize, n->length, + to_string(kind).c_str(), notes.str().c_str()); + } + fclose(f); +} + +static +void dumpExhaust(const RoseEngine *t, const string &base) { + stringstream sstxt; + sstxt << base << "rose_exhaust.txt"; + FILE *f = fopen(sstxt.str().c_str(), "w"); + + const NfaInfo *infos + = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); + + u32 queue_count = t->activeArrayCount; + + for (u32 i = 0; i < queue_count; ++i) { + u32 ekey_offset = infos[i].ekeyListOffset; + + fprintf(f, "%u (%u):", i, ekey_offset); + + if (ekey_offset) { + const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset); + while (1) { + u32 e = *ekeys; + ++ekeys; + if (e == ~0U) { + break; + } + fprintf(f, " %u", e); + } + } + + fprintf(f, "\n"); + } + + fclose(f); +} + +static +void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { + dumpExhaust(t, base); + + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); + + stringstream ssbase; + ssbase << base << "rose_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); + + if (dump_raw) { + stringstream ssraw; + ssraw << base << "rose_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); + fwrite(n, 1, n->length, f); + fclose(f); + } + } +} + +static +void dumpRevComponentInfo(const RoseEngine *t, const string &base) { + stringstream ss; + ss << base << "som_rev_components.txt"; + ofstream fout(ss.str().c_str()); + + fout << "Index Offset\tEngine \tStates S.State Bytes\n"; + + const char *tp = (const char *)t; + const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); + + for (u32 i = 0; i < t->somRevCount; i++) { + u32 offset = rev_offsets[i]; + const NFA *n = (const NFA *)(tp + offset); + + fout << left << setw(6) << i << " "; + + fout << left << offset << "\t"; /* offset */ + + fout << left << setw(16) << describe(*n) << "\t"; + + fout << left << setw(6) << n->nPositions << " "; + fout << left << setw(7) << n->streamStateSize << " "; + fout << left << setw(7) << n->length; + fout << endl; + } +} + +static +void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { + const char *tp = (const char *)t; + const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); + + for (u32 i = 0; i < t->somRevCount; i++) { + const NFA *n = (const NFA *)(tp + rev_offsets[i]); + + stringstream ssbase; + ssbase << base << "som_rev_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); + + if (dump_raw) { + stringstream ssraw; + ssraw << base << "som_rev_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); + fwrite(n, 1, n->length, f); + fclose(f); + } + } +} + +static +void dumpAnchored(const RoseEngine *t, const string &base) { + u32 i = 0; + const anchored_matcher_info *curr + = (const anchored_matcher_info *)getALiteralMatcher(t); + + while (curr) { + const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); + + stringstream ssbase; + ssbase << base << "anchored_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); + + curr = curr->next_offset ? (const anchored_matcher_info *) + ((const char *)curr + curr->next_offset) : nullptr; + i++; + }; +} + +static +void dumpAnchoredStats(const void *atable, FILE *f) { + assert(atable); + + u32 i = 0; + const anchored_matcher_info *curr = (const anchored_matcher_info *)atable; + + while (curr) { + const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); + + fprintf(f, " NFA %u: %s, %u states (%u bytes)\n", i, + describe(*n).c_str(), n->nPositions, n->length); + + curr = curr->next_offset ? (const anchored_matcher_info *) + ((const char *)curr + curr->next_offset) : nullptr; + i++; + }; + +} + +static +void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, + const RoseLongLitSubtable *ll_sub, FILE *f) { + if (!ll_sub->hashBits) { + fprintf(f, " \n"); + return; + } + + const char *base = (const char *)ll_table; + + u32 nbits = ll_sub->hashBits; + u32 num_entries = 1U << nbits; + const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); + u32 hash_occ = + count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); + float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; + + fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", + nbits, hash_occ, num_entries, hash_occ_percent); + + u32 bloom_bits = ll_sub->bloomBits; + u32 bloom_size = 1U << bloom_bits; + const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; + u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, + [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); + float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; + + fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", + bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); +} + +static +void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { + if (!t->longLitTableOffset) { + return; + } + + fprintf(f, "\n"); + fprintf(f, "Long literal table (streaming):\n"); + + const auto *ll_table = + (const struct RoseLongLitTable *)loadFromByteCodeOffset( + t, t->longLitTableOffset); + + fprintf(f, " total size : %u bytes\n", ll_table->size); + fprintf(f, " longest len : %u\n", ll_table->maxLen); + fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); + + fprintf(f, " caseful:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); + + fprintf(f, " nocase:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); +} + +static +void roseDumpText(const RoseEngine *t, FILE *f) { + if (!t) { + fprintf(f, "<< no rose >>\n"); return; } - const RoseBuildImpl &build = dynamic_cast(build_base); + const void *atable = getAnchoredMatcher(t); + const HWLM *ftable = getFloatingMatcher(t); + const HWLM *drtable = getDelayRebuildMatcher(t); + const HWLM *etable = getEodMatcher(t); + const HWLM *sbtable = getSmallBlockMatcher(t); + + fprintf(f, "Rose:\n\n"); + + fprintf(f, "mode: : "); + switch(t->mode) { + case HS_MODE_BLOCK: + fprintf(f, "block"); + break; + case HS_MODE_STREAM: + fprintf(f, "streaming"); + break; + case HS_MODE_VECTORED: + fprintf(f, "vectored"); + break; + } + fprintf(f, "\n"); + + fprintf(f, "properties :"); + if (t->canExhaust) { + fprintf(f, " canExhaust"); + } + if (t->hasSom) { + fprintf(f, " hasSom"); + } + if (t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL) { + fprintf(f, " pureLiteral"); + } + if (t->runtimeImpl == ROSE_RUNTIME_SINGLE_OUTFIX) { + fprintf(f, " soleOutfix"); + } + fprintf(f, "\n"); + + fprintf(f, "dkey count : %u\n", t->dkeyCount); + fprintf(f, "som slot count : %u\n", t->somLocationCount); + fprintf(f, "som width : %u bytes\n", t->somHorizon); + fprintf(f, "rose count : %u\n", t->roseCount); + fprintf(f, "\n"); + + fprintf(f, "total engine size : %u bytes\n", t->size); + fprintf(f, " - anchored matcher : %u bytes over %u bytes\n", t->asize, + t->anchoredDistance); + fprintf(f, " - floating matcher : %zu bytes%s", + ftable ? hwlmSize(ftable) : 0, t->noFloatingRoots ? " (cond)":""); + if (t->floatingMinDistance) { + fprintf(f, " from %s bytes\n", + rose_off(t->floatingMinDistance).str().c_str()); + } + if (t->floatingDistance != ROSE_BOUND_INF && ftable) { + fprintf(f, " over %u bytes\n", t->floatingDistance); + } else { + fprintf(f, "\n"); + } + fprintf(f, " - delay-rb matcher : %zu bytes\n", + drtable ? hwlmSize(drtable) : 0); + fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", + etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); + fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", + sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); + fprintf(f, " - role state table : %zu bytes\n", + t->rolesWithStateCount * sizeof(u32)); + fprintf(f, " - nfa info table : %zu bytes\n", + t->queueCount * sizeof(NfaInfo)); + + fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); + fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); + fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); + fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); + fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); + fprintf(f, " - active array : %u bytes\n", + mmbit_size(t->activeArrayCount)); + fprintf(f, " - active rose : %u bytes\n", + mmbit_size(t->activeLeftCount)); + fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize); + fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize); + fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize); + fprintf(f, " - one whole bytes : %u bytes\n", + t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable); + fprintf(f, " - groups : %u bytes\n", + t->stateOffsets.groups_size); + fprintf(f, "\n"); + + fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); + fprintf(f, "floating groups : 0x%016llx\n", t->floating_group_mask); + fprintf(f, "handled key count : %u\n", t->handledKeyCount); + fprintf(f, "\n"); + + fprintf(f, "total literal count : %u\n", t->totalNumLiterals); + fprintf(f, " delayed literals : %u\n", t->delay_count); + + fprintf(f, "\n"); + fprintf(f, " minWidth : %u\n", t->minWidth); + fprintf(f, " minWidthExcludingBoundaries : %u\n", + t->minWidthExcludingBoundaries); + fprintf(f, " maxBiAnchoredWidth : %s\n", + rose_off(t->maxBiAnchoredWidth).str().c_str()); + fprintf(f, " minFloatLitMatchOffset : %s\n", + rose_off(t->floatingMinLiteralMatchOffset).str().c_str()); + fprintf(f, " maxFloatingDelayedMatch : %s\n", + rose_off(t->maxFloatingDelayedMatch).str().c_str()); + + if (atable) { + fprintf(f, "\nAnchored literal matcher stats:\n\n"); + dumpAnchoredStats(atable, f); + } + + if (ftable) { + fprintf(f, "\nFloating literal matcher stats:\n\n"); + hwlmPrintStats(ftable, f); + } + + if (drtable) { + fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n"); + hwlmPrintStats(drtable, f); + } + + if (etable) { + fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); + hwlmPrintStats(etable, f); + } + + if (sbtable) { + fprintf(f, "\nSmall-block literal matcher stats:\n\n"); + hwlmPrintStats(sbtable, f); + } + + dumpLongLiteralTable(t, f); +} + +#define DUMP_U8(o, member) \ + fprintf(f, " %-32s: %hhu/%hhx\n", #member, o->member, o->member) +#define DUMP_U32(o, member) \ + fprintf(f, " %-32s: %u/%08x\n", #member, o->member, o->member) +#define DUMP_U64(o, member) \ + fprintf(f, " %-32s: %llu/%016llx\n", #member, o->member, o->member) + +static +void roseDumpStructRaw(const RoseEngine *t, FILE *f) { + fprintf(f, "struct RoseEngine {\n"); + DUMP_U8(t, noFloatingRoots); + DUMP_U8(t, requiresEodCheck); + DUMP_U8(t, hasOutfixesInSmallBlock); + DUMP_U8(t, runtimeImpl); + DUMP_U8(t, mpvTriggeredByLeaf); + DUMP_U8(t, canExhaust); + DUMP_U8(t, hasSom); + DUMP_U8(t, somHorizon); + DUMP_U32(t, mode); + DUMP_U32(t, historyRequired); + DUMP_U32(t, ekeyCount); + DUMP_U32(t, dkeyCount); + DUMP_U32(t, dkeyLogSize); + DUMP_U32(t, invDkeyOffset); + DUMP_U32(t, somLocationCount); + DUMP_U32(t, somLocationFatbitSize); + DUMP_U32(t, rolesWithStateCount); + DUMP_U32(t, stateSize); + DUMP_U32(t, anchorStateSize); + DUMP_U32(t, nfaStateSize); + DUMP_U32(t, tStateSize); + DUMP_U32(t, smallWriteOffset); + DUMP_U32(t, amatcherOffset); + DUMP_U32(t, ematcherOffset); + DUMP_U32(t, fmatcherOffset); + DUMP_U32(t, drmatcherOffset); + DUMP_U32(t, sbmatcherOffset); + DUMP_U32(t, longLitTableOffset); + DUMP_U32(t, amatcherMinWidth); + DUMP_U32(t, fmatcherMinWidth); + DUMP_U32(t, eodmatcherMinWidth); + DUMP_U32(t, amatcherMaxBiAnchoredWidth); + DUMP_U32(t, fmatcherMaxBiAnchoredWidth); + DUMP_U32(t, reportProgramOffset); + DUMP_U32(t, reportProgramCount); + DUMP_U32(t, delayProgramOffset); + DUMP_U32(t, anchoredProgramOffset); + DUMP_U32(t, activeArrayCount); + DUMP_U32(t, activeLeftCount); + DUMP_U32(t, queueCount); + DUMP_U32(t, activeQueueArraySize); + DUMP_U32(t, eagerIterOffset); + DUMP_U32(t, handledKeyCount); + DUMP_U32(t, handledKeyFatbitSize); + DUMP_U32(t, leftOffset); + DUMP_U32(t, roseCount); + DUMP_U32(t, eodProgramOffset); + DUMP_U32(t, lastByteHistoryIterOffset); + DUMP_U32(t, minWidth); + DUMP_U32(t, minWidthExcludingBoundaries); + DUMP_U32(t, maxBiAnchoredWidth); + DUMP_U32(t, anchoredDistance); + DUMP_U32(t, anchoredMinDistance); + DUMP_U32(t, floatingDistance); + DUMP_U32(t, floatingMinDistance); + DUMP_U32(t, smallBlockDistance); + DUMP_U32(t, floatingMinLiteralMatchOffset); + DUMP_U32(t, nfaInfoOffset); + DUMP_U64(t, initialGroups); + DUMP_U64(t, floating_group_mask); + DUMP_U32(t, size); + DUMP_U32(t, delay_count); + DUMP_U32(t, delay_fatbit_size); + DUMP_U32(t, anchored_count); + DUMP_U32(t, anchored_fatbit_size); + DUMP_U32(t, maxFloatingDelayedMatch); + DUMP_U32(t, delayRebuildLength); + DUMP_U32(t, stateOffsets.history); + DUMP_U32(t, stateOffsets.exhausted); + DUMP_U32(t, stateOffsets.activeLeafArray); + DUMP_U32(t, stateOffsets.activeLeftArray); + DUMP_U32(t, stateOffsets.activeLeftArray_size); + DUMP_U32(t, stateOffsets.leftfixLagTable); + DUMP_U32(t, stateOffsets.anchorState); + DUMP_U32(t, stateOffsets.groups); + DUMP_U32(t, stateOffsets.groups_size); + DUMP_U32(t, stateOffsets.longLitState); + DUMP_U32(t, stateOffsets.somLocation); + DUMP_U32(t, stateOffsets.somValid); + DUMP_U32(t, stateOffsets.somWritable); + DUMP_U32(t, stateOffsets.end); + DUMP_U32(t, boundary.reportEodOffset); + DUMP_U32(t, boundary.reportZeroOffset); + DUMP_U32(t, boundary.reportZeroEodOffset); + DUMP_U32(t, totalNumLiterals); + DUMP_U32(t, asize); + DUMP_U32(t, outfixBeginQueue); + DUMP_U32(t, outfixEndQueue); + DUMP_U32(t, leftfixBeginQueue); + DUMP_U32(t, initMpvNfa); + DUMP_U32(t, rosePrefixCount); + DUMP_U32(t, activeLeftIterOffset); + DUMP_U32(t, ematcherRegionSize); + DUMP_U32(t, somRevCount); + DUMP_U32(t, somRevOffsetOffset); + DUMP_U32(t, longLitStreamState); + fprintf(f, "}\n"); + fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); +} + +static +void roseDumpComponents(const RoseEngine *t, bool dump_raw, + const string &base) { + dumpComponentInfo(t, base); + dumpComponentInfoCsv(t, base); + dumpNfas(t, dump_raw, base); + dumpAnchored(t, base); + dumpRevComponentInfo(t, base); + dumpRevNfas(t, dump_raw, base); +} + +static +void roseDumpPrograms(const vector &fragments, const RoseEngine *t, + const string &base) { + dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt"); + dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); + dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); + dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); + dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); +} + +void dumpRose(const RoseBuildImpl &build, const vector &fragments, + const map &leftfix_queue_map, + const map &suffix_queue_map, + const RoseEngine *t) { + const Grey &grey = build.cc.grey; + + if (!grey.dumpFlags) { + return; + } stringstream ss; ss << grey.dumpPath << "rose.txt"; @@ -492,16 +2241,14 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t, fclose(f); roseDumpComponents(t, false, grey.dumpPath); + roseDumpPrograms(fragments, t, grey.dumpPath); // Graph. - dumpRoseGraph(build, t, "rose.dot"); - - // Literals. - ss.str(""); - ss.clear(); - ss << grey.dumpPath << "rose_literals.txt"; - dumpRoseLiterals(build, ss.str().c_str()); - dumpRoseTestLiterals(build, grey.dumpPath); + dumpRoseGraph(build, t, fragments, leftfix_queue_map, suffix_queue_map, + "rose.dot"); + + // Literals + dumpRoseLiterals(build, fragments, grey); f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w"); roseDumpStructRaw(t, f); diff --git a/src/rose/rose_build_dump.h b/src/rose/rose_build_dump.h index 28e9f53ab..d4c620a3e 100644 --- a/src/rose/rose_build_dump.h +++ b/src/rose/rose_build_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,28 +29,51 @@ #ifndef ROSE_BUILD_DUMP_H #define ROSE_BUILD_DUMP_H +#include "ue2common.h" + +#include +#include +#include + struct RoseEngine; namespace ue2 { -class RoseBuild; +class RoseBuildImpl; struct Grey; +struct hwlmLiteral; +struct LitFragment; +struct left_id; +struct suffix_id; #ifdef DUMP_SUPPORT // Dump the Rose graph in graphviz representation. -void dumpRoseGraph(const RoseBuild &build, const RoseEngine *t, - const char *filename); +void dumpRoseGraph(const RoseBuildImpl &build, const char *filename); + +void dumpRose(const RoseBuildImpl &build, + const std::vector &fragments, + const std::map &leftfix_queue_map, + const std::map &suffix_queue_map, + const RoseEngine *t); + +void dumpMatcherLiterals(const std::vector &lits, + const std::string &name, const Grey &grey); -void dumpRose(const RoseBuild &build_base, const RoseEngine *t, - const Grey &grey); #else static UNUSED -void dumpRoseGraph(const RoseBuild &, const RoseEngine *, const char *) { +void dumpRoseGraph(const RoseBuildImpl &, const char *) { +} + +static UNUSED +void dumpRose(const RoseBuildImpl &, const std::vector &, + const std::map &, const std::map &, + const RoseEngine *) { } static UNUSED -void dumpRose(const RoseBuild &, const RoseEngine *, const Grey &) { +void dumpMatcherLiterals(const std::vector &, const std::string &, + const Grey &) { } #endif diff --git a/src/rose/rose_build_engine_blob.cpp b/src/rose/rose_build_engine_blob.cpp new file mode 100644 index 000000000..d39572070 --- /dev/null +++ b/src/rose/rose_build_engine_blob.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_engine_blob.h" + +#include "rose_build_lookaround.h" +#include "util/charreach_util.h" + +using namespace std; + +namespace ue2 { + +u32 lookaround_info::get_offset_of(const vector> &reaches, + RoseEngineBlob &blob) { + assert(reaches.size() != 1); + + // Check the cache. + auto it = multi_cache.find(reaches); + if (it != multi_cache.end()) { + DEBUG_PRINTF("reusing reach at idx %u\n", it->second); + return it->second; + } + + vector raw_reach(reaches.size() * MULTI_REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &m : reaches) { + u8 u = 0; + assert(m.size() == MAX_LOOKAROUND_PATHS); + for (size_t i = 0; i < m.size(); i++) { + if (m[i].none()) { + u |= (u8)1U << i; + } + } + fill_n(raw_reach.data() + off, MULTI_REACH_BITVECTOR_LEN, u); + + for (size_t i = 0; i < m.size(); i++) { + const CharReach &cr = m[i]; + if (cr.none()) { + continue; + } + + for (size_t c = cr.find_first(); c != cr.npos; + c = cr.find_next(c)) { + raw_reach[c + off] |= (u8)1U << i; + } + } + + off += MULTI_REACH_BITVECTOR_LEN; + } + + u32 reach_idx = blob.add_range(raw_reach); + DEBUG_PRINTF("adding reach at idx %u\n", reach_idx); + multi_cache.emplace(reaches, reach_idx); + + return reach_idx; +} + +u32 lookaround_info::get_offset_of(const vector &reach, + RoseEngineBlob &blob) { + if (contains(rcache, reach)) { + u32 offset = rcache[reach]; + DEBUG_PRINTF("reusing reach at idx %u\n", offset); + return offset; + } + + vector raw_reach(reach.size() * REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &cr : reach) { + assert(cr.any()); // Should be at least one character! + fill_bitvector(cr, raw_reach.data() + off); + off += REACH_BITVECTOR_LEN; + } + + u32 offset = blob.add_range(raw_reach); + rcache.emplace(reach, offset); + return offset; +} + +u32 lookaround_info::get_offset_of(const vector &look, + RoseEngineBlob &blob) { + if (contains(lcache, look)) { + u32 offset = lcache[look]; + DEBUG_PRINTF("reusing look at idx %u\n", offset); + return offset; + } + + u32 offset = blob.add_range(look); + lcache.emplace(look, offset); + return offset; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 8542b87bc..3aa501b47 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,19 +33,35 @@ #include "ue2common.h" #include "util/alloc.h" +#include "util/bytecode_ptr.h" +#include "util/charreach.h" #include "util/container.h" #include "util/multibit_build.h" +#include "util/noncopyable.h" #include "util/ue2_containers.h" #include "util/verify_types.h" #include #include -#include - namespace ue2 { -class RoseEngineBlob : boost::noncopyable { +class RoseEngineBlob; + +struct lookaround_info : noncopyable { + u32 get_offset_of(const std::vector> &look, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector &reach, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector &look, RoseEngineBlob &blob); + +private: + unordered_map>, u32> multi_cache; + unordered_map, u32> lcache; + unordered_map, u32> rcache; +}; + +class RoseEngineBlob : noncopyable { public: /** \brief Base offset of engine_blob in the Rose engine bytecode. */ static constexpr u32 base_offset = ROUNDUP_CL(sizeof(RoseEngine)); @@ -58,10 +74,6 @@ class RoseEngineBlob : boost::noncopyable { return blob.size(); } - const char *data() const { - return blob.data(); - } - u32 add(const void *a, const size_t len, const size_t align) { pad(align); @@ -77,6 +89,11 @@ class RoseEngineBlob : boost::noncopyable { return verify_u32(rv); } + template + u32 add(const bytecode_ptr &a) { + return add(a.get(), a.size(), a.align()); + } + template u32 add(const T &a) { static_assert(std::is_pod::value, "should be pod"); @@ -106,6 +123,11 @@ class RoseEngineBlob : boost::noncopyable { return offset; } + template + u32 add_range(const Range &range) { + return add(begin(range), end(range)); + } + u32 add_iterator(const std::vector &iter) { auto cache_it = cached_iters.find(iter); if (cache_it != cached_iters.end()) { @@ -123,6 +145,8 @@ class RoseEngineBlob : boost::noncopyable { copy_bytes((char *)engine + base_offset, blob); } + lookaround_info lookaround_cache; + private: void pad(size_t align) { assert(ISALIGNED_N(base_offset, align)); diff --git a/src/rose/rose_build_exclusive.h b/src/rose/rose_build_exclusive.h index 9cabb1d28..3269dce61 100644 --- a/src/rose/rose_build_exclusive.h +++ b/src/rose/rose_build_exclusive.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,23 +49,6 @@ namespace ue2 { -/** brief subengine info including built engine and - * corresponding triggering rose vertices */ -struct ExclusiveSubengine { - aligned_unique_ptr nfa; - std::vector vertices; -}; - -/** \brief exclusive info to build tamarama */ -struct ExclusiveInfo { - // subengine info - std::vector subengines; - // all the report in tamarama - std::set reports; - // assigned queue id - u32 queue; -}; - /** \brief role info structure for exclusive analysis */ template struct RoleInfo { diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index 0a1c501f2..c670e6033 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,10 @@ #include "rose_build_groups.h" +#include "util/boundary_reports.h" +#include "util/compile_context.h" +#include "util/report_manager.h" + #include #include @@ -71,24 +75,18 @@ bool superStrong(const rose_literal_id &lit) { static bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) { - /* returns true if it or any of its delay versions have root role */ - for (auto v : build.literal_info[id].vertices) { - if (build.isRootSuccessor(v)) { - NGHolder *h = build.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } + auto eligble = [&](RoseVertex v) { + return build.isRootSuccessor(v) + && (!build.g[v].left || !isAnchored(build.g[v].left)); + }; + + if (any_of_in(build.literal_info[id].vertices, eligble)) { + return true; } for (u32 delayed_id : build.literal_info[id].delayed_ids) { - for (auto v : build.literal_info[delayed_id].vertices) { - if (build.isRootSuccessor(v)) { - NGHolder *h = build.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } + if (any_of_in(build.literal_info[delayed_id].vertices, eligble)) { + return true; } } @@ -170,6 +168,64 @@ u32 next_available_group(u32 counter, u32 min_start_group) { return counter; } +static +void allocateGroupForBoundary(RoseBuildImpl &build, u32 group_always_on, + map &groupCount) { + /* Boundary reports at zero will always fired and forgotten, no need to + * worry about preventing the stream being marked as exhausted */ + if (build.boundary.report_at_eod.empty()) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + DEBUG_PRINTF("allocating %u as boundary group id\n", group_always_on); + + build.boundary_group_mask = 1ULL << group_always_on; + groupCount[group_always_on]++; +} + +static +void allocateGroupForEvent(RoseBuildImpl &build, u32 group_always_on, + map &groupCount, u32 *counter) { + if (build.eod_event_literal_id == MO_INVALID_IDX) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + rose_literal_info &info = build.literal_info[build.eod_event_literal_id]; + + if (info.vertices.empty()) { + return; + } + + bool new_group = !groupCount[group_always_on]; + for (RoseVertex v : info.vertices) { + if (build.g[v].left && !isAnchored(build.g[v].left)) { + new_group = false; + } + } + + u32 group; + if (!new_group) { + group = group_always_on; + } else { + group = *counter; + *counter += 1; + } + + DEBUG_PRINTF("allocating %u as eod event group id\n", *counter); + info.group_mask = 1ULL << group; + groupCount[group]++; +} + void assignGroupsToLiterals(RoseBuildImpl &build) { auto &literals = build.literals; auto &literal_info = build.literal_info; @@ -182,9 +238,8 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { u32 group_always_on = 0; // First pass: handle always on literals. - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); rose_literal_info &info = literal_info[id]; if (!requires_group_assignment(lit, info)) { @@ -211,13 +266,15 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { counter++; } + allocateGroupForBoundary(build, group_always_on, groupCount); + allocateGroupForEvent(build, group_always_on, groupCount, &counter); + u32 min_start_group = counter; priority_queue> pq; // Second pass: the other literals. - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); rose_literal_info &info = literal_info[id]; if (!requires_group_assignment(lit, info)) { @@ -231,7 +288,7 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { while (!pq.empty()) { u32 id = get<2>(pq.top()); pq.pop(); - UNUSED const rose_literal_id &lit = literals.right.at(id); + UNUSED const rose_literal_id &lit = literals.at(id); DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id, literal_info[id].vertices.size(), lit.s.length()); @@ -302,9 +359,8 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { } } /* assign delayed literals to the same group as their parent */ - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); if (!lit.delay) { continue; @@ -319,7 +375,7 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { } DEBUG_PRINTF("populate group to literal mapping\n"); - for (const u32 id : literals.right | map_keys) { + for (u32 id = 0; id < literals.size(); id++) { rose_group groups = literal_info[id].group_mask; while (groups) { u32 group_id = findAndClearLSB_64(&groups); @@ -453,6 +509,7 @@ rose_group getSquashableGroups(const RoseBuildImpl &build) { } DEBUG_PRINTF("squashable groups=0x%llx\n", squashable_groups); + assert(!(squashable_groups & build.boundary_group_mask)); return squashable_groups; } @@ -501,11 +558,11 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, const rose_literal_info &lit_info = build.literal_info.at(id); DEBUG_PRINTF("checking if %u '%s' is a group squasher %016llx\n", id, - dumpString(build.literals.right.at(id).s).c_str(), - lit_info.group_mask); + dumpString(build.literals.at(id).s).c_str(), + lit_info.group_mask); - if (build.literals.right.at(id).table == ROSE_EVENT) { - DEBUG_PRINTF("event literal, has no groups to squash\n"); + if (build.literals.at(id).table == ROSE_EVENT) { + DEBUG_PRINTF("event literal\n"); return false; } @@ -538,8 +595,12 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, /* Case 1 */ - // Can't squash cases with accepts - if (!g[v].reports.empty()) { + // Can't squash cases with accepts unless they are all + // simple-exhaustible. + if (any_of_in(g[v].reports, [&](ReportID report) { + return !isSimpleExhaustible(build.rm.getReport(report)); + })) { + DEBUG_PRINTF("can't squash reporter\n"); return false; } @@ -628,10 +689,11 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, } void findGroupSquashers(RoseBuildImpl &build) { - rose_group forbidden_squash_group = 0; - for (const auto &e : build.literals.right) { - if (e.second.delay) { - forbidden_squash_group |= build.literal_info[e.first].group_mask; + rose_group forbidden_squash_group = build.boundary_group_mask; + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); + if (lit.delay) { + forbidden_squash_group |= build.literal_info[id].group_mask; } } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 6b326d34b..13f1cfc9c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,28 +26,30 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ROSE_BUILD_IMPL_H_17E20A3C6935D6 -#define ROSE_BUILD_IMPL_H_17E20A3C6935D6 +#ifndef ROSE_BUILD_IMPL_H +#define ROSE_BUILD_IMPL_H #include "rose_build.h" #include "rose_build_util.h" +#include "rose_common.h" #include "rose_graph.h" #include "nfa/mpvcompile.h" #include "nfa/goughcompile.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_revacc.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" +#include "util/hash.h" #include "util/order_check.h" #include "util/queue_index_factory.h" #include "util/ue2_containers.h" +#include "util/ue2string.h" +#include "util/verify_types.h" #include #include #include #include -#include -#include #include struct RoseEngine; @@ -58,6 +60,17 @@ namespace ue2 { #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 +/** + * \brief The largest allowable "short" literal fragment which can be given to + * a literal matcher directly. + * + * Literals longer than this will be truncated to their suffix and confirmed in + * the Rose interpreter, either as "medium length" literals which can be + * confirmed from history, or "long literals" which make use of the streaming + * table support. + */ +#define ROSE_SHORT_LITERAL_LEN_MAX 8 + struct BoundaryReports; struct CastleProto; struct CompileContext; @@ -252,9 +265,7 @@ struct rose_literal_info { ue2::flat_set vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; - u32 final_id = MO_INVALID_IDX; /* id reported by fdr */ bool squash_group = false; - bool requires_explode = false; bool requires_benefits = false; }; @@ -290,6 +301,11 @@ struct rose_literal_id { } return MAX(mask_len, s.length()) + delay; } + + bool operator==(const rose_literal_id &b) const { + return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && + delay == b.delay && distinctiveness == b.distinctiveness; + } }; static inline @@ -303,8 +319,60 @@ bool operator<(const rose_literal_id &a, const rose_literal_id &b) { return 0; } -// Literals are stored in a map from (string, nocase) -> ID -typedef boost::bimap RoseLiteralMap; +inline +size_t hash_value(const rose_literal_id &lit) { + return hash_all(lit.s, lit.msk, lit.cmp, lit.table, lit.delay, + lit.distinctiveness); +} + +class RoseLiteralMap { + /** + * \brief Main storage for literals. + * + * Note that this cannot be a vector, as the present code relies on + * iterator stability when iterating over this list and adding to it inside + * the loop. + */ + std::deque lits; + + /** \brief Quick-lookup index from literal -> index in lits. */ + unordered_map lits_index; + +public: + std::pair insert(const rose_literal_id &lit) { + auto it = lits_index.find(lit); + if (it != lits_index.end()) { + return {it->second, false}; + } + u32 id = verify_u32(lits.size()); + lits.push_back(lit); + lits_index.emplace(lit, id); + return {id, true}; + } + + // Erase the last num elements. + void erase_back(size_t num) { + assert(num <= lits.size()); + for (size_t i = 0; i < num; i++) { + lits_index.erase(lits.back()); + lits.pop_back(); + } + assert(lits.size() == lits_index.size()); + } + + const rose_literal_id &at(u32 id) const { + assert(id < lits.size()); + return lits.at(id); + } + + using const_iterator = decltype(lits)::const_iterator; + const_iterator begin() const { return lits.begin(); } + const_iterator end() const { return lits.end(); } + + size_t size() const { + return lits.size(); + } +}; struct simple_anchored_info { simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) @@ -415,8 +483,8 @@ struct OutfixInfo { RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width - depth minWidth = depth::infinity(); - depth maxWidth = 0; + depth minWidth{depth::infinity()}; + depth maxWidth{0}; u64a maxOffset = 0; bool in_sbmatcher = false; //!< handled by small-block matcher. @@ -438,8 +506,7 @@ class RoseBuildImpl : public RoseBuild { void add(bool anchored, bool eod, const ue2_literal &lit, const ue2::flat_set &ids) override; - bool addRose(const RoseInGraph &ig, bool prefilter, - bool finalChance = false) override; + bool addRose(const RoseInGraph &ig, bool prefilter) override; bool addSombeRose(const RoseInGraph &ig) override; bool addOutfix(const NGHolder &h) override; @@ -462,8 +529,8 @@ class RoseBuildImpl : public RoseBuild { bool eod) override; // Construct a runtime implementation. - aligned_unique_ptr buildRose(u32 minWidth) override; - aligned_unique_ptr buildFinalEngine(u32 minWidth); + bytecode_ptr buildRose(u32 minWidth) override; + bytecode_ptr buildFinalEngine(u32 minWidth); void setSom() override { hasSom = true; } @@ -481,8 +548,6 @@ class RoseBuildImpl : public RoseBuild { const std::vector &cmp, u32 delay, rose_literal_table table); - bool hasLiteral(const ue2_literal &s, rose_literal_table table) const; - u32 getNewLiteralId(void); void removeVertices(const std::vector &dead); @@ -490,8 +555,6 @@ class RoseBuildImpl : public RoseBuild { // Is the Rose anchored? bool hasNoFloatingRoots() const; - RoseVertex cloneVertex(RoseVertex v); - u32 calcHistoryRequired() const; rose_group getInitialGroups() const; @@ -512,8 +575,6 @@ class RoseBuildImpl : public RoseBuild { bool isDirectReport(u32 id) const; bool isDelayed(u32 id) const; - bool hasFinalId(u32 id) const; - bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored * table */ bool isFloating(RoseVertex v) const; /* true iff has literal in floating @@ -553,31 +614,19 @@ class RoseBuildImpl : public RoseBuild { return next_nfa_report++; } std::deque literal_info; - u32 delay_base_id; bool hasSom; //!< at least one pattern requires SOM. std::map>> anchored_nfas; std::map> anchored_simple; std::map > group_to_literal; u32 group_end; - u32 anchored_base_id; - u32 ematcher_region_size; /**< number of bytes the eod table runs over */ - /** \brief Mapping from leftfix to queue ID (used in dump code). */ - unordered_map leftfix_queue_map; - - /** \brief Mapping from suffix to queue ID (used in dump code). */ - unordered_map suffix_queue_map; - /** \brief Mapping from anchored literal ID to the original literal suffix * present when the literal was added to the literal matcher. Used for * overlap calculation in history assignment. */ std::map anchoredLitSuffix; - std::map > final_id_to_literal; /* final literal id to - * literal id */ - unordered_set transient; unordered_map rose_squash_masks; @@ -592,6 +641,8 @@ class RoseBuildImpl : public RoseBuild { u32 max_rose_anchored_floating_overlap; + rose_group boundary_group_mask = 0; + QueueIndexFactory qif; ReportManager &rm; SomSlotManager &ssm; @@ -614,8 +665,6 @@ size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); ue2_literal findNonOverlappingTail(const std::set &lits, const ue2_literal &s); -void setReportId(NGHolder &g, ReportID id); - #ifndef NDEBUG bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); bool hasOrphanedTops(const RoseBuildImpl &build); @@ -629,10 +678,15 @@ u64a findMaxOffset(const std::set &reports, const ReportManager &rm); void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, std::vector &cmp); +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); + +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); + #ifndef NDEBUG bool canImplementGraphs(const RoseBuildImpl &tbi); #endif } // namespace ue2 -#endif /* ROSE_BUILD_IMPL_H_17E20A3C6935D6 */ +#endif /* ROSE_BUILD_IMPL_H */ diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp new file mode 100644 index 000000000..b00c36be6 --- /dev/null +++ b/src/rose/rose_build_instructions.cpp @@ -0,0 +1,639 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_instructions.h" + +#include "rose_build_engine_blob.h" +#include "util/multibit_build.h" +#include "util/verify_types.h" + +#include + +using namespace std; + +namespace ue2 { +/* Destructors to avoid weak vtables. */ + +RoseInstruction::~RoseInstruction() = default; +RoseInstrCatchUp::~RoseInstrCatchUp() = default; +RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; +RoseInstrSomZero::~RoseInstrSomZero() = default; +RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; +RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; +RoseInstrEnd::~RoseInstrEnd() = default; +RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; + +using OffsetMap = RoseInstruction::OffsetMap; + +static +u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, + const RoseInstruction *to) { + DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); + assert(from && contains(offset_map, from)); + assert(to && contains(offset_map, to)); + + u32 from_offset = offset_map.at(from); + u32 to_offset = offset_map.at(to); + DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); + assert(from_offset <= to_offset); + + return to_offset - from_offset; +} + +void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; + inst->anch_id = anch_id; + inst->done_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_offset = min_offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_bound = min_bound; + inst->max_bound = max_bound; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->key = key; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->offset = offset; + inst->reach_index = blob.lookaround_cache.get_offset_of({reach}, blob); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + vector look_offsets; + vector reaches; + for (const auto &le : look) { + look_offsets.push_back(le.offset); + reaches.push_back(le.reach); + } + inst->look_index = blob.lookaround_cache.get_offset_of(look_offsets, blob); + inst->reach_index = blob.lookaround_cache.get_offset_of(reaches, blob); + inst->count = verify_u32(look.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(and_mask), end(and_mask), inst->and_mask); + copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->negation = negation; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), + inst->bucket_select_mask_lo); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->delay = delay; + inst->index = index; +} + +void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->distance = distance; +} + +void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; +} + +void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->cancel = cancel; + inst->queue = queue; + inst->event = event; +} + +void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->event = event; +} + +void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->event = event; + inst->top_squash_distance = top_squash_distance; +} + +void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->ekey = ekey; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->end_adj = end_adj; + inst->min_length = min_length; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; +} + +void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Resolve and write the multibit sparse iterator and the jump table. + vector keys; + vector jump_offsets; + for (const auto &jump : jump_table) { + keys.push_back(jump.first); + assert(contains(offset_map, jump.second)); + jump_offsets.push_back(offset_map.at(jump.second)); + } + + auto iter = mmbBuildSparseIterator(keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); + inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); + + // Store offsets for corresponding SPARSE_ITER_NEXT operations. + is_written = true; + iter_offset = inst->iter_offset; + jump_table_offset = inst->jump_table; +} + +void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->state = state; + inst->fail_jump = calc_jump(offset_map, this, target); + + // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN + // instruction. + assert(begin); + assert(contains(offset_map, begin)); + assert(begin->is_written); + inst->iter_offset = begin->iter_offset; + inst->jump_table = begin->jump_table_offset; +} + +void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Write the multibit sparse iterator. + auto iter = mmbBuildSparseIterator(keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); +} + +void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->iter_offset = iter_offset; +} + +void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + auto &cache = blob.lookaround_cache; + vector look_offsets; + vector> reaches; + for (const auto &vle : multi_look) { + reaches.push_back({}); + bool done_offset = false; + + for (const auto &le : vle) { + reaches.back().push_back(le.reach); + + /* empty reaches don't have valid offsets */ + if (!done_offset && le.reach.any()) { + look_offsets.push_back(le.offset); + done_offset = true; + } + } + } + inst->look_index = cache.get_offset_of(look_offsets, blob); + inst->reach_index = cache.get_offset_of(reaches, blob); + inst->count = verify_u32(multi_look.size()); + inst->last_start = last_start; + copy(begin(start_mask), end(start_mask), inst->start_mask); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 16, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32, + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32, + inst->bucket_select_mask_lo); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + copy(begin(data_select_mask), end(data_select_mask), + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +} diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h new file mode 100644 index 000000000..025f6a671 --- /dev/null +++ b/src/rose/rose_build_instructions.h @@ -0,0 +1,2132 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Concrete classes for interpreter instructions. + * + * Note: this header should only be included in files which need to deal with + * the details of actual instructions. It is expected that most will only + * require access to the RoseInstruction API exposed in rose_build_program.h + */ + +#ifndef ROSE_BUILD_INSTRUCTIONS_H +#define ROSE_BUILD_INSTRUCTIONS_H + +#include "rose_build_lookaround.h" +#include "rose_build_program.h" +#include "util/verify_types.h" + +namespace ue2 { + +/** + * \brief Abstract base class representing a single Rose instruction. + */ +class RoseInstruction { +public: + virtual ~RoseInstruction(); + + /** \brief Opcode used for the instruction in the bytecode. */ + virtual RoseInstructionCode code() const = 0; + + /** + * \brief Simple hash used for program equivalence. + * + * Note that pointers (jumps, for example) should not be used when + * calculating the hash: they will be converted to instruction offsets when + * compared later. + */ + virtual size_t hash() const = 0; + + /** \brief Length of the bytecode instruction in bytes. */ + virtual size_t byte_length() const = 0; + + using OffsetMap = unordered_map; + + /** + * \brief Writes a concrete implementation of this instruction. + * + * Other data that this instruction depends on is written directly into the + * blob, while the instruction structure itself (of size given by + * the byte_length() function) is written to dest. + */ + virtual void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const = 0; + + /** + * \brief Update a target pointer. + * + * If this instruction contains any reference to the old target, replace it + * with the new one. + */ + virtual void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) = 0; + + /** + * \brief True if these instructions are equivalent within their own + * programs. + * + * Checks that any pointers to other instructions point to the same + * offsets. + */ + bool equiv(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return equiv_impl(other, offsets, other_offsets); + } + +private: + virtual bool equiv_impl(const RoseInstruction &other, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const = 0; +}; + +/** + * \brief Templated implementation class to handle boring boilerplate code. + */ +template +class RoseInstrBase : public RoseInstruction { +protected: + static constexpr RoseInstructionCode opcode = Opcode; + using impl_type = ImplType; + +public: + RoseInstructionCode code() const override { return opcode; } + + size_t byte_length() const override { + return sizeof(impl_type); + } + + /** + * Note: this implementation simply zeroes the destination region and + * writes in the correct opcode. This is sufficient for trivial + * instructions, but instructions with data members will want to override + * it. + */ + void write(void *dest, RoseEngineBlob &, + const RoseInstruction::OffsetMap &) const override { + assert(dest != nullptr); + assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); + + impl_type *inst = static_cast(dest); + memset(inst, 0, sizeof(impl_type)); + inst->code = verify_u8(opcode); + } + +private: + bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const override { + const auto *ri_that = dynamic_cast(&other); + if (!ri_that) { + return false; + } + const auto *ri_this = dynamic_cast(this); + assert(ri_this); + return ri_this->equiv_to(*ri_that, offsets, other_offsets); + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have + * just a single target member, called "target". + */ +template +class RoseInstrBaseOneTarget + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + RoseInstrType *ri = dynamic_cast(this); + assert(ri); + if (ri->target == old_target) { + ri->target = new_target; + } + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have no + * targets. + */ +template +class RoseInstrBaseNoTargets + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *, + const RoseInstruction *) override {} +}; + +/** + * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that + * have no members at all, just an opcode. + */ +template +class RoseInstrBaseTrivial + : public RoseInstrBaseNoTargets { +public: + virtual bool operator==(const RoseInstrType &) const { return true; } + + size_t hash() const override { + return boost::hash_value(static_cast(Opcode)); + } + + bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, + const RoseInstruction::OffsetMap &) const { + return true; + } +}; + +//// +//// Concrete implementation classes start here. +//// + +class RoseInstrAnchoredDelay + : public RoseInstrBaseOneTarget { +public: + rose_group groups; + u32 anch_id; + const RoseInstruction *target; + + RoseInstrAnchoredDelay(rose_group groups_in, u32 anch_id_in, + const RoseInstruction *target_in) + : groups(groups_in), anch_id(anch_id_in), target(target_in) {} + + bool operator==(const RoseInstrAnchoredDelay &ri) const { + return groups == ri.groups && anch_id == ri.anch_id + && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups, anch_id); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return groups == ri.groups && anch_id == ri.anch_id + && offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLitEarly + : public RoseInstrBaseOneTarget { +public: + u32 min_offset; + const RoseInstruction *target; + + RoseInstrCheckLitEarly(u32 min_offset_in, const RoseInstruction *target_in) + : min_offset(min_offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckLitEarly &ri) const { + return min_offset == ri.min_offset && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), min_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_offset == ri.min_offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrCheckGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckOnlyEod + : public RoseInstrBaseOneTarget { +public: + const RoseInstruction *target; + + explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) + : target(target_in) {} + + bool operator==(const RoseInstrCheckOnlyEod &ri) const { + return target == ri.target; + } + + size_t hash() const override { + return boost::hash_value(static_cast(opcode)); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckBounds + : public RoseInstrBaseOneTarget { +public: + u64a min_bound; + u64a max_bound; + const RoseInstruction *target; + + RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) + : min_bound(min), max_bound(max), target(target_in) {} + + bool operator==(const RoseInstrCheckBounds &ri) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), min_bound, max_bound); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckNotHandled + : public RoseInstrBaseOneTarget { +public: + u32 key; + const RoseInstruction *target; + + RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) + : key(key_in), target(target_in) {} + + bool operator==(const RoseInstrCheckNotHandled &ri) const { + return key == ri.key && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), key); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return key == ri.key && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckSingleLookaround + : public RoseInstrBaseOneTarget { +public: + s8 offset; + CharReach reach; + const RoseInstruction *target; + + RoseInstrCheckSingleLookaround(s8 offset_in, CharReach reach_in, + const RoseInstruction *target_in) + : offset(offset_in), reach(std::move(reach_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckSingleLookaround &ri) const { + return offset == ri.offset && reach == ri.reach && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), offset, reach); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckSingleLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offset == ri.offset && reach == ri.reach && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLookaround + : public RoseInstrBaseOneTarget { +public: + std::vector look; + const RoseInstruction *target; + + RoseInstrCheckLookaround(std::vector look_in, + const RoseInstruction *target_in) + : look(std::move(look_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckLookaround &ri) const { + return look == ri.look && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), look); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return look == ri.look + && offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask + : public RoseInstrBaseOneTarget { +public: + u64a and_mask; + u64a cmp_mask; + u64a neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask32 + : public RoseInstrBaseOneTarget { +public: + std::array and_mask; + std::array cmp_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask32(std::array and_mask_in, + std::array cmp_mask_in, u32 neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask32 &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckByte + : public RoseInstrBaseOneTarget { +public: + u8 and_mask; + u8 cmp_mask; + u8 negation; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckByte &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, negation, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckInfix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckInfix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckPrefix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckPrefix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrPushDelayed + : public RoseInstrBaseNoTargets { +public: + u8 delay; + u32 index; + + RoseInstrPushDelayed(u8 delay_in, u32 index_in) + : delay(delay_in), index(index_in) {} + + bool operator==(const RoseInstrPushDelayed &ri) const { + return delay == ri.delay && index == ri.index; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), delay, index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, + const OffsetMap &) const { + return delay == ri.delay && index == ri.index; + } +}; + +class RoseInstrCatchUp + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUp() override; +}; + +class RoseInstrCatchUpMpv + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUpMpv() override; +}; + +class RoseInstrSomAdjust + : public RoseInstrBaseNoTargets { +public: + u32 distance; + + explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} + + bool operator==(const RoseInstrSomAdjust &ri) const { + return distance == ri.distance; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, + const OffsetMap &) const { + return distance == ri.distance; + } +}; + +class RoseInstrSomLeftfix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 lag; + + RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) + : queue(queue_in), lag(lag_in) {} + + bool operator==(const RoseInstrSomLeftfix &ri) const { + return queue == ri.queue && lag == ri.lag; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && lag == ri.lag; + } +}; + +class RoseInstrSomFromReport + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrSomFromReport() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrSomFromReport &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrSomZero + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSomZero() override; +}; + +class RoseInstrTriggerInfix + : public RoseInstrBaseNoTargets { +public: + u8 cancel; + u32 queue; + u32 event; + + RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) + : cancel(cancel_in), queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerInfix &ri) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), cancel, queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, + const OffsetMap &) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrTriggerSuffix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 event; + + RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) + : queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerSuffix &ri) const { + return queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrDedupe + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupe &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrDedupeSom + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeSom &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrReportChain + : public RoseInstrBaseNoTargets { +public: + u32 event; + u64a top_squash_distance; + + RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) + : event(event_in), top_squash_distance(top_squash_distance_in) {} + + bool operator==(const RoseInstrReportChain &ri) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), event, top_squash_distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, + const OffsetMap &) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } +}; + +class RoseInstrReportSomInt + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomInt() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomInt &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReportSomAware + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomAware() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomAware &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrReportSom + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReportSom &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportSomExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportSomExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrDedupeAndReport + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + ReportID onmatch; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, + s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeAndReport &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, onmatch, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrFinalReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrFinalReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrCheckExhausted + : public RoseInstrBaseOneTarget { +public: + u32 ekey; + const RoseInstruction *target; + + RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) + : ekey(ekey_in), target(target_in) {} + + bool operator==(const RoseInstrCheckExhausted &ri) const { + return ekey == ri.ekey && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return ekey == ri.ekey && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMinLength + : public RoseInstrBaseOneTarget { +public: + s32 end_adj; + u64a min_length; + const RoseInstruction *target; + + RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, + const RoseInstruction *target_in) + : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMinLength &ri) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), end_adj, min_length); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSetState + : public RoseInstrBaseNoTargets { +public: + u32 index; + + explicit RoseInstrSetState(u32 index_in) : index(index_in) {} + + bool operator==(const RoseInstrSetState &ri) const { + return index == ri.index; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, + const OffsetMap &) const { + return index == ri.index; + } +}; + +class RoseInstrSetGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSetGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrSquashGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSquashGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckState + : public RoseInstrBaseOneTarget { +public: + u32 index; + const RoseInstruction *target; + + RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) + : index(index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckState &ri) const { + return index == ri.index && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterBegin + : public RoseInstrBase { +public: + u32 num_keys; // total number of multibit keys + std::vector> jump_table; + const RoseInstruction *target; + + RoseInstrSparseIterBegin(u32 num_keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterBegin &ri) const { + return num_keys == ri.num_keys && jump_table == ri.jump_table && + target == ri.target; + } + + size_t hash() const override { + size_t v = hash_all(static_cast(opcode), num_keys); + for (const u32 &key : jump_table | boost::adaptors::map_keys) { + boost::hash_combine(v, key); + } + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + for (auto &jump : jump_table) { + if (jump.second == old_target) { + jump.second = new_target; + } + } + } + + bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + if (iter_offset != ri.iter_offset || + offsets.at(target) != other_offsets.at(ri.target)) { + return false; + } + if (jump_table.size() != ri.jump_table.size()) { + return false; + } + auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); + for (; it1 != jump_table.end(); ++it1, ++it2) { + if (it1->first != it2->first) { + return false; + } + if (offsets.at(it1->second) != other_offsets.at(it2->second)) { + return false; + } + } + return true; + } + +private: + friend class RoseInstrSparseIterNext; + + // These variables allow us to use the same multibit iterator and jump + // table in subsequent SPARSE_ITER_NEXT write() operations. + mutable bool is_written = false; + mutable u32 iter_offset = 0; + mutable u32 jump_table_offset = 0; +}; + +class RoseInstrSparseIterNext + : public RoseInstrBase { +public: + u32 state; + const RoseInstrSparseIterBegin *begin; + const RoseInstruction *target; + + RoseInstrSparseIterNext(u32 state_in, + const RoseInstrSparseIterBegin *begin_in, + const RoseInstruction *target_in) + : state(state_in), begin(begin_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterNext &ri) const { + return state == ri.state && begin == ri.begin && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), state); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + if (begin == old_target) { + assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); + begin = static_cast(new_target); + } + } + + bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return state == ri.state && + offsets.at(begin) == other_offsets.at(ri.begin) && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterAny + : public RoseInstrBaseOneTarget { +public: + u32 num_keys; // total number of multibit keys + std::vector keys; + const RoseInstruction *target; + + RoseInstrSparseIterAny(u32 num_keys_in, std::vector keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} + + bool operator==(const RoseInstrSparseIterAny &ri) const { + return num_keys == ri.num_keys && keys == ri.keys && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), num_keys, keys); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return num_keys == ri.num_keys && keys == ri.keys && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnginesEod + : public RoseInstrBaseNoTargets { +public: + u32 iter_offset; + + explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} + + bool operator==(const RoseInstrEnginesEod &ri) const { + return iter_offset == ri.iter_offset; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), iter_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, + const OffsetMap &) const { + return iter_offset == ri.iter_offset; + } +}; + +class RoseInstrSuffixesEod + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSuffixesEod() override; +}; + +class RoseInstrMatcherEod : public RoseInstrBaseTrivial { +public: + ~RoseInstrMatcherEod() override; +}; + +class RoseInstrCheckLongLit + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + RoseInstrCheckLongLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckLongLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLongLitNocase + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + RoseInstrCheckLongLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckLongLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLit + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckMedLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLitNocase + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckMedLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrClearWorkDone + : public RoseInstrBaseTrivial { +public: + ~RoseInstrClearWorkDone() override; +}; + +class RoseInstrMultipathLookaround + : public RoseInstrBaseOneTarget { +public: + std::vector> multi_look; + s32 last_start; + std::array start_mask; + const RoseInstruction *target; + + RoseInstrMultipathLookaround(std::vector> ml, + s32 last_start_in, + std::array start_mask_in, + const RoseInstruction *target_in) + : multi_look(std::move(ml)), last_start(last_start_in), + start_mask(std::move(start_mask_in)), target(target_in) {} + + bool operator==(const RoseInstrMultipathLookaround &ri) const { + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), multi_look, last_start, + start_mask); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrMultipathLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask + && offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u16 hi_bits_mask; + u16 lo_bits_mask; + u16 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u16 hi_bits_mask_in, u16 lo_bits_mask_in, + u16 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti64 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u64a hi_bits_mask; + u64a lo_bits_mask; + u64a neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti64(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u64a hi_bits_mask_in, u64a lo_bits_mask_in, + u64a neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnd + : public RoseInstrBaseTrivial { +public: + ~RoseInstrEnd() override; +}; + +} +#endif diff --git a/src/rose/rose_build_lit_accel.cpp b/src/rose/rose_build_lit_accel.cpp new file mode 100644 index 000000000..b389f493d --- /dev/null +++ b/src/rose/rose_build_lit_accel.cpp @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_lit_accel.h" + +#include "grey.h" +#include "ue2common.h" +#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_internal.h" +#include "hwlm/hwlm_literal.h" +#include "nfa/accel.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" +#include "util/compare.h" +#include "util/dump_charclass.h" +#include "util/ue2string.h" +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static const unsigned int MAX_ACCEL_OFFSET = 16; +static const unsigned int MAX_SHUFTI_WIDTH = 240; + +static +size_t mask_overhang(const AccelString &lit) { + size_t msk_true_size = lit.msk.size(); + assert(msk_true_size <= HWLM_MASKLEN); + assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); + for (u8 c : lit.msk) { + if (!c) { + msk_true_size--; + } else { + break; + } + } + + if (lit.s.length() >= msk_true_size) { + return 0; + } + + /* only short literals should be able to have a mask which overhangs */ + assert(lit.s.length() < MAX_ACCEL_OFFSET); + return msk_true_size - lit.s.length(); +} + +static +bool findDVerm(const vector &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), + b5insens(false), valid(true) {} + char c1; + char c2; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (other.cdiffers() && !cdiffers()) { + return false; + } + + if (!other.cdiffers() && cdiffers()) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + + bool cdiffers(void) const { + if (!b5insens) { + return c1 != c2; + } + return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { + found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; + j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + } else { + found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + break; + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + aux->dverm.offset = verify_u8(best.max_offset); + + if (!best.b5insens) { + aux->dverm.accel_type = ACCEL_DVERM; + aux->dverm.c1 = best.c1; + aux->dverm.c2 = best.c2; + DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } else { + aux->dverm.accel_type = ACCEL_DVERM_NOCASE; + aux->dverm.c1 = best.c1 & CASE_CLEAR; + aux->dverm.c2 = best.c2 & CASE_CLEAR; + DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } + return true; +} + +static +bool findSVerm(const vector &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c(base.s[offset]), max_offset(0), + b5insens(false), valid(true) {} + char c; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && ourisalpha(curr.c)) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + found |= curr.c == lit.s[j]; + found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + } else { + found = curr.c == lit.s[j]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + if (!best.b5insens) { + aux->verm.accel_type = ACCEL_VERM; + aux->verm.c = best.c; + DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); + } else { + aux->verm.accel_type = ACCEL_VERM_NOCASE; + aux->verm.c = best.c & CASE_CLEAR; + DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); + } + aux->verm.offset = verify_u8(best.max_offset); + + return true; +} + +static +void filterLits(const vector &lits, hwlm_group_t expected_groups, + vector *filtered_lits, u32 *min_len) { + *min_len = MAX_ACCEL_OFFSET; + + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + const size_t lit_len = lit.s.length(); + if (lit_len < *min_len) { + *min_len = verify_u32(lit_len); + } + + DEBUG_PRINTF("lit: '%s', nocase=%d, groups=0x%llx\n", + escapeString(lit.s).c_str(), lit.nocase ? 1 : 0, + lit.groups); + filtered_lits->push_back(&lit); + } +} + +static +bool litGuardedByCharReach(const CharReach &cr, const AccelString &lit, + u32 max_offset) { + for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { + unsigned char c = lit.s[i]; + if (lit.nocase) { + if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { + return true; + } + } else { + if (cr.test(c)) { + return true; + } + } + } + + return false; +} + +static +void findForwardAccelScheme(const vector &lits, + hwlm_group_t expected_groups, AccelAux *aux) { + DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); + u32 min_len = MAX_ACCEL_OFFSET; + vector filtered_lits; + + filterLits(lits, expected_groups, &filtered_lits, &min_len); + if (filtered_lits.empty()) { + return; + } + + if (findDVerm(filtered_lits, aux) + || findSVerm(filtered_lits, aux)) { + return; + } + + /* look for shufti/truffle */ + + vector reach(MAX_ACCEL_OFFSET, CharReach()); + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + u32 overhang = mask_overhang(lit); + for (u32 i = 0; i < overhang; i++) { + /* this offset overhangs the start of the real literal; look at the + * msk/cmp */ + for (u32 j = 0; j < N_CHARS; j++) { + if ((j & lit.msk[i]) == lit.cmp[i]) { + reach[i].set(j); + } + } + } + for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { + CharReach &reach_i = reach[i]; + u32 i_effective = i - overhang; + + if (litGuardedByCharReach(reach_i, lit, i_effective)) { + continue; + } + unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] + : lit.s.back(); + if (lit.nocase) { + reach_i.set(mytoupper(c)); + reach_i.set(mytolower(c)); + } else { + reach_i.set(c); + } + } + } + + u32 min_count = ~0U; + u32 min_offset = ~0U; + for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { + size_t count = reach[i].count(); + DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, + describeClass(reach[i]).c_str(), count); + if (count < min_count) { + min_count = (u32)count; + min_offset = i; + } + } + + if (min_count > MAX_SHUFTI_WIDTH) { + DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); + return; + } + + const CharReach &cr = reach[min_offset]; + if (-1 != + shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { + DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->shufti.accel_type = ACCEL_SHUFTI; + aux->shufti.offset = verify_u8(min_offset); + return; + } + + truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); + DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->truffle.accel_type = ACCEL_TRUFFLE; + aux->truffle.offset = verify_u8(min_offset); +} + +void buildForwardAccel(HWLM *h, const vector &lits, + hwlm_group_t expected_groups) { + findForwardAccelScheme(lits, expected_groups, &h->accel1); + findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); + + h->accel1_groups = expected_groups; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_lit_accel.h b/src/rose/rose_build_lit_accel.h new file mode 100644 index 000000000..f0c014348 --- /dev/null +++ b/src/rose/rose_build_lit_accel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LIT_ACCEL_H +#define ROSE_BUILD_LIT_ACCEL_H + +#include "hwlm/hwlm.h" + +#include +#include +#include +#include + +struct HWLM; + +namespace ue2 { + +struct AccelString { + AccelString(std::string s_in, bool nocase_in, std::vector msk_in, + std::vector cmp_in, hwlm_group_t groups_in) + : s(std::move(s_in)), nocase(nocase_in), msk(std::move(msk_in)), + cmp(std::move(cmp_in)), groups(groups_in) {} + + std::string s; + bool nocase; + std::vector msk; + std::vector cmp; + hwlm_group_t groups; + + bool operator==(const AccelString &a) const { + return s == a.s && nocase == a.nocase && msk == a.msk && cmp == a.cmp && + groups == a.groups; + } + + bool operator<(const AccelString &a) const { + return std::tie(s, nocase, msk, cmp, groups) < + std::tie(a.s, a.nocase, a.msk, a.cmp, a.groups); + } +}; + +void buildForwardAccel(HWLM *h, const std::vector &lits, + hwlm_group_t expected_groups); + +} // namespace ue2 + +#endif // ROSE_BUILD_LIT_ACCEL_H diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp index c32f49d00..7ebf73ecb 100644 --- a/src/rose/rose_build_long_lit.cpp +++ b/src/rose/rose_build_long_lit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,7 @@ #include "rose_build_engine_blob.h" #include "rose_build_impl.h" #include "stream_long_lit_hash.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/bitutils.h" #include "util/verify_types.h" #include "util/compile_context.h" @@ -401,7 +401,7 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; - auto table = aligned_zmalloc_unique(tabSize); + auto table = make_zeroed_bytecode_ptr(tabSize, 16); assert(table); // otherwise would have thrown std::bad_alloc // Fill in the RoseLongLitTable header structure. @@ -435,7 +435,7 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, *historyRequired = max(*historyRequired, max_len); *longLitStreamStateRequired = tot_state_bytes; - return blob.add(table.get(), tabSize, 16); + return blob.add(table); } } // namespace ue2 diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index 10bd59dea..a46a1aeb6 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -45,6 +45,7 @@ #include #include +#include using namespace std; @@ -62,6 +63,20 @@ static const u32 MAX_LOOKAROUND_ENTRIES = 16; /** \brief We would rather have lookarounds with smaller reach than this. */ static const u32 LOOKAROUND_WIDE_REACH = 200; +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dump(const map &look) { + ostringstream oss; + for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { + if (it != look.begin()) { + oss << ", "; + } + oss << "{" << it->first << ": " << describeClass(it->second) << "}"; + } + return oss.str(); +} +#endif + static void getForwardReach(const NGHolder &g, u32 top, map &look) { ue2::flat_set curr, next; @@ -298,21 +313,6 @@ void findBackwardReach(const RoseGraph &g, const RoseVertex v, // TODO: implement DFA variants if necessary. } -#if defined(DEBUG) || defined(DUMP_SUPPORT) -#include -static UNUSED -string dump(const map &look) { - ostringstream oss; - for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { - if (it != look.begin()) { - oss << ", "; - } - oss << "{" << it->first << ": " << describeClass(it->second) << "}"; - } - return oss.str(); -} -#endif - static void normalise(map &look) { // We can erase entries where the reach is "all characters". @@ -447,7 +447,7 @@ static void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, set &flood_reach) { for (u32 lit_id : tbi.g[v].literals) { - const ue2_literal &s = tbi.literals.right.at(lit_id).s; + const ue2_literal &s = tbi.literals.at(lit_id).s; if (s.empty()) { continue; } @@ -460,13 +460,24 @@ void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, } } + +namespace { +struct LookProto { + LookProto(s32 offset_in, CharReach reach_in) + : offset(offset_in), reach(move(reach_in)) {} + s32 offset; + CharReach reach; +}; +} + static -map findLiteralReach(const rose_literal_id &lit) { - map look; +vector findLiteralReach(const rose_literal_id &lit) { + vector look; + look.reserve(lit.s.length()); - u32 i = lit.delay + 1; - for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) { - look[0 - i] |= *it; + s32 i = 0 - lit.s.length() - lit.delay; + for (const auto &c : lit.s) { + look.emplace_back(i, c); i++; } @@ -478,22 +489,40 @@ map findLiteralReach(const RoseBuildImpl &build, const RoseVertex v) { bool first = true; map look; + for (u32 lit_id : build.g[v].literals) { - const rose_literal_id &lit = build.literals.right.at(lit_id); + const rose_literal_id &lit = build.literals.at(lit_id); auto lit_look = findLiteralReach(lit); if (first) { - look = move(lit_look); + for (auto &p : lit_look) { + look.emplace(p.offset, p.reach); + } first = false; - } else { - for (auto it = look.begin(); it != look.end();) { - auto l_it = lit_look.find(it->first); - if (l_it == lit_look.end()) { - it = look.erase(it); - } else { - it->second |= l_it->second; - ++it; - } + continue; + } + + // Erase elements from look with keys not in lit_look. Where a key is + // in both maps, union its reach with the lookaround. + auto jt = begin(lit_look); + for (auto it = begin(look); it != end(look);) { + if (jt == end(lit_look)) { + // No further lit_look entries, erase remaining elements from + // look. + look.erase(it, end(look)); + break; + } + if (it->first < jt->offset) { + // Offset is present in look but not in lit_look, erase. + it = look.erase(it); + } else if (it->first > jt->offset) { + // Offset is preset in lit_look but not in look, ignore. + ++jt; + } else { + // Offset is present in both, union its reach with look. + it->second |= jt->reach; + ++it; + ++jt; } } } @@ -525,6 +554,76 @@ void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str()); } +static +void normaliseLeftfix(map &look) { + // We can erase entries where the reach is "all characters", except for the + // very first one -- this might be required to establish a minimum bound on + // the literal's match offset. + + // TODO: It would be cleaner to use a literal program instruction to check + // the minimum bound explicitly. + + if (look.empty()) { + return; + } + + const auto earliest = begin(look)->first; + + vector dead; + for (const auto &m : look) { + if (m.second.all() && m.first != earliest) { + dead.push_back(m.first); + } + } + erase_all(&look, dead); +} + +static +bool trimMultipathLeftfix(const RoseBuildImpl &build, const RoseVertex v, + vector> &looks) { + size_t path_count = 0; + for (auto &look : looks) { + ++path_count; + DEBUG_PRINTF("Path #%ld\n", path_count); + + assert(!look.empty()); + trimLiterals(build, v, look); + + if (look.empty()) { + return false; + } + + // Could be optimized here, just keep the empty byte of the longest path + normaliseLeftfix(look); + + if (look.size() > MAX_LOOKAROUND_ENTRIES) { + DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); + return false; + } + } + return true; +} + +static +void transToLookaround(const vector> &looks, + vector> &lookarounds) { + for (const auto &look : looks) { + vector lookaround; + DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str()); + lookaround.reserve(look.size()); + for (const auto &m : look) { + if (m.first < -128 || m.first > 127) { + DEBUG_PRINTF("range too big\n"); + lookarounds.clear(); + return; + } + s8 offset = verify_s8(m.first); + lookaround.emplace_back(offset, m.second); + } + lookarounds.push_back(lookaround); + } +} + void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, vector &lookaround) { lookaround.clear(); @@ -563,115 +662,155 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, } static -bool hasSingleFloatingStart(const NGHolder &g) { - NFAVertex initial = NGHolder::null_vertex(); - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (v == g.startDs) { - continue; - } - if (initial != NGHolder::null_vertex()) { - DEBUG_PRINTF("more than one start\n"); - return false; - } - initial = v; - } - - if (initial == NGHolder::null_vertex()) { - DEBUG_PRINTF("no floating starts\n"); - return false; - } +bool checkShuftiBuckets(const vector> &looks, + u32 bucket_size) { + set bucket; + for (const auto &look : looks) { + for (const auto &l : look) { + CharReach cr = l.second; + if (cr.count() > 128) { + cr.flip(); + } + map lo2hi; + + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i &0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } - // Anchored start must have no successors other than startDs and initial. - for (auto v : adjacent_vertices_range(g.start, g)) { - if (v != initial && v != g.startDs) { - DEBUG_PRINTF("anchored start\n"); - return false; + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + bucket.insert(hi_lo); + } } } - - return true; + DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size()); + return bucket.size() <= bucket_size; } static -bool getTransientPrefixReach(const NGHolder &g, u32 lag, - map &look) { - if (in_degree(g.accept, g) != 1) { - DEBUG_PRINTF("more than one accept\n"); +bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, + vector> &looks) { + if (!isAcyclic(g)) { + DEBUG_PRINTF("contains back-edge\n"); return false; } - // Must be a floating chain wired to startDs. - if (!hasSingleFloatingStart(g)) { - DEBUG_PRINTF("not a single floating start\n"); + // Must be floating chains wired to startDs. + if (!isFloating(g)) { + DEBUG_PRINTF("not a floating start\n"); return false; } - NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first); - u32 i = lag + 1; - while (v != g.startDs) { - DEBUG_PRINTF("i=%u, v=%zu\n", i, g[v].index); - if (is_special(v, g)) { - DEBUG_PRINTF("special\n"); - return false; + vector curr; + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (v == g.start || v == g.startDs) { + DEBUG_PRINTF("empty graph\n"); + return true; + } + if (contains(g[v].reports, report)) { + curr.push_back(v); } + } - look[0 - i] = g[v].char_reach; + assert(!curr.empty()); - NFAVertex next = NGHolder::null_vertex(); - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - continue; // Benign, checked by hasSingleFloatingStart - } - if (next == NGHolder::null_vertex()) { - next = u; - continue; - } - DEBUG_PRINTF("branch\n"); - return false; - } + u32 total_len = curr.size(); + + for (const auto &v : curr) { + looks.emplace_back(map()); + looks.back()[0 - (lag + 1)] = g[v].char_reach; + } - if (next == NGHolder::null_vertex() || next == v) { - DEBUG_PRINTF("no predecessor or only self-loop\n"); - // This graph is malformed -- all vertices in a graph that makes it - // to this analysis should have predecessors. - assert(0); + bool curr_active = false; + + /* For each offset -i, we backwardly trace the path by vertices in curr. + * Once there are more than 8 paths and more than 64 bits total_len, + * which means that neither MULTIPATH_LOOKAROUND nor MULTIPATH_SHUFTI + * could be successfully built, we will give up the path finding. + * Otherwise, the loop will halt when all vertices in curr are startDs. + */ + for (u32 i = lag + 2; i < (lag + 2) + MAX_BACK_LEN; i++) { + curr_active = false; + size_t curr_size = curr.size(); + if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) { + DEBUG_PRINTF("range is larger than 16 in multi-path\n"); return false; } - v = next; - i++; - } + for (size_t idx = 0; idx < curr_size; idx++) { + NFAVertex v = curr[idx]; + if (v == g.startDs) { + continue; + } + assert(!is_special(v, g)); - DEBUG_PRINTF("done\n"); - return true; -} + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start || u == g.startDs) { + curr[idx] = g.startDs; + break; + } + } -static -void normaliseLeftfix(map &look) { - // We can erase entries where the reach is "all characters", except for the - // very first one -- this might be required to establish a minimum bound on - // the literal's match offset. + if (is_special(curr[idx], g)) { + continue; + } - // TODO: It would be cleaner to use a literal program instruction to check - // the minimum bound explicitly. + for (auto u : inv_adjacent_vertices_range(v, g)) { + curr_active = true; + if (curr[idx] == v) { + curr[idx] = u; + looks[idx][0 - i] = g[u].char_reach; + total_len++; + } else { + curr.push_back(u); + looks.push_back(looks[idx]); + (looks.back())[0 - i] = g[u].char_reach; + total_len += looks.back().size(); + } - if (look.empty()) { - return; + if (curr.size() > MAX_LOOKAROUND_PATHS && total_len > 64) { + DEBUG_PRINTF("too many branches\n"); + return false; + } + } + } + if (!curr_active) { + break; + } } - const auto earliest = begin(look)->first; + if (curr_active) { + DEBUG_PRINTF("single path too long\n"); + return false; + } - vector dead; - for (const auto &m : look) { - if (m.second.all() && m.first != earliest) { - dead.push_back(m.first); + // More than 8 paths, check multi-path shufti. + if (curr.size() > MAX_LOOKAROUND_PATHS) { + u32 bucket_size = total_len > 32 ? 8 : 16; + if (!checkShuftiBuckets(looks, bucket_size)) { + DEBUG_PRINTF("shufti has too many buckets\n"); + return false; } } - erase_all(&look, dead); + + assert(!looks.empty()); + if (looks.size() == 1) { + DEBUG_PRINTF("single lookaround\n"); + } else { + DEBUG_PRINTF("multi-path lookaround\n"); + } + DEBUG_PRINTF("done\n"); + return true; } bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - vector &lookaround) { + vector> &lookaround) { lookaround.clear(); const RoseGraph &g = build.g; @@ -687,36 +826,19 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, return false; } - map look; - if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.lag, look)) { - DEBUG_PRINTF("not a chain\n"); - return false; - } - - trimLiterals(build, v, look); - normaliseLeftfix(look); - - if (look.size() > MAX_LOOKAROUND_ENTRIES) { - DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); + vector> looks; + if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report, + g[v].left.lag, looks)) { + DEBUG_PRINTF("graph has loop or too large\n"); return false; } - if (look.empty()) { - DEBUG_PRINTF("lookaround empty; this is weird\n"); + if (!trimMultipathLeftfix(build, v, looks)) { return false; } + transToLookaround(looks, lookaround); - lookaround.reserve(look.size()); - for (const auto &m : look) { - if (m.first < -128 || m.first > 127) { - DEBUG_PRINTF("range too big\n"); - return false; - } - s8 offset = verify_s8(m.first); - lookaround.emplace_back(offset, m.second); - } - - return true; + return !lookaround.empty(); } void mergeLookaround(vector &lookaround, diff --git a/src/rose/rose_build_lookaround.h b/src/rose/rose_build_lookaround.h index 993bd2291..aea87ccf8 100644 --- a/src/rose/rose_build_lookaround.h +++ b/src/rose/rose_build_lookaround.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,9 @@ #include +/** \brief Max path number for multi-path lookaround. */ +#define MAX_LOOKAROUND_PATHS 8 + namespace ue2 { class CharReach; @@ -44,6 +47,7 @@ class RoseBuildImpl; /** \brief Lookaround entry prototype, describing the reachability at a given * distance from the end of a role match. */ struct LookEntry { + LookEntry() : offset(0) {} LookEntry(s8 offset_in, const CharReach &reach_in) : offset(offset_in), reach(reach_in) {} s8 offset; //!< offset from role match location. @@ -63,7 +67,7 @@ size_t hash_value(const LookEntry &l) { } void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, - std::vector &lookaround); + std::vector &look_more); /** * \brief If possible, render the prefix of the given vertex as a lookaround. @@ -72,7 +76,7 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, * it can be satisfied with a lookaround alone. */ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - std::vector &lookaround); + std::vector> &lookaround); void mergeLookaround(std::vector &lookaround, const std::vector &more_lookaround); diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 01633c06c..682a87c38 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,9 +33,12 @@ #include "rose_build_matchers.h" +#include "rose_build_dump.h" #include "rose_build_impl.h" +#include "rose_build_lit_accel.h" #include "rose_build_width.h" #include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_internal.h" #include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/nfa_api_queue.h" @@ -58,6 +61,8 @@ using boost::adaptors::map_values; namespace ue2 { +static const size_t MAX_ACCEL_STRING_LEN = 16; + #ifdef DEBUG static UNUSED string dumpMask(const vector &v) { @@ -206,7 +211,7 @@ bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, } u32 u_lit_id = *(g[u].literals.begin()); - const rose_literal_id &u_id = build.literals.right.at(u_lit_id); + const rose_literal_id &u_id = build.literals.at(u_lit_id); DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str()); // Number of characters to take from the back of u's literal. @@ -341,12 +346,8 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { } vector candidates; - for (const auto &e : build.literals.right) { - const u32 id = e.first; - const auto &lit = e.second; - - // This pass takes place before final IDs are assigned to literals. - assert(!build.hasFinalId(id)); + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); if (lit.delay || build.isDelayed(id)) { continue; @@ -375,7 +376,7 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { } for (const u32 &id : candidates) { - const auto &lit = build.literals.right.at(id); + const auto &lit = build.literals.at(id); auto &lit_info = build.literal_info.at(id); vector msk, cmp; @@ -404,7 +405,6 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { lit_info.vertices.clear(); // Preserve other properties. - new_info.requires_explode = lit_info.requires_explode; new_info.requires_benefits = lit_info.requires_benefits; } } @@ -491,8 +491,14 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return false; } - if (build.literals.right.at(id).s.length() > max_len) { - DEBUG_PRINTF("requires literal check\n"); + size_t len = build.literals.at(id).s.length(); + if (len > max_len) { + DEBUG_PRINTF("long literal, requires confirm\n"); + return false; + } + + if (len > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("medium-length literal, requires confirm\n"); return false; } @@ -610,7 +616,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, // If this literal in the undelayed literal corresponding to some delayed // literals, we must take their minimum offsets into account. for (const u32 &delayed_id : info.delayed_ids) { - const auto &delayed_lit = build.literals.right.at(delayed_id); + const auto &delayed_lit = build.literals.at(delayed_id); const auto &delayed_info = build.literal_info.at(delayed_id); u64a delayed_min_offset = literalMinReportOffset(build, delayed_lit, delayed_info); @@ -626,159 +632,245 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, return lit_min_offset; } -vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, - size_t max_len, u32 max_offset) { +template +void trim_to_suffix(Container &c, size_t len) { + if (c.size() <= len) { + return; + } + + size_t suffix_len = c.size() - len; + c.erase(c.begin(), c.begin() + suffix_len); +} + +namespace { + +/** \brief Prototype for literal matcher construction. */ +struct MatcherProto { + /** \brief Literal fragments used to construct the literal matcher. */ vector lits; - for (const auto &e : build.literals.right) { - const u32 id = e.first; - if (!build.hasFinalId(id)) { - continue; - } + /** \brief Longer literals used for acceleration analysis. */ + vector accel_lits; - if (e.second.delay) { - continue; /* delay id's are virtual-ish */ - } + /** \brief The history required by the literal matcher. */ + size_t history_required = 0; - if (e.second.table != table) { - continue; /* wrong table */ - } + /** \brief Insert the contents of another MatcherProto. */ + void insert(const MatcherProto &a); +}; +} + +/** + * \brief Build up a vector of literals (and associated other data) for the + * given table. + * + * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can + * only lead to a pattern match after max_offset may be excluded. + */ +static +MatcherProto makeMatcherProto(const RoseBuildImpl &build, + const vector &fragments, + rose_literal_table table, bool delay_rebuild, + size_t max_len, u32 max_offset = ROSE_BOUND_INF) { + MatcherProto mp; - assert(id < build.literal_info.size()); - const rose_literal_info &info = build.literal_info[id]; - u32 final_id = info.final_id; - rose_group groups = info.group_mask; - /* Note: requires_benefits are handled in the literal entries */ - const ue2_literal &lit = e.second.s; + if (delay_rebuild) { + assert(table == ROSE_FLOATING); + assert(build.cc.streaming); + } - DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str()); + for (const auto &f : fragments) { + for (u32 id : f.lit_ids) { + const rose_literal_id &lit = build.literals.at(id); - if (max_offset != ROSE_BOUND_INF) { - u64a min_report = literalMinReportOffset(build, e.second, info); - if (min_report > max_offset) { - DEBUG_PRINTF("min report offset=%llu exceeds max_offset=%u\n", - min_report, max_offset); - continue; + if (lit.table != table) { + continue; /* wrong table */ } - } - - const vector &msk = e.second.msk; - const vector &cmp = e.second.cmp; - bool noruns = isNoRunsLiteral(build, id, info, max_len); + if (lit.delay) { + continue; /* delay id's are virtual-ish */ + } - if (info.requires_explode) { - DEBUG_PRINTF("exploding lit\n"); + assert(id < build.literal_info.size()); + const auto &info = build.literal_info.at(id); - // We do not require_explode for long literals. - assert(lit.length() <= max_len); + /* Note: requires_benefits are handled in the literal entries */ + const ue2_literal &s = lit.s; - case_iter cit = caseIterateBegin(lit); - case_iter cite = caseIterateEnd(); - for (; cit != cite; ++cit) { - string s = *cit; - bool nocase = false; + DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(s).c_str(), + s.length()); - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d msk=%s, " - "cmp=%s (exploded)\n", - final_id, escapeString(s).c_str(), nocase, noruns, - dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + // When building the delay rebuild table, we only want to include + // literals that have delayed variants. + if (delay_rebuild && info.delayed_ids.empty()) { + DEBUG_PRINTF("not needed for delay rebuild\n"); + continue; + } - if (!maskIsConsistent(s, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + if (max_offset != ROSE_BOUND_INF) { + u64a min_report = literalMinReportOffset(build, lit, info); + if (min_report > max_offset) { + DEBUG_PRINTF("min report offset=%llu exceeds " + "max_offset=%u\n", min_report, max_offset); continue; } + } + + const vector &msk = lit.msk; + const vector &cmp = lit.cmp; + bool noruns = isNoRunsLiteral(build, id, info, max_len); - lits.emplace_back(move(s), nocase, noruns, final_id, groups, - msk, cmp); + size_t lit_hist_len = 0; + if (build.cc.streaming) { + lit_hist_len = max(msk.size(), min(s.length(), max_len)); + lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; } - } else { - string s = lit.get_string(); - bool nocase = lit.any_nocase(); + DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len); + assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " - "cmp=%s\n", - final_id, escapeString(s).c_str(), (int)nocase, noruns, - dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + auto lit_final = s; // copy - if (s.length() > max_len) { - DEBUG_PRINTF("truncating to tail of length %zu\n", max_len); - s.erase(0, s.length() - max_len); + if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + lit_final.erase(0, lit_final.length() + - ROSE_SHORT_LITERAL_LEN_MAX); // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= max_len); + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); } - if (!maskIsConsistent(s, nocase, msk, cmp)) { + const auto &s_final = lit_final.get_string(); + bool nocase = lit_final.any_nocase(); + + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " + "cmp=%s\n", f.fragment_id, + escapeString(s_final).c_str(), (int)nocase, noruns, + dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + + if (!maskIsConsistent(s_final, nocase, msk, cmp)) { DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); continue; } - lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, - cmp); + mp.accel_lits.emplace_back(s.get_string(), s.any_nocase(), msk, cmp, + info.group_mask); + mp.history_required = max(mp.history_required, lit_hist_len); + + u32 prog_offset = delay_rebuild ? f.delay_program_offset + : f.lit_program_offset; + const auto &groups = f.groups; + + mp.lits.emplace_back(move(s_final), nocase, noruns, prog_offset, + groups, msk, cmp); } } - return lits; + sort_and_unique(mp.lits); + + // Literals used for acceleration must be limited to max_len, as that's all + // we can see in history. + for_each(begin(mp.accel_lits), end(mp.accel_lits), + [&max_len](AccelString &a) { + trim_to_suffix(a.s, max_len); + trim_to_suffix(a.msk, max_len); + trim_to_suffix(a.cmp, max_len); + }); + + sort_and_unique(mp.accel_lits); + + return mp; +} + +void MatcherProto::insert(const MatcherProto &a) { + ::ue2::insert(&lits, lits.end(), a.lits); + ::ue2::insert(&accel_lits, accel_lits.end(), a.accel_lits); + sort_and_unique(lits); + sort_and_unique(accel_lits); + history_required = max(history_required, a.history_required); +} + +static +void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, + HWLM &hwlm) { + if (!build.cc.grey.hamsterAccelForward) { + return; + } + + if (hwlm.type == HWLM_ENGINE_NOOD) { + return; + } + + buildForwardAccel(&hwlm, mp.accel_lits, build.getInitialGroups()); } -aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *fsize, - size_t *historyRequired) { - *fsize = 0; +bytecode_ptr buildFloatingMatcher(const RoseBuildImpl &build, + const vector &fragments, + size_t longLitLengthThreshold, + rose_group *fgroups, + size_t *historyRequired) { *fgroups = 0; - auto fl = fillHamsterLiteralList(build, ROSE_FLOATING, - longLitLengthThreshold); - if (fl.empty()) { + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, + longLitLengthThreshold); + if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; } + dumpMatcherLiterals(mp.lits, "floating", build.cc.grey); - for (const hwlmLiteral &hlit : fl) { - *fgroups |= hlit.groups; + for (const hwlmLiteral &lit : mp.lits) { + *fgroups |= lit.groups; } - hwlmStreamingControl ctl; - hwlmStreamingControl *ctlp; - if (build.cc.streaming) { - ctl.history_max = build.cc.grey.maxHistoryAvailable; - ctl.history_min = MAX(*historyRequired, - build.cc.grey.minHistoryAvailable); - DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n", - ctl.history_max, ctl.history_min); - ctlp = &ctl; - } else { - ctlp = nullptr; // Null for non-streaming. - } - - aligned_unique_ptr ftable = - hwlmBuild(fl, ctlp, false, build.cc, build.getInitialGroups()); - if (!ftable) { + auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); + if (!hwlm) { throw CompileError("Unable to generate bytecode."); } + buildAccel(build, mp, *hwlm); + if (build.cc.streaming) { - DEBUG_PRINTF("literal_history_required=%zu\n", - ctl.literal_history_required); - assert(ctl.literal_history_required <= - build.cc.grey.maxHistoryAvailable); - *historyRequired = max(*historyRequired, - ctl.literal_history_required); - } - - *fsize = hwlmSize(ftable.get()); - assert(*fsize); - DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize); - return ftable; + DEBUG_PRINTF("history_required=%zu\n", mp.history_required); + assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); + *historyRequired = max(*historyRequired, mp.history_required); + } + + DEBUG_PRINTF("built floating literal table size %zu bytes\n", hwlm.size()); + return hwlm; } -aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - size_t *sbsize) { - *sbsize = 0; +bytecode_ptr +buildDelayRebuildMatcher(const RoseBuildImpl &build, + const vector &fragments, + size_t longLitLengthThreshold) { + if (!build.cc.streaming) { + DEBUG_PRINTF("not streaming\n"); + return nullptr; + } + + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, true, + longLitLengthThreshold); + if (mp.lits.empty()) { + DEBUG_PRINTF("empty delay rebuild matcher\n"); + return nullptr; + } + dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey); + + auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); + if (!hwlm) { + throw CompileError("Unable to generate bytecode."); + } + buildAccel(build, mp, *hwlm); + + DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", hwlm.size()); + return hwlm; +} + +bytecode_ptr +buildSmallBlockMatcher(const RoseBuildImpl &build, + const vector &fragments) { if (build.cc.streaming) { DEBUG_PRINTF("streaming mode\n"); return nullptr; @@ -791,74 +883,75 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto lits = fillHamsterLiteralList( - build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - if (lits.empty()) { + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; - } else if (lits.size() == 1) { + } else if (mp.lits.size() == 1) { DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); return nullptr; } - auto anchored_lits = - fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - if (anchored_lits.empty()) { + auto mp_anchored = makeMatcherProto(build, fragments, + ROSE_ANCHORED_SMALL_BLOCK, false, + ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; } - lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end()); + mp.insert(mp_anchored); + dumpMatcherLiterals(mp.lits, "smallblock", build.cc.grey); // None of our literals should be longer than the small block limit. - assert(all_of(begin(lits), end(lits), [](const hwlmLiteral &lit) { + assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { return lit.s.length() <= ROSE_SMALL_BLOCK_LEN; })); - if (lits.empty()) { + if (mp.lits.empty()) { DEBUG_PRINTF("no literals shorter than small block len\n"); return nullptr; } - aligned_unique_ptr hwlm = - hwlmBuild(lits, nullptr, true, build.cc, build.getInitialGroups()); + auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); if (!hwlm) { throw CompileError("Unable to generate bytecode."); } - *sbsize = hwlmSize(hwlm.get()); - assert(*sbsize); - DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize); + buildAccel(build, mp, *hwlm); + + DEBUG_PRINTF("built small block literal table size %zu bytes\n", + hwlm.size()); return hwlm; } -aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - size_t *esize) { - *esize = 0; +bytecode_ptr +buildEodAnchoredMatcher(const RoseBuildImpl &build, + const vector &fragments) { + auto mp = makeMatcherProto(build, fragments, ROSE_EOD_ANCHORED, false, + build.ematcher_region_size); - auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, - build.ematcher_region_size); - - if (el.empty()) { + if (mp.lits.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); assert(!build.ematcher_region_size); return nullptr; } + dumpMatcherLiterals(mp.lits, "eod", build.cc.grey); assert(build.ematcher_region_size); - hwlmStreamingControl *ctlp = nullptr; // not a streaming case - aligned_unique_ptr etable = - hwlmBuild(el, ctlp, true, build.cc, build.getInitialGroups()); - if (!etable) { + auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); + if (!hwlm) { throw CompileError("Unable to generate bytecode."); } - *esize = hwlmSize(etable.get()); - assert(*esize); - DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); - return etable; + buildAccel(build, mp, *hwlm); + + DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", + hwlm.size()); + return hwlm; } } // namespace ue2 diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index a25dbca39..2b1afc8c6 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,36 +35,47 @@ #define ROSE_BUILD_MATCHERS_H #include "rose_build_impl.h" +#include "util/bytecode_ptr.h" #include +struct Grey; struct HWLM; namespace ue2 { -struct hwlmLiteral; +struct LitFragment { + LitFragment(u32 fragment_id_in, rose_group groups_in, u32 lit_id) + : fragment_id(fragment_id_in), groups(groups_in), lit_ids({lit_id}) {} + LitFragment(u32 fragment_id_in, rose_group groups_in, + std::vector lit_ids_in) + : fragment_id(fragment_id_in), groups(groups_in), + lit_ids(std::move(lit_ids_in)) {} + u32 fragment_id; + rose_group groups; + std::vector lit_ids; + u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET; + u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; +}; -/** - * \brief Build up a vector of literals for the given table. - * - * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can - * only lead to a pattern match after max_offset may be excluded. - */ -std::vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, size_t max_len, - u32 max_offset = ROSE_BOUND_INF); +bytecode_ptr +buildFloatingMatcher(const RoseBuildImpl &build, + const std::vector &fragments, + size_t longLitLengthThreshold, rose_group *fgroups, + size_t *historyRequired); -aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *fsize, - size_t *historyRequired); +bytecode_ptr +buildDelayRebuildMatcher(const RoseBuildImpl &build, + const std::vector &fragments, + size_t longLitLengthThreshold); -aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - size_t *sbsize); +bytecode_ptr +buildSmallBlockMatcher(const RoseBuildImpl &build, + const std::vector &fragments); -aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - size_t *esize); +bytecode_ptr +buildEodAnchoredMatcher(const RoseBuildImpl &build, + const std::vector &fragments); void findMoreLiteralMasks(RoseBuildImpl &build); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 54a7390ea..d638e589e 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1054,14 +1054,14 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, vector> ulits; ulits.reserve(tbi.g[u].literals.size()); for (u32 id : tbi.g[u].literals) { - ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); + ulits.emplace_back(&tbi.literals.at(id), ulag); } u32 vlag = tbi.g[v].left.lag; vector> vlits; vlits.reserve(tbi.g[v].literals.size()); for (u32 id : tbi.g[v].literals) { - vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + vlits.emplace_back(&tbi.literals.at(id), vlag); } if (!compatibleLiteralsForMerge(ulits, vlits)) { @@ -1130,7 +1130,7 @@ bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, vector pred_rose_lits; pred_rose_lits.reserve(pred_lits.size()); for (const auto &p : pred_lits) { - pred_rose_lits.push_back(&tbi.literals.right.at(p)); + pred_rose_lits.push_back(&tbi.literals.at(p)); } for (auto v : v2) { @@ -1140,7 +1140,7 @@ bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, } for (const u32 vlit : tbi.g[v].literals) { - const rose_literal_id &vl = tbi.literals.right.at(vlit); + const rose_literal_id &vl = tbi.literals.at(vlit); assert(!vl.delay); // this should never have got this far? for (const auto &ul : pred_rose_lits) { assert(!ul->delay); // this should never have got this far? @@ -1195,7 +1195,7 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, u32 ulag = tbi.g[a].left.lag; for (u32 id : tbi.g[a].literals) { - ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); + ulits.emplace_back(&tbi.literals.at(id), ulag); } } @@ -1207,7 +1207,7 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, u32 vlag = tbi.g[a].left.lag; for (u32 id : tbi.g[a].literals) { - vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + vlits.emplace_back(&tbi.literals.at(id), vlag); } } @@ -1759,7 +1759,7 @@ void replaceTops(NGHolder &h, const map &top_mapping) { DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); new_tops.insert(top_mapping.at(t)); } - h[e].tops = move(new_tops); + h[e].tops = std::move(new_tops); } } @@ -2730,7 +2730,7 @@ u32 allowedSquashDistance(const CharReach &cr, u32 min_width, /* TODO: inspect further back in the pattern */ for (u32 lit_id : g[tv].literals) { - const rose_literal_id &lit = tbi.literals.right.at(lit_id); + const rose_literal_id &lit = tbi.literals.at(lit_id); if (lit.delay) { return 0; /* TODO: better */ } diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 28b885bd5..01be11ef8 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ #include "rose_build_impl.h" -#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile_util.h" @@ -75,10 +75,8 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, : cc(cc_in), root(add_vertex(g)), anchored_root(add_vertex(g)), - delay_base_id(MO_INVALID_IDX), hasSom(false), group_end(0), - anchored_base_id(MO_INVALID_IDX), ematcher_region_size(0), eod_event_literal_id(MO_INVALID_IDX), max_rose_anchored_floating_overlap(0), @@ -156,14 +154,12 @@ bool isInTable(const RoseBuildImpl &tbi, RoseVertex v, // All literals for a given vertex will be in the same table, so we need // only inspect the first one. - const auto lit_table = tbi.literals.right.at(*lit_ids.begin()).table; + const auto lit_table = tbi.literals.at(*lit_ids.begin()).table; -#ifndef NDEBUG // Verify that all literals for this vertex are in the same table. - for (auto lit_id : lit_ids) { - assert(tbi.literals.right.at(lit_id).table == lit_table); - } -#endif + assert(all_of_in(lit_ids, [&](u32 lit_id) { + return tbi.literals.at(lit_id).table == lit_table; + })); return lit_table == table; } @@ -213,7 +209,7 @@ size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const { size_t maxlen = 0; for (const auto &lit_id : lit_ids) { - maxlen = max(maxlen, literals.right.at(lit_id).elength()); + maxlen = max(maxlen, literals.at(lit_id).elength()); } return maxlen; @@ -226,7 +222,7 @@ size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const { size_t minlen = ROSE_BOUND_INF; for (const auto &lit_id : lit_ids) { - minlen = min(minlen, literals.right.at(lit_id).elength()); + minlen = min(minlen, literals.at(lit_id).elength()); } return minlen; @@ -241,11 +237,6 @@ unique_ptr makeRoseBuilder(ReportManager &rm, return ue2::make_unique(rm, ssm, smwr, cc, boundary); } -size_t roseSize(const RoseEngine *t) { - assert(t); - return t->size; -} - bool roseIsPureLiteral(const RoseEngine *t) { return t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL; } @@ -294,12 +285,11 @@ size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) { static const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi, u32 literal_id) { - map::const_iterator it = - tbi.anchoredLitSuffix.find(literal_id); + auto it = tbi.anchoredLitSuffix.find(literal_id); if (it != tbi.anchoredLitSuffix.end()) { return it->second; } - return tbi.literals.right.at(literal_id); + return tbi.literals.at(literal_id); } ue2_literal findNonOverlappingTail(const set &lits, @@ -375,16 +365,14 @@ u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const { u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table) { - DEBUG_PRINTF("getting id for %s\n", dumpString(s).c_str()); + DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), + table); assert(table != ROSE_ANCHORED); rose_literal_id key(s, table, delay); - u32 numLiterals = verify_u32(literals.left.size()); - RoseLiteralMap::iterator it; - bool inserted; - tie(it, inserted) - = literals.insert(RoseLiteralMap::value_type(key, numLiterals)); - u32 id = it->right; + auto m = literals.insert(key); + u32 id = m.first; + bool inserted = m.second; if (inserted) { literal_info.push_back(rose_literal_info()); @@ -464,19 +452,17 @@ rose_literal_id::rose_literal_id(const ue2_literal &s_in, u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector &msk, const vector &cmp, u32 delay, rose_literal_table table) { - DEBUG_PRINTF("getting id for %s\n", dumpString(s).c_str()); + DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), + table); assert(table != ROSE_ANCHORED); rose_literal_id key(s, msk, cmp, table, delay); - u32 numLiterals = verify_u32(literals.left.size()); /* ue2_literals are always uppercased if nocase and must have an * alpha char */ - RoseLiteralMap::iterator it; - bool inserted; - tie(it, inserted) = literals.insert( - RoseLiteralMap::value_type(key, numLiterals)); - u32 id = it->right; + auto m = literals.insert(key); + u32 id = m.first; + bool inserted = m.second; if (inserted) { literal_info.push_back(rose_literal_info()); @@ -493,40 +479,14 @@ u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector &msk, return id; } -bool RoseBuildImpl::hasLiteral(const ue2_literal &s, - rose_literal_table table) const { - DEBUG_PRINTF("looking if %s exists\n", dumpString(s).c_str()); - assert(table != ROSE_ANCHORED); - - for (RoseLiteralMap::left_map::const_iterator it - = literals.left.lower_bound(rose_literal_id(s, table, 0)); - it != literals.left.end(); ++it) { - if (it->first.table != table || it->first.s != s) { - break; - } - const rose_literal_info &info = literal_info[it->second]; - if (!info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("(used) literal not found\n"); - - return false; -} - u32 RoseBuildImpl::getNewLiteralId() { rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0); - u32 numLiterals = verify_u32(literals.left.size()); + u32 numLiterals = verify_u32(literals.size()); key.distinctiveness = numLiterals; - RoseLiteralMap::iterator it; - bool inserted; - tie(it, inserted) - = literals.insert(RoseLiteralMap::value_type(key, numLiterals)); - u32 id = it->right; - - assert(inserted); + auto m = literals.insert(key); + assert(m.second); + u32 id = m.first; literal_info.push_back(rose_literal_info()); assert(literal_info.size() == id + 1); @@ -536,350 +496,6 @@ u32 RoseBuildImpl::getNewLiteralId() { return id; } -static -bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, - const Grey &grey) { - /* TODO: tighten */ - NFAVertex seen_vert = NGHolder::null_vertex(); - - for (auto v : inv_adjacent_vertices_range(h.accept, h)) { - if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NGHolder::null_vertex()) { - return true; - } - seen_vert = v; - } - } - - for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { - if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NGHolder::null_vertex()) { - return true; - } - seen_vert = v; - } - } - - if (seen_vert) { - /* if the reporting vertex is part of of a terminal repeat, the - * construction process may reform the graph splitting it into two - * vertices (pos, cyclic) and hence require dedupe */ - vector repeats; - findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); - for (const auto &repeat : repeats) { - if (find(repeat.vertices.begin(), repeat.vertices.end(), - seen_vert) != repeat.vertices.end()) { - return true; - } - } - } - - return false; -} - -class RoseDedupeAuxImpl : public RoseDedupeAux { -public: - explicit RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in); - bool requiresDedupeSupport( - const ue2::flat_set &reports) const override; - -private: - bool hasSafeMultiReports(const ue2::flat_set &reports) const; - - const RoseBuildImpl &tbi; - map> vert_map; //!< ordinary literals - map> sb_vert_map; //!< small block literals - map> suffix_map; - map> outfix_map; - map> puff_map; -}; - -unique_ptr RoseBuildImpl::generateDedupeAux() const { - return ue2::make_unique(*this); -} - -RoseDedupeAux::~RoseDedupeAux() { -} - -RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) - : tbi(tbi_in) { - const RoseGraph &g = tbi.g; - - set suffixes; - - for (auto v : vertices_range(g)) { - // Literals in the small block table are "shadow" copies of literals in - // the other tables that do not run in the same runtime invocation. - // Dedupe key assignment will be taken care of by the real literals. - if (tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { - for (const auto &report_id : g[v].reports) { - sb_vert_map[report_id].insert(v); - } - } else { - for (const auto &report_id : g[v].reports) { - vert_map[report_id].insert(v); - } - } - - // Several vertices may share a suffix, so we collect the set of - // suffixes first to avoid repeating work. - if (g[v].suffix) { - suffixes.insert(g[v].suffix); - } - } - - for (const auto &suffix : suffixes) { - for (const auto &report_id : all_reports(suffix)) { - suffix_map[report_id].insert(suffix); - } - } - - for (const auto &outfix : tbi.outfixes) { - for (const auto &report_id : all_reports(outfix)) { - outfix_map[report_id].insert(&outfix); - } - } - - if (tbi.mpv_outfix) { - auto *mpv = tbi.mpv_outfix->mpv(); - for (const auto &puff : mpv->puffettes) { - puff_map[puff.report].insert(&puff); - } - for (const auto &puff : mpv->triggered_puffettes) { - puff_map[puff.report].insert(&puff); - } - } -} - -static -vector makePath(const rose_literal_id &lit) { - vector path(begin(lit.s), end(lit.s)); - for (u32 i = 0; i < lit.delay; i++) { - path.push_back(CharReach::dot()); - } - return path; -} - -/** - * \brief True if one of the given literals overlaps with the suffix of - * another, meaning that they could arrive at the same offset. - */ -static -bool literalsCouldRace(const rose_literal_id &lit1, - const rose_literal_id &lit2) { - DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n", - dumpString(lit1.s).c_str(), lit1.delay, - dumpString(lit2.s).c_str(), lit2.delay); - - // Add dots on the end of each literal for delay. - const auto v1 = makePath(lit1); - const auto v2 = makePath(lit2); - - // See if the smaller path is a suffix of the larger path. - const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; - const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; - auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), - overlaps); - return r.first == smaller->rend(); -} - -bool RoseDedupeAuxImpl::hasSafeMultiReports( - const flat_set &reports) const { - if (reports.size() <= 1) { - return true; - } - - /* We have more than one ReportID corresponding to the external ID that is - * presented to the user. These may differ in offset adjustment, bounds - * checks, etc. */ - - /* TODO: work out if these differences will actually cause problems */ - - /* One common case where we know we don't have a problem is if there are - * precisely two reports, one for the main Rose path and one for the - * "small block matcher" path. */ - if (reports.size() == 2) { - ReportID id1 = *reports.begin(); - ReportID id2 = *reports.rbegin(); - - bool has_verts_1 = contains(vert_map, id1); - bool has_verts_2 = contains(vert_map, id2); - bool has_sb_verts_1 = contains(sb_vert_map, id1); - bool has_sb_verts_2 = contains(sb_vert_map, id2); - - if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { - DEBUG_PRINTF("two reports, one full and one small block: ok\n"); - return true; - } - } - - DEBUG_PRINTF("more than one report\n"); - return false; -} - -bool RoseDedupeAuxImpl::requiresDedupeSupport( - const ue2::flat_set &reports) const { - /* TODO: this could be expanded to check for offset or character - constraints */ - - DEBUG_PRINTF("reports: %s\n", as_string_list(reports).c_str()); - - const RoseGraph &g = tbi.g; - - bool has_suffix = false; - bool has_outfix = false; - - if (!hasSafeMultiReports(reports)) { - DEBUG_PRINTF("multiple reports not safe\n"); - return true; - } - - set roles; - set suffixes; - set outfixes; - set puffettes; - for (ReportID r : reports) { - if (contains(vert_map, r)) { - insert(&roles, vert_map.at(r)); - } - if (contains(suffix_map, r)) { - insert(&suffixes, suffix_map.at(r)); - } - - if (contains(outfix_map, r)) { - insert(&outfixes, outfix_map.at(r)); - } - - if (contains(puff_map, r)) { - insert(&puffettes, puff_map.at(r)); - } - } - - /* roles */ - - map lits; // Literal ID -> count of occurrences. - - const bool has_role = !roles.empty(); - for (auto v : roles) { - for (const auto &lit : g[v].literals) { - lits[lit]++; - } - if (g[v].eod_accept) { - // Literals plugged into this EOD accept must be taken into account - // as well. - for (auto u : inv_adjacent_vertices_range(v, g)) { - for (const auto &lit : g[u].literals) { - lits[lit]++; - } - } - } - } - - /* literals */ - - for (const auto &m : lits) { - if (m.second > 1) { - DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first); - return true; - } - } - - for (auto it = begin(lits); it != end(lits); ++it) { - const auto &lit1 = tbi.literals.right.at(it->first); - for (auto jt = next(it); jt != end(lits); ++jt) { - const auto &lit2 = tbi.literals.right.at(jt->first); - if (literalsCouldRace(lit1, lit2)) { - DEBUG_PRINTF("literals could race\n"); - return true; - } - } - } - - /* suffixes */ - - for (const auto &suffix : suffixes) { - if (has_suffix || has_role) { - return true; /* scope for badness */ - } - - has_suffix = true; - - /* some lesser suffix engines (nfas, haig, castle) can raise multiple - * matches for a report id at the same offset if there are multiple - * report states live. */ - if (suffix.haig()) { - return true; - } - if (suffix.graph() && - requiresDedupe(*suffix.graph(), reports, tbi.cc.grey)) { - return true; - } - if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { - return true; - } - } - - /* outfixes */ - - for (const auto &outfix_ptr : outfixes) { - assert(outfix_ptr); - const OutfixInfo &out = *outfix_ptr; - - if (has_outfix || has_role || has_suffix) { - return true; - } - has_outfix = true; - - if (out.haig()) { - return true; /* haig may report matches with different SOM at the - same offset */ - } - - if (out.holder() && - requiresDedupe(*out.holder(), reports, tbi.cc.grey)) { - return true; - } - } - - /* mpv */ - for (UNUSED const auto &puff : puffettes) { - if (has_outfix || has_role || has_suffix) { - return true; - } - has_outfix = true; - } - - /* boundary */ - if (has_intersection(tbi.boundary.report_at_eod, reports)) { - if (has_outfix || has_role || has_suffix) { - return true; - } - } - - return false; -} - -// Sets the report ID for all vertices connected to an accept to `id`. -void setReportId(NGHolder &g, ReportID id) { - // First, wipe the report IDs on all vertices. - for (auto v : vertices_range(g)) { - g[v].reports.clear(); - } - - // Any predecessors of accept get our id. - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - g[v].reports.insert(id); - } - - // Same for preds of acceptEod, except accept itself. - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; - } - g[v].reports.insert(id); - } -} - bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { ORDER_CHECK(minBound); ORDER_CHECK(maxBound); @@ -887,17 +503,6 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { return false; } -// Note: only clones the vertex, you'll have to wire up your own edges. -RoseVertex RoseBuildImpl::cloneVertex(RoseVertex v) { - RoseVertex v2 = add_vertex(g[v], g); - - for (const auto &lit_id : g[v2].literals) { - literal_info[lit_id].vertices.insert(v2); - } - - return v2; -} - #ifndef NDEBUG bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) { const RoseGraph &g = build.g; @@ -979,7 +584,7 @@ void RoseSuffixInfo::reset(void) { rdfa.reset(); haig.reset(); tamarama.reset(); - dfa_min_width = 0; + dfa_min_width = depth(0); dfa_max_width = depth::infinity(); } @@ -1103,6 +708,13 @@ bool isAnchored(const left_id &r) { if (r.graph()) { return isAnchored(*r.graph()); } + if (r.dfa()) { + return r.dfa()->start_anchored == DEAD_STATE; + } + if (r.haig()) { + return r.haig()->start_anchored == DEAD_STATE; + } + // All other types are explicitly anchored. return true; } @@ -1183,7 +795,7 @@ void LeftEngInfo::reset(void) { tamarama.reset(); lag = 0; leftfix_report = MO_INVALID_IDX; - dfa_min_width = 0; + dfa_min_width = depth(0); dfa_max_width = depth::infinity(); } @@ -1264,6 +876,59 @@ u32 roseQuality(const RoseEngine *t) { return 1; } +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 min_offset = UINT32_MAX; + for (const auto &v : lit_vertices) { + min_offset = min(min_offset, build.g[v].min_offset); + } + + return min_offset; +} + +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 max_offset = 0; + for (const auto &v : lit_vertices) { + max_offset = max(max_offset, build.g[v].max_offset); + } + + return max_offset; +} + +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { + const auto &g = build.g; + const auto v = target(e, g); + + if (!build.g[v].eod_accept) { + return false; + } + + // If there's a graph between us and EOD, we shouldn't be eager. + if (build.g[v].left) { + return false; + } + + // Must be exactly at EOD. + if (g[e].minBound != 0 || g[e].maxBound != 0) { + return false; + } + + // In streaming mode, we can only eagerly report EOD for literals in the + // EOD-anchored table, as that's the only time we actually know where EOD + // is. In block mode, we always have this information. + const auto u = source(e, g); + if (build.cc.streaming && !build.isInETable(u)) { + return false; + } + + return true; +} + #ifndef NDEBUG /** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose * graph are implementable. */ diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index ee237639b..23a8b959b 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,515 +26,191 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "rose_build_engine_blob.h" #include "rose_build_program.h" + +#include "rose_build_engine_blob.h" +#include "rose_build_instructions.h" +#include "rose_build_lookaround.h" +#include "rose_build_resources.h" +#include "nfa/nfa_api_queue.h" +#include "nfa/nfa_build_util.h" +#include "nfa/tamaramacompile.h" +#include "nfagraph/ng_util.h" +#include "util/charreach_util.h" #include "util/container.h" -#include "util/multibit_build.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/report_manager.h" #include "util/verify_types.h" +#include + #include #include using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { -/* Destructors to avoid weak vtables. */ +engine_info::engine_info(const NFA *nfa, bool trans) + : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)), + stream_size(nfa->streamStateSize), + scratch_size(nfa->scratchStateSize), + scratch_align(state_alignment(*nfa)), + transient(trans) { + assert(scratch_align); +} + +left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector &stops, u32 max_ql, + u8 cm_count, const CharReach &cm_cr) + : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), + max_queuelen(max_ql), countingMiracleCount(cm_count), + countingMiracleReach(cm_cr) { +} -RoseInstruction::~RoseInstruction() = default; -RoseInstrCatchUp::~RoseInstrCatchUp() = default; -RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; -RoseInstrSomZero::~RoseInstrSomZero() = default; -RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; -RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; -RoseInstrEnd::~RoseInstrEnd() = default; +left_build_info::left_build_info(const vector> &looks) + : has_lookaround(true), lookaround(looks) { +} using OffsetMap = RoseInstruction::OffsetMap; static -u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, - const RoseInstruction *to) { - DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); - assert(from && contains(offset_map, from)); - assert(to && contains(offset_map, to)); - - u32 from_offset = offset_map.at(from); - u32 to_offset = offset_map.at(to); - DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); - assert(from_offset <= to_offset); - - return to_offset - from_offset; -} - -void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; - inst->done_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->min_offset = min_offset; -} - -void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; -} - -void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->min_bound = min_bound; - inst->max_bound = max_bound; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->key = key; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->offset = offset; - inst->reach_index = reach_index; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->index = index; - inst->count = count; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->and_mask = and_mask; - inst->cmp_mask = cmp_mask; - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(and_mask), end(and_mask), inst->and_mask); - copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->and_mask = and_mask; - inst->cmp_mask = cmp_mask; - inst->negation = negation; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(nib_mask), end(nib_mask), inst->nib_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), - inst->bucket_select_mask_hi); - copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), - inst->bucket_select_mask_lo); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->lag = lag; - inst->report = report; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->lag = lag; - inst->report = report; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->delay = delay; - inst->index = index; -} - -void RoseInstrRecordAnchored::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->id = id; -} - -void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->distance = distance; -} - -void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->lag = lag; -} - -void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->som = som; -} - -void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->cancel = cancel; - inst->queue = queue; - inst->event = event; -} - -void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->event = event; -} - -void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->event = event; - inst->top_squash_distance = top_squash_distance; -} - -void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->som = som; +OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { + OffsetMap offset_map; + u32 offset = 0; + for (const auto &ri : program) { + offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(), + ri->code(), offset); + assert(!contains(offset_map, ri.get())); + offset_map.emplace(ri.get(), offset); + offset += ri->byte_length(); + } + *total_len = offset; + return offset_map; } -void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->som = som; +RoseProgram::RoseProgram() { + prog.push_back(make_unique()); } -void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} +RoseProgram::~RoseProgram() = default; -void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->ekey = ekey; -} +RoseProgram::RoseProgram(RoseProgram &&) = default; +RoseProgram &RoseProgram::operator=(RoseProgram &&) = default; -void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; +bool RoseProgram::empty() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + // Empty if we only have one element, the END instruction. + return next(prog.begin()) == prog.end(); } -void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->ekey = ekey; -} - -void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} - -void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->ekey = ekey; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->end_adj = end_adj; - inst->min_length = min_length; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->index = index; -} +const RoseInstruction *RoseProgram::end_instruction() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); -void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; + return prog.back().get(); } -void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; +void RoseProgram::update_targets(RoseProgram::iterator it, + RoseProgram::iterator it_end, + const RoseInstruction *old_target, + const RoseInstruction *new_target) { + assert(old_target && new_target && old_target != new_target); + for (; it != it_end; ++it) { + unique_ptr &ri = *it; + assert(ri); + ri->update_target(old_target, new_target); + } } -void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->index = index; - inst->fail_jump = calc_jump(offset_map, this, target); +RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, + unique_ptr ri) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.insert(it, move(ri)); } -void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->fail_jump = calc_jump(offset_map, this, target); +RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, + RoseProgram &&block) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); - // Resolve and write the multibit sparse iterator and the jump table. - vector keys; - vector jump_offsets; - for (const auto &jump : jump_table) { - keys.push_back(jump.first); - assert(contains(offset_map, jump.second)); - jump_offsets.push_back(offset_map.at(jump.second)); + if (block.empty()) { + return it; } - vector iter; - mmbBuildSparseIterator(iter, keys, num_keys); - assert(!iter.empty()); - inst->iter_offset = blob.add_iterator(iter); - inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); - - // Store offsets for corresponding SPARSE_ITER_NEXT operations. - is_written = true; - iter_offset = inst->iter_offset; - jump_table_offset = inst->jump_table; + const RoseInstruction *end_ptr = block.end_instruction(); + assert(end_ptr->code() == ROSE_INSTR_END); + block.prog.pop_back(); + + const RoseInstruction *new_target = it->get(); + update_targets(block.prog.begin(), block.prog.end(), end_ptr, new_target); + + // Workaround: container insert() for ranges doesn't return an iterator + // in the version of the STL distributed with gcc 4.8. + auto dist = distance(prog.begin(), it); + prog.insert(it, make_move_iterator(block.prog.begin()), + make_move_iterator(block.prog.end())); + it = prog.begin(); + advance(it, dist); + return it; } -void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->state = state; - inst->fail_jump = calc_jump(offset_map, this, target); - - // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN - // instruction. - assert(begin); - assert(contains(offset_map, begin)); - assert(begin->is_written); - inst->iter_offset = begin->iter_offset; - inst->jump_table = begin->jump_table_offset; +RoseProgram::iterator RoseProgram::erase(RoseProgram::iterator first, + RoseProgram::iterator last) { + return prog.erase(first, last); } -void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->fail_jump = calc_jump(offset_map, this, target); - - // Write the multibit sparse iterator. - vector iter; - mmbBuildSparseIterator(iter, keys, num_keys); - assert(!iter.empty()); - inst->iter_offset = blob.add_iterator(iter); +void RoseProgram::add_before_end(std::unique_ptr ri) { + assert(!prog.empty()); + insert(std::prev(prog.end()), std::move(ri)); } -void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->iter_offset = iter_offset; -} +void RoseProgram::add_before_end(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); -void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); -} + if (block.empty()) { + return; + } -void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); + insert(prev(prog.end()), move(block)); } -static -OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { - OffsetMap offset_map; - u32 offset = 0; - for (const auto &ri : program) { - offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(), - ri->code(), offset); - assert(!contains(offset_map, ri.get())); - offset_map.emplace(ri.get(), offset); - offset += ri->byte_length(); +void RoseProgram::add_block(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; } - *total_len = offset; - return offset_map; + + // Replace pointers to the current END with pointers to the first + // instruction in the new sequence. + const RoseInstruction *end_ptr = end_instruction(); + prog.pop_back(); + update_targets(prog.begin(), prog.end(), end_ptr, + block.prog.front().get()); + prog.insert(prog.end(), make_move_iterator(block.prog.begin()), + make_move_iterator(block.prog.end())); } -aligned_unique_ptr -writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len) { - const auto offset_map = makeOffsetMap(program, total_len); - DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), *total_len); +bytecode_ptr writeProgram(RoseEngineBlob &blob, + const RoseProgram &program) { + u32 total_len = 0; + const auto offset_map = makeOffsetMap(program, &total_len); + DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), total_len); - auto bytecode = aligned_zmalloc_unique(*total_len); + auto bytecode = make_zeroed_bytecode_ptr(total_len, + ROSE_INSTR_MIN_ALIGN); char *ptr = bytecode.get(); for (const auto &ri : program) { @@ -546,6 +222,15 @@ writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len) { return bytecode; } +size_t RoseProgramHash::operator()(const RoseProgram &program) const { + size_t v = 0; + for (const auto &ri : program) { + assert(ri); + boost::hash_combine(v, ri->hash()); + } + return v; +} + bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, const RoseProgram &prog2) const { if (prog1.size() != prog2.size()) { @@ -569,4 +254,2095 @@ bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); } +/* Removes any CHECK_HANDLED instructions from the given program */ +static +void stripCheckHandledInstruction(RoseProgram &prog) { + for (auto it = prog.begin(); it != prog.end();) { + auto ins = dynamic_cast(it->get()); + if (!ins) { + ++it; + continue; + } + + auto next_it = next(it); + assert(next_it != prog.end()); /* there should always be an end ins */ + auto next_ins = next_it->get(); + + /* update all earlier instructions which point to ins to instead point + * to the next instruction. Only need to look at earlier as we only ever + * jump forward. */ + RoseProgram::update_targets(prog.begin(), it, ins, next_ins); + + /* remove check handled instruction */ + it = prog.erase(it, next_it); + } +} + + +/** Returns true if the program may read the the interpreter's work_done flag */ +static +bool reads_work_done_flag(const RoseProgram &prog) { + for (const auto &ri : prog) { + if (dynamic_cast(ri.get())) { + return true; + } + } + return false; +} + +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { + if (!eodNfaIterOffset) { + return; + } + + RoseProgram block; + block.add_before_end(make_unique(eodNfaIterOffset)); + program.add_block(move(block)); +} + +void addSuffixesEodProgram(RoseProgram &program) { + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); +} + +void addMatcherEodProgram(RoseProgram &program) { + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); +} + +static +void makeRoleCheckLeftfix(const RoseBuildImpl &build, + const map &leftfix_info, + RoseVertex v, RoseProgram &program) { + auto it = leftfix_info.find(v); + if (it == end(leftfix_info)) { + return; + } + const left_build_info &lni = it->second; + if (lni.has_lookaround) { + return; // Leftfix completely implemented by lookaround. + } + + assert(!build.cc.streaming || + build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); + + bool is_prefix = build.isRootSuccessor(v); + const auto *end_inst = program.end_instruction(); + + unique_ptr ri; + if (is_prefix) { + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } else { + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeAnchoredLiteralDelay(const RoseBuildImpl &build, + const ProgramBuild &prog_build, u32 lit_id, + RoseProgram &program) { + // Only relevant for literals in the anchored table. + const rose_literal_id &lit = build.literals.at(lit_id); + if (lit.table != ROSE_ANCHORED) { + return; + } + + // If this literal match cannot occur after floatingMinLiteralMatchOffset, + // we do not need this check. + bool all_too_early = true; + rose_group groups = 0; + + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + for (RoseVertex v : lit_vertices) { + if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { + all_too_early = false; + } + groups |= build.g[v].groups; + } + + if (all_too_early) { + return; + } + + assert(contains(prog_build.anchored_programs, lit_id)); + u32 anch_id = prog_build.anchored_programs.at(lit_id); + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(groups, anch_id, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupe(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = + make_unique(report.quashSom, rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupeSom(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(report.quashSom, + rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeCatchup(const ReportManager &rm, bool needs_catchup, + const flat_set &reports, RoseProgram &program) { + if (!needs_catchup) { + return; + } + + // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run + // before reports are triggered. + + auto report_needs_catchup = [&](const ReportID &id) { + const Report &report = rm.getReport(id); + return report.type != INTERNAL_ROSE_CHAIN; + }; + + if (!any_of(begin(reports), end(reports), report_needs_catchup)) { + DEBUG_PRINTF("none of the given reports needs catchup\n"); + return; + } + + program.add_before_end(make_unique()); +} + +static +void writeSomOperation(const Report &report, som_operation *op) { + assert(op); + + memset(op, 0, sizeof(*op)); + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REL: + op->type = SOM_EXTERNAL_CALLBACK_REL; + break; + case INTERNAL_SOM_LOC_SET: + op->type = SOM_INTERNAL_LOC_SET; + break; + case INTERNAL_SOM_LOC_SET_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_COPY: + op->type = SOM_INTERNAL_LOC_COPY; + break; + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; + break; + case EXTERNAL_CALLBACK_SOM_STORED: + op->type = SOM_EXTERNAL_CALLBACK_STORED; + break; + case EXTERNAL_CALLBACK_SOM_ABS: + op->type = SOM_EXTERNAL_CALLBACK_ABS; + break; + case EXTERNAL_CALLBACK_SOM_REV_NFA: + op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_FROM: + op->type = SOM_INTERNAL_LOC_SET_FROM; + break; + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; + break; + default: + // This report doesn't correspond to a SOM operation. + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + op->onmatch = report.onmatch; + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->aux.revNfaIndex = report.revNfaIndex; + break; + default: + op->aux.somDistance = report.somDistance; + break; + } +} + +static +void makeReport(const RoseBuildImpl &build, const ReportID id, + const bool has_som, RoseProgram &program) { + assert(id < build.rm.numReports()); + const Report &report = build.rm.getReport(id); + + RoseProgram report_block; + const RoseInstruction *end_inst = report_block.end_instruction(); + + // Handle min/max offset checks. + if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { + auto ri = make_unique(report.minOffset, + report.maxOffset, end_inst); + report_block.add_before_end(move(ri)); + } + + // If this report has an exhaustion key, we can check it in the program + // rather than waiting until we're in the callback adaptor. + if (report.ekey != INVALID_EKEY) { + auto ri = make_unique(report.ekey, end_inst); + report_block.add_before_end(move(ri)); + } + + // External SOM reports that aren't passthrough need their SOM value + // calculated. + if (isExternalSomReport(report) && + report.type != EXTERNAL_CALLBACK_SOM_PASS) { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + + // Min length constraint. + if (report.minLength > 0) { + assert(build.hasSom); + auto ri = make_unique( + report.offsetAdjust, report.minLength, end_inst); + report_block.add_before_end(move(ri)); + } + + if (report.quashSom) { + report_block.add_before_end(make_unique()); + } + + switch (report.type) { + case EXTERNAL_CALLBACK: + if (!has_som) { + // Dedupe is only necessary if this report has a dkey, or if there + // are SOM reports to catch up. + bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; + if (report.ekey == INVALID_EKEY) { + if (needs_dedupe) { + report_block.add_before_end( + make_unique( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } + } else { + if (needs_dedupe) { + makeDedupe(build.rm, report, report_block); + } + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + } else { // has_som + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + } + break; + case INTERNAL_SOM_LOC_SET: + case INTERNAL_SOM_LOC_SET_IF_UNSET: + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + case INTERNAL_SOM_LOC_COPY: + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + case INTERNAL_SOM_LOC_SET_FROM: + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + if (has_som) { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } else { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + break; + case INTERNAL_ROSE_CHAIN: { + report_block.add_before_end(make_unique( + report.onmatch, report.topSquashDistance)); + break; + } + case EXTERNAL_CALLBACK_SOM_REL: + case EXTERNAL_CALLBACK_SOM_STORED: + case EXTERNAL_CALLBACK_SOM_ABS: + case EXTERNAL_CALLBACK_SOM_REV_NFA: + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + break; + case EXTERNAL_CALLBACK_SOM_PASS: + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + break; + + default: + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + assert(!report_block.empty()); + program.add_block(move(report_block)); +} + +static +void makeRoleReports(const RoseBuildImpl &build, + const std::map &leftfix_info, + bool needs_catchup, RoseVertex v, RoseProgram &program) { + const auto &g = build.g; + + bool report_som = false; + if (g[v].left.tracksSom()) { + /* we are a suffaig - need to update role to provide som to the + * suffix. */ + assert(contains(leftfix_info, v)); + const left_build_info &lni = leftfix_info.at(v); + program.add_before_end( + make_unique(lni.queue, g[v].left.lag)); + report_som = true; + } else if (g[v].som_adjust) { + program.add_before_end( + make_unique(g[v].som_adjust)); + report_som = true; + } + + makeCatchup(build.rm, needs_catchup, g[v].reports, program); + + RoseProgram report_block; + for (ReportID id : g[v].reports) { + makeReport(build, id, report_som, report_block); + } + program.add_before_end(move(report_block)); +} + +static +void makeRoleSetState(const unordered_map &roleStateIndices, + RoseVertex v, RoseProgram &program) { + // We only need this instruction if a state index has been assigned to this + // vertex. + auto it = roleStateIndices.find(v); + if (it == end(roleStateIndices)) { + return; + } + program.add_before_end(make_unique(it->second)); +} + +static +void makePushDelayedInstructions(const RoseLiteralMap &literals, + ProgramBuild &prog_build, + const flat_set &delayed_ids, + RoseProgram &program) { + vector delay_instructions; + + for (const auto &delayed_lit_id : delayed_ids) { + DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); + assert(contains(prog_build.delay_programs, delayed_lit_id)); + u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); + const auto &delay_lit = literals.at(delayed_lit_id); + delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); + } + + sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, + const RoseInstrPushDelayed &b) { + return tie(a.delay, a.index) < tie(b.delay, b.index); + }); + + for (const auto &ri : delay_instructions) { + program.add_before_end(make_unique(ri)); + } +} + +static +void makeCheckLiteralInstruction(const rose_literal_id &lit, + size_t longLitLengthThreshold, + RoseProgram &program, + const CompileContext &cc) { + assert(longLitLengthThreshold > 0); + + DEBUG_PRINTF("lit=%s, long lit threshold %zu\n", dumpString(lit.s).c_str(), + longLitLengthThreshold); + + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("lit short enough to not need confirm\n"); + return; + } + + // Check resource limits as well. + if (lit.s.length() > cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + + if (lit.s.length() <= longLitLengthThreshold) { + DEBUG_PRINTF("is a medium-length literal\n"); + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), + end_inst); + } + program.add_before_end(move(ri)); + return; + } + + // Long literal support should only really be used for the floating table + // in streaming mode. + assert(lit.table == ROSE_FLOATING && cc.streaming); + + DEBUG_PRINTF("is a long literal\n"); + + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, + RoseProgram &program) { + u32 handled_key; + if (contains(prog_build.handledKeys, v)) { + handled_key = prog_build.handledKeys.at(v); + } else { + handled_key = verify_u32(prog_build.handledKeys.size()); + prog_build.handledKeys.emplace(v, handled_key); + } + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(handled_key, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, RoseProgram &program) { + const RoseGraph &g = build.g; + const RoseVertex u = source(e, g); + + // We know that we can trust the anchored table (DFA) to always deliver us + // literals at the correct offset. + if (build.isAnchored(v)) { + DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); + return; + } + + // Use the minimum literal length. + u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); + + u64a min_bound = g[e].minBound + lit_length; + u64a max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + assert(g[u].fixedOffset()); + // Make offsets absolute. + min_bound += g[u].max_offset; + if (max_bound != ROSE_BOUND_INF) { + max_bound += g[u].max_offset; + } + } + + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET + // (max value of a u64a) to represent ROSE_BOUND_INF. + if (max_bound == ROSE_BOUND_INF) { + max_bound = MAX_OFFSET; + } + + // This instruction should be doing _something_ -- bounds should be tighter + // than just {length, inf}. + assert(min_bound > lit_length || max_bound < MAX_OFFSET); + + const auto *end_inst = program.end_instruction(); + program.add_before_end( + make_unique(min_bound, max_bound, end_inst)); +} + +static +void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, + RoseVertex v, RoseProgram &program) { + rose_group groups = g[v].groups; + if (!groups) { + return; + } + + // The set of "already on" groups as we process this vertex is the + // intersection of the groups set by our predecessors. + assert(in_degree(v, g) > 0); + rose_group already_on = ~rose_group{0}; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + already_on &= prog_build.vertex_group_map.at(u); + } + + DEBUG_PRINTF("already_on=0x%llx\n", already_on); + DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); + DEBUG_PRINTF("groups=0x%llx\n", groups); + + already_on &= ~prog_build.squashable_groups; + DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); + + // We don't *have* to mask off the groups that we know are already on, but + // this will make bugs more apparent. + groups &= ~already_on; + + if (!groups) { + DEBUG_PRINTF("no new groups to set, skipping\n"); + return; + } + + program.add_before_end(make_unique(groups)); +} + +static +bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { + size_t reach_size = cr.count(); + assert(reach_size > 0); + // check whether entry_size is some power of 2. + if ((reach_size - 1) & reach_size) { + return false; + } + make_and_cmp_mask(cr, &andmask, &cmpmask); + if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { + return false; + } + return true; +} + +static +bool checkReachWithFlip(const CharReach &cr, u8 &andmask, + u8 &cmpmask, u8 &flip) { + if (checkReachMask(cr, andmask, cmpmask)) { + flip = 0; + return true; + } + if (checkReachMask(~cr, andmask, cmpmask)) { + flip = 1; + return true; + } + return false; +} + +static +bool makeRoleByte(const vector &look, RoseProgram &program) { + if (look.size() == 1) { + const auto &entry = look[0]; + u8 andmask_u8, cmpmask_u8; + u8 flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { + return false; + } + s32 checkbyte_offset = verify_s32(entry.offset); + DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(andmask_u8, cmpmask_u8, flip, + checkbyte_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static +bool makeRoleMask(const vector &look, RoseProgram &program) { + if (look.back().offset < look.front().offset + 8) { + s32 base_offset = verify_s32(look.front().offset); + u64a and_mask = 0; + u64a cmp_mask = 0; + u64a neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + DEBUG_PRINTF("entry offset %d\n", entry.offset); + u32 shift = (entry.offset - base_offset) << 3; + and_mask |= (u64a)andmask_u8 << shift; + cmp_mask |= (u64a)cmpmask_u8 << shift; + if (flip) { + neg_mask |= 0xffLLU << shift; + } + } + DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", + and_mask, cmp_mask); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static UNUSED +string convertMaskstoString(u8 *p, int byte_len) { + string s; + for (int i = 0; i < byte_len; i++) { + u8 hi = *p >> 4; + u8 lo = *p & 0xf; + s += (char)(hi + (hi < 10 ? 48 : 87)); + s += (char)(lo + (lo < 10 ? 48 : 87)); + p++; + } + return s; +} + +static +bool makeRoleMask32(const vector &look, + RoseProgram &program) { + if (look.back().offset >= look.front().offset + 32) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + array and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); + u32 neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 32); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1 << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 32).c_str()); + DEBUG_PRINTF("neg_mask %08x\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; +} + +// Sorting by the size of every bucket. +// Used in map, cmpNibble>. +struct cmpNibble { + bool operator()(const u32 data1, const u32 data2) const{ + u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); + u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); + return std::tie(size1, data1) < std::tie(size2, data2); + } +}; + +// Insert all pairs of bucket and offset into buckets. +static really_inline +void getAllBuckets(const vector &look, + map, cmpNibble> &buckets, u64a &neg_mask) { + s32 base_offset = verify_s32(look.front().offset); + for (const auto &entry : look) { + CharReach cr = entry.reach; + // Flip heavy character classes to save buckets. + if (cr.count() > 128 ) { + cr.flip(); + } else { + neg_mask ^= 1ULL << (entry.offset - base_offset); + } + map lo2hi; + // We treat Ascii Table as a 16x16 grid. + // Push every row in cr into lo2hi and mark the row number. + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i & 0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + buckets[hi_lo].push_back(entry.offset); + } + } +} + +// Once we have a new bucket, we'll try to combine it with all old buckets. +static really_inline +void nibUpdate(map &nib, u32 hi_lo) { + u16 hi = hi_lo >> 16; + u16 lo = hi_lo & 0xffff; + for (const auto pairs : nib) { + u32 old = pairs.first; + if ((old >> 16) == hi || (old & 0xffff) == lo) { + if (!nib[old | hi_lo]) { + nib[old | hi_lo] = nib[old] | nib[hi_lo]; + } + } + } +} + +static really_inline +void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { + for (u8 index = 0; data > 0; data >>= 1, index++) { + if (data & 1) { + // 0 ~ 7 bucket in first 16 bytes, + // 8 ~ 15 bucket in second 16 bytes. + if (bit_index >= 8) { + mask[index + 16] |= 1 << (bit_index - 8); + } else { + mask[index] |= 1 << bit_index; + } + } + } +} + +static +bool getShuftiMasks(const vector &look, array &hi_mask, + array &lo_mask, u8 *bucket_select_hi, + u8 *bucket_select_lo, u64a &neg_mask, + u8 &bit_idx, size_t len) { + map nib; // map every bucket to its bucket number. + map, cmpNibble> bucket2offsets; + s32 base_offset = look.front().offset; + + bit_idx = 0; + neg_mask = ~0ULL; + + getAllBuckets(look, bucket2offsets, neg_mask); + + for (const auto &it : bucket2offsets) { + u32 hi_lo = it.first; + // New bucket. + if (!nib[hi_lo]) { + if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { + return false; + } + nib[hi_lo] = 1 << bit_idx; + + nibUpdate(nib, hi_lo); + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); + bit_idx++; + } + + DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); + + // Update bucket_select_mask. + u8 nib_hi = nib[hi_lo] >> 8; + u8 nib_lo = nib[hi_lo] & 0xff; + for (const auto offset : it.second) { + bucket_select_hi[offset - base_offset] |= nib_hi; + bucket_select_lo[offset - base_offset] |= nib_lo; + } + } + return true; +} + +static +unique_ptr +makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 8) { + return nullptr; + } + array nib_mask; + array bucket_select_mask_16; + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, + bucket_select_mask_16.begin()); + return make_unique + (nib_mask, bucket_select_mask_16, + neg_mask & 0xffff, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 8) { + return nullptr; + } + + array hi_mask_16; + array lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + return make_unique + (hi_mask_16, lo_mask_16, bucket_select_mask, + neg_mask, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 16) { + return nullptr; + } + + array bucket_select_mask_32; + copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + return make_unique + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask & 0xffff, base_offset, end_inst); +} +static +unique_ptr +makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 16) { + return nullptr; + } + + return make_unique + (hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, base_offset, end_inst); +} + +static +bool makeRoleShufti(const vector &look, RoseProgram &program) { + + s32 base_offset = verify_s32(look.front().offset); + if (look.back().offset >= base_offset + 32) { + return false; + } + + u8 bucket_idx = 0; // number of buckets + u64a neg_mask_64; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + + if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { + return false; + } + u32 neg_mask = (u32)neg_mask_64; + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 32).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 32).c_str()); + + const auto *end_inst = program.end_instruction(); + s32 offset_range = look.back().offset - base_offset + 1; + + auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + if (!ri) { + ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + } + if (!ri) { + ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + if (!ri) { + ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + assert(ri); + program.add_before_end(move(ri)); + + return true; +} + +/** + * Builds a lookaround instruction, or an appropriate specialization if one is + * available. + */ +static +void makeLookaroundInstruction(const vector &look, + RoseProgram &program) { + assert(!look.empty()); + + if (makeRoleByte(look, program)) { + return; + } + + if (look.size() == 1) { + s8 offset = look.begin()->offset; + const CharReach &reach = look.begin()->reach; + auto ri = make_unique(offset, reach, + program.end_instruction()); + program.add_before_end(move(ri)); + return; + } + + if (makeRoleMask(look, program)) { + return; + } + + if (makeRoleMask32(look, program)) { + return; + } + + if (makeRoleShufti(look, program)) { + return; + } + + auto ri = make_unique(look, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &program) { + const auto &info = build.literal_info.at(lit_id); + if (!info.requires_benefits) { + return; + } + + vector look; + + const ue2_literal &s = build.literals.at(lit_id).s; + DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, + dumpString(s).c_str()); + assert(s.length() <= MAX_MASK2_WIDTH); + s32 i = 0 - s.length(); + for (const auto &e : s) { + if (!e.nocase) { + look.emplace_back(verify_s8(i), e); + } + i++; + } + + assert(!look.empty()); + makeLookaroundInstruction(look, program); +} + +static +void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, + const vector &lit_edges, + u32 floatingMinLiteralMatchOffset, + RoseProgram &prog) { + if (lit_edges.empty()) { + return; + } + + if (floatingMinLiteralMatchOffset == 0) { + return; + } + + RoseVertex v = target(lit_edges.front(), build.g); + if (!build.isFloating(v)) { + return; + } + + const auto &lit = build.literals.at(lit_id); + size_t min_len = lit.elength(); + u32 min_offset = findMinOffset(build, lit_id); + DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, + min_offset, floatingMinLiteralMatchOffset); + + // If we can't match before the min offset, we don't need the check. + if (min_len >= floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("no need for check, min is %u\n", + floatingMinLiteralMatchOffset); + return; + } + + assert(min_offset >= floatingMinLiteralMatchOffset); + assert(min_offset < UINT32_MAX); + + DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); + const auto *end = prog.end_instruction(); + prog.add_before_end(make_unique(min_offset, end)); +} + +static +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + + if (!info.group_mask) { + return; + } + prog.add_before_end(make_unique(info.group_mask)); +} + +static +bool hasDelayedLiteral(const RoseBuildImpl &build, + const vector &lit_edges) { + auto is_delayed = [&build](u32 lit_id) { return build.isDelayed(lit_id); }; + for (const auto &e : lit_edges) { + auto v = target(e, build.g); + const auto &lits = build.g[v].literals; + if (any_of(begin(lits), end(lits), is_delayed)) { + return true; + } + } + return false; +} + +static +RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_replay_program) { + RoseProgram program; + + // Check long literal info. + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.at(lit_id), + prog_build.longLitLengthThreshold, + program, build.cc); + } + + // Check lit mask. + makeCheckLitMaskInstruction(build, lit_id, program); + + // Check literal groups. This is an optimisation that we only perform for + // delayed literals, as their groups may be switched off; ordinarily, we + // can trust the HWLM matcher. + if (hasDelayedLiteral(build, lit_edges)) { + makeGroupCheckInstruction(build, lit_id, program); + } + + // Add instructions for pushing delayed matches, if there are any. + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + program); + + // Add pre-check for early literals in the floating table. + makeCheckLitEarlyInstruction(build, lit_id, lit_edges, + prog_build.floatingMinLiteralMatchOffset, + program); + + /* Check if we are able to deliever matches from the anchored table now */ + if (!is_anchored_replay_program) { + makeAnchoredLiteralDelay(build, prog_build, lit_id, program); + } + + return program; +} + +static +bool makeRoleMultipathShufti(const vector> &multi_look, + RoseProgram &program) { + if (multi_look.empty()) { + return false; + } + + // find the base offset + assert(!multi_look[0].empty()); + s32 base_offset = multi_look[0].front().offset; + s32 last_start = base_offset; + s32 end_offset = multi_look[0].back().offset; + size_t multi_len = 0; + + for (const auto &look : multi_look) { + assert(look.size() > 0); + multi_len += look.size(); + + LIMIT_TO_AT_MOST(&base_offset, look.front().offset); + ENSURE_AT_LEAST(&last_start, look.front().offset); + ENSURE_AT_LEAST(&end_offset, look.back().offset); + } + + assert(last_start < 0); + + if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { + return false; + } + + if (multi_len <= 16) { + multi_len = 16; + } else if (multi_len <= 32) { + multi_len = 32; + } else if (multi_len <= 64) { + multi_len = 64; + } else { + DEBUG_PRINTF("too long for multi-path\n"); + return false; + } + + vector linear_look; + array data_select_mask; + data_select_mask.fill(0); + u64a hi_bits_mask = 0; + u64a lo_bits_mask = 0; + + for (const auto &look : multi_look) { + assert(linear_look.size() < 64); + lo_bits_mask |= 1LLU << linear_look.size(); + for (const auto &entry : look) { + assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); + data_select_mask[linear_look.size()] = + verify_u8(entry.offset - base_offset); + linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); + } + hi_bits_mask |= 1LLU << (linear_look.size() - 1); + } + + u8 bit_index = 0; // number of buckets + u64a neg_mask; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); + bucket_select_lo.fill(0); + + if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask, bit_index, + multi_len)) { + return false; + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 16).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 16).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 64).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 64).c_str()); + DEBUG_PRINTF("data_select_mask %s\n", + convertMaskstoString(data_select_mask.data(), 64).c_str()); + DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); + DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); + DEBUG_PRINTF("neg_mask %llx\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + DEBUG_PRINTF("last_start %d\n", last_start); + + // Since we don't have 16x16 now, just call 32x16 instead. + if (bit_index > 8) { + assert(multi_len <= 32); + multi_len = 32; + } + + const auto *end_inst = program.end_instruction(); + assert(multi_len == 16 || multi_len == 32 || multi_len == 64); + if (multi_len == 16) { + neg_mask &= 0xffff; + assert(!(hi_bits_mask & ~0xffffULL)); + assert(!(lo_bits_mask & ~0xffffULL)); + assert(bit_index <=8); + array nib_mask; + copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); + copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); + + auto ri = make_unique + (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } else if (multi_len == 32) { + neg_mask &= 0xffffffff; + assert(!(hi_bits_mask & ~0xffffffffULL)); + assert(!(lo_bits_mask & ~0xffffffffULL)); + if (bit_index <= 8) { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } + return true; +} + +static +void makeRoleMultipathLookaround(const vector> &multi_look, + RoseProgram &program) { + assert(!multi_look.empty()); + assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); + vector> ordered_look; + set look_offset; + + assert(!multi_look[0].empty()); + s32 last_start = multi_look[0][0].offset; + + // build offset table. + for (const auto &look : multi_look) { + assert(look.size() > 0); + last_start = max(last_start, (s32)look.begin()->offset); + + for (const auto &t : look) { + look_offset.insert(t.offset); + } + } + + array start_mask; + if (multi_look.size() < MAX_LOOKAROUND_PATHS) { + start_mask.fill((1 << multi_look.size()) - 1); + } else { + start_mask.fill(0xff); + } + + u32 path_idx = 0; + for (const auto &look : multi_look) { + for (const auto &t : look) { + assert(t.offset >= (int)*look_offset.begin()); + size_t update_offset = t.offset - *look_offset.begin() + 1; + if (update_offset < start_mask.size()) { + start_mask[update_offset] &= ~(1 << path_idx); + } + } + path_idx++; + } + + for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { + start_mask[i] &= start_mask[i - 1]; + DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); + } + + assert(look_offset.size() <= MULTIPATH_MAX_LEN); + + assert(last_start < 0); + + for (const auto &offset : look_offset) { + vector multi_entry; + multi_entry.resize(MAX_LOOKAROUND_PATHS); + + for (size_t i = 0; i < multi_look.size(); i++) { + for (const auto &t : multi_look[i]) { + if (t.offset == offset) { + multi_entry[i] = t; + } + } + } + ordered_look.emplace_back(multi_entry); + } + + auto ri = make_unique(move(ordered_look), + last_start, start_mask, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeRoleLookaround(const RoseBuildImpl &build, + const map &leftfix_info, + RoseVertex v, RoseProgram &program) { + if (!build.cc.grey.roseLookaroundMasks) { + return; + } + + vector> looks; + + // Lookaround from leftfix (mandatory). + if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) { + DEBUG_PRINTF("using leftfix lookaround\n"); + looks = leftfix_info.at(v).lookaround; + } + + // We may be able to find more lookaround info (advisory) and merge it + // in. + if (looks.size() <= 1) { + vector look; + vector look_more; + if (!looks.empty()) { + look = move(looks.front()); + } + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + if (!look.empty()) { + makeLookaroundInstruction(look, program); + } + return; + } + + if (!makeRoleMultipathShufti(looks, program)) { + assert(looks.size() <= 8); + makeRoleMultipathLookaround(looks, program); + } +} + +static +void makeRoleSuffix(const RoseBuildImpl &build, + const map &suffixes, + const map &engine_info_by_queue, + RoseVertex v, RoseProgram &prog) { + const auto &g = build.g; + if (!g[v].suffix) { + return; + } + assert(contains(suffixes, g[v].suffix)); + u32 queue = suffixes.at(g[v].suffix); + u32 event; + assert(contains(engine_info_by_queue, queue)); + const auto eng_info = engine_info_by_queue.at(queue); + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].suffix.tamarama.get(); + assert(tamaProto); + event = (u32)MQE_TOP_FIRST + + tamaProto->top_remap.at(make_pair(g[v].index, + g[v].suffix.top)); + assert(event < MQE_INVALID); + } else if (isMultiTopType(eng_info.type)) { + assert(!g[v].suffix.haig); + event = (u32)MQE_TOP_FIRST + g[v].suffix.top; + assert(event < MQE_INVALID); + } else { + // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP + // event. + assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); + event = MQE_TOP; + } + + prog.add_before_end(make_unique(queue, event)); +} + +static +void addInfixTriggerInstructions(vector triggers, + RoseProgram &prog) { + // Order, de-dupe and add instructions to the end of program. + sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); + for (const auto &ti : triggers) { + prog.add_before_end( + make_unique(ti.cancel, ti.queue, ti.event)); + } +} + +static +void makeRoleInfixTriggers(const RoseBuildImpl &build, + const map &leftfix_info, + const map &engine_info_by_queue, + RoseVertex u, RoseProgram &program) { + const auto &g = build.g; + + vector triggers; + + for (const auto &e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (!g[v].left) { + continue; + } + + assert(contains(leftfix_info, v)); + const left_build_info &lbi = leftfix_info.at(v); + if (lbi.has_lookaround) { + continue; + } + + assert(contains(engine_info_by_queue, lbi.queue)); + const auto &eng_info = engine_info_by_queue.at(lbi.queue); + + // DFAs have no TOP_N support, so they get a classic MQE_TOP event. + u32 top; + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].left.tamarama.get(); + assert(tamaProto); + top = MQE_TOP_FIRST + tamaProto->top_remap.at( + make_pair(g[v].index, g[e].rose_top)); + assert(top < MQE_INVALID); + } else if (!isMultiTopType(eng_info.type)) { + assert(num_tops(g[v].left) == 1); + top = MQE_TOP; + } else { + top = MQE_TOP_FIRST + g[e].rose_top; + assert(top < MQE_INVALID); + } + + triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); + } + + addInfixTriggerInstructions(move(triggers), program); +} + + +/** + * \brief True if the given vertex is a role that can only be switched on at + * EOD. + */ +static +bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { + const RoseGraph &g = tbi.g; + + // All such roles have only (0,0) edges to vertices with the eod_accept + // property, and no other effects (suffixes, ordinary reports, etc, etc). + + if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { + return false; + } + + for (const auto &e : out_edges_range(v, g)) { + RoseVertex w = target(e, g); + if (!g[w].eod_accept) { + return false; + } + assert(!g[w].reports.empty()); + assert(g[w].literals.empty()); + + if (g[e].minBound || g[e].maxBound) { + return false; + } + } + + /* There is no pointing enforcing this check at runtime if + * this role is only fired by the eod event literal */ + if (tbi.eod_event_literal_id != MO_INVALID_IDX && + g[v].literals.size() == 1 && + *g[v].literals.begin() == tbi.eod_event_literal_id) { + return false; + } + + return true; +} + +static +void addCheckOnlyEodInstruction(RoseProgram &prog) { + DEBUG_PRINTF("only at eod\n"); + const auto *end_inst = prog.end_instruction(); + prog.add_before_end(make_unique(end_inst)); +} + +static +void makeRoleEagerEodReports(const RoseBuildImpl &build, + const map &leftfix_info, + bool needs_catchup, RoseVertex v, + RoseProgram &program) { + RoseProgram eod_program; + + for (const auto &e : out_edges_range(v, build.g)) { + if (canEagerlyReportAtEod(build, e)) { + RoseProgram block; + makeRoleReports(build, leftfix_info, needs_catchup, + target(e, build.g), block); + eod_program.add_block(move(block)); + } + } + + if (eod_program.empty()) { + return; + } + + if (!onlyAtEod(build, v)) { + // The rest of our program wasn't EOD anchored, so we need to guard + // these reports with a check. + addCheckOnlyEodInstruction(program); + } + + program.add_before_end(move(eod_program)); +} + +/* Makes a program for a role/vertex given a specfic pred/in_edge. */ +static +RoseProgram makeRoleProgram(const RoseBuildImpl &build, + const map &leftfix_info, + const map &suffixes, + const map &engine_info_by_queue, + const unordered_map &roleStateIndices, + ProgramBuild &prog_build, const RoseEdge &e) { + const RoseGraph &g = build.g; + auto v = target(e, g); + + RoseProgram program; + + // First, add program instructions that enforce preconditions without + // effects. + + if (onlyAtEod(build, v)) { + addCheckOnlyEodInstruction(program); + } + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + // This role program may be triggered by different predecessors, with + // different offset bounds. We must ensure we put this check/set operation + // after the bounds check to deal with this case. + if (in_degree(v, g) > 1) { + assert(!build.isRootSuccessor(v)); + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeRoleLookaround(build, leftfix_info, v, program); + makeRoleCheckLeftfix(build, leftfix_info, v, program); + + // Next, we can add program instructions that have effects. This must be + // done as a series of blocks, as some of them (like reports) are + // escapable. + + RoseProgram effects_block; + + RoseProgram reports_block; + makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v, + reports_block); + effects_block.add_block(move(reports_block)); + + RoseProgram infix_block; + makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v, + infix_block); + effects_block.add_block(move(infix_block)); + + // Note: SET_GROUPS instruction must be after infix triggers, as an infix + // going dead may switch off groups. + RoseProgram groups_block; + makeRoleGroups(build.g, prog_build, v, groups_block); + effects_block.add_block(move(groups_block)); + + RoseProgram suffix_block; + makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block); + effects_block.add_block(move(suffix_block)); + + RoseProgram state_block; + makeRoleSetState(roleStateIndices, v, state_block); + effects_block.add_block(move(state_block)); + + // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if + // the program doesn't have one already). + RoseProgram eod_block; + makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v, + eod_block); + effects_block.add_block(move(eod_block)); + + /* a 'ghost role' may do nothing if we know that its groups are already set + * - in this case we can avoid producing a program at all. */ + if (effects_block.empty()) { + return {}; + } + + program.add_before_end(move(effects_block)); + return program; +} + +static +void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + if (!info.squash_group) { + return; + } + + DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask); + assert(info.group_mask); + /* Note: group_mask is negated. */ + prog.add_before_end(make_unique(~info.group_mask)); +} + +namespace { +struct ProgKey { + ProgKey(const RoseProgram &p) : prog(&p) { } + + bool operator==(const ProgKey &b) const { + return RoseProgramEquivalence()(*prog, *b.prog); + } + + friend size_t hash_value(const ProgKey &a) { + return RoseProgramHash()(*a.prog); + } +private: + const RoseProgram *prog; +}; +} + +RoseProgram assembleProgramBlocks(vector &&blocks_in) { + DEBUG_PRINTF("%zu blocks before dedupe\n", blocks_in.size()); + + vector blocks; + blocks.reserve(blocks_in.size()); /* to ensure stable reference for seen */ + + unordered_set seen; + for (auto &block : blocks_in) { + if (contains(seen, block)) { + continue; + } + + blocks.push_back(move(block)); + seen.emplace(blocks.back()); + } + + DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); + + RoseProgram prog; + for (auto &block : blocks) { + /* If we have multiple blocks from different literals and any of them + * squash groups, we will have to add a CLEAR_WORK_DONE instruction to + * each literal program block to clear the work_done flags so that it's + * only set if a state has been. */ + if (!prog.empty() && reads_work_done_flag(block)) { + RoseProgram clear_block; + clear_block.add_before_end(make_unique()); + prog.add_block(move(clear_block)); + } + + prog.add_block(move(block)); + } + + return prog; +} + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const map &leftfix_info, + const map &suffixes, + const map &engine_info_by_queue, + const unordered_map &roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_replay_program) { + const auto &g = build.g; + + DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); + + // Construct initial program up front, as its early checks must be able + // to jump to end and terminate processing for this literal. + auto lit_program = makeLitInitialProgram(build, prog_build, lit_id, + lit_edges, + is_anchored_replay_program); + + RoseProgram role_programs; + + // Predecessor state id -> program block. + map pred_blocks; + + // Construct sparse iter sub-programs. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. + } + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + assert(contains(roleStateIndices, u)); + u32 pred_state = roleStateIndices.at(u); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, roleStateIndices, + prog_build, e); + if (!role_prog.empty()) { + pred_blocks[pred_state].add_block(move(role_prog)); + } + } + + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). + addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs); + + // Add blocks to handle root roles. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (!build.isAnyStart(u)) { + continue; + } + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, roleStateIndices, + prog_build, e); + role_programs.add_block(move(role_prog)); + } + + if (lit_id == build.eod_event_literal_id) { + /* Note: does not require the lit intial program */ + assert(build.eod_event_literal_id != MO_INVALID_IDX); + return role_programs; + } + + /* Instructions to run even if a role program bails out */ + RoseProgram unconditional_block; + + // Literal may squash groups. + makeGroupSquashInstruction(build, lit_id, unconditional_block); + + role_programs.add_block(move(unconditional_block)); + lit_program.add_before_end(move(role_programs)); + + return lit_program; +} + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, + const vector &lit_ids) { + assert(!lit_ids.empty()); + assert(build.cc.streaming); + + vector blocks; + + for (const auto &lit_id : lit_ids) { + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const auto &info = build.literal_info.at(lit_id); + if (info.delayed_ids.empty()) { + continue; // No delayed IDs, no work to do. + } + + RoseProgram prog; + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.at(lit_id), + prog_build.longLitLengthThreshold, prog, + build.cc); + } + + makeCheckLitMaskInstruction(build, lit_id, prog); + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + prog); + blocks.push_back(move(prog)); + } + + return assembleProgramBlocks(move(blocks)); +} + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + RoseProgram program; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + if (multiple_preds) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program); + + const bool has_som = false; + RoseProgram report_block; + for (const auto &id : g[v].reports) { + makeReport(build, id, has_som, report_block); + } + program.add_before_end(move(report_block)); + + return program; +} + +static +void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup, + ReportID id, RoseProgram &program) { + if (!needs_mpv_catchup) { + return; + } + + const Report &report = rm.getReport(id); + if (report.type == INTERNAL_ROSE_CHAIN) { + return; + } + + program.add_before_end(make_unique()); +} + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id) { + RoseProgram prog; + + makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog); + + const bool has_som = false; + makeReport(build, id, has_som, prog); + + return prog; +} + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const set &reports) { + // Note: no CATCHUP instruction is necessary in the boundary case, as we + // should always be caught up (and may not even have the resources in + // scratch to support it). + + const bool has_som = false; + RoseProgram prog; + for (const auto &id : reports) { + makeReport(build, id, has_som, prog); + } + + return prog; +} + +static +void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, + RoseProgram &program) { + // Prepend an instruction to check the pred state is on. + const auto *end_inst = pred_block.end_instruction(); + pred_block.insert(begin(pred_block), + make_unique(pred_state, end_inst)); + program.add_block(move(pred_block)); +} + +static +void addPredBlocksAny(map &pred_blocks, u32 num_states, + RoseProgram &program) { + RoseProgram sparse_program; + + vector keys; + for (const u32 &key : pred_blocks | map_keys) { + keys.push_back(key); + } + + const RoseInstruction *end_inst = sparse_program.end_instruction(); + auto ri = make_unique(num_states, keys, end_inst); + sparse_program.add_before_end(move(ri)); + + RoseProgram &block = pred_blocks.begin()->second; + + /* we no longer need the check handled instruction as all the pred-role + * blocks are being collapsed together */ + stripCheckHandledInstruction(block); + + sparse_program.add_before_end(move(block)); + program.add_block(move(sparse_program)); +} + +static +void addPredBlocksMulti(map &pred_blocks, + u32 num_states, RoseProgram &program) { + assert(!pred_blocks.empty()); + + RoseProgram sparse_program; + const RoseInstruction *end_inst = sparse_program.end_instruction(); + vector> jump_table; + + // BEGIN instruction. + auto ri_begin = make_unique(num_states, end_inst); + RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); + sparse_program.add_before_end(move(ri_begin)); + + // NEXT instructions, one per pred program. + u32 prev_key = pred_blocks.begin()->first; + for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { + auto ri = make_unique(prev_key, begin_inst, + end_inst); + sparse_program.add_before_end(move(ri)); + prev_key = it->first; + } + + // Splice in each pred program after its BEGIN/NEXT. + auto out_it = begin(sparse_program); + for (auto &m : pred_blocks) { + u32 key = m.first; + RoseProgram &flat_prog = m.second; + assert(!flat_prog.empty()); + const size_t block_len = flat_prog.size() - 1; // without INSTR_END. + + assert(dynamic_cast(out_it->get()) || + dynamic_cast(out_it->get())); + out_it = sparse_program.insert(++out_it, move(flat_prog)); + + // Jump table target for this key is the beginning of the block we just + // spliced in. + jump_table.emplace_back(key, out_it->get()); + + assert(distance(begin(sparse_program), out_it) + block_len <= + sparse_program.size()); + advance(out_it, block_len); + } + + // Write the jump table back into the SPARSE_ITER_BEGIN instruction. + begin_inst->jump_table = move(jump_table); + + program.add_block(move(sparse_program)); +} + +void addPredBlocks(map &pred_blocks, u32 num_states, + RoseProgram &program) { + // Trim empty blocks, if any exist. + for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { + if (it->second.empty()) { + it = pred_blocks.erase(it); + } else { + ++it; + } + } + + const size_t num_preds = pred_blocks.size(); + if (num_preds == 0) { + return; + } + + if (num_preds == 1) { + const auto head = pred_blocks.begin(); + addPredBlockSingle(head->first, head->second, program); + return; + } + + // First, see if all our blocks are equivalent, in which case we can + // collapse them down into one. + const auto &blocks = pred_blocks | map_values; + if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { + return RoseProgramEquivalence()(*begin(blocks), block); + })) { + DEBUG_PRINTF("all blocks equiv\n"); + addPredBlocksAny(pred_blocks, num_states, program); + return; + } + + addPredBlocksMulti(pred_blocks, num_states, program); +} + +void applyFinalSpecialisation(RoseProgram &program) { + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); + if (program.size() < 2) { + return; + } + + /* Replace the second-to-last instruction (before END) with a one-shot + * specialisation if available. */ + auto it = next(program.rbegin()); + if (auto *ri = dynamic_cast(it->get())) { + DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); + program.replace(it, make_unique( + ri->onmatch, ri->offset_adjust)); + } +} + +void recordLongLiterals(vector &longLiterals, + const RoseProgram &program) { + for (const auto &ri : program) { + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, false); + continue; + } + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, true); + } + } +} + +void recordResources(RoseResources &resources, const RoseProgram &program) { + for (const auto &ri : program) { + switch (ri->code()) { + case ROSE_INSTR_TRIGGER_SUFFIX: + resources.has_suffixes = true; + break; + case ROSE_INSTR_TRIGGER_INFIX: + case ROSE_INSTR_CHECK_INFIX: + case ROSE_INSTR_CHECK_PREFIX: + case ROSE_INSTR_SOM_LEFTFIX: + resources.has_leftfixes = true; + break; + case ROSE_INSTR_SET_STATE: + case ROSE_INSTR_CHECK_STATE: + case ROSE_INSTR_SPARSE_ITER_BEGIN: + case ROSE_INSTR_SPARSE_ITER_NEXT: + resources.has_states = true; + break; + case ROSE_INSTR_CHECK_GROUPS: + resources.checks_groups = true; + break; + case ROSE_INSTR_PUSH_DELAYED: + resources.has_lit_delay = true; + break; + case ROSE_INSTR_CHECK_LONG_LIT: + case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: + resources.has_lit_check = true; + break; + default: + break; + } + } +} + } // namespace ue2 diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 0c725b46d..8758ef64a 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,1759 +31,21 @@ #include "rose_build_impl.h" #include "rose_program.h" -#include "som/som_operation.h" -#include "util/alloc.h" -#include "util/container.h" +#include "util/bytecode_ptr.h" #include "util/hash.h" #include "util/make_unique.h" #include "util/ue2_containers.h" -#include "util/ue2string.h" -#include -#include #include -#include #include namespace ue2 { +struct LookEntry; class RoseEngineBlob; - -/** - * \brief Abstract base class representing a single Rose instruction. - */ -class RoseInstruction { -public: - virtual ~RoseInstruction(); - - /** \brief Opcode used for the instruction in the bytecode. */ - virtual RoseInstructionCode code() const = 0; - - /** - * \brief Simple hash used for program equivalence. - * - * Note that pointers (jumps, for example) should not be used when - * calculating the hash: they will be converted to instruction offsets when - * compared later. - */ - virtual size_t hash() const = 0; - - /** \brief Length of the bytecode instruction in bytes. */ - virtual size_t byte_length() const = 0; - - using OffsetMap = unordered_map; - - /** - * \brief Writes a concrete implementation of this instruction. - * - * Other data that this instruction depends on is written directly into the - * blob, while the instruction structure itself (of size given by - * the byte_length() function) is written to dest. - */ - virtual void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const = 0; - - /** - * \brief Update a target pointer. - * - * If this instruction contains any reference to the old target, replace it - * with the new one. - */ - virtual void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) = 0; - - /** - * \brief True if these instructions are equivalent within their own - * programs. - * - * Checks that any pointers to other instructions point to the same - * offsets. - */ - bool equiv(const RoseInstruction &other, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return equiv_impl(other, offsets, other_offsets); - } - -private: - virtual bool equiv_impl(const RoseInstruction &other, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const = 0; -}; - -/** - * \brief Templated implementation class to handle boring boilerplate code. - */ -template -class RoseInstrBase : public RoseInstruction { -protected: - static constexpr RoseInstructionCode opcode = Opcode; - using impl_type = ImplType; - -public: - RoseInstructionCode code() const override { return opcode; } - - size_t byte_length() const override { - return sizeof(impl_type); - } - - /** - * Note: this implementation simply zeroes the destination region and - * writes in the correct opcode. This is sufficient for trivial - * instructions, but instructions with data members will want to override - * it. - */ - void write(void *dest, RoseEngineBlob &, - const RoseInstruction::OffsetMap &) const override { - assert(dest != nullptr); - assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); - - impl_type *inst = static_cast(dest); - memset(inst, 0, sizeof(impl_type)); - inst->code = verify_u8(opcode); - } - -private: - bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, - const OffsetMap &other_offsets) const override { - const auto *ri_that = dynamic_cast(&other); - if (!ri_that) { - return false; - } - const auto *ri_this = dynamic_cast(this); - assert(ri_this); - return ri_this->equiv_to(*ri_that, offsets, other_offsets); - } -}; - -/** - * \brief Refinement of RoseInstrBase to use for instructions that have - * just a single target member, called "target". - */ -template -class RoseInstrBaseOneTarget - : public RoseInstrBase { -public: - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - RoseInstrType *ri = dynamic_cast(this); - assert(ri); - if (ri->target == old_target) { - ri->target = new_target; - } - } -}; - -/** - * \brief Refinement of RoseInstrBase to use for instructions that have no - * targets. - */ -template -class RoseInstrBaseNoTargets - : public RoseInstrBase { -public: - void update_target(const RoseInstruction *, - const RoseInstruction *) override {} -}; - -/** - * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that - * have no members at all, just an opcode. - */ -template -class RoseInstrBaseTrivial - : public RoseInstrBaseNoTargets { -public: - virtual bool operator==(const RoseInstrType &) const { return true; } - - size_t hash() const override { - return boost::hash_value(static_cast(Opcode)); - } - - bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, - const RoseInstruction::OffsetMap &) const { - return true; - } -}; - -//// -//// Concrete implementation classes start here. -//// - -class RoseInstrAnchoredDelay - : public RoseInstrBaseOneTarget { -public: - rose_group groups; - const RoseInstruction *target; - - RoseInstrAnchoredDelay(rose_group groups_in, - const RoseInstruction *target_in) - : groups(groups_in), target(target_in) {} - - bool operator==(const RoseInstrAnchoredDelay &ri) const { - return groups == ri.groups && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return groups == ri.groups && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLitEarly - : public RoseInstrBaseNoTargets { -public: - u32 min_offset; - - explicit RoseInstrCheckLitEarly(u32 min) : min_offset(min) {} - - bool operator==(const RoseInstrCheckLitEarly &ri) const { - return min_offset == ri.min_offset; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), min_offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &, - const OffsetMap &) const { - return min_offset == ri.min_offset; - } -}; - -class RoseInstrCheckGroups - : public RoseInstrBaseNoTargets { -public: - rose_group groups; - - explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrCheckGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrCheckOnlyEod - : public RoseInstrBaseOneTarget { -public: - const RoseInstruction *target; - - explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) - : target(target_in) {} - - bool operator==(const RoseInstrCheckOnlyEod &ri) const { - return target == ri.target; - } - - size_t hash() const override { - return boost::hash_value(static_cast(opcode)); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckBounds - : public RoseInstrBaseOneTarget { -public: - u64a min_bound; - u64a max_bound; - const RoseInstruction *target; - - RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) - : min_bound(min), max_bound(max), target(target_in) {} - - bool operator==(const RoseInstrCheckBounds &ri) const { - return min_bound == ri.min_bound && max_bound == ri.max_bound && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), min_bound, max_bound); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return min_bound == ri.min_bound && max_bound == ri.max_bound && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckNotHandled - : public RoseInstrBaseOneTarget { -public: - u32 key; - const RoseInstruction *target; - - RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) - : key(key_in), target(target_in) {} - - bool operator==(const RoseInstrCheckNotHandled &ri) const { - return key == ri.key && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), key); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return key == ri.key && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckSingleLookaround - : public RoseInstrBaseOneTarget { -public: - s8 offset; - u32 reach_index; - const RoseInstruction *target; - - RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in, - const RoseInstruction *target_in) - : offset(offset_in), reach_index(reach_index_in), target(target_in) {} - - bool operator==(const RoseInstrCheckSingleLookaround &ri) const { - return offset == ri.offset && reach_index == ri.reach_index && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), offset, reach_index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckSingleLookaround &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return offset == ri.offset && reach_index == ri.reach_index && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLookaround - : public RoseInstrBaseOneTarget { -public: - u32 index; - u32 count; - const RoseInstruction *target; - - RoseInstrCheckLookaround(u32 index_in, u32 count_in, - const RoseInstruction *target_in) - : index(index_in), count(count_in), target(target_in) {} - - bool operator==(const RoseInstrCheckLookaround &ri) const { - return index == ri.index && count == ri.count && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), index, count); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return index == ri.index && count == ri.count && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMask - : public RoseInstrBaseOneTarget { -public: - u64a and_mask; - u64a cmp_mask; - u64a neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), - offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMask &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMask32 - : public RoseInstrBaseOneTarget { -public: - std::array and_mask; - std::array cmp_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckMask32(std::array and_mask_in, - std::array cmp_mask_in, u32 neg_mask_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(move(and_mask_in)), cmp_mask(move(cmp_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMask32 &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckByte - : public RoseInstrBaseOneTarget { -public: - u8 and_mask; - u8 cmp_mask; - u8 negation; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), - offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckByte &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - negation == ri.negation && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, negation, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - negation == ri.negation && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti16x8 - : public RoseInstrBaseOneTarget { -public: - std::array nib_mask; - std::array bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti16x8(std::array nib_mask_in, - std::array bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : nib_mask(move(nib_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti16x8 &ri) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), nib_mask, - bucket_select_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti32x8 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti32x8(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti32x8 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti16x16 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti16x16(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti16x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti32x16 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask_hi; - std::array bucket_select_mask_lo; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti32x16(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_hi_in, - std::array bucket_select_mask_lo_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask_hi(move(bucket_select_mask_hi_in)), - bucket_select_mask_lo(move(bucket_select_mask_lo_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti32x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask_hi, bucket_select_mask_lo, - neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckInfix - : public RoseInstrBaseOneTarget { -public: - u32 queue; - u32 lag; - ReportID report; - const RoseInstruction *target; - - RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, - const RoseInstruction *target_in) - : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} - - bool operator==(const RoseInstrCheckInfix &ri) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag, report); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckPrefix - : public RoseInstrBaseOneTarget { -public: - u32 queue; - u32 lag; - ReportID report; - const RoseInstruction *target; - - RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, - const RoseInstruction *target_in) - : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} - - bool operator==(const RoseInstrCheckPrefix &ri) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag, report); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrPushDelayed - : public RoseInstrBaseNoTargets { -public: - u8 delay; - u32 index; - - RoseInstrPushDelayed(u8 delay_in, u32 index_in) - : delay(delay_in), index(index_in) {} - - bool operator==(const RoseInstrPushDelayed &ri) const { - return delay == ri.delay && index == ri.index; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), delay, index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, - const OffsetMap &) const { - return delay == ri.delay && index == ri.index; - } -}; - -class RoseInstrRecordAnchored - : public RoseInstrBaseNoTargets { -public: - u32 id; - - explicit RoseInstrRecordAnchored(u32 id_in) : id(id_in) {} - - bool operator==(const RoseInstrRecordAnchored &ri) const { - return id == ri.id; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), id); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrRecordAnchored &ri, const OffsetMap &, - const OffsetMap &) const { - return id == ri.id; - } -}; - -class RoseInstrCatchUp - : public RoseInstrBaseTrivial { -public: - ~RoseInstrCatchUp() override; -}; - -class RoseInstrCatchUpMpv - : public RoseInstrBaseTrivial { -public: - ~RoseInstrCatchUpMpv() override; -}; - -class RoseInstrSomAdjust - : public RoseInstrBaseNoTargets { -public: - u32 distance; - - explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} - - bool operator==(const RoseInstrSomAdjust &ri) const { - return distance == ri.distance; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), distance); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, - const OffsetMap &) const { - return distance == ri.distance; - } -}; - -class RoseInstrSomLeftfix - : public RoseInstrBaseNoTargets { -public: - u32 queue; - u32 lag; - - RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) - : queue(queue_in), lag(lag_in) {} - - bool operator==(const RoseInstrSomLeftfix &ri) const { - return queue == ri.queue && lag == ri.lag; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, - const OffsetMap &) const { - return queue == ri.queue && lag == ri.lag; - } -}; - -class RoseInstrSomFromReport - : public RoseInstrBaseNoTargets { -public: - som_operation som; - - RoseInstrSomFromReport() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrSomFromReport &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrSomZero - : public RoseInstrBaseTrivial { -public: - ~RoseInstrSomZero() override; -}; - -class RoseInstrTriggerInfix - : public RoseInstrBaseNoTargets { -public: - u8 cancel; - u32 queue; - u32 event; - - RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) - : cancel(cancel_in), queue(queue_in), event(event_in) {} - - bool operator==(const RoseInstrTriggerInfix &ri) const { - return cancel == ri.cancel && queue == ri.queue && event == ri.event; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), cancel, queue, event); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, - const OffsetMap &) const { - return cancel == ri.cancel && queue == ri.queue && event == ri.event; - } -}; - -class RoseInstrTriggerSuffix - : public RoseInstrBaseNoTargets { -public: - u32 queue; - u32 event; - - RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) - : queue(queue_in), event(event_in) {} - - bool operator==(const RoseInstrTriggerSuffix &ri) const { - return queue == ri.queue && event == ri.event; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, event); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, - const OffsetMap &) const { - return queue == ri.queue && event == ri.event; - } -}; - -class RoseInstrDedupe - : public RoseInstrBaseOneTarget { -public: - u8 quash_som; - u32 dkey; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupe &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, - offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrDedupeSom - : public RoseInstrBaseOneTarget { -public: - u8 quash_som; - u32 dkey; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupeSom &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, - offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrReportChain - : public RoseInstrBaseNoTargets { -public: - u32 event; - u64a top_squash_distance; - - RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) - : event(event_in), top_squash_distance(top_squash_distance_in) {} - - bool operator==(const RoseInstrReportChain &ri) const { - return event == ri.event && - top_squash_distance == ri.top_squash_distance; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), event, top_squash_distance); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, - const OffsetMap &) const { - return event == ri.event && - top_squash_distance == ri.top_squash_distance; - } -}; - -class RoseInstrReportSomInt - : public RoseInstrBaseNoTargets { -public: - som_operation som; - - RoseInstrReportSomInt() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrReportSomInt &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrReportSomAware - : public RoseInstrBaseNoTargets { -public: - som_operation som; - - RoseInstrReportSomAware() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrReportSomAware &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrReport - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrReport &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrReportExhaust - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - u32 ekey; - - RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, - u32 ekey_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} - - bool operator==(const RoseInstrReportExhaust &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } -}; - -class RoseInstrReportSom - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrReportSom &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrReportSomExhaust - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - u32 ekey; - - RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, - u32 ekey_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} - - bool operator==(const RoseInstrReportSomExhaust &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } -}; - -class RoseInstrDedupeAndReport - : public RoseInstrBaseOneTarget { -public: - u8 quash_som; - u32 dkey; - ReportID onmatch; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, - s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupeAndReport &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, onmatch, - offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrFinalReport - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrFinalReport &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrCheckExhausted - : public RoseInstrBaseOneTarget { -public: - u32 ekey; - const RoseInstruction *target; - - RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) - : ekey(ekey_in), target(target_in) {} - - bool operator==(const RoseInstrCheckExhausted &ri) const { - return ekey == ri.ekey && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return ekey == ri.ekey && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMinLength - : public RoseInstrBaseOneTarget { -public: - s32 end_adj; - u64a min_length; - const RoseInstruction *target; - - RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, - const RoseInstruction *target_in) - : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMinLength &ri) const { - return end_adj == ri.end_adj && min_length == ri.min_length && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), end_adj, min_length); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return end_adj == ri.end_adj && min_length == ri.min_length && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSetState - : public RoseInstrBaseNoTargets { -public: - u32 index; - - explicit RoseInstrSetState(u32 index_in) : index(index_in) {} - - bool operator==(const RoseInstrSetState &ri) const { - return index == ri.index; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, - const OffsetMap &) const { - return index == ri.index; - } -}; - -class RoseInstrSetGroups - : public RoseInstrBaseNoTargets { -public: - rose_group groups; - - explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrSetGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrSquashGroups - : public RoseInstrBaseNoTargets { -public: - rose_group groups; - - explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrSquashGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrCheckState - : public RoseInstrBaseOneTarget { -public: - u32 index; - const RoseInstruction *target; - - RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) - : index(index_in), target(target_in) {} - - bool operator==(const RoseInstrCheckState &ri) const { - return index == ri.index && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return index == ri.index && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSparseIterBegin - : public RoseInstrBase { -public: - u32 num_keys; // total number of multibit keys - std::vector> jump_table; - const RoseInstruction *target; - - RoseInstrSparseIterBegin(u32 num_keys_in, - const RoseInstruction *target_in) - : num_keys(num_keys_in), target(target_in) {} - - bool operator==(const RoseInstrSparseIterBegin &ri) const { - return num_keys == ri.num_keys && jump_table == ri.jump_table && - target == ri.target; - } - - size_t hash() const override { - size_t v = hash_all(static_cast(opcode), num_keys); - for (const u32 &key : jump_table | boost::adaptors::map_keys) { - boost::hash_combine(v, key); - } - return v; - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - if (target == old_target) { - target = new_target; - } - for (auto &jump : jump_table) { - if (jump.second == old_target) { - jump.second = new_target; - } - } - } - - bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - if (iter_offset != ri.iter_offset || - offsets.at(target) != other_offsets.at(ri.target)) { - return false; - } - if (jump_table.size() != ri.jump_table.size()) { - return false; - } - auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); - for (; it1 != jump_table.end(); ++it1, ++it2) { - if (it1->first != it2->first) { - return false; - } - if (offsets.at(it1->second) != other_offsets.at(it2->second)) { - return false; - } - } - return true; - } - -private: - friend class RoseInstrSparseIterNext; - - // These variables allow us to use the same multibit iterator and jump - // table in subsequent SPARSE_ITER_NEXT write() operations. - mutable bool is_written = false; - mutable u32 iter_offset = 0; - mutable u32 jump_table_offset = 0; -}; - -class RoseInstrSparseIterNext - : public RoseInstrBase { -public: - u32 state; - const RoseInstrSparseIterBegin *begin; - const RoseInstruction *target; - - RoseInstrSparseIterNext(u32 state_in, - const RoseInstrSparseIterBegin *begin_in, - const RoseInstruction *target_in) - : state(state_in), begin(begin_in), target(target_in) {} - - bool operator==(const RoseInstrSparseIterNext &ri) const { - return state == ri.state && begin == ri.begin && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), state); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - if (target == old_target) { - target = new_target; - } - if (begin == old_target) { - assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); - begin = static_cast(new_target); - } - } - - bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return state == ri.state && - offsets.at(begin) == other_offsets.at(ri.begin) && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSparseIterAny - : public RoseInstrBaseOneTarget { -public: - u32 num_keys; // total number of multibit keys - std::vector keys; - const RoseInstruction *target; - - RoseInstrSparseIterAny(u32 num_keys_in, std::vector keys_in, - const RoseInstruction *target_in) - : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} - - bool operator==(const RoseInstrSparseIterAny &ri) const { - return num_keys == ri.num_keys && keys == ri.keys && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), num_keys, keys); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return num_keys == ri.num_keys && keys == ri.keys && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrEnginesEod - : public RoseInstrBaseNoTargets { -public: - u32 iter_offset; - - explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} - - bool operator==(const RoseInstrEnginesEod &ri) const { - return iter_offset == ri.iter_offset; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), iter_offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, - const OffsetMap &) const { - return iter_offset == ri.iter_offset; - } -}; - -class RoseInstrSuffixesEod - : public RoseInstrBaseTrivial { -public: - ~RoseInstrSuffixesEod() override; -}; - -class RoseInstrMatcherEod : public RoseInstrBaseTrivial { -public: - ~RoseInstrMatcherEod() override; -}; - -class RoseInstrCheckLongLit - : public RoseInstrBaseNoTargets { -public: - std::string literal; - - RoseInstrCheckLongLit(std::string literal_in) - : literal(std::move(literal_in)) {} - - bool operator==(const RoseInstrCheckLongLit &ri) const { - return literal == ri.literal; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &, - const OffsetMap &) const { - return literal == ri.literal; - } -}; - -class RoseInstrCheckLongLitNocase - : public RoseInstrBaseNoTargets { -public: - std::string literal; - - RoseInstrCheckLongLitNocase(std::string literal_in) - : literal(std::move(literal_in)) { - upperString(literal); - } - - bool operator==(const RoseInstrCheckLongLitNocase &ri) const { - return literal == ri.literal; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &, - const OffsetMap &) const { - return literal == ri.literal; - } -}; - -class RoseInstrEnd - : public RoseInstrBaseTrivial { -public: - ~RoseInstrEnd() override; -}; +class RoseInstruction; +struct RoseResources; /** * \brief Container for a list of program instructions. @@ -1793,16 +55,14 @@ class RoseProgram { std::vector> prog; public: - RoseProgram() { - prog.push_back(make_unique()); - } + RoseProgram(); + ~RoseProgram(); + RoseProgram(const RoseProgram &) = delete; + RoseProgram(RoseProgram &&); + RoseProgram &operator=(const RoseProgram &) = delete; + RoseProgram &operator=(RoseProgram &&); - bool empty() const { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - // Empty if we only have one element, the END instruction. - return std::next(prog.begin()) == prog.end(); - } + bool empty() const; size_t size() const { return prog.size(); } @@ -1826,105 +86,40 @@ class RoseProgram { const_reverse_iterator rend() const { return prog.rend(); } /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ - const RoseInstruction *end_instruction() const { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); + const RoseInstruction *end_instruction() const; - return prog.back().get(); - } - -private: static void update_targets(iterator it, iterator it_end, const RoseInstruction *old_target, - const RoseInstruction *new_target) { - assert(old_target && new_target && old_target != new_target); - for (; it != it_end; ++it) { - std::unique_ptr &ri = *it; - assert(ri); - ri->update_target(old_target, new_target); - } - } - -public: - iterator insert(iterator it, std::unique_ptr ri) { - assert(!prog.empty()); - assert(it != end()); - assert(prog.back()->code() == ROSE_INSTR_END); - - return prog.insert(it, std::move(ri)); - } - - iterator insert(iterator it, RoseProgram &&block) { - assert(!prog.empty()); - assert(it != end()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return it; - } + const RoseInstruction *new_target); - const RoseInstruction *end_ptr = block.end_instruction(); - assert(end_ptr->code() == ROSE_INSTR_END); - block.prog.pop_back(); + iterator insert(iterator it, std::unique_ptr ri); - const RoseInstruction *new_target = it->get(); - update_targets(block.prog.begin(), block.prog.end(), end_ptr, - new_target); + iterator insert(iterator it, RoseProgram &&block); - // Workaround: container insert() for ranges doesn't return an iterator - // in the version of the STL distributed with gcc 4.8. - auto dist = distance(prog.begin(), it); - prog.insert(it, std::make_move_iterator(block.prog.begin()), - std::make_move_iterator(block.prog.end())); - it = prog.begin(); - std::advance(it, dist); - return it; - } + /* Note: takes iterator rather than const_iterator to support toolchains + * with pre-C++11 standard libraries (i.e., gcc-4.8). */ + iterator erase(iterator first, iterator last); /** * \brief Adds this instruction to the program just before the terminating * ROSE_INSTR_END. */ - void add_before_end(std::unique_ptr ri) { - assert(!prog.empty()); - insert(std::prev(prog.end()), std::move(ri)); - } + void add_before_end(std::unique_ptr ri); /** * \brief Adds this block to the program just before the terminating * ROSE_INSTR_END. + * + * Any existing instruction that was jumping to end continues to do so. */ - void add_before_end(RoseProgram &&block) { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return; - } - - insert(std::prev(prog.end()), std::move(block)); - } - + void add_before_end(RoseProgram &&block); /** * \brief Append this program block, replacing our current ROSE_INSTR_END. + * + * Any existing instruction that was jumping to end, now leads to the newly + * added block. */ - void add_block(RoseProgram &&block) { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return; - } - - // Replace pointers to the current END with pointers to the first - // instruction in the new sequence. - const RoseInstruction *end_ptr = end_instruction(); - prog.pop_back(); - update_targets(prog.begin(), prog.end(), end_ptr, - block.prog.front().get()); - prog.insert(prog.end(), std::make_move_iterator(block.prog.begin()), - std::make_move_iterator(block.prog.end())); - } + void add_block(RoseProgram &&block); /** * \brief Replace the instruction pointed to by the given iterator. @@ -1932,29 +127,19 @@ class RoseProgram { template void replace(Iter it, std::unique_ptr ri) { assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); const RoseInstruction *old_ptr = it->get(); *it = move(ri); update_targets(prog.begin(), prog.end(), old_ptr, it->get()); - - assert(prog.back()->code() == ROSE_INSTR_END); } }; -aligned_unique_ptr -writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len); +bytecode_ptr writeProgram(RoseEngineBlob &blob, + const RoseProgram &program); class RoseProgramHash { public: - size_t operator()(const RoseProgram &program) const { - size_t v = 0; - for (const auto &ri : program) { - assert(ri); - boost::hash_combine(v, ri->hash()); - } - return v; - } + size_t operator()(const RoseProgram &program) const; }; class RoseProgramEquivalence { @@ -1962,6 +147,141 @@ class RoseProgramEquivalence { bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; }; +/** \brief Data only used during construction of various programs (literal, + * anchored, delay, etc). */ +struct ProgramBuild : noncopyable { + explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh, + bool catchup) + : floatingMinLiteralMatchOffset(fMinLitOffset), + longLitLengthThreshold(longLitThresh), needs_catchup(catchup) { + } + + /** \brief Minimum offset of a match from the floating table. */ + const u32 floatingMinLiteralMatchOffset; + + /** \brief Long literal length threshold, used in streaming mode. */ + const size_t longLitLengthThreshold; + + /** \brief True if reports need CATCH_UP instructions to catch up suffixes, + * outfixes etc. */ + const bool needs_catchup; + + /** \brief Mapping from vertex to key, for vertices with a + * CHECK_NOT_HANDLED instruction. */ + ue2::unordered_map handledKeys; + + /** \brief Mapping from Rose literal ID to anchored program index. */ + std::map anchored_programs; + + /** \brief Mapping from Rose literal ID to delayed program index. */ + std::map delay_programs; + + /** \brief Mapping from every vertex to the groups that must be on for that + * vertex to be reached. */ + ue2::unordered_map vertex_group_map; + + /** \brief Global bitmap of groups that can be squashed. */ + rose_group squashable_groups = 0; +}; + +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); +void addSuffixesEodProgram(RoseProgram &program); +void addMatcherEodProgram(RoseProgram &program); + +static constexpr u32 INVALID_QUEUE = ~0U; + +struct left_build_info { + // Constructor for an engine implementation. + left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector &stops, u32 max_ql, u8 cm_count, + const CharReach &cm_cr); + + // Constructor for a lookaround implementation. + explicit left_build_info(const std::vector> &looks); + + u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ + u32 lag = 0; + u32 transient = 0; + rose_group squash_mask = ~rose_group{0}; + std::vector stopAlphabet; + u32 max_queuelen = 0; + u8 countingMiracleCount = 0; + CharReach countingMiracleReach; + u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ + bool has_lookaround = false; + + // alternative implementation to the NFA + std::vector> lookaround; +}; + +/** + * \brief Provides a brief summary of properties of an NFA that has already been + * finalised and stored in the blob. + */ +struct engine_info { + engine_info(const NFA *nfa, bool trans); + + enum NFAEngineType type; + bool accepts_eod; + u32 stream_size; + u32 scratch_size; + u32 scratch_align; + bool transient; +}; + +/** + * \brief Consumes list of program blocks corresponding to different literals, + * checks them for duplicates and then concatenates them into one program. + * + * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is + * inserted to prevent the work_done flag being contaminated by early blocks. + */ +RoseProgram assembleProgramBlocks(std::vector &&blocks); + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const std::map &leftfix_info, + const std::map &suffixes, + const std::map &engine_info_by_queue, + const unordered_map &roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const std::vector &lit_edges, + bool is_anchored_replay_program); + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, + const std::vector &lit_ids); + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds); + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id); + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const std::set &reports); + +struct TriggerInfo { + TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {} + bool cancel; + u32 queue; + u32 event; + + bool operator==(const TriggerInfo &b) const { + return cancel == b.cancel && queue == b.queue && event == b.event; + } +}; + +void addPredBlocks(std::map &pred_blocks, u32 num_states, + RoseProgram &program); + +void applyFinalSpecialisation(RoseProgram &program); + +void recordLongLiterals(std::vector &longLiterals, + const RoseProgram &program); + +void recordResources(RoseResources &resources, const RoseProgram &program); + } // namespace ue2 #endif // ROSE_BUILD_PROGRAM_H diff --git a/src/rose/rose_build_resources.h b/src/rose/rose_build_resources.h new file mode 100644 index 000000000..3edb81b96 --- /dev/null +++ b/src/rose/rose_build_resources.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_RESOURCES_H +#define ROSE_BUILD_RESOURCES_H + +namespace ue2 { + +/** + * \brief Structure tracking which resources are used by this Rose instance at + * runtime. + * + * We use this to control how much initialisation we need to do at the + * beginning of a stream/block at runtime. + */ +struct RoseResources { + bool has_outfixes = false; + bool has_suffixes = false; + bool has_leftfixes = false; + bool has_literals = false; + bool has_states = false; + bool checks_groups = false; + bool has_lit_delay = false; + bool has_lit_check = false; // long literal support + bool has_anchored = false; + bool has_floating = false; + bool has_eod = false; +}; + +} + +#endif diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index c6139097e..0e78ec7db 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,7 @@ #include "util/container.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/hash.h" #include "util/order_check.h" #include "util/ue2_containers.h" @@ -111,11 +112,14 @@ struct AliasInEdge : EdgeAndVertex { class CandidateSet { public: - typedef set::iterator iterator; - typedef RoseVertex key_type; + using key_type = RoseVertex; + using iterator = set::iterator; + using const_iterator = set::const_iterator; iterator begin() { return main_cont.begin(); } iterator end() { return main_cont.end(); } + const_iterator begin() const { return main_cont.begin(); } + const_iterator end() const { return main_cont.end(); } bool contains(RoseVertex a) const { return hash_cont.find(a) != hash_cont.end(); @@ -324,9 +328,9 @@ bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) { // Otherwise, all the literals involved must have the same length. for (u32 a_id : lits_a) { - const rose_literal_id &la = build.literals.right.at(a_id); + const rose_literal_id &la = build.literals.at(a_id); for (u32 b_id : lits_b) { - const rose_literal_id &lb = build.literals.right.at(b_id); + const rose_literal_id &lb = build.literals.at(b_id); if (la.elength() != lb.elength()) { DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(), @@ -451,37 +455,6 @@ bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a, return true; } -/** - * Hash on some deterministic props checked in sameRoleProperties + properties - * required for right equivalence. - */ -static -size_t hashRightRoleProperties(RoseVertex v, const RoseGraph &g) { - using boost::hash_combine; - using boost::hash_range; - - const RoseVertexProps &props = g[v]; - - size_t val = 0; - hash_combine(val, hash_range(begin(props.reports), end(props.reports))); - - if (props.suffix) { - const auto &suffix = props.suffix; - if (suffix.castle) { - hash_combine(val, suffix.castle->reach()); - hash_combine(val, suffix.castle->repeats.size()); - } - if (suffix.graph) { - hash_combine(val, num_vertices(*suffix.graph)); - } - if (suffix.haig) { - hash_combine(val, hash_dfa(*suffix.haig)); - } - } - - return val; -} - static void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge, const RoseEdge *to_edge, RoseGraph &g) { @@ -684,16 +657,6 @@ void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) { num_vertices(build.g)); } -static -RoseVertex pickSucc(const RoseVertex v, const RoseGraph &g) { - RoseGraph::adjacency_iterator ai, ae; - tie(ai, ae) = adjacent_vertices(v, g); - if (ai == ae) { - return RoseGraph::null_vertex(); - } - return *ai; -} - static RoseVertex pickPred(const RoseVertex v, const RoseGraph &g, const RoseBuildImpl &build) { @@ -854,7 +817,7 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, return; } assert(isCorrectlyTopped(h)); - DEBUG_PRINTF("prunning unused tops\n"); + DEBUG_PRINTF("pruning unused tops\n"); ue2::flat_set used_tops; for (auto v : verts) { assert(g[v].left.graph.get() == &h); @@ -875,7 +838,7 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, auto pt_inserter = inserter(pruned_tops, pruned_tops.end()); set_intersection(h[e].tops.begin(), h[e].tops.end(), used_tops.begin(), used_tops.end(), pt_inserter); - h[e].tops = move(pruned_tops); + h[e].tops = std::move(pruned_tops); if (h[e].tops.empty()) { DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); @@ -1162,6 +1125,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, shared_ptr a_h = a_left.graph; shared_ptr b_h = b_left.graph; assert(a_h && b_h); + assert(isImplementableNFA(*a_h, nullptr, build.cc)); + assert(isImplementableNFA(*b_h, nullptr, build.cc)); // If we only differ in reports, this is a very easy merge. Just use b's // report for both. @@ -1252,6 +1217,11 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, duplicateReport(*new_graph, b_left.leftfix_report, new_report); pruneAllOtherReports(*new_graph, new_report); + if (!isImplementableNFA(*new_graph, nullptr, build.cc)) { + DEBUG_PRINTF("new graph not implementable\n"); + return false; + } + rai.rev_leftfix[a_left_id].erase(a); rai.rev_leftfix[b_left_id].erase(b); pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); @@ -1427,62 +1397,95 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, return false; } +/** + * \brief Buckets that only contain one vertex are never going to lead to a + * merge. + */ static -void splitByReportSuffixBehaviour(const RoseGraph &g, - vector> &buckets, - ue2::unordered_map &inv) { - /* vertices with different report/suffixes can never be considered for right - * merge. */ - vector> out; - for (const vector &b : buckets) { - assert(!b.empty()); - map, RoseSuffixInfo>, size_t> dest_map; - for (RoseVertex v : b) { - auto key = decltype(dest_map)::key_type(g[v].reports, g[v].suffix); - size_t out_bucket; - if (contains(dest_map, key)) { - out_bucket = dest_map[key]; - } else { - out_bucket = out.size(); - out.push_back(vector()); - dest_map[key] = out_bucket; - } - out[out_bucket].push_back(v); - inv[v] = out_bucket; - } - +void removeSingletonBuckets(vector> &buckets) { + auto it = remove_if( + begin(buckets), end(buckets), + [](const vector &bucket) { return bucket.size() < 2; }); + if (it != end(buckets)) { + DEBUG_PRINTF("deleting %zu singleton buckets\n", + distance(it, end(buckets))); + buckets.erase(it, end(buckets)); } - - buckets.swap(out); } static -void splitByLiteralTable(const RoseBuildImpl &build, - vector> &buckets, - ue2::unordered_map &inv) { - const RoseGraph &g = build.g; +void buildInvBucketMap(const vector> &buckets, + ue2::unordered_map &inv) { + inv.clear(); + for (size_t i = 0; i < buckets.size(); i++) { + for (auto v : buckets[i]) { + assert(!contains(inv, v)); + inv.emplace(v, i); + } + } +} + +/** + * \brief Generic splitter that will use the given split function to partition + * the vector of buckets, then remove buckets with <= 1 entry. + */ +template +void splitAndFilterBuckets(vector> &buckets, + const SplitFunction &make_split_key) { + if (buckets.empty()) { + return; + } vector> out; + // Mapping from split key value to new bucket index. + using key_type = decltype(make_split_key(RoseGraph::null_vertex())); + unordered_map dest_map; + dest_map.reserve(buckets.front().size()); + for (const auto &bucket : buckets) { assert(!bucket.empty()); - map dest_map; + dest_map.clear(); for (RoseVertex v : bucket) { - auto table = build.literals.right.at(*g[v].literals.begin()).table; - size_t out_bucket; - if (contains(dest_map, table)) { - out_bucket = dest_map[table]; - } else { - out_bucket = out.size(); - out.push_back(vector()); - dest_map[table] = out_bucket; + auto p = dest_map.emplace(make_split_key(v), out.size()); + if (p.second) { // New key, add a bucket. + out.emplace_back(); } + auto out_bucket = p.first->second; out[out_bucket].push_back(v); - inv[v] = out_bucket; } } - buckets.swap(out); + if (out.size() == buckets.size()) { + return; // No new buckets created. + } + + buckets = std::move(out); + removeSingletonBuckets(buckets); +} + +static +void splitByReportSuffixBehaviour(const RoseGraph &g, + vector> &buckets) { + // Split by report set and suffix info. + auto make_split_key = [&g](RoseVertex v) { + return hash_all(g[v].reports, g[v].suffix); + }; + splitAndFilterBuckets(buckets, make_split_key); +} + +static +void splitByLiteralTable(const RoseBuildImpl &build, + vector> &buckets) { + const RoseGraph &g = build.g; + + // Split by literal table. + auto make_split_key = [&](RoseVertex v) { + const auto &lits = g[v].literals; + assert(!lits.empty()); + return build.literals.at(*lits.begin()).table; + }; + splitAndFilterBuckets(buckets, make_split_key); } static @@ -1543,6 +1546,9 @@ void splitByNeighbour(const RoseGraph &g, vector> &buckets, } insert(&buckets, buckets.end(), extras); } + + removeSingletonBuckets(buckets); + buildInvBucketMap(buckets, inv); } static @@ -1551,16 +1557,35 @@ splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) { const RoseGraph &g = build.g; vector> buckets(1); - ue2::unordered_map inv; - for (RoseVertex v : candidates) { - buckets[0].push_back(v); - inv[v] = 0; + buckets[0].reserve(candidates.size()); + insert(&buckets[0], buckets[0].end(), candidates); + + DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); + + splitByReportSuffixBehaviour(g, buckets); + DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; } - splitByReportSuffixBehaviour(g, buckets, inv); - splitByLiteralTable(build, buckets, inv); + splitByLiteralTable(build, buckets); + DEBUG_PRINTF("split by lit table, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; + } + + // Neighbour splits require inverse map. + ue2::unordered_map inv; + buildInvBucketMap(buckets, inv); + splitByNeighbour(g, buckets, inv, true); + DEBUG_PRINTF("split by successor, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; + } + splitByNeighbour(g, buckets, inv, false); + DEBUG_PRINTF("split by predecessor, %zu buckets\n", buckets.size()); return buckets; } @@ -1677,55 +1702,62 @@ vector::iterator findLeftMergeSibling( return end; } +static +void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a, + vector &siblings) { + // We have to find a sibling to merge `a' with, and we select between + // two approaches to minimize the number of vertices we have to + // examine; which we use depends on the shape of the graph. + + const RoseGraph &g = build.g; + assert(!g[a].literals.empty()); + u32 lit_id = *g[a].literals.begin(); + const auto &verts = build.literal_info.at(lit_id).vertices; + RoseVertex pred = pickPred(a, g, build); + + siblings.clear(); + + if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || + out_degree(pred, g) > verts.size()) { + // Select sibling from amongst the vertices that share a literal. + insert(&siblings, siblings.end(), verts); + } else { + // Select sibling from amongst the vertices that share a + // predecessor. + insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); + } +} + static never_inline void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, vector *dead, RoseAliasingInfo &rai) { DEBUG_PRINTF("begin (%zu)\n", candidates.size()); - RoseGraph &g = build.g; vector siblings; - CandidateSet::iterator it = candidates.begin(); + auto it = candidates.begin(); while (it != candidates.end()) { RoseVertex a = *it; CandidateSet::iterator ait = it; ++it; - // We have to find a sibling to merge `a' with, and we select between - // two approaches to minimize the number of vertices we have to - // examine; which we use depends on the shape of the graph. - - assert(!g[a].literals.empty()); - u32 lit_id = *g[a].literals.begin(); - const auto &verts = build.literal_info.at(lit_id).vertices; - RoseVertex pred = pickPred(a, g, build); - - siblings.clear(); - if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) - || out_degree(pred, g) > verts.size()) { - // Select sibling from amongst the vertices that share a literal. - siblings.insert(siblings.end(), verts.begin(), verts.end()); - } else { - // Select sibling from amongst the vertices that share a - // predecessor. - insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); - } - - auto jt = findLeftMergeSibling(siblings.begin(), siblings.end(), a, - build, rai, candidates); - if (jt == siblings.end()) { - continue; - } - - RoseVertex b = *jt; + getLeftMergeSiblings(build, a, siblings); - if (!attemptRoseMerge(build, true, a, b, 0, rai)) { - DEBUG_PRINTF("rose fail\n"); - continue; + auto jt = siblings.begin(); + while (jt != siblings.end()) { + jt = findLeftMergeSibling(jt, siblings.end(), a, build, rai, + candidates); + if (jt == siblings.end()) { + break; + } + RoseVertex b = *jt; + if (attemptRoseMerge(build, true, a, b, false, rai)) { + mergeVerticesLeft(a, b, build, rai); + dead->push_back(a); + candidates.erase(ait); + break; // consider next a + } + ++jt; } - - mergeVerticesLeft(a, b, build, rai); - dead->push_back(a); - candidates.erase(ait); } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); @@ -1810,91 +1842,49 @@ vector::const_iterator findRightMergeSibling( return end; } -template static -void split(map &keys, size_t *next_key, Iter it, - const Iter end) { - map new_keys; - - for (; it != end; ++it) { - RoseVertex v = *it; - size_t ok = keys[v]; - size_t nk; - if (contains(new_keys, ok)) { - nk = new_keys[ok]; - } else { - nk = (*next_key)++; - new_keys[ok] = nk; - } - keys[v] = nk; - } +void splitByRightProps(const RoseGraph &g, + vector> &buckets) { + // Successor vector used in make_split_key. We declare it here so we can + // reuse storage. + vector succ; + + // Split by {successors, literals, reports}. + auto make_split_key = [&](RoseVertex v) { + succ.clear(); + insert(&succ, succ.end(), adjacent_vertices(v, g)); + sort(succ.begin(), succ.end()); + return hash_all(g[v].literals, g[v].reports, succ); + }; + splitAndFilterBuckets(buckets, make_split_key); } static never_inline -void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &build, - map> &sibling_cache, - map &keys_ext) { - RoseGraph &g = build.g; - - size_t next_key = 1; - map keys; - - for (const auto &c : candidates) { - keys[c] = 0; - } - - set done_succ; - set done_lit; - - for (auto a : candidates) { - assert(!g[a].literals.empty()); - u32 lit_id = *g[a].literals.begin(); - RoseVertex succ = pickSucc(a, g); - const auto &verts = build.literal_info.at(lit_id).vertices; - if (succ != RoseGraph::null_vertex() - && in_degree(succ, g) < verts.size()) { - if (!done_succ.insert(succ).second) { - continue; // succ already in done_succ. - } - RoseGraph::inv_adjacency_iterator ai, ae; - tie (ai, ae) = inv_adjacent_vertices(succ, g); - split(keys, &next_key, ai, ae); - } else { - if (!done_lit.insert(lit_id).second) { - continue; // lit_id already in done_lit. - } - split(keys, &next_key, verts.begin(), verts.end()); - } - } +vector> +splitRightMergeBuckets(const CandidateSet &candidates, + const RoseBuildImpl &build) { + const RoseGraph &g = build.g; - map> int_to_ext; + vector> buckets(1); + buckets[0].reserve(candidates.size()); + insert(&buckets[0], buckets[0].end(), candidates); - for (const auto &key : keys) { - RoseVertex v = key.first; - u32 ext; - size_t rph = hashRightRoleProperties(v, g); - if (contains(int_to_ext[key.second], rph)) { - ext = int_to_ext[key.second][rph]; - } else { - ext = keys_ext.size(); - int_to_ext[key.second][rph] = ext; - } + DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); - keys_ext[v] = ext; - sibling_cache[ext].push_back(v); + splitByReportSuffixBehaviour(g, buckets); + DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; } - for (auto &siblings : sibling_cache | map_values) { - sort(siblings.begin(), siblings.end()); + splitByRightProps(g, buckets); + DEBUG_PRINTF("split by right-merge properties, %zu buckets\n", + buckets.size()); + if (buckets.empty()) { + return buckets; } -} -static -const vector &getCandidateRightSiblings( - const map> &sibling_cache, - map &keys, RoseVertex a) { - size_t key = keys.at(a); - return sibling_cache.at(key); + return buckets; } static never_inline @@ -1903,45 +1893,31 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build, RoseAliasingInfo &rai) { DEBUG_PRINTF("begin\n"); - map> sibling_cache; - map keys; - - buildCandidateRightSiblings(candidates, build, sibling_cache, keys); - - CandidateSet::iterator it = candidates.begin(); - while (it != candidates.end()) { - RoseVertex a = *it; - CandidateSet::iterator ait = it; - ++it; - - // We have to find a sibling to merge `a' with, and we select between - // two approaches to minimize the number of vertices we have to - // examine; which we use depends on the shape of the graph. + if (candidates.empty()) { + return; + } - const vector &siblings - = getCandidateRightSiblings(sibling_cache, keys, a); + auto buckets = splitRightMergeBuckets(candidates, build); - auto jt = siblings.begin(); - while (jt != siblings.end()) { - jt = findRightMergeSibling(jt, siblings.end(), a, build, rai, - candidates); - if (jt == siblings.end()) { - break; - } - if (attemptRoseMerge(build, false, a, *jt, !mergeRoses, rai)) { - break; + for (const auto &bucket : buckets) { + assert(!bucket.empty()); + for (auto it = bucket.begin(); it != bucket.end(); it++) { + RoseVertex a = *it; + for (auto jt = bucket.begin(); jt != bucket.end(); jt++) { + jt = findRightMergeSibling(jt, bucket.end(), a, build, rai, + candidates); + if (jt == bucket.end()) { + break; + } + RoseVertex b = *jt; + if (attemptRoseMerge(build, false, a, b, !mergeRoses, rai)) { + mergeVerticesRight(a, b, build, rai); + dead->push_back(a); + candidates.erase(a); + break; // consider next a + } } - ++jt; } - - if (jt == siblings.end()) { - continue; - } - - RoseVertex b = *jt; - mergeVerticesRight(a, b, build, rai); - dead->push_back(a); - candidates.erase(ait); } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); diff --git a/src/rose/rose_build_scatter.cpp b/src/rose/rose_build_scatter.cpp index 8d30dd23c..87085ae9a 100644 --- a/src/rose/rose_build_scatter.cpp +++ b/src/rose/rose_build_scatter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,24 +63,24 @@ void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) { insert(&out->p_u8, out->p_u8.end(), in.p_u8); } -void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count, - u32 left_array_count, u32 left_prefix_count, - const RoseStateOffsets &stateOffsets, - bool streaming, u32 leaf_array_count, - u32 outfix_begin, u32 outfix_end, - scatter_plan_raw *out) { +scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, + u32 role_state_count, u32 left_array_count, u32 left_prefix_count, + const RoseStateOffsets &stateOffsets, bool streaming, + u32 leaf_array_count, u32 outfix_begin, u32 outfix_end) { + scatter_plan_raw out; + /* init role array */ scatter_plan_raw spr_role; mmbBuildClearPlan(role_state_count, &spr_role); rebase(&spr_role, role_state_offset); - merge_in(out, spr_role); + merge_in(&out, spr_role); /* init rose array: turn on prefixes */ u32 rose_array_offset = stateOffsets.activeLeftArray; scatter_plan_raw spr_rose; mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose); rebase(&spr_rose, rose_array_offset); - merge_in(out, spr_rose); + merge_in(&out, spr_rose); /* suffix/outfix array */ scatter_plan_raw spr_leaf; @@ -91,7 +91,9 @@ void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count, mmbBuildClearPlan(leaf_array_count, &spr_leaf); } rebase(&spr_leaf, stateOffsets.activeLeafArray); - merge_in(out, spr_leaf); + merge_in(&out, spr_leaf); + + return out; } u32 aux_size(const scatter_plan_raw &raw) { diff --git a/src/rose/rose_build_scatter.h b/src/rose/rose_build_scatter.h index a159fe4e2..67a82b993 100644 --- a/src/rose/rose_build_scatter.h +++ b/src/rose/rose_build_scatter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,12 +45,10 @@ struct scatter_plan_raw { std::vector p_u8; }; -void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count, - u32 left_array_count, u32 left_prefix_count, - const RoseStateOffsets &stateOffsets, - bool streaming, u32 leaf_array_count, - u32 outfix_begin, u32 outfix_end, - scatter_plan_raw *out); +scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, + u32 role_state_count, u32 left_array_count, u32 left_prefix_count, + const RoseStateOffsets &stateOffsets, bool streaming, + u32 leaf_array_count, u32 outfix_begin, u32 outfix_end); u32 aux_size(const scatter_plan_raw &raw); diff --git a/src/rose/rose_common.h b/src/rose/rose_common.h index 3249f0b8b..34678b8fc 100644 --- a/src/rose/rose_common.h +++ b/src/rose/rose_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,4 +41,16 @@ /** \brief Length in bytes of a reach bitvector, used by the lookaround code. */ #define REACH_BITVECTOR_LEN 32 +/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */ +#define MULTI_REACH_BITVECTOR_LEN 256 + +/** + * \brief The max offset from the leftmost byte to the rightmost byte in + * multi-path lookaround. + */ +#define MULTIPATH_MAX_LEN 16 + +/** \brief Value used to represent an invalid Rose program offset. */ +#define ROSE_INVALID_PROG_OFFSET 0 + #endif // ROSE_COMMON_H diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp deleted file mode 100644 index 1867be507..000000000 --- a/src/rose/rose_dump.cpp +++ /dev/null @@ -1,1386 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "hwlm/hwlm_build.h" -#include "hwlm/hwlm_dump.h" -#include "rose_build.h" -#include "rose_dump.h" -#include "rose_common.h" -#include "rose_internal.h" -#include "rose_program.h" -#include "hs_compile.h" -#include "ue2common.h" -#include "nfa/nfa_build_util.h" -#include "nfa/nfa_dump_api.h" -#include "nfa/nfa_internal.h" -#include "nfa/nfa_kind.h" -#include "util/dump_charclass.h" -#include "util/multibit_build.h" -#include "util/multibit.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef DUMP_SUPPORT -#error No dump support! -#endif - -using namespace std; - -namespace ue2 { - -namespace /* anonymous */ { - -struct rose_off { - explicit rose_off(u32 j) : i(j) {} - string str(void) const; - u32 i; -}; - -ostream &operator<< (ostream &o, const rose_off &to) { - if (to.i == ROSE_BOUND_INF) { - o << "inf"; - } else { - o << to.i; - } - return o; -} - -string rose_off::str(void) const { - ostringstream out; - out << *this; - return out.str(); -} - -} - -static -const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) { - if (!offset) { - return nullptr; - } - - const char *lt = (const char *)t + offset; - return lt; -} - -static -const void *getAnchoredMatcher(const RoseEngine *t) { - return loadFromByteCodeOffset(t, t->amatcherOffset); -} - -static -const HWLM *getFloatingMatcher(const RoseEngine *t) { - return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); -} - -static -const HWLM *getEodMatcher(const RoseEngine *t) { - return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); -} - -static -const HWLM *getSmallBlockMatcher(const RoseEngine *t) { - return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); -} - -static -CharReach bitvectorToReach(const u8 *reach) { - CharReach cr; - - for (size_t i = 0; i < 256; i++) { - if (reach[i / 8] & (1U << (i % 8))) { - cr.set(i); - - } - } - return cr; -} - -static -void dumpLookaround(ofstream &os, const RoseEngine *t, - const ROSE_STRUCT_CHECK_LOOKAROUND *ri) { - assert(ri); - - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - - const s8 *look = look_base + ri->index; - const s8 *look_end = look + ri->count; - const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN; - - os << " contents:" << endl; - - for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { - os << " " << std::setw(4) << std::setfill(' ') << int{*look} - << ": "; - describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); - os << endl; - } -} - -static -vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { - vector keys; - - if (num_bits == 0) { - return keys; - } - - vector bits(mmbit_size(num_bits), u8{0xff}); // All bits on. - vector state(MAX_SPARSE_ITER_STATES); - - const u8 *b = bits.data(); - mmbit_sparse_state *s = state.data(); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s); - while (i != MMB_INVALID) { - keys.push_back(i); - i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s); - } - - return keys; -} - -static -void dumpJumpTable(ofstream &os, const RoseEngine *t, - const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) { - auto *it = - (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset); - auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table); - - for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) { - os << " " << std::setw(4) << std::setfill(' ') << key << " : +" - << *jumps << endl; - ++jumps; - } -} - -static -void dumpSomOperation(ofstream &os, const som_operation &op) { - os << " som (type=" << u32{op.type} << ", onmatch=" << op.onmatch; - switch (op.type) { - case SOM_EXTERNAL_CALLBACK_REV_NFA: - case SOM_INTERNAL_LOC_SET_REV_NFA: - case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET: - case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: - os << ", revNfaIndex=" << op.aux.revNfaIndex; - break; - default: - os << ", somDistance=" << op.aux.somDistance; - break; - } - os << ")" << endl; -} - -static -string dumpStrMask(const u8 *mask, size_t len) { - ostringstream oss; - for (size_t i = 0; i < len; i++) { - oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]} - << " "; - } - return oss.str(); -} - -#define PROGRAM_CASE(name) \ - case ROSE_INSTR_##name: { \ - os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ - << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ - const auto *ri = (const struct ROSE_STRUCT_##name *)pc; - -#define PROGRAM_NEXT_INSTRUCTION \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -static -void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { - const char *pc_base = pc; - for (;;) { - u8 code = *(const u8 *)pc; - assert(code <= LAST_ROSE_INSTRUCTION); - const size_t offset = pc - pc_base; - switch (code) { - PROGRAM_CASE(END) { return; } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ANCHORED_DELAY) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - os << " done_jump " << offset + ri->done_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LIT_EARLY) { - os << " min_offset " << ri->min_offset << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_GROUPS) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_ONLY_EOD) { - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BOUNDS) { - os << " min_bound " << ri->min_bound << endl; - os << " max_bound " << ri->max_bound << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_NOT_HANDLED) { - os << " key " << ri->key << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { - os << " offset " << int{ri->offset} << endl; - os << " reach_index " << ri->reach_index << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - const u8 *base = (const u8 *)t; - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + - ri->reach_index * REACH_BITVECTOR_LEN; - os << " contents "; - describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); - os << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LOOKAROUND) { - os << " index " << ri->index << endl; - os << " count " << ri->count << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - dumpLookaround(os, t, ri); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK) { - os << " and_mask 0x" << std::hex << std::setw(16) - << std::setfill('0') << ri->and_mask << std::dec << endl; - os << " cmp_mask 0x" << std::hex << std::setw(16) - << std::setfill('0') << ri->cmp_mask << std::dec << endl; - os << " neg_mask 0x" << std::hex << std::setw(16) - << std::setfill('0') << ri->neg_mask << std::dec << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK_32) { - os << " and_mask " - << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) - << endl; - os << " cmp_mask " - << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) - << endl; - os << " neg_mask 0x" << std::hex << std::setw(8) - << std::setfill('0') << ri->neg_mask << std::dec << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BYTE) { - os << " and_mask 0x" << std::hex << std::setw(2) - << std::setfill('0') << u32{ri->and_mask} << std::dec - << endl; - os << " cmp_mask 0x" << std::hex << std::setw(2) - << std::setfill('0') << u32{ri->cmp_mask} << std::dec - << endl; - os << " negation " << u32{ri->negation} << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x8) { - os << " nib_mask " - << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) - << endl; - os << " bucket_select_mask " - << dumpStrMask(ri->bucket_select_mask, - sizeof(ri->bucket_select_mask)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x8) { - os << " hi_mask " - << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) - << endl; - os << " lo_mask " - << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) - << endl; - os << " bucket_select_mask " - << dumpStrMask(ri->bucket_select_mask, - sizeof(ri->bucket_select_mask)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x16) { - os << " hi_mask " - << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) - << endl; - os << " lo_mask " - << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) - << endl; - os << " bucket_select_mask " - << dumpStrMask(ri->bucket_select_mask, - sizeof(ri->bucket_select_mask)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x16) { - os << " hi_mask " - << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) - << endl; - os << " lo_mask " - << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) - << endl; - os << " bucket_select_mask_hi " - << dumpStrMask(ri->bucket_select_mask_hi, - sizeof(ri->bucket_select_mask_hi)) - << endl; - os << " bucket_select_mask_lo " - << dumpStrMask(ri->bucket_select_mask_lo, - sizeof(ri->bucket_select_mask_lo)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_INFIX) { - os << " queue " << ri->queue << endl; - os << " lag " << ri->lag << endl; - os << " report " << ri->report << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_PREFIX) { - os << " queue " << ri->queue << endl; - os << " lag " << ri->lag << endl; - os << " report " << ri->report << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(PUSH_DELAYED) { - os << " delay " << u32{ri->delay} << endl; - os << " index " << ri->index << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(RECORD_ANCHORED) { - os << " id " << ri->id << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP_MPV) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ADJUST) { - os << " distance " << ri->distance << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_LEFTFIX) { - os << " queue " << ri->queue << endl; - os << " lag " << ri->lag << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_FROM_REPORT) { - dumpSomOperation(os, ri->som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ZERO) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_INFIX) { - os << " queue " << ri->queue << endl; - os << " event " << ri->event << endl; - os << " cancel " << u32{ri->cancel} << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_SUFFIX) { - os << " queue " << ri->queue << endl; - os << " event " << ri->event << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE) { - os << " quash_som " << u32{ri->quash_som} << endl; - os << " dkey " << ri->dkey << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_SOM) { - os << " quash_som " << u32{ri->quash_som} << endl; - os << " dkey " << ri->dkey << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_CHAIN) { - os << " event " << ri->event << endl; - os << " top_squash_distance " << ri->top_squash_distance - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_INT) { - dumpSomOperation(os, ri->som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_AWARE) { - dumpSomOperation(os, ri->som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_EXHAUST) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " ekey " << ri->ekey << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_EXHAUST) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " ekey " << ri->ekey << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_AND_REPORT) { - os << " quash_som " << u32{ri->quash_som} << endl; - os << " dkey " << ri->dkey << endl; - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(FINAL_REPORT) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_EXHAUSTED) { - os << " ekey " << ri->ekey << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MIN_LENGTH) { - os << " end_adj " << ri->end_adj << endl; - os << " min_length " << ri->min_length << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_STATE) { - os << " index " << ri->index << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_GROUPS) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SQUASH_GROUPS) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_STATE) { - os << " index " << ri->index << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_BEGIN) { - os << " iter_offset " << ri->iter_offset << endl; - os << " jump_table " << ri->jump_table << endl; - dumpJumpTable(os, t, ri); - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_NEXT) { - os << " iter_offset " << ri->iter_offset << endl; - os << " jump_table " << ri->jump_table << endl; - os << " state " << ri->state << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_ANY) { - os << " iter_offset " << ri->iter_offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ENGINES_EOD) { - os << " iter_offset " << ri->iter_offset << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SUFFIXES_EOD) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(MATCHER_EOD) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT) { - os << " lit_offset " << ri->lit_offset << endl; - os << " lit_length " << ri->lit_length << endl; - const char *lit = (const char *)t + ri->lit_offset; - os << " literal: \"" - << escapeString(string(lit, ri->lit_length)) << "\"" << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { - os << " lit_offset " << ri->lit_offset << endl; - os << " lit_length " << ri->lit_length << endl; - const char *lit = (const char *)t + ri->lit_offset; - os << " literal: \"" - << escapeString(string(lit, ri->lit_length)) << "\"" << endl; - } - PROGRAM_NEXT_INSTRUCTION - - default: - os << " UNKNOWN (code " << int{code} << ")" << endl; - os << " " << endl; - return; - } - } -} - -#undef PROGRAM_CASE -#undef PROGRAM_NEXT_INSTRUCTION - -static -void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const u32 *litPrograms = - (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset); - const u32 *delayRebuildPrograms = - (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset); - - for (u32 i = 0; i < t->literalCount; i++) { - os << "Literal " << i << endl; - os << "---------------" << endl; - - if (litPrograms[i]) { - os << "Program @ " << litPrograms[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, litPrograms[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; - } - - if (delayRebuildPrograms[i]) { - os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":" - << endl; - const char *prog = (const char *)loadFromByteCodeOffset( - t, delayRebuildPrograms[i]); - dumpProgram(os, t, prog); - } - - os << endl; - } - - os.close(); -} - -static -void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - const char *base = (const char *)t; - - if (t->eodProgramOffset) { - os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; - dumpProgram(os, t, base + t->eodProgramOffset); - os << endl; - } else { - os << "" << endl; - } - - os.close(); -} - -static -void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const u32 *programs = - (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset); - - for (u32 i = 0; i < t->reportProgramCount; i++) { - os << "Report " << i << endl; - os << "---------------" << endl; - - if (programs[i]) { - os << "Program @ " << programs[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, programs[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; - } - } - - os.close(); -} - -static -void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { - const u32 qindex = n->queueIndex; - - if (qindex < t->outfixBeginQueue) { - fout << "chained"; - return; - } - - if (qindex < t->outfixEndQueue) { - fout << "outfix"; - return; - } - - const NfaInfo *nfa_info = getNfaInfoByQueue(t, qindex); - const NFA *nfa = getNfaByInfo(t, nfa_info); - - if (nfa_info->eod) { - fout << "eod "; - } - - if (qindex < t->leftfixBeginQueue) { - fout << "suffix"; - return; - } - - const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex); - if (left->eager) { - fout << "eager "; - } - if (left->transient) { - fout << "transient " << (u32)left->transient << " "; - } - if (left->infix) { - fout << "infix"; - u32 maxQueueLen = left->maxQueueLen; - if (maxQueueLen != (u32)(-1)) { - fout << " maxqlen=" << maxQueueLen; - } - } else { - fout << "prefix"; - } - fout << " maxlag=" << left->maxLag; - if (left->stopTable) { - fout << " miracles"; - } - if (left->countingMiracleOffset) { - const RoseCountingMiracle *cm - = (const RoseCountingMiracle *)((const char *)t - + left->countingMiracleOffset); - fout << " counting_miracle:" << (int)cm->count - << (cm->shufti ? "s" : "v"); - } - if (nfaSupportsZombie(nfa)) { - fout << " zombie"; - } - if (left->eod_check) { - fout << " eod"; - } -} - -static -void dumpComponentInfo(const RoseEngine *t, const string &base) { - stringstream ss; - ss << base << "rose_components.txt"; - ofstream fout(ss.str().c_str()); - - fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; - - for (u32 i = 0; i < t->queueCount; i++) { - const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); - const NFA *n = getNfaByInfo(t, nfa_info); - - fout << left << setw(6) << i << " "; - - fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */ - - fout << left << setw(16) << describe(*n) << "\t"; - - fout << left << setw(6) << n->nPositions << " "; - fout << left << setw(7) << n->streamStateSize << " "; - fout << left << setw(7) << n->length << " "; - - dumpNfaNotes(fout, t, n); - - fout << endl; - } -} - - -static -void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { - FILE *f = fopen((base +"rose_components.csv").c_str(), "w"); - - fprintf(f, "Index, Offset,Engine Type,States,Stream State,Bytecode Size," - "Kind,Notes\n"); - - for (u32 i = 0; i < t->queueCount; i++) { - const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); - const NFA *n = getNfaByInfo(t, nfa_info); - nfa_kind kind; - stringstream notes; - - if (i < t->outfixBeginQueue) { - notes << "chained;"; - } - - if (nfa_info->eod) { - notes << "eod;"; - } - - if (i < t->outfixEndQueue) { - kind = NFA_OUTFIX; - } else if (i < t->leftfixBeginQueue) { - kind = NFA_SUFFIX; - } else { - const LeftNfaInfo *left = getLeftInfoByQueue(t, i); - if (left->eager) { - notes << "eager;"; - } - if (left->transient) { - notes << "transient " << (u32)left->transient << ";"; - } - if (left->infix) { - kind = NFA_INFIX; - u32 maxQueueLen = left->maxQueueLen; - if (maxQueueLen != (u32)(-1)) { - notes << "maxqlen=" << maxQueueLen << ";"; - } - } else { - kind = NFA_PREFIX; - } - notes << "maxlag=" << left->maxLag << ";"; - if (left->stopTable) { - notes << "miracles;"; - } - if (left->countingMiracleOffset) { - auto cm = (const RoseCountingMiracle *) - ((const char *)t + left->countingMiracleOffset); - notes << "counting_miracle:" << (int)cm->count - << (cm->shufti ? "s" : "v") << ";"; - } - if (nfaSupportsZombie(n)) { - notes << " zombie;"; - } - if (left->eod_check) { - notes << "left_eod;"; - } - } - - fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, - (const char *)n - (const char *)t, describe(*n).c_str(), - n->nPositions, n->streamStateSize, n->length, - to_string(kind).c_str(), notes.str().c_str()); - } - fclose(f); -} - - -static -void dumpExhaust(const RoseEngine *t, const string &base) { - stringstream sstxt; - sstxt << base << "rose_exhaust.txt"; - FILE *f = fopen(sstxt.str().c_str(), "w"); - - const NfaInfo *infos - = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); - - u32 queue_count = t->activeArrayCount; - - for (u32 i = 0; i < queue_count; ++i) { - u32 ekey_offset = infos[i].ekeyListOffset; - - fprintf(f, "%u (%u):", i, ekey_offset); - - if (ekey_offset) { - const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset); - while (1) { - u32 e = *ekeys; - ++ekeys; - if (e == ~0U) { - break; - } - fprintf(f, " %u", e); - } - } - - fprintf(f, "\n"); - } - - fclose(f); -} - -static -void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { - dumpExhaust(t, base); - - for (u32 i = 0; i < t->queueCount; i++) { - const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); - const NFA *n = getNfaByInfo(t, nfa_info); - - stringstream ssbase; - ssbase << base << "rose_nfa_" << i; - nfaGenerateDumpFiles(n, ssbase.str()); - - if (dump_raw) { - stringstream ssraw; - ssraw << base << "rose_nfa_" << i << ".raw"; - FILE *f = fopen(ssraw.str().c_str(), "w"); - fwrite(n, 1, n->length, f); - fclose(f); - } - } -} - -static -void dumpRevComponentInfo(const RoseEngine *t, const string &base) { - stringstream ss; - ss << base << "som_rev_components.txt"; - ofstream fout(ss.str().c_str()); - - fout << "Index Offset\tEngine \tStates S.State Bytes\n"; - - const char *tp = (const char *)t; - const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); - - for (u32 i = 0; i < t->somRevCount; i++) { - u32 offset = rev_offsets[i]; - const NFA *n = (const NFA *)(tp + offset); - - fout << left << setw(6) << i << " "; - - fout << left << offset << "\t"; /* offset */ - - fout << left << setw(16) << describe(*n) << "\t"; - - fout << left << setw(6) << n->nPositions << " "; - fout << left << setw(7) << n->streamStateSize << " "; - fout << left << setw(7) << n->length; - fout << endl; - } -} - -static -void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { - const char *tp = (const char *)t; - const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); - - for (u32 i = 0; i < t->somRevCount; i++) { - const NFA *n = (const NFA *)(tp + rev_offsets[i]); - - stringstream ssbase; - ssbase << base << "som_rev_nfa_" << i; - nfaGenerateDumpFiles(n, ssbase.str()); - - if (dump_raw) { - stringstream ssraw; - ssraw << base << "som_rev_nfa_" << i << ".raw"; - FILE *f = fopen(ssraw.str().c_str(), "w"); - fwrite(n, 1, n->length, f); - fclose(f); - } - } -} - -static -void dumpAnchored(const RoseEngine *t, const string &base) { - u32 i = 0; - const anchored_matcher_info *curr - = (const anchored_matcher_info *)getALiteralMatcher(t); - - while (curr) { - const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); - - stringstream ssbase; - ssbase << base << "anchored_" << i; - nfaGenerateDumpFiles(n, ssbase.str()); - - curr = curr->next_offset ? (const anchored_matcher_info *) - ((const char *)curr + curr->next_offset) : nullptr; - i++; - }; -} - -static -void dumpAnchoredStats(const void *atable, FILE *f) { - assert(atable); - - u32 i = 0; - const anchored_matcher_info *curr = (const anchored_matcher_info *)atable; - - while (curr) { - const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); - - fprintf(f, " NFA %u: %s, %u states (%u bytes)\n", i, - describe(*n).c_str(), n->nPositions, n->length); - - curr = curr->next_offset ? (const anchored_matcher_info *) - ((const char *)curr + curr->next_offset) : nullptr; - i++; - }; - -} - -static -void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, - const RoseLongLitSubtable *ll_sub, FILE *f) { - if (!ll_sub->hashBits) { - fprintf(f, " \n"); - return; - } - - const char *base = (const char *)ll_table; - - u32 nbits = ll_sub->hashBits; - u32 num_entries = 1U << nbits; - const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); - u32 hash_occ = - count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { - return ent.str_offset != 0; - }); - float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; - - fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", - nbits, hash_occ, num_entries, hash_occ_percent); - - u32 bloom_bits = ll_sub->bloomBits; - u32 bloom_size = 1U << bloom_bits; - const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; - u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, - [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); - float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; - - fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", - bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); -} - -static -void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { - if (!t->longLitTableOffset) { - return; - } - - fprintf(f, "\n"); - fprintf(f, "Long literal table (streaming):\n"); - - const auto *ll_table = - (const struct RoseLongLitTable *)loadFromByteCodeOffset( - t, t->longLitTableOffset); - - fprintf(f, " total size : %u bytes\n", ll_table->size); - fprintf(f, " longest len : %u\n", ll_table->maxLen); - fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); - - fprintf(f, " caseful:\n"); - dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); - - fprintf(f, " nocase:\n"); - dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); -} - -// Externally accessible functions - -void roseDumpText(const RoseEngine *t, FILE *f) { - if (!t) { - fprintf(f, "<< no rose >>\n"); - return; - } - - const void *atable = getAnchoredMatcher(t); - const HWLM *ftable = getFloatingMatcher(t); - const HWLM *etable = getEodMatcher(t); - const HWLM *sbtable = getSmallBlockMatcher(t); - - fprintf(f, "Rose:\n\n"); - - fprintf(f, "mode: : "); - switch(t->mode) { - case HS_MODE_BLOCK: - fprintf(f, "block"); - break; - case HS_MODE_STREAM: - fprintf(f, "streaming"); - break; - case HS_MODE_VECTORED: - fprintf(f, "vectored"); - break; - } - fprintf(f, "\n"); - - fprintf(f, "properties :"); - if (t->canExhaust) { - fprintf(f, " canExhaust"); - } - if (t->hasSom) { - fprintf(f, " hasSom"); - } - fprintf(f, "\n"); - - fprintf(f, "dkey count : %u\n", t->dkeyCount); - fprintf(f, "som slot count : %u\n", t->somLocationCount); - fprintf(f, "som width : %u bytes\n", t->somHorizon); - fprintf(f, "rose count : %u\n", t->roseCount); - fprintf(f, "\n"); - - fprintf(f, "total engine size : %u bytes\n", t->size); - fprintf(f, " - anchored matcher : %u bytes over %u bytes\n", t->asize, - t->anchoredDistance); - fprintf(f, " - floating matcher : %zu bytes%s", - ftable ? hwlmSize(ftable) : 0, t->noFloatingRoots ? " (cond)":""); - if (t->floatingMinDistance) { - fprintf(f, " from %s bytes\n", - rose_off(t->floatingMinDistance).str().c_str()); - } - if (t->floatingDistance != ROSE_BOUND_INF && ftable) { - fprintf(f, " over %u bytes\n", t->floatingDistance); - } else { - fprintf(f, "\n"); - } - fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", - etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); - fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", - sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); - fprintf(f, " - role state table : %zu bytes\n", - t->rolesWithStateCount * sizeof(u32)); - fprintf(f, " - nfa info table : %zu bytes\n", - t->queueCount * sizeof(NfaInfo)); - fprintf(f, " - lookaround table : %u bytes\n", - t->nfaInfoOffset - t->lookaroundTableOffset); - fprintf(f, " - lookaround reach : %u bytes\n", - t->lookaroundTableOffset - t->lookaroundReachOffset); - - fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); - fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); - fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); - fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); - fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); - fprintf(f, " - active array : %u bytes\n", - mmbit_size(t->activeArrayCount)); - fprintf(f, " - active rose : %u bytes\n", - mmbit_size(t->activeLeftCount)); - fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize); - fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize); - fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize); - fprintf(f, " - one whole bytes : %u bytes\n", - t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable); - fprintf(f, " - groups : %u bytes\n", - t->stateOffsets.groups_size); - fprintf(f, "\n"); - - fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); - fprintf(f, "floating groups : 0x%016llx\n", t->floating_group_mask); - fprintf(f, "handled key count : %u\n", t->handledKeyCount); - fprintf(f, "\n"); - - fprintf(f, "total literal count : %u\n", t->totalNumLiterals); - fprintf(f, " prog table size : %u\n", t->literalCount); - fprintf(f, " delayed literals : %u\n", t->delay_count); - - fprintf(f, "\n"); - fprintf(f, " minWidth : %u\n", t->minWidth); - fprintf(f, " minWidthExcludingBoundaries : %u\n", - t->minWidthExcludingBoundaries); - fprintf(f, " maxBiAnchoredWidth : %s\n", - rose_off(t->maxBiAnchoredWidth).str().c_str()); - fprintf(f, " minFloatLitMatchOffset : %s\n", - rose_off(t->floatingMinLiteralMatchOffset).str().c_str()); - fprintf(f, " delay_base_id : %u\n", t->delay_base_id); - fprintf(f, " maxFloatingDelayedMatch : %s\n", - rose_off(t->maxFloatingDelayedMatch).str().c_str()); - - if (atable) { - fprintf(f, "\nAnchored literal matcher stats:\n\n"); - dumpAnchoredStats(atable, f); - } - - if (ftable) { - fprintf(f, "\nFloating literal matcher stats:\n\n"); - hwlmPrintStats(ftable, f); - } - - if (etable) { - fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); - hwlmPrintStats(etable, f); - } - - if (sbtable) { - fprintf(f, "\nSmall-block literal matcher stats:\n\n"); - hwlmPrintStats(sbtable, f); - } - - dumpLongLiteralTable(t, f); -} - -#define DUMP_U8(o, member) \ - fprintf(f, " %-32s: %hhu/%hhx\n", #member, o->member, o->member) -#define DUMP_U32(o, member) \ - fprintf(f, " %-32s: %u/%08x\n", #member, o->member, o->member) -#define DUMP_U64(o, member) \ - fprintf(f, " %-32s: %llu/%016llx\n", #member, o->member, o->member) - -void roseDumpStructRaw(const RoseEngine *t, FILE *f) { - fprintf(f, "struct RoseEngine {\n"); - DUMP_U8(t, noFloatingRoots); - DUMP_U8(t, requiresEodCheck); - DUMP_U8(t, hasOutfixesInSmallBlock); - DUMP_U8(t, runtimeImpl); - DUMP_U8(t, mpvTriggeredByLeaf); - DUMP_U8(t, canExhaust); - DUMP_U8(t, hasSom); - DUMP_U8(t, somHorizon); - DUMP_U8(t, needsCatchup); - DUMP_U32(t, mode); - DUMP_U32(t, historyRequired); - DUMP_U32(t, ekeyCount); - DUMP_U32(t, dkeyCount); - DUMP_U32(t, dkeyLogSize); - DUMP_U32(t, invDkeyOffset); - DUMP_U32(t, somLocationCount); - DUMP_U32(t, somLocationFatbitSize); - DUMP_U32(t, rolesWithStateCount); - DUMP_U32(t, stateSize); - DUMP_U32(t, anchorStateSize); - DUMP_U32(t, nfaStateSize); - DUMP_U32(t, tStateSize); - DUMP_U32(t, smallWriteOffset); - DUMP_U32(t, amatcherOffset); - DUMP_U32(t, ematcherOffset); - DUMP_U32(t, fmatcherOffset); - DUMP_U32(t, sbmatcherOffset); - DUMP_U32(t, longLitTableOffset); - DUMP_U32(t, amatcherMinWidth); - DUMP_U32(t, fmatcherMinWidth); - DUMP_U32(t, eodmatcherMinWidth); - DUMP_U32(t, amatcherMaxBiAnchoredWidth); - DUMP_U32(t, fmatcherMaxBiAnchoredWidth); - DUMP_U32(t, litProgramOffset); - DUMP_U32(t, litDelayRebuildProgramOffset); - DUMP_U32(t, reportProgramOffset); - DUMP_U32(t, reportProgramCount); - DUMP_U32(t, literalCount); - DUMP_U32(t, activeArrayCount); - DUMP_U32(t, activeLeftCount); - DUMP_U32(t, queueCount); - DUMP_U32(t, activeQueueArraySize); - DUMP_U32(t, eagerIterOffset); - DUMP_U32(t, handledKeyCount); - DUMP_U32(t, handledKeyFatbitSize); - DUMP_U32(t, leftOffset); - DUMP_U32(t, roseCount); - DUMP_U32(t, lookaroundTableOffset); - DUMP_U32(t, lookaroundReachOffset); - DUMP_U32(t, eodProgramOffset); - DUMP_U32(t, lastByteHistoryIterOffset); - DUMP_U32(t, minWidth); - DUMP_U32(t, minWidthExcludingBoundaries); - DUMP_U32(t, maxBiAnchoredWidth); - DUMP_U32(t, anchoredDistance); - DUMP_U32(t, anchoredMinDistance); - DUMP_U32(t, floatingDistance); - DUMP_U32(t, floatingMinDistance); - DUMP_U32(t, smallBlockDistance); - DUMP_U32(t, floatingMinLiteralMatchOffset); - DUMP_U32(t, nfaInfoOffset); - DUMP_U64(t, initialGroups); - DUMP_U64(t, floating_group_mask); - DUMP_U32(t, size); - DUMP_U32(t, delay_count); - DUMP_U32(t, delay_fatbit_size); - DUMP_U32(t, delay_base_id); - DUMP_U32(t, anchored_count); - DUMP_U32(t, anchored_fatbit_size); - DUMP_U32(t, anchored_base_id); - DUMP_U32(t, maxFloatingDelayedMatch); - DUMP_U32(t, delayRebuildLength); - DUMP_U32(t, stateOffsets.history); - DUMP_U32(t, stateOffsets.exhausted); - DUMP_U32(t, stateOffsets.activeLeafArray); - DUMP_U32(t, stateOffsets.activeLeftArray); - DUMP_U32(t, stateOffsets.activeLeftArray_size); - DUMP_U32(t, stateOffsets.leftfixLagTable); - DUMP_U32(t, stateOffsets.anchorState); - DUMP_U32(t, stateOffsets.groups); - DUMP_U32(t, stateOffsets.groups_size); - DUMP_U32(t, stateOffsets.longLitState); - DUMP_U32(t, stateOffsets.somLocation); - DUMP_U32(t, stateOffsets.somValid); - DUMP_U32(t, stateOffsets.somWritable); - DUMP_U32(t, stateOffsets.end); - DUMP_U32(t, boundary.reportEodOffset); - DUMP_U32(t, boundary.reportZeroOffset); - DUMP_U32(t, boundary.reportZeroEodOffset); - DUMP_U32(t, totalNumLiterals); - DUMP_U32(t, asize); - DUMP_U32(t, outfixBeginQueue); - DUMP_U32(t, outfixEndQueue); - DUMP_U32(t, leftfixBeginQueue); - DUMP_U32(t, initMpvNfa); - DUMP_U32(t, rosePrefixCount); - DUMP_U32(t, activeLeftIterOffset); - DUMP_U32(t, ematcherRegionSize); - DUMP_U32(t, somRevCount); - DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, longLitStreamState); - fprintf(f, "}\n"); - fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); -} - -void roseDumpComponents(const RoseEngine *t, bool dump_raw, - const string &base) { - dumpComponentInfo(t, base); - dumpComponentInfoCsv(t, base); - dumpNfas(t, dump_raw, base); - dumpAnchored(t, base); - dumpRevComponentInfo(t, base); - dumpRevNfas(t, dump_raw, base); - dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); - dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); - dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); -} - -void roseDumpInternals(const RoseEngine *t, const string &base) { - if (!t) { - DEBUG_PRINTF("no rose\n"); - return; - } - - const void *atable = getAnchoredMatcher(t); - const HWLM *ftable = getFloatingMatcher(t); - const HWLM *etable = getEodMatcher(t); - - if (atable) { - FILE *f = fopen((base + "/anchored.raw").c_str(), "w"); - if (f) { - fwrite(atable, 1, t->asize, f); - fclose(f); - } - } - - if (ftable) { - FILE *f = fopen((base + "/floating.raw").c_str(), "w"); - if (f) { - fwrite(ftable, 1, hwlmSize(ftable), f); - fclose(f); - } - } - - if (etable) { - FILE *f = fopen((base + "/eod.raw").c_str(), "w"); - if (f) { - fwrite(etable, 1, hwlmSize(etable), f); - fclose(f); - } - } - - FILE *f = fopen((base + "/rose.raw").c_str(), "w"); - assert(f); - fwrite(t, 1, roseSize(t), f); - fclose(f); - - f = fopen((base + "/rose_struct.txt").c_str(), "w"); - roseDumpStructRaw(t, f); - fclose(f); - - roseDumpComponents(t, true, base); -} - -} // namespace ue2 diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index c3af749fb..b7e092bbd 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -85,7 +85,7 @@ struct LeftEngInfo { std::shared_ptr tamarama; u32 lag = 0U; ReportID leftfix_report = MO_INVALID_IDX; - depth dfa_min_width = 0; + depth dfa_min_width{0}; depth dfa_max_width = depth::infinity(); bool operator==(const LeftEngInfo &other) const { @@ -125,7 +125,7 @@ struct RoseSuffixInfo { std::shared_ptr haig; std::shared_ptr rdfa; std::shared_ptr tamarama; - depth dfa_min_width = 0; + depth dfa_min_width{0}; depth dfa_max_width = depth::infinity(); bool operator==(const RoseSuffixInfo &b) const; diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index 0e2185768..42c59932d 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,6 +55,7 @@ namespace ue2 { class NGHolder; struct raw_som_dfa; +struct raw_dfa; enum RoseInVertexType { RIV_LITERAL, @@ -166,9 +167,12 @@ struct RoseInEdgeProps { /** \brief Maximum bound on 'dot' repeat between literals. */ u32 maxBound; - /** \brief Prefix graph. Graph is end to (end - lag). */ + /** \brief Graph on edge. Graph is end to (end - lag). */ std::shared_ptr graph; + /** \brief DFA version of graph, if we have already determinised. */ + std::shared_ptr dfa; + /** \brief Haig version of graph, if required. */ std::shared_ptr haig; diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 411ce03f6..57395c9dc 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -304,7 +304,6 @@ struct RoseEngine { u8 hasSom; /**< has at least one pattern which tracks SOM. */ u8 somHorizon; /**< width in bytes of SOM offset storage (governed by SOM precision) */ - u8 needsCatchup; /** catch up needs to be run on every report. */ u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */ u32 historyRequired; /**< max amount of history required for streaming */ u32 ekeyCount; /**< number of exhaustion keys */ @@ -326,6 +325,7 @@ struct RoseEngine { u32 amatcherOffset; // offset of the anchored literal matcher (bytes) u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) + u32 drmatcherOffset; // offset of the delayed rebuild table (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) u32 longLitTableOffset; // offset of the long literal table u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern @@ -343,12 +343,6 @@ struct RoseEngine { u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still * produce a match for a pattern involved * with the anchored table. */ - /** \brief Offset of u32 array of program offsets for literals. */ - u32 litProgramOffset; - - /** \brief Offset of u32 array of delay rebuild program offsets for - * literals. */ - u32 litDelayRebuildProgramOffset; /** * \brief Offset of u32 array of program offsets for reports used by @@ -362,12 +356,15 @@ struct RoseEngine { u32 reportProgramCount; /** - * \brief Number of entries in the arrays pointed to by litProgramOffset, - * litDelayRebuildProgramOffset. - * - * Note: NOT the total number of literals. + * \brief Offset of u32 array of program offsets for delayed replay of + * literals. + */ + u32 delayProgramOffset; + + /** + * \brief Offset of u32 array of program offsets for anchored literals. */ - u32 literalCount; + u32 anchoredProgramOffset; u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array @@ -386,9 +383,6 @@ struct RoseEngine { u32 leftOffset; u32 roseCount; - u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values) - u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 - * bytes each) */ u32 eodProgramOffset; //!< EOD program, otherwise 0. @@ -419,12 +413,8 @@ struct RoseEngine { u32 size; // (bytes) u32 delay_count; /* number of delayed literal ids. */ u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes) - u32 delay_base_id; /* literal id of the first delayed literal. - * delayed literal ids are contiguous */ u32 anchored_count; /* number of anchored literal ids */ u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) - u32 anchored_base_id; /* literal id of the first literal in the A table. - * anchored literal ids are contiguous */ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can * usefully be reported */ u32 delayRebuildLength; /* length of the history region which needs to be diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index ed9133162..78b123d5c 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ #include "som/som_operation.h" #include "rose_internal.h" #include "ue2common.h" +#include "util/simd_types.h" /** \brief Minimum alignment for each instruction in memory. */ #define ROSE_INSTR_MIN_ALIGN 8U @@ -61,7 +62,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. - ROSE_INSTR_RECORD_ANCHORED, //!< Record an anchored literal match. + ROSE_INSTR_DUMMY_NOP, //!< NOP. Should not exist in build programs. ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. @@ -129,7 +130,55 @@ enum RoseInstructionCode { */ ROSE_INSTR_CHECK_LONG_LIT_NOCASE, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel. + /** + * \brief Confirm a case-sensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT, + + /** + * \brief Confirm a case-insensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT_NOCASE, + + /** + * \brief Clear the "work done" flag used by the SQUASH_GROUPS instruction. + */ + ROSE_INSTR_CLEAR_WORK_DONE, + + /** \brief Check lookaround if it has multiple paths. */ + ROSE_INSTR_MULTIPATH_LOOKAROUND, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 16 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 16 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, + + /** + * \brief Use shufti to check multiple paths lookaround. The total + * length of the paths is 64 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64 //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -139,13 +188,14 @@ struct ROSE_STRUCT_END { struct ROSE_STRUCT_ANCHORED_DELAY { u8 code; //!< From enum RoseInstructionCode. rose_group groups; //!< Bitmask. - u32 done_jump; //!< Jump forward this many bytes if successful. + u32 anch_id; //!< Program to restart after the delay. + u32 done_jump; //!< Jump forward this many bytes if we have to delay. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LIT_EARLY { u8 code; //!< From enum RoseInstructionCode. u32 min_offset; //!< Minimum offset for this literal. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; /** Note: check failure will halt program. */ @@ -175,14 +225,15 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED { struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. s8 offset; //!< The offset of the byte to examine. - u32 reach_index; //!< The index of the reach table entry to use. + u32 reach_index; //!< Index for lookaround reach bitvectors. u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. - u32 index; - u32 count; + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. + u32 count; //!< The count of lookaround entries in one instruction. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -277,9 +328,8 @@ struct ROSE_STRUCT_PUSH_DELAYED { u32 index; // Delay literal index (relative to first delay lit). }; -struct ROSE_STRUCT_RECORD_ANCHORED { +struct ROSE_STRUCT_DUMMY_NOP { u8 code; //!< From enum RoseInstructionCode. - u32 id; //!< Literal ID. }; struct ROSE_STRUCT_CATCH_UP { @@ -477,18 +527,102 @@ struct ROSE_STRUCT_MATCHER_EOD { u8 code; //!< From enum RoseInstructionCode. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LONG_LIT { u8 code; //!< From enum RoseInstructionCode. u32 lit_offset; //!< Offset of literal string. u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { u8 code; //!< From enum RoseInstructionCode. u32 lit_offset; //!< Offset of literal string. u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CLEAR_WORK_DONE { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_MULTIPATH_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. + u32 count; //!< The lookaround byte numbers for each path. + s32 last_start; //!< The latest start offset among 8 paths. + u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most + * data is missed. */ + u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 nib_mask[2 * sizeof(m128)]; //!< High and low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m128)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m128)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m256)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m256)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask_hi[sizeof(m256)]; //!< Mask for bucket assigning. + u8 bucket_select_mask_lo[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[2 * sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[2 * sizeof(m256)]; //!< Shuffle mask for data ordering. + u64a hi_bits_mask; //!< High-bits used in multi-path validation. + u64a lo_bits_mask; //!< Low-bits used in multi-path validation. + u64a neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/rose/stream.c b/src/rose/stream.c index 9599612f0..c68cd8ab9 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -412,16 +412,18 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, } static really_inline -void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, - struct hs_scratch *scratch) { +void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) { + assert(t->drmatcherOffset); assert(!can_stop_matching(scratch)); + + const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset); size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength); const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len; DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len); scratch->core_info.status &= ~STATUS_DELAY_DIRTY; - hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch, + hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch, scratch->tctxt.groups); assert(!can_stop_matching(scratch)); } @@ -512,6 +514,34 @@ void runEagerPrefixesStream(const struct RoseEngine *t, } } +static really_inline +int can_never_match(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch, size_t length, u64a offset) { + struct RoseContext *tctxt = &scratch->tctxt; + + if (tctxt->groups) { + DEBUG_PRINTF("still has active groups\n"); + return 0; + } + + if (offset + length <= t->anchoredDistance) { /* not < as may have eod */ + DEBUG_PRINTF("still in anchored region\n"); + return 0; + } + + if (t->lastByteHistoryIterOffset) { /* last byte history is hard */ + DEBUG_PRINTF("last byte history\n"); + return 0; + } + + if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + DEBUG_PRINTF("active leaf\n"); + return 0; + } + + return 1; +} + void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset, scratch->core_info.buf_offset + (u64a)scratch->core_info.len); @@ -607,15 +637,12 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { rebuild, scratch->core_info.status, t->maxFloatingDelayedMatch, offset); - if (!flen) { - if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, ftable, scratch); - } - goto flush_delay_and_exit; + if (rebuild) { /* rebuild floating delayed match stuff */ + do_rebuild(t, scratch); } - if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, ftable, scratch); + if (!flen) { + goto flush_delay_and_exit; } if (flen + offset <= t->floatingMinDistance) { @@ -647,6 +674,14 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { if (!can_stop_matching(scratch)) { ensureStreamNeatAndTidy(t, state, scratch, length, offset); } + + if (!told_to_stop_matching(scratch) + && can_never_match(t, state, scratch, length, offset)) { + DEBUG_PRINTF("PATTERN SET IS EXHAUSTED\n"); + scratch->core_info.status = STATUS_EXHAUSTED; + return; + } + DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n", scratch->core_info.status); return; diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h index d78e28635..0736ec88e 100644 --- a/src/rose/stream_long_lit.h +++ b/src/rose/stream_long_lit.h @@ -111,7 +111,7 @@ void loadLongLiteralState(const struct RoseEngine *t, char *state, } // If we don't have any long literals in play, these values must point to - // the real history buffer so that CHECK_LITERAL instructions examine the + // the real history buffer so that CHECK_LONG_LIT instructions examine the // history buffer. scratch->tctxt.ll_buf = scratch->core_info.hbuf; scratch->tctxt.ll_len = scratch->core_info.hlen; diff --git a/src/rose/validate_shufti.h b/src/rose/validate_shufti.h index 49d2c2fe6..1dc855d99 100644 --- a/src/rose/validate_shufti.h +++ b/src/rose/validate_shufti.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,10 +46,11 @@ void dumpMask(const void *mask, int len) { static really_inline int validateShuftiMask16x16(const m256 data, const m256 hi_mask, const m256 lo_mask, const m256 and_mask, - const u32 neg_mask, const u16 valid_data_mask) { + const u32 neg_mask, const u32 valid_data_mask) { m256 low4bits = set32x8(0xf); - m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); - m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); + m256 c_hi = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data), 4)); m256 t = and256(c_lo, c_hi); u32 nresult = movemask256(eq256(and256(t, and_mask), zeroes256())); #ifdef DEBUG @@ -75,10 +76,10 @@ int validateShuftiMask16x16(const m256 data, const m256 hi_mask, static really_inline int validateShuftiMask16x8(const m128 data, const m256 nib_mask, const m128 and_mask, const u32 neg_mask, - const u16 valid_data_mask) { + const u32 valid_data_mask) { m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); m256 low4bits = set32x8(0xf); - m256 c_nib = vpshufb(nib_mask, and256(data_m256, low4bits)); + m256 c_nib = pshufb_m256(nib_mask, and256(data_m256, low4bits)); m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); m128 nresult = eq128(and128(t, and_mask), zeroes128()); #ifdef DEBUG @@ -101,8 +102,9 @@ int validateShuftiMask32x8(const m256 data, const m256 hi_mask, const m256 lo_mask, const m256 and_mask, const u32 neg_mask, const u32 valid_data_mask) { m256 low4bits = set32x8(0xf); - m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); - m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); + m256 c_hi = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data), 4)); m256 t = and256(c_lo, c_hi); m256 nresult = eq256(and256(t, and_mask), zeroes256()); #ifdef DEBUG @@ -134,10 +136,10 @@ int validateShuftiMask32x16(const m256 data, m256 low4bits = set32x8(0xf); m256 data_lo = and256(data, low4bits); m256 data_hi = and256(rshift64_m256(data, 4), low4bits); - m256 c_lo_1 = vpshufb(lo_mask_1, data_lo); - m256 c_lo_2 = vpshufb(lo_mask_2, data_lo); - m256 c_hi_1 = vpshufb(hi_mask_1, data_hi); - m256 c_hi_2 = vpshufb(hi_mask_2, data_hi); + m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); + m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); + m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); + m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); m256 t1 = and256(c_lo_1, c_hi_1); m256 t2 = and256(c_lo_2, c_hi_2); m256 result = or256(and256(t1, bucket_mask_lo), and256(t2, bucket_mask_hi)); @@ -172,4 +174,121 @@ int validateShuftiMask32x16(const m256 data, u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask; return !cmp_result; } + +static really_inline +int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) { + u32 t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %x\n", t); + return !!t; +} + +static really_inline +int checkMultipath64(u64a data, u64a hi_bits, u64a lo_bits) { + u64a t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %llx\n", t); + return !!t; +} + +static really_inline +int validateMultipathShuftiMask16x8(const m128 data, + const m256 nib_mask, + const m128 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 data_256 = combine2x128(rshift64_m128(data, 4), data); + m256 low4bits = set32x8(0xf); + m256 c_nib = pshufb_m256(nib_mask, and256(data_256, low4bits)); + m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); + m128 result = and128(t, bucket_select_mask); + u32 nresult = movemask128(eq128(result, zeroes128())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x8(const m256 data, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo = pshufb_m256(lo_mask, data_lo); + m256 c_hi = pshufb_m256(hi_mask, data_hi); + m256 c = and256(c_lo, c_hi); + m256 result = and256(c, bucket_select_mask); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x16(const m256 data, + const m256 hi_mask_1, const m256 hi_mask_2, + const m256 lo_mask_1, const m256 lo_mask_2, + const m256 bucket_select_mask_hi, + const m256 bucket_select_mask_lo, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); + m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); + m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); + m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 result = or256(and256(t1, bucket_select_mask_lo), + and256(t2, bucket_select_mask_hi)); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask_1, + const m256 bucket_select_mask_2, + const u64a hi_bits, const u64a lo_bits, + const u64a neg_mask, + const u64a valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo_1 = pshufb_m256(lo_mask, and256(data_1, low4bits)); + m256 c_lo_2 = pshufb_m256(lo_mask, and256(data_2, low4bits)); + m256 c_hi_1 = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data_1), 4)); + m256 c_hi_2 = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data_2), 4)); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 nresult_1 = eq256(and256(t1, bucket_select_mask_1), zeroes256()); + m256 nresult_2 = eq256(and256(t2, bucket_select_mask_2), zeroes256()); + u64a nresult = (u64a)movemask256(nresult_1) | + (u64a)movemask256(nresult_2) << 32; + u64a cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %llx\n", cmp_result); + + return checkMultipath64(cmp_result, hi_bits, lo_bits); +} + #endif diff --git a/src/runtime.c b/src/runtime.c index a2ed10260..5725cf93a 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -214,7 +214,7 @@ void pureLiteralBlockExec(const struct RoseEngine *rose, scratch->tctxt.groups = rose->initialGroups; hwlmExec(ftable, buffer, length, 0, roseCallback, scratch, - rose->initialGroups); + rose->initialGroups & rose->floating_group_mask); } static really_inline @@ -311,9 +311,10 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, } HS_PUBLIC_API -hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, - unsigned flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *userCtx) { +hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, + unsigned length, unsigned flags, + hs_scratch_t *scratch, match_event_handler onEvent, + void *userCtx) { if (unlikely(!scratch || !data)) { return HS_INVALID; } @@ -503,8 +504,9 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose, } HS_PUBLIC_API -hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags, - hs_stream_t **stream) { +hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, + UNUSED unsigned flags, + hs_stream_t **stream) { if (unlikely(!stream)) { return HS_INVALID; } @@ -656,7 +658,8 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, } HS_PUBLIC_API -hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id) { +hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id, + const hs_stream_t *from_id) { if (!to_id) { return HS_INVALID; } @@ -683,11 +686,11 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id) { } HS_PUBLIC_API -hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, - const hs_stream_t *from_id, - hs_scratch_t *scratch, - match_event_handler onEvent, - void *context) { +hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, + const hs_stream_t *from_id, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { if (!from_id || !from_id->rose) { return HS_INVALID; } @@ -762,7 +765,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, const size_t start = 0; hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, - rose->initialGroups); + rose->initialGroups & rose->floating_group_mask); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { @@ -906,9 +909,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, } HS_PUBLIC_API -hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length, - unsigned flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *context) { +hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, + unsigned length, unsigned flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) { return HS_INVALID; @@ -924,8 +928,9 @@ hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length, } HS_PUBLIC_API -hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, - match_event_handler onEvent, void *context) { +hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { if (!id) { return HS_INVALID; } @@ -947,9 +952,10 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, } HS_PUBLIC_API -hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *context) { +hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { if (!id) { return HS_INVALID; } @@ -972,7 +978,8 @@ hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } HS_PUBLIC_API -hs_error_t hs_stream_size(const hs_database_t *db, size_t *stream_size) { +hs_error_t HS_CDECL hs_stream_size(const hs_database_t *db, + size_t *stream_size) { if (!stream_size) { return HS_INVALID; } @@ -1019,10 +1026,13 @@ void dumpData(const char *data, size_t len) { #endif HS_PUBLIC_API -hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, - const unsigned int *length, unsigned int count, - UNUSED unsigned int flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *context) { +hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, + const char * const * data, + const unsigned int *length, + unsigned int count, + UNUSED unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context) { if (unlikely(!scratch || !data || !length)) { return HS_INVALID; } diff --git a/src/scratch.c b/src/scratch.c index 8cbe97601..84d23cedd 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -240,7 +240,8 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { } HS_PUBLIC_API -hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { +hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, + hs_scratch_t **scratch) { if (!db || !scratch) { return HS_INVALID; } @@ -385,7 +386,8 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { } HS_PUBLIC_API -hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest) { +hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src, + hs_scratch_t **dest) { if (!dest || !src || !ISALIGNED_CL(src) || src->magic != SCRATCH_MAGIC) { return HS_INVALID; } @@ -402,7 +404,7 @@ hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest) { } HS_PUBLIC_API -hs_error_t hs_free_scratch(hs_scratch_t *scratch) { +hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch) { if (scratch) { /* has to be aligned before we can do anything with it */ if (!ISALIGNED_CL(scratch)) { @@ -426,7 +428,7 @@ hs_error_t hs_free_scratch(hs_scratch_t *scratch) { } HS_PUBLIC_API -hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *size) { +hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, size_t *size) { if (!size || !scratch || !ISALIGNED_CL(scratch) || scratch->magic != SCRATCH_MAGIC) { return HS_INVALID; diff --git a/src/scratch.h b/src/scratch.h index b59dc8d4b..47f8afa87 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,8 +73,11 @@ struct catchup_pq { /** \brief Status flag: user requested termination. */ #define STATUS_TERMINATED (1U << 0) -/** \brief Status flag: all possible matches on this stream have - * been raised (i.e. all its exhaustion keys are on.) */ +/** \brief Status flag: it has been determined that it is not possible for this + * stream to raise any more matches. + * + * This may be because all its exhaustion keys are on or for other reasons + * (anchored sections not matching). */ #define STATUS_EXHAUSTED (1U << 1) /** \brief Status flag: Rose requires rebuild as delay literal matched in diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 108bca8aa..bb933cbe3 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,10 +26,16 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief Small-write engine build code. + */ + #include "smallwrite/smallwrite_build.h" #include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/dfa_min.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" @@ -40,14 +46,18 @@ #include "nfagraph/ng_depth.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_reports.h" #include "nfagraph/ng_prune.h" #include "nfagraph/ng_util.h" #include "smallwrite/smallwrite_internal.h" #include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" +#include "util/compare.h" #include "util/compile_context.h" #include "util/container.h" #include "util/make_unique.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -56,12 +66,55 @@ #include #include +#include + using namespace std; namespace ue2 { -#define LITERAL_MERGE_CHUNK_SIZE 25 #define DFA_MERGE_MAX_STATES 8000 +#define MAX_TRIE_VERTICES 8000 + +struct LitTrieVertexProps { + LitTrieVertexProps() = default; + explicit LitTrieVertexProps(u8 c_in) : c(c_in) {} + size_t index; // managed by ue2_graph + u8 c = 0; //!< character reached on this vertex + flat_set reports; //!< managed reports fired on this vertex +}; + +struct LitTrieEdgeProps { + size_t index; // managed by ue2_graph +}; + +/** + * \brief BGL graph used to store a trie of literals (for later AC construction + * into a DFA). + */ +struct LitTrie + : public ue2_graph { + + LitTrie() : root(add_vertex(*this)) {} + + const vertex_descriptor root; //!< Root vertex for the trie. +}; + +static +bool is_empty(const LitTrie &trie) { + return num_vertices(trie) <= 1; +} + +static +std::set all_reports(const LitTrie &trie) { + std::set reports; + for (auto v : vertices_range(trie)) { + insert(&reports, trie[v].reports); + } + return reports; +} + +using LitTrieVertex = LitTrie::vertex_descriptor; +using LitTrieEdge = LitTrie::edge_descriptor; namespace { // unnamed @@ -72,9 +125,9 @@ class SmallWriteBuildImpl : public SmallWriteBuild { const CompileContext &cc); // Construct a runtime implementation. - aligned_unique_ptr build(u32 roseQuality) override; + bytecode_ptr build(u32 roseQuality) override; - void add(const NGWrapper &w) override; + void add(const NGHolder &g, const ExpressionInfo &expr) override; void add(const ue2_literal &literal, ReportID r) override; set all_reports() const override; @@ -85,13 +138,15 @@ class SmallWriteBuildImpl : public SmallWriteBuild { const CompileContext &cc; unique_ptr rdfa; - vector > cand_literals; + LitTrie lit_trie; + LitTrie lit_trie_nocase; + size_t num_literals = 0; bool poisoned; }; } // namespace -SmallWriteBuild::~SmallWriteBuild() { } +SmallWriteBuild::~SmallWriteBuild() = default; SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns, const ReportManager &rm_in, @@ -143,16 +198,16 @@ static bool pruneOverlong(NGHolder &g, const depth &max_depth, const ReportManager &rm) { bool modified = false; - std::vector depths; - calcDepths(g, depths); + auto depths = calcBidiDepths(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } const auto &d = depths.at(g[v].index); - depth min_depth = min(d.fromStart.min, d.fromStartDotStar.min); - if (min_depth > max_depth) { + depth min_match_offset = min(d.fromStart.min, d.fromStartDotStar.min) + + min(d.toAccept.min, d.toAcceptEod.min); + if (min_match_offset > max_depth) { clear_vertex(v, g); modified = true; continue; @@ -171,26 +226,41 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth, return modified; } -void SmallWriteBuildImpl::add(const NGWrapper &w) { +void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. if (poisoned) { return; } - if (w.som || w.min_length || isVacuous(w)) { /* cannot support in smwr */ + if (expr.som) { + DEBUG_PRINTF("no SOM support in small-write engine\n"); poisoned = true; return; } - DEBUG_PRINTF("w=%p\n", &w); + if (isVacuous(g)) { + DEBUG_PRINTF("no vacuous graph support in small-write engine\n"); + poisoned = true; + return; + } + + if (any_of_in(::ue2::all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength > 0; + })) { + DEBUG_PRINTF("no min_length extparam support in small-write engine\n"); + poisoned = true; + return; + } + + DEBUG_PRINTF("g=%p\n", &g); // make a copy of the graph so that we can modify it for our purposes - unique_ptr h = cloneHolder(w); + unique_ptr h = cloneHolder(g); pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm); - reduceGraph(*h, SOM_NONE, w.utf8, cc); + reduceGraph(*h, SOM_NONE, expr.utf8, cc); if (can_never_match(*h)) { DEBUG_PRINTF("graph can never match in small block\n"); @@ -209,7 +279,7 @@ void SmallWriteBuildImpl::add(const NGWrapper &w) { return; } - if (prune_overlong(*r, cc.grey.smallWriteLargestBuffer)) { + if (clear_deeper_reports(*r, cc.grey.smallWriteLargestBuffer)) { minimize_hopcroft(*r, cc.grey); } @@ -229,101 +299,457 @@ void SmallWriteBuildImpl::add(const NGWrapper &w) { } } +static +bool add_to_trie(const ue2_literal &literal, ReportID report, LitTrie &trie) { + auto u = trie.root; + for (const auto &c : literal) { + auto next = LitTrie::null_vertex(); + for (auto v : adjacent_vertices_range(u, trie)) { + if (trie[v].c == (u8)c.c) { + next = v; + break; + } + } + if (!next) { + next = add_vertex(LitTrieVertexProps((u8)c.c), trie); + add_edge(u, next, trie); + } + u = next; + } + + trie[u].reports.insert(report); + + DEBUG_PRINTF("added '%s' (report %u) to trie, now %zu vertices\n", + escapeString(literal).c_str(), report, num_vertices(trie)); + return num_vertices(trie) <= MAX_TRIE_VERTICES; +} + void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. if (poisoned) { + DEBUG_PRINTF("poisoned\n"); return; } if (literal.length() > cc.grey.smallWriteLargestBuffer) { + DEBUG_PRINTF("exceeded length limit\n"); return; /* too long */ } - cand_literals.push_back(make_pair(literal, r)); + if (++num_literals > cc.grey.smallWriteMaxLiterals) { + DEBUG_PRINTF("exceeded literal limit\n"); + poisoned = true; + return; + } - if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) { + auto &trie = literal.any_nocase() ? lit_trie_nocase : lit_trie; + if (!add_to_trie(literal, r, trie)) { + DEBUG_PRINTF("trie add failed\n"); poisoned = true; } } +namespace { + +/** + * \brief BFS visitor for Aho-Corasick automaton construction. + * + * This is doing two things: + * + * - Computing the failure edges (also called fall or supply edges) for each + * vertex, giving the longest suffix of the path to that point that is also + * a prefix in the trie reached on the same character. The BFS traversal + * makes it possible to build these from earlier failure paths. + * + * - Computing the output function for each vertex, which is done by + * propagating the reports from failure paths as well. This ensures that + * substrings of the current path also report correctly. + */ +struct ACVisitor : public boost::default_bfs_visitor { + ACVisitor(LitTrie &trie_in, + map &failure_map_in, + vector &ordering_in) + : mutable_trie(trie_in), failure_map(failure_map_in), + ordering(ordering_in) {} + + LitTrieVertex find_failure_target(LitTrieVertex u, LitTrieVertex v, + const LitTrie &trie) { + assert(u == trie.root || contains(failure_map, u)); + assert(!contains(failure_map, v)); + + const auto &c = trie[v].c; + + while (u != trie.root) { + auto f = failure_map.at(u); + for (auto w : adjacent_vertices_range(f, trie)) { + if (trie[w].c == c) { + return w; + } + } + u = f; + } + + DEBUG_PRINTF("no failure edge\n"); + return LitTrie::null_vertex(); + } + + void tree_edge(LitTrieEdge e, const LitTrie &trie) { + auto u = source(e, trie); + auto v = target(e, trie); + DEBUG_PRINTF("bfs (%zu, %zu) on '%c'\n", trie[u].index, trie[v].index, + trie[v].c); + ordering.push_back(v); + + auto f = find_failure_target(u, v, trie); + + if (f) { + DEBUG_PRINTF("final failure vertex %zu\n", trie[f].index); + failure_map.emplace(v, f); + + // Propagate reports from failure path to ensure we correctly + // report substrings. + insert(&mutable_trie[v].reports, mutable_trie[f].reports); + } else { + DEBUG_PRINTF("final failure vertex root\n"); + failure_map.emplace(v, trie.root); + } + } + +private: + LitTrie &mutable_trie; //!< For setting reports property. + map &failure_map; + vector &ordering; //!< BFS ordering for vertices. +}; +} + +static UNUSED +bool isSaneTrie(const LitTrie &trie) { + CharReach seen; + for (auto u : vertices_range(trie)) { + seen.clear(); + for (auto v : adjacent_vertices_range(u, trie)) { + if (seen.test(trie[v].c)) { + return false; + } + seen.set(trie[v].c); + } + } + return true; +} + +/** + * \brief Turn the given literal trie into an AC automaton by adding additional + * edges and reports. + */ static -void lit_to_graph(NGHolder *h, const ue2_literal &literal, ReportID r) { - NFAVertex u = h->startDs; - for (const auto &c : literal) { - NFAVertex v = add_vertex(*h); - add_edge(u, v, *h); - (*h)[v].char_reach = c; - u = v; +void buildAutomaton(LitTrie &trie, + map &failure_map, + vector &ordering) { + assert(isSaneTrie(trie)); + + // Find our failure transitions and reports. + ACVisitor ac_vis(trie, failure_map, ordering); + boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); + + // Compute missing edges from failure map. + for (auto v : ordering) { + DEBUG_PRINTF("vertex %zu\n", trie[v].index); + CharReach seen; + for (auto w : adjacent_vertices_range(v, trie)) { + DEBUG_PRINTF("edge to %zu with reach 0x%02x\n", trie[w].index, + trie[w].c); + assert(!seen.test(trie[w].c)); + seen.set(trie[w].c); + } + auto parent = failure_map.at(v); + for (auto w : adjacent_vertices_range(parent, trie)) { + if (!seen.test(trie[w].c)) { + add_edge(v, w, trie); + } + } } - (*h)[u].reports.insert(r); - add_edge(u, h->accept, *h); } -bool SmallWriteBuildImpl::determiniseLiterals() { - DEBUG_PRINTF("handling literals\n"); - assert(!poisoned); - assert(cand_literals.size() <= cc.grey.smallWriteMaxLiterals); +static +vector findDistFromRoot(const LitTrie &trie) { + vector dist(num_vertices(trie), UINT32_MAX); + dist[trie[trie.root].index] = 0; + + // BFS to find dist from root. + breadth_first_search( + trie, trie.root, + visitor(make_bfs_visitor(record_distances( + make_iterator_property_map(dist.begin(), + get(&LitTrieVertexProps::index, trie)), + boost::on_tree_edge())))); + + return dist; +} - if (cand_literals.empty()) { - return true; /* nothing to do */ +static +vector findDistToAccept(const LitTrie &trie) { + vector dist(num_vertices(trie), UINT32_MAX); + + // Start with all reporting vertices. + deque q; + for (auto v : vertices_range(trie)) { + if (!trie[v].reports.empty()) { + q.push_back(v); + dist[trie[v].index] = 0; + } } - vector > temp_dfas; + // Custom BFS, since we have a pile of sources. + while (!q.empty()) { + auto v = q.front(); + q.pop_front(); + u32 d = dist[trie[v].index]; - for (const auto &cand : cand_literals) { - NGHolder h; - DEBUG_PRINTF("determinising %s\n", dumpString(cand.first).c_str()); - lit_to_graph(&h, cand.first, cand.second); - temp_dfas.push_back(buildMcClellan(h, &rm, cc.grey)); + for (auto u : inv_adjacent_vertices_range(v, trie)) { + auto &u_dist = dist[trie[u].index]; + if (u_dist == UINT32_MAX) { + q.push_back(u); + u_dist = d + 1; + } + } + } - // If we couldn't build a McClellan DFA for this portion, then we - // can't SmallWrite optimize the entire graph, so we can't - // optimize any of it - if (!temp_dfas.back()) { - DEBUG_PRINTF("failed to determinise\n"); - poisoned = true; - return false; + return dist; +} + +/** + * \brief Prune all vertices from the trie that do not lie on a path from root + * to accept of length <= max_depth. + */ +static +void pruneTrie(LitTrie &trie, u32 max_depth) { + DEBUG_PRINTF("pruning trie to %u\n", max_depth); + + auto dist_from_root = findDistFromRoot(trie); + auto dist_to_accept = findDistToAccept(trie); + + vector dead; + for (auto v : vertices_range(trie)) { + if (v == trie.root) { + continue; + } + auto v_index = trie[v].index; + DEBUG_PRINTF("vertex %zu: from_start=%u, to_accept=%u\n", trie[v].index, + dist_from_root[v_index], dist_to_accept[v_index]); + assert(dist_from_root[v_index] != UINT32_MAX); + assert(dist_to_accept[v_index] != UINT32_MAX); + u32 min_path_len = dist_from_root[v_index] + dist_to_accept[v_index]; + if (min_path_len > max_depth) { + DEBUG_PRINTF("pruning vertex %zu (min path len %u)\n", + trie[v].index, min_path_len); + clear_vertex(v, trie); + dead.push_back(v); } } - if (!rdfa && temp_dfas.size() == 1) { - /* no need to merge there is only one dfa */ - rdfa = move(temp_dfas[0]); - return true; + if (dead.empty()) { + return; + } + + for (auto v : dead) { + remove_vertex(v, trie); } - /* do a merge of the new dfas */ + DEBUG_PRINTF("%zu vertices remain\n", num_vertices(trie)); - vector to_merge; + renumber_edges(trie); + renumber_vertices(trie); +} - if (rdfa) {/* also include the existing dfa */ - to_merge.push_back(rdfa.get()); +static +vector getAlphabet(const LitTrie &trie, bool nocase) { + vector esets = {CharReach::dot()}; + for (auto v : vertices_range(trie)) { + if (v == trie.root) { + continue; + } + + CharReach cr; + if (nocase) { + cr.set(mytoupper(trie[v].c)); + cr.set(mytolower(trie[v].c)); + } else { + cr.set(trie[v].c); + } + + for (size_t i = 0; i < esets.size(); i++) { + if (esets[i].count() == 1) { + continue; + } + + CharReach t = cr & esets[i]; + if (t.any() && t != esets[i]) { + esets[i] &= ~t; + esets.push_back(t); + } + } } - for (const auto &d : temp_dfas) { - to_merge.push_back(d.get()); + // For deterministic compiles. + sort(esets.begin(), esets.end()); + return esets; +} + +static +u16 buildAlphabet(const LitTrie &trie, bool nocase, + array &alpha, + array &unalpha) { + const auto &esets = getAlphabet(trie, nocase); + + u16 i = 0; + for (const auto &cr : esets) { + u16 leader = cr.find_first(); + for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) { + alpha[s] = i; + } + unalpha[i] = leader; + i++; } - assert(to_merge.size() > 1); + for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) { + alpha[j] = i; + unalpha[i] = j; + } - while (to_merge.size() > LITERAL_MERGE_CHUNK_SIZE) { - vector small_merge; - small_merge.insert(small_merge.end(), to_merge.begin(), - to_merge.begin() + LITERAL_MERGE_CHUNK_SIZE); + DEBUG_PRINTF("alphabet size %u\n", i); + return i; +} - temp_dfas.push_back( - mergeAllDfas(small_merge, DFA_MERGE_MAX_STATES, &rm, cc.grey)); +/** + * \brief Calculate state mapping, from vertex in trie to state index in BFS + * ordering. + */ +static +unordered_map +makeStateMap(const LitTrie &trie, const vector &ordering) { + unordered_map state_ids; + state_ids.reserve(num_vertices(trie)); + u32 idx = DEAD_STATE + 1; + state_ids.emplace(trie.root, idx++); + for (auto v : ordering) { + state_ids.emplace(v, idx++); + } + assert(state_ids.size() == num_vertices(trie)); + return state_ids; +} - if (!temp_dfas.back()) { - DEBUG_PRINTF("merge failed\n"); - poisoned = true; - return false; +/** \brief Construct a raw_dfa from a literal trie. */ +static +unique_ptr buildDfa(LitTrie &trie, bool nocase) { + DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie)); + + vector ordering; + map failure_map; + buildAutomaton(trie, failure_map, ordering); + + // Construct DFA states in BFS order. + const auto state_ids = makeStateMap(trie, ordering); + + auto rdfa = make_unique(NFA_OUTFIX); + + // Calculate alphabet. + array unalpha; + auto &alpha = rdfa->alpha_remap; + rdfa->alpha_size = buildAlphabet(trie, nocase, alpha, unalpha); + + // Construct states and transitions. + const u16 root_state = state_ids.at(trie.root); + assert(root_state == DEAD_STATE + 1); + rdfa->start_anchored = root_state; + rdfa->start_floating = root_state; + rdfa->states.resize(num_vertices(trie) + 1, dstate(rdfa->alpha_size)); + + // Dead state. + fill(rdfa->states[DEAD_STATE].next.begin(), + rdfa->states[DEAD_STATE].next.end(), DEAD_STATE); + + for (auto u : vertices_range(trie)) { + auto u_state = state_ids.at(u); + DEBUG_PRINTF("state %u\n", u_state); + assert(u_state < rdfa->states.size()); + auto &ds = rdfa->states[u_state]; + ds.reports = trie[u].reports; + if (!ds.reports.empty()) { + DEBUG_PRINTF("reports: %s\n", as_string_list(ds.reports).c_str()); } - to_merge.erase(to_merge.begin(), - to_merge.begin() + LITERAL_MERGE_CHUNK_SIZE); - to_merge.push_back(temp_dfas.back().get()); + // Set daddy state from failure map. + if (u == trie.root) { + ds.daddy = DEAD_STATE; + } else { + assert(contains(failure_map, u)); + ds.daddy = state_ids.at(failure_map.at(u)); + } + + // By default, transition back to the root. + fill(ds.next.begin(), ds.next.end(), root_state); + // TOP should be a self-loop. + ds.next[alpha[TOP]] = u_state; + + // Add in the real transitions. + for (auto v : adjacent_vertices_range(u, trie)) { + if (v == trie.root) { + continue; + } + auto v_state = state_ids.at(v); + u16 sym = alpha[trie[v].c]; + DEBUG_PRINTF("edge to %u on 0x%02x (sym %u)\n", v_state, + trie[v].c, sym); + assert(sym < ds.next.size()); + assert(ds.next[sym] == root_state); + ds.next[sym] = v_state; + } + } + + return rdfa; +} + +bool SmallWriteBuildImpl::determiniseLiterals() { + DEBUG_PRINTF("handling literals\n"); + assert(!poisoned); + assert(num_literals <= cc.grey.smallWriteMaxLiterals); + + if (is_empty(lit_trie) && is_empty(lit_trie_nocase)) { + DEBUG_PRINTF("no literals\n"); + return true; /* nothing to do */ + } + + vector> dfas; + + if (!is_empty(lit_trie)) { + dfas.push_back(buildDfa(lit_trie, false)); + DEBUG_PRINTF("caseful literal dfa with %zu states\n", + dfas.back()->states.size()); + } + if (!is_empty(lit_trie_nocase)) { + dfas.push_back(buildDfa(lit_trie_nocase, true)); + DEBUG_PRINTF("nocase literal dfa with %zu states\n", + dfas.back()->states.size()); + } + + if (rdfa) { + dfas.push_back(move(rdfa)); + DEBUG_PRINTF("general dfa with %zu states\n", + dfas.back()->states.size()); + } + + // If we only have one DFA, no merging is necessary. + if (dfas.size() == 1) { + DEBUG_PRINTF("only one dfa\n"); + rdfa = move(dfas.front()); + return true; + } + + // Merge all DFAs. + vector to_merge; + for (const auto &d : dfas) { + to_merge.push_back(d.get()); } auto merged = mergeAllDfas(to_merge, DFA_MERGE_MAX_STATES, &rm, cc.grey); @@ -334,11 +760,11 @@ bool SmallWriteBuildImpl::determiniseLiterals() { return false; } - DEBUG_PRINTF("merge succeeded, built %p\n", merged.get()); + DEBUG_PRINTF("merge succeeded, built dfa with %zu states\n", + merged->states.size()); - // Replace our only DFA with the merged one + // Replace our only DFA with the merged one. rdfa = move(merged); - return true; } @@ -385,30 +811,36 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, } static -aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, - const ReportManager &rm, - set &accel_states) { - aligned_unique_ptr dfa = nullptr; +bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, + const ReportManager &rm, bool has_non_literals, + set &accel_states) { + // If we determinised only literals, then we only need to consider the init + // states for acceleration. + bool only_accel_init = !has_non_literals; + bool trust_daddy_states = !has_non_literals; + + bytecode_ptr dfa = nullptr; if (cc.grey.allowSmallWriteSheng) { - dfa = shengCompile(rdfa, cc, rm, &accel_states); + dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states); } if (!dfa) { - dfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, + trust_daddy_states, &accel_states); } return dfa; } static -aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, - const CompileContext &cc, - const ReportManager &rm, u32 *start_offset, - u32 *small_region) { +bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, + const CompileContext &cc, const ReportManager &rm, + bool has_non_literals, u32 *start_offset, + u32 *small_region) { *start_offset = remove_leading_dots(rdfa); // Unleash the McClellan! set accel_states; - auto nfa = getDfa(rdfa, cc, rm, accel_states); + auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); return nullptr; @@ -420,14 +852,14 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, if (*small_region <= *start_offset) { return nullptr; } - if (prune_overlong(rdfa, *small_region - *start_offset)) { + if (clear_deeper_reports(rdfa, *small_region - *start_offset)) { minimize_hopcroft(rdfa, cc.grey); if (rdfa.start_anchored == DEAD_STATE) { DEBUG_PRINTF("all patterns pruned out\n"); return nullptr; } - nfa = getDfa(rdfa, cc, rm, accel_states); + nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ @@ -456,9 +888,10 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, return ue2::make_unique(num_patterns, rm, cc); } -aligned_unique_ptr -SmallWriteBuildImpl::build(u32 roseQuality) { - if (!rdfa && cand_literals.empty()) { +bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { + const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase); + const bool has_non_literals = rdfa != nullptr; + if (!rdfa && !has_literals) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; return nullptr; @@ -469,17 +902,34 @@ SmallWriteBuildImpl::build(u32 roseQuality) { return nullptr; } + // We happen to know that if the rose is high quality, we're going to limit + // depth further. + if (roseQuality) { + u32 max_depth = cc.grey.smallWriteLargestBufferBad; + if (!is_empty(lit_trie)) { + pruneTrie(lit_trie, max_depth); + } + if (!is_empty(lit_trie_nocase)) { + pruneTrie(lit_trie_nocase, max_depth); + } + } + if (!determiniseLiterals()) { DEBUG_PRINTF("some literal could not be made into a smallwrite dfa\n"); return nullptr; } + if (!rdfa) { + DEBUG_PRINTF("no dfa, pruned everything away\n"); + return nullptr; + } + DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); u32 start_offset; u32 small_region; - auto nfa = - prepEngine(*rdfa, roseQuality, cc, rm, &start_offset, &small_region); + auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_non_literals, + &start_offset, &small_region); if (!nfa) { DEBUG_PRINTF("some smallwrite outfix could not be prepped\n"); /* just skip the smallwrite optimization */ @@ -488,7 +938,7 @@ SmallWriteBuildImpl::build(u32 roseQuality) { } u32 size = sizeof(SmallWriteEngine) + nfa->length; - auto smwr = aligned_zmalloc_unique(size); + auto smwr = make_zeroed_bytecode_ptr(size); smwr->size = size; smwr->start_offset = start_offset; @@ -510,15 +960,11 @@ set SmallWriteBuildImpl::all_reports() const { if (rdfa) { insert(&reports, ::ue2::all_reports(*rdfa)); } - for (const auto &cand : cand_literals) { - reports.insert(cand.second); - } - return reports; -} -size_t smwrSize(const SmallWriteEngine *smwr) { - assert(smwr); - return smwr->size; + insert(&reports, ::ue2::all_reports(lit_trie)); + insert(&reports, ::ue2::all_reports(lit_trie_nocase)); + + return reports; } } // namespace ue2 diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 84c6df3a2..648b13db7 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,49 +30,50 @@ #define SMWR_BUILD_H /** - * SmallWrite Build interface. Everything you ever needed to feed literals in - * and get a SmallWriteEngine out. This header should be everything needed by - * the rest of UE2. + * \file + * \brief Small-write engine build interface. + * + * Everything you ever needed to feed literals in and get a SmallWriteEngine + * out. This header should be everything needed by the rest of UE2. */ #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" +#include "util/noncopyable.h" +#include #include -#include - struct SmallWriteEngine; namespace ue2 { struct CompileContext; struct ue2_literal; -class NGWrapper; -class ReportManager; +class ExpressionInfo; +class NGHolder; +class ReportManager; -// Abstract interface intended for callers from elsewhere in the tree, real -// underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. -class SmallWriteBuild : boost::noncopyable { +/** + * Abstract interface intended for callers from elsewhere in the tree, real + * underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. + */ +class SmallWriteBuild : noncopyable { public: - // Destructor virtual ~SmallWriteBuild(); - // Construct a runtime implementation. - virtual ue2::aligned_unique_ptr build(u32 roseQuality) = 0; + virtual bytecode_ptr build(u32 roseQuality) = 0; - virtual void add(const NGWrapper &w) = 0; + virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0; virtual void add(const ue2_literal &literal, ReportID r) = 0; virtual std::set all_reports() const = 0; }; -// Construct a usable SmallWrite builder. -std::unique_ptr makeSmallWriteBuilder(size_t num_patterns, - const ReportManager &rm, - const CompileContext &cc); - -size_t smwrSize(const SmallWriteEngine *t); +/** \brief Construct a usable SmallWrite builder. */ +std::unique_ptr +makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm, + const CompileContext &cc); } // namespace ue2 diff --git a/src/som/slot_manager.cpp b/src/som/slot_manager.cpp index b1aa6bf78..3dc74d3da 100644 --- a/src/som/slot_manager.cpp +++ b/src/som/slot_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief SOM Slot Manager. */ + #include "slot_manager.h" #include "slot_manager_internal.h" @@ -245,7 +247,7 @@ u32 SomSlotManager::numSomSlots() const { return nextSomSlot; } -u32 SomSlotManager::addRevNfa(aligned_unique_ptr nfa, u32 maxWidth) { +u32 SomSlotManager::addRevNfa(bytecode_ptr nfa, u32 maxWidth) { u32 rv = verify_u32(rev_nfas.size()); rev_nfas.push_back(move(nfa)); diff --git a/src/som/slot_manager.h b/src/som/slot_manager.h index 971ea3623..ddb105f53 100644 --- a/src/som/slot_manager.h +++ b/src/som/slot_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief SOM Slot Manager. */ @@ -35,12 +36,12 @@ #include "ue2common.h" #include "nfagraph/ng_holder.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" +#include "util/noncopyable.h" #include "util/ue2_containers.h" #include #include -#include struct NFA; @@ -54,7 +55,7 @@ struct SlotCache; /** \brief SOM slot manager. Used to hand out SOM slots and track their * relationships during SOM construction. Also stores reverse NFAs used for * SOM. */ -class SomSlotManager : boost::noncopyable { +class SomSlotManager : noncopyable { public: explicit SomSlotManager(u8 precision); ~SomSlotManager(); @@ -78,11 +79,11 @@ class SomSlotManager : boost::noncopyable { u32 numSomSlots() const; - const std::deque> &getRevNfas() const { + const std::deque> &getRevNfas() const { return rev_nfas; } - u32 addRevNfa(aligned_unique_ptr nfa, u32 maxWidth); + u32 addRevNfa(bytecode_ptr nfa, u32 maxWidth); u32 somHistoryRequired() const { return historyRequired; } @@ -97,7 +98,7 @@ class SomSlotManager : boost::noncopyable { std::unique_ptr cache; /** \brief Reverse NFAs used for SOM support. */ - std::deque> rev_nfas; + std::deque> rev_nfas; /** \brief In streaming mode, the amount of history we've committed to * using for SOM rev NFAs. */ diff --git a/src/som/som.h b/src/som/som.h index 4a3809843..e759cf0a2 100644 --- a/src/som/som.h +++ b/src/som/som.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,17 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Common SOM definitions. */ #ifndef UE2_SOM_H #define UE2_SOM_H +namespace ue2 { + /** \brief Enumeration specifying a start of match behaviour. */ enum som_type { SOM_NONE, //!< No SOM required SOM_LEFT //!< Exact leftmost SOM }; +} // namespace ue2 + #endif // UE2_SOM_H diff --git a/src/ue2common.h b/src/ue2common.h index e1f03f721..4bec83155 100644 --- a/src/ue2common.h +++ b/src/ue2common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -189,8 +189,8 @@ typedef u32 ReportID; #define unlikely(x) __builtin_expect(!!(x), 0) #endif #else -#define likely(x) (x) -#define unlikely(x) (x) +#define likely(x) (x) +#define unlikely(x) (x) #endif #if !defined(RELEASE_BUILD) || defined(DEBUG) diff --git a/src/util/alloc.h b/src/util/alloc.h index 191bc387e..de20c8d02 100644 --- a/src/util/alloc.h +++ b/src/util/alloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Aligned memory alloc/free. */ @@ -51,25 +52,6 @@ void *aligned_zmalloc(size_t size); /** \brief Free a pointer allocated with \ref aligned_zmalloc. */ void aligned_free(void *ptr); -template struct AlignedDeleter { - void operator()(T *ptr) const { aligned_free(ptr); } -}; -template -using aligned_unique_ptr = std::unique_ptr>; - -/** \brief 64-byte aligned, zeroed malloc that returns an appropriately-typed - * aligned_unique_ptr. - * - * If the requested size cannot be allocated, throws std::bad_alloc. - */ -template -inline -aligned_unique_ptr aligned_zmalloc_unique(size_t size) { - T* ptr = static_cast(aligned_zmalloc(size)); - assert(ptr); // Guaranteed by aligned_zmalloc. - return aligned_unique_ptr(ptr); -} - /** \brief Internal use only, used by AlignedAllocator. */ void *aligned_malloc_internal(size_t size, size_t align); diff --git a/src/nfa/multishufti.h b/src/util/arch.h similarity index 54% rename from src/nfa/multishufti.h rename to src/util/arch.h index af5784831..c78ee9ced 100644 --- a/src/nfa/multishufti.h +++ b/src/util/arch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,44 +27,60 @@ */ /** \file - * \brief Multishufti: multibyte version of Shufti - * - * Utilises the SSSE3 pshufb shuffle instruction + * \brief Per-platform architecture definitions */ -#ifndef MULTISHUFTI_H -#define MULTISHUFTI_H +#ifndef UTIL_ARCH_H_ +#define UTIL_ARCH_H_ -#include "ue2common.h" -#include "util/simd_types.h" +#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) +#define HAVE_SSE2 +#endif -#ifdef __cplusplus -extern "C" -{ +#if defined(__SSE4_1__) || (defined(_WIN32) && defined(__AVX__)) +#define HAVE_SSE41 #endif -const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); +#if defined(__SSE4_2__) || (defined(_WIN32) && defined(__AVX__)) +#define HAVE_SSE42 +#endif -const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); +#if defined(__AVX__) +#define HAVE_AVX +#endif -const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); +#if defined(__AVX2__) +#define HAVE_AVX2 +#endif -const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); +#if defined(__AVX512BW__) +#define HAVE_AVX512 +#endif -const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); +/* + * ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros + */ +#if defined(__POPCNT__) || \ + (defined(__INTEL_COMPILER) && defined(__SSE4_2__)) || \ + (defined(_WIN32) && defined(__AVX__)) +#define HAVE_POPCOUNT_INSTR +#endif -const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); +#if defined(__BMI__) || (defined(_WIN32) && defined(__AVX2__)) || \ + (defined(__INTEL_COMPILER) && defined(__AVX2__)) +#define HAVE_BMI +#endif -#ifdef __cplusplus -} +#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) || \ + (defined(__INTEL_COMPILER) && defined(__AVX2__)) +#define HAVE_BMI2 #endif +/* + * MSVC uses a different form of inline asm + */ +#if defined(_WIN32) && defined(_MSC_VER) +#define NO_ASM #endif + +#endif // UTIL_ARCH_H_ diff --git a/src/util/bitutils.h b/src/util/bitutils.h index d144e8793..c545ee187 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,37 +35,8 @@ #include "ue2common.h" #include "popcount.h" - -#ifdef __cplusplus -# if defined(HAVE_CXX_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#else // C, baby -# if defined(HAVE_C_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#endif - -#ifdef __cplusplus -# if defined(HAVE_CXX_INTRIN_H) -# define USE_INTRIN_H -# endif -#else // C, baby -# if defined(HAVE_C_INTRIN_H) -# define USE_INTRIN_H -# endif -#endif - -#if defined(USE_X86INTRIN_H) -#include -#elif defined(USE_INTRIN_H) -#include -#endif - -// MSVC has a different form of inline asm -#ifdef _WIN32 -#define NO_ASM -#endif +#include "util/arch.h" +#include "util/intrinsics.h" #define CASE_BIT 0x20 #define CASE_CLEAR 0xdf @@ -269,7 +240,7 @@ u32 findAndClearMSB_64(u64a *v) { static really_inline u32 compress32(u32 x, u32 m) { -#if defined(__BMI2__) +#if defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pext_u32(x, m); #else @@ -304,7 +275,7 @@ u32 compress32(u32 x, u32 m) { static really_inline u64a compress64(u64a x, u64a m) { -#if defined(ARCH_X86_64) && defined(__BMI2__) +#if defined(ARCH_X86_64) && defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pext_u64(x, m); #else @@ -340,7 +311,7 @@ u64a compress64(u64a x, u64a m) { static really_inline u32 expand32(u32 x, u32 m) { -#if defined(__BMI2__) +#if defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pdep_u32(x, m); #else @@ -380,7 +351,7 @@ u32 expand32(u32 x, u32 m) { static really_inline u64a expand64(u64a x, u64a m) { -#if defined(ARCH_X86_64) && defined(__BMI2__) +#if defined(ARCH_X86_64) && defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pdep_u64(x, m); #else @@ -471,13 +442,9 @@ u32 rank_in_mask64(u64a mask, u32 bit) { return popcount64(mask); } -#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) -#define HAVE_PEXT -#endif - static really_inline u32 pext32(u32 x, u32 mask) { -#if defined(HAVE_PEXT) +#if defined(HAVE_BMI2) // Intel BMI2 can do this operation in one instruction. return _pext_u32(x, mask); #else @@ -497,7 +464,7 @@ u32 pext32(u32 x, u32 mask) { static really_inline u64a pext64(u64a x, u64a mask) { -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) // Intel BMI2 can do this operation in one instruction. return _pext_u64(x, mask); #else @@ -515,7 +482,7 @@ u64a pext64(u64a x, u64a mask) { #endif } -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) static really_inline u64a pdep64(u64a x, u64a mask) { return _pdep_u64(x, mask); diff --git a/src/util/boundary_reports.h b/src/util/boundary_reports.h index 7ad93ba1e..b2bb1c9b0 100644 --- a/src/util/boundary_reports.h +++ b/src/util/boundary_reports.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,13 +30,13 @@ #define BOUNDARY_REPORTS_H #include "ue2common.h" +#include "util/noncopyable.h" #include -#include namespace ue2 { -struct BoundaryReports : boost::noncopyable { +struct BoundaryReports : noncopyable { std::set report_at_0; /* set of internal reports to fire * unconditionally at offset 0 */ std::set report_at_0_eod; /* set of internal reports to fire diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h new file mode 100644 index 000000000..f1f2e5ef8 --- /dev/null +++ b/src/util/bytecode_ptr.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief bytecode_ptr: Smart pointer with unique ownership that knows its + * length and alignment. + */ + +#ifndef UTIL_BYTECODE_PTR_H +#define UTIL_BYTECODE_PTR_H + +#include "util/alloc.h" +#include "util/operators.h" + +#include // std::max +#include +#include +#include // std::logic_error + +namespace ue2 { + +/** + * \brief Smart pointer that knows its length and alignment and behaves like a + * std::unique_ptr -- i.e. it retains unique ownership of the memory region. + * + * This is intended to be used for flat aligned memory regions that will + * eventually end up copied into the Hyperscan bytecode. + */ +template +class bytecode_ptr : totally_ordered> { +public: + bytecode_ptr() = default; + explicit bytecode_ptr(size_t bytes_in, size_t alignment_in = alignof(T)) + : bytes(bytes_in), alignment(alignment_in) { + // posix_memalign doesn't like us asking for smaller alignment. + size_t mem_align = std::max(alignment, sizeof(void *)); + ptr.reset(static_cast(aligned_malloc_internal(bytes, mem_align))); + if (!ptr) { + throw std::bad_alloc(); + } + } + + bytecode_ptr(std::nullptr_t) {} + + T *get() const { return ptr.get(); } + + T &operator*() { return *ptr; } + const T &operator*() const { return *ptr; } + + T *operator->() { return ptr.get(); } + const T *operator->() const { return ptr.get(); } + + explicit operator bool() const { return ptr != nullptr; } + + /** \brief Move converter for shared_ptr. */ + template ::value>::type> + operator std::shared_ptr() && { + auto d = ptr.get_deleter(); + return std::shared_ptr(ptr.release(), d); + } + + void reset(T *p = nullptr) { ptr.reset(p); } + + T *release() { + auto *p = ptr.release(); + bytes = 0; + alignment = 0; + return p; + } + + void swap(bytecode_ptr &other) { + using std::swap; + swap(ptr, other.ptr); + swap(bytes, other.bytes); + swap(alignment, other.alignment); + } + + /** + * \brief Reduces the apparent size of the memory region. Note that this + * does not reallocate and copy, it just changes the value returned by + * size(). + */ + void shrink(size_t new_size) { + if (new_size > bytes) { + assert(0); + throw std::logic_error("Must shrink to a smaller value"); + } + bytes = new_size; + } + + /** \brief Returns size of the memory region in bytes. */ + size_t size() const { return bytes; } + + /** \brief Returns alignment of the memory region in bytes. */ + size_t align() const { return alignment; } + + bool operator==(const bytecode_ptr &a) const { return ptr == a.ptr; } + bool operator<(const bytecode_ptr &a) const { return ptr < a.ptr; } + +private: + /** \brief Deleter function for std::unique_ptr. */ + template struct deleter { + void operator()(DT *p) const { aligned_free_internal(p); } + }; + + std::unique_ptr> ptr; //!< Underlying pointer. + size_t bytes = 0; //!< Size of memory region in bytes. + size_t alignment = 0; //!< Alignment of memory region in bytes. +}; + +/** + * \brief Constructs a bytecode_ptr with the given size and alignment. + */ +template +inline bytecode_ptr make_bytecode_ptr(size_t size, + size_t align = alignof(T)) { + return bytecode_ptr(size, align); +} + +/** + * \brief Constructs a bytecode_ptr with the given size and alignment and + * fills the memory region with zeroes. + */ +template +inline bytecode_ptr make_zeroed_bytecode_ptr(size_t size, + size_t align = alignof(T)) { + auto ptr = make_bytecode_ptr(size, align); + std::memset(ptr.get(), 0, size); + return ptr; +} + +} // namespace ue2 + +#endif // UTIL_BYTECODE_PTR_H diff --git a/src/util/container.h b/src/util/container.h index e2cfb485e..68f60e99e 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -89,6 +89,14 @@ auto make_vector_from(const std::pair &range) return std::vector(range.first, range.second); } +/** \brief Sort a sequence container and remove duplicates. */ +template > +void sort_and_unique(C &container, Compare comp = Compare()) { + std::sort(std::begin(container), std::end(container), comp); + container.erase(std::unique(std::begin(container), std::end(container)), + std::end(container)); +} + /** \brief Returns a set containing the keys in the given associative * container. */ template @@ -194,6 +202,17 @@ void erase_all(C *container, const D &donor) { } } + +template +bool any_of_in(const C &c, Pred p) { + return std::any_of(c.begin(), c.end(), std::move(p)); +} + +template +bool all_of_in(const C &c, Pred p) { + return std::all_of(c.begin(), c.end(), std::move(p)); +} + } // namespace ue2 #ifdef DUMP_SUPPORT diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index dba147ee1..c0ab09afb 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "ue2common.h" #include "hs_compile.h" // for HS_MODE_ flags #include "hs_internal.h" +#include "util/arch.h" #ifndef _WIN32 #include @@ -55,9 +56,18 @@ #define AVX2 (1 << 5) #define BMI2 (1 << 8) +// Structured Extended Feature Flags Enumeration Leaf EBX values +#define AVX512F (1 << 16) +#define AVX512BW (1 << 30) + // Extended Control Register 0 (XCR0) values #define XCR0_SSE (1 << 1) #define XCR0_AVX (1 << 2) +#define XCR0_OPMASK (1 << 5) // k-regs +#define XCR0_ZMM_Hi256 (1 << 6) // upper 256 bits of ZMM0-ZMM15 +#define XCR0_Hi16_ZMM (1 << 7) // ZMM16-ZMM31 + +#define XCR0_AVX512 (XCR0_OPMASK | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM) static __inline void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, @@ -123,6 +133,48 @@ int check_avx2(void) { #endif } +int check_avx512(void) { + /* + * For our purposes, having avx512 really means "can we use AVX512BW?" + */ +#if defined(__INTEL_COMPILER) + return _may_i_use_cpu_feature(_FEATURE_AVX512BW | _FEATURE_AVX512VL); +#else + unsigned int eax, ebx, ecx, edx; + + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + + /* check XSAVE is enabled by OS */ + if (!(ecx & XSAVE)) { + DEBUG_PRINTF("AVX and XSAVE not supported\n"); + return 0; + } + + /* check that AVX 512 registers are enabled by OS */ + u64a xcr0 = xgetbv(0); + if ((xcr0 & XCR0_AVX512) != XCR0_AVX512) { + DEBUG_PRINTF("AVX512 registers not enabled\n"); + return 0; + } + + /* ECX and EDX contain capability flags */ + ecx = 0; + cpuid(7, 0, &eax, &ebx, &ecx, &edx); + + if (!(ebx & AVX512F)) { + DEBUG_PRINTF("AVX512F (AVX512 Foundation) instructions not enabled\n"); + return 0; + } + + if (ebx & AVX512BW) { + DEBUG_PRINTF("AVX512BW instructions enabled\n"); + return 1; + } + + return 0; +#endif +} + u64a cpuid_flags(void) { u64a cap = 0; @@ -131,10 +183,19 @@ u64a cpuid_flags(void) { cap |= HS_CPU_FEATURES_AVX2; } -#if !defined(__AVX2__) + if (check_avx512()) { + DEBUG_PRINTF("AVX512 enabled\n"); + cap |= HS_CPU_FEATURES_AVX512; + } + +#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX2) cap &= ~HS_CPU_FEATURES_AVX2; #endif +#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX512) + cap &= ~HS_CPU_FEATURES_AVX512; +#endif + return cap; } @@ -167,33 +228,37 @@ struct family_id { * Family Numbers" */ static const struct family_id known_microarch[] = { { 0x6, 0x37, HS_TUNE_FAMILY_SLM }, /* baytrail */ + { 0x6, 0x4A, HS_TUNE_FAMILY_SLM }, /* silvermont */ + { 0x6, 0x4C, HS_TUNE_FAMILY_SLM }, /* silvermont */ { 0x6, 0x4D, HS_TUNE_FAMILY_SLM }, /* avoton, rangley */ + { 0x6, 0x5A, HS_TUNE_FAMILY_SLM }, /* silvermont */ + { 0x6, 0x5D, HS_TUNE_FAMILY_SLM }, /* silvermont */ + + { 0x6, 0x5C, HS_TUNE_FAMILY_GLM }, /* goldmont */ + { 0x6, 0x5F, HS_TUNE_FAMILY_GLM }, /* denverton */ { 0x6, 0x3C, HS_TUNE_FAMILY_HSW }, /* haswell */ { 0x6, 0x45, HS_TUNE_FAMILY_HSW }, /* haswell */ { 0x6, 0x46, HS_TUNE_FAMILY_HSW }, /* haswell */ - { 0x6, 0x3F, HS_TUNE_FAMILY_HSW }, /* haswell */ + { 0x6, 0x3F, HS_TUNE_FAMILY_HSW }, /* haswell Xeon */ - { 0x6, 0x3E, HS_TUNE_FAMILY_IVB }, /* ivybridge */ + { 0x6, 0x3E, HS_TUNE_FAMILY_IVB }, /* ivybridge Xeon */ { 0x6, 0x3A, HS_TUNE_FAMILY_IVB }, /* ivybridge */ { 0x6, 0x2A, HS_TUNE_FAMILY_SNB }, /* sandybridge */ - { 0x6, 0x2D, HS_TUNE_FAMILY_SNB }, /* sandybridge */ + { 0x6, 0x2D, HS_TUNE_FAMILY_SNB }, /* sandybridge Xeon */ { 0x6, 0x3D, HS_TUNE_FAMILY_BDW }, /* broadwell Core-M */ + { 0x6, 0x47, HS_TUNE_FAMILY_BDW }, /* broadwell */ { 0x6, 0x4F, HS_TUNE_FAMILY_BDW }, /* broadwell xeon */ { 0x6, 0x56, HS_TUNE_FAMILY_BDW }, /* broadwell xeon-d */ -// { 0x6, 0x25, HS_TUNE_FAMILY_GENERIC }, /* westmere */ -// { 0x6, 0x2C, HS_TUNE_FAMILY_GENERIC }, /* westmere */ -// { 0x6, 0x2F, HS_TUNE_FAMILY_GENERIC }, /* westmere */ - -// { 0x6, 0x1E, HS_TUNE_FAMILY_GENERIC }, /* nehalem */ -// { 0x6, 0x1A, HS_TUNE_FAMILY_GENERIC }, /* nehalem */ -// { 0x6, 0x2E, HS_TUNE_FAMILY_GENERIC }, /* nehalem */ + { 0x6, 0x4E, HS_TUNE_FAMILY_SKL }, /* Skylake Mobile */ + { 0x6, 0x5E, HS_TUNE_FAMILY_SKL }, /* Skylake Core/E3 Xeon */ + { 0x6, 0x55, HS_TUNE_FAMILY_SKX }, /* Skylake Xeon */ -// { 0x6, 0x17, HS_TUNE_FAMILY_GENERIC }, /* penryn */ -// { 0x6, 0x1D, HS_TUNE_FAMILY_GENERIC }, /* penryn */ + { 0x6, 0x8E, HS_TUNE_FAMILY_SKL }, /* Kabylake Mobile */ + { 0x6, 0x9E, HS_TUNE_FAMILY_SKL }, /* Kabylake desktop */ }; @@ -203,10 +268,13 @@ const char *dumpTune(u32 tune) { #define T_CASE(x) case x: return #x; switch (tune) { T_CASE(HS_TUNE_FAMILY_SLM); + T_CASE(HS_TUNE_FAMILY_GLM); T_CASE(HS_TUNE_FAMILY_HSW); T_CASE(HS_TUNE_FAMILY_SNB); T_CASE(HS_TUNE_FAMILY_IVB); T_CASE(HS_TUNE_FAMILY_BDW); + T_CASE(HS_TUNE_FAMILY_SKL); + T_CASE(HS_TUNE_FAMILY_SKX); } #undef T_CASE return "unknown"; diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index 8b23d4958..d79c3832f 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef CPUID_H_53FFCB14B257C2 -#define CPUID_H_53FFCB14B257C2 +#ifndef UTIL_CPUID_H_ +#define UTIL_CPUID_H_ #include "ue2common.h" @@ -41,6 +41,7 @@ u64a cpuid_flags(void); u32 cpuid_tune(void); +int check_avx512(void); int check_avx2(void); int check_ssse3(void); int check_sse42(void); @@ -50,5 +51,5 @@ int check_popcnt(void); } /* extern "C" */ #endif -#endif /* CPUID_H_53FFCB14B257C2 */ +#endif /* UTIL_CPUID_H_ */ diff --git a/src/util/depth.h b/src/util/depth.h index 977fd0c30..9af1ded88 100644 --- a/src/util/depth.h +++ b/src/util/depth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,13 +34,13 @@ #define DEPTH_H #include "ue2common.h" +#include "util/hash.h" +#include "util/operators.h" #ifdef DUMP_SUPPORT #include #endif -#include - namespace ue2 { /** @@ -52,11 +52,12 @@ struct DepthOverflowError {}; * \brief Type used to represent depth information; value is either a count, * or the special values "infinity" and "unreachable". */ -class depth { +class depth : totally_ordered { public: - depth() : val(val_unreachable) {} + /** \brief The default depth is special value "unreachable". */ + depth() = default; - depth(u32 v) : val(v) { + explicit depth(u32 v) : val(v) { if (v > max_value()) { DEBUG_PRINTF("depth %u too large to represent!\n", v); throw DepthOverflowError(); @@ -92,11 +93,7 @@ class depth { } bool operator<(const depth &d) const { return val < d.val; } - bool operator>(const depth &d) const { return val > d.val; } - bool operator<=(const depth &d) const { return val <= d.val; } - bool operator>=(const depth &d) const { return val >= d.val; } bool operator==(const depth &d) const { return val == d.val; } - bool operator!=(const depth &d) const { return val != d.val; } // The following comparison operators exist for use against integer types // that are bigger than what we can safely convert to depth (such as those @@ -196,6 +193,29 @@ class depth { return *this; } + depth operator-(s32 d) const { + if (is_unreachable()) { + return unreachable(); + } + if (is_infinite()) { + return infinity(); + } + + s64a rv = val - d; + if (rv < 0 || (u64a)rv >= val_infinity) { + DEBUG_PRINTF("depth %lld too large to represent!\n", rv); + throw DepthOverflowError(); + } + + return depth((u32)rv); + } + + depth operator-=(s32 d) { + depth rv = *this - d; + *this = rv; + return *this; + } + #ifdef DUMP_SUPPORT /** \brief Render as a string, useful for debugging. */ std::string str() const; @@ -209,17 +229,17 @@ class depth { static constexpr u32 val_infinity = (1u << 31) - 1; static constexpr u32 val_unreachable = 1u << 31; - u32 val; + u32 val = val_unreachable; }; /** * \brief Encapsulates a min/max pair. */ -struct DepthMinMax { - depth min; - depth max; +struct DepthMinMax : totally_ordered { + depth min{depth::infinity()}; + depth max{0}; - DepthMinMax() : min(depth::infinity()), max(depth(0)) {} + DepthMinMax() = default; DepthMinMax(const depth &mn, const depth &mx) : min(mn), max(mx) {} bool operator<(const DepthMinMax &b) const { @@ -233,21 +253,15 @@ struct DepthMinMax { return min == b.min && max == b.max; } - bool operator!=(const DepthMinMax &b) const { - return !(*this == b); - } - #ifdef DUMP_SUPPORT /** \brief Render as a string, useful for debugging. */ std::string str() const; #endif + }; inline size_t hash_value(const DepthMinMax &d) { - size_t val = 0; - boost::hash_combine(val, d.min); - boost::hash_combine(val, d.max); - return val; + return hash_all(d.min, d.max); } /** diff --git a/src/util/dump_charclass.cpp b/src/util/dump_charclass.cpp index 4c159ec24..4535777d1 100644 --- a/src/util/dump_charclass.cpp +++ b/src/util/dump_charclass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -249,15 +249,6 @@ string describeClass(const CharReach &cr, size_t maxLength, return oss.str(); } -string describeClasses(const std::vector &v, size_t maxClassLength, - enum cc_output_t out_type) { - std::ostringstream oss; - for (const auto &cr : v) { - describeClass(oss, cr, maxClassLength, out_type); - } - return oss.str(); -} - // C stdio wrapper void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type) { diff --git a/src/util/dump_charclass.h b/src/util/dump_charclass.h index 45b707f1e..999641340 100644 --- a/src/util/dump_charclass.h +++ b/src/util/dump_charclass.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include #include +#include #include #include @@ -55,9 +56,16 @@ void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16, std::string describeClass(const CharReach &cr, size_t maxLength = 16, enum cc_output_t out_type = CC_OUT_TEXT); -std::string describeClasses(const std::vector &v, +template +std::string describeClasses(const Container &container, size_t maxClassLength = 16, - enum cc_output_t out_type = CC_OUT_TEXT); + enum cc_output_t out_type = CC_OUT_TEXT) { + std::ostringstream oss; + for (const CharReach &cr : container) { + describeClass(oss, cr, maxClassLength, out_type); + } + return oss.str(); +} void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type); diff --git a/src/util/dump_util.cpp b/src/util/dump_util.cpp index 5b961367c..782cba7a3 100644 --- a/src/util/dump_util.cpp +++ b/src/util/dump_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,8 @@ using namespace std; +namespace ue2 { + FILE *fopen_or_throw(const char *path, const char *mode) { FILE *f = fopen(path, mode); if (!f) { @@ -40,3 +42,5 @@ FILE *fopen_or_throw(const char *path, const char *mode) { } return f; } + +} // namespace ue2 diff --git a/src/util/dump_util.h b/src/util/dump_util.h index 487d2e7c3..f5ebe94a5 100644 --- a/src/util/dump_util.h +++ b/src/util/dump_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,9 +31,13 @@ #include +namespace ue2 { + /** * Same as fopen(), but on error throws an exception rather than returning NULL. */ FILE *fopen_or_throw(const char *path, const char *mode); +} // namespace ue2 + #endif diff --git a/src/util/hash.h b/src/util/hash.h index 0b5717729..6f76e43de 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #ifndef UTIL_HASH_H #define UTIL_HASH_H +#include #include namespace ue2 { @@ -69,6 +70,15 @@ size_t hash_all(Args&&... args) { return v; } +/** + * \brief Compute the hash of all the elements of any range on which we can + * call std::begin() and std::end(). + */ +template +size_t hash_range(const Range &r) { + return boost::hash_range(std::begin(r), std::end(r)); +} + } // namespace ue2 #endif // UTIL_HASH_H diff --git a/src/util/hash_dynamic_bitset.h b/src/util/hash_dynamic_bitset.h new file mode 100644 index 000000000..315aed34f --- /dev/null +++ b/src/util/hash_dynamic_bitset.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Hashing utility functions. + */ + +#ifndef UTIL_HASH_DYNAMIC_BITSET_H +#define UTIL_HASH_DYNAMIC_BITSET_H + +#include +#include + +#include + +namespace ue2 { + +/** + * \brief An output iterator which calculates the combined hash of all elements + * written to it. + * + * The location to output the hash is provided to the constructor and should + * already be zero initialised. + */ +struct hash_output_it { + using value_type = void; + using difference_type = ptrdiff_t; + using pointer = void *; + using reference = void; + using iterator_category = std::output_iterator_tag; + + hash_output_it(size_t *hash_out = nullptr) : out(hash_out) {} + hash_output_it &operator++() { + return *this; + } + hash_output_it &operator++(int) { + return *this; + } + + struct deref_proxy { + deref_proxy(size_t *hash_out) : out(hash_out) {} + + template + void operator=(const T &val) const { + boost::hash_combine(*out, val); + } + + private: + size_t *out; /* output location of the owning iterator */ + }; + + deref_proxy operator*() { return {out}; } + +private: + size_t *out; /* location to output the hashes to */ +}; + +/* Function object for hashing a dynamic bitset */ +struct hash_dynamic_bitset { + size_t operator()(const boost::dynamic_bitset<> &bs) const { + size_t rv = 0; + to_block_range(bs, hash_output_it(&rv)); + return rv; + } +}; + +} // namespace ue2 + +#endif diff --git a/unit/internal/multiaccel_shift.cpp b/src/util/intrinsics.h similarity index 59% rename from unit/internal/multiaccel_shift.cpp rename to src/util/intrinsics.h index d6019870d..edc4f6efb 100644 --- a/unit/internal/multiaccel_shift.cpp +++ b/src/util/intrinsics.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,56 +26,41 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** \file + * \brief Wrapper around the compiler supplied intrinsic header + */ + +#ifndef INTRINSICS_H +#define INTRINSICS_H + #include "config.h" -#include "src/ue2common.h" -#include "gtest/gtest.h" -#include "nfa/multiaccel_common.h" +#ifdef __cplusplus +# if defined(HAVE_CXX_X86INTRIN_H) +# define USE_X86INTRIN_H +# endif +#else // C +# if defined(HAVE_C_X86INTRIN_H) +# define USE_X86INTRIN_H +# endif +#endif -/* - * Unit tests for the shifters. - * - * This is a bit messy, as shifters are macros, so we're using macros to test - * other macros. - */ +#ifdef __cplusplus +# if defined(HAVE_CXX_INTRIN_H) +# define USE_INTRIN_H +# endif +#else // C +# if defined(HAVE_C_INTRIN_H) +# define USE_INTRIN_H +# endif +#endif -#define TEST_SHIFT(n) \ - do { \ - u64a val = ((u64a) 1 << n) - 1; \ - JOIN(SHIFT, n)(val); \ - ASSERT_EQ(val, 1); \ - } while (0) +#if defined(USE_X86INTRIN_H) +#include +#elif defined(USE_INTRIN_H) +#include +#else +#error no intrinsics file +#endif -TEST(MultiaccelShift, StaticShift) { - TEST_SHIFT(1); - TEST_SHIFT(2); - TEST_SHIFT(3); - TEST_SHIFT(4); - TEST_SHIFT(5); - TEST_SHIFT(6); - TEST_SHIFT(7); - TEST_SHIFT(8); - TEST_SHIFT(10); - TEST_SHIFT(11); - TEST_SHIFT(12); - TEST_SHIFT(13); - TEST_SHIFT(14); - TEST_SHIFT(15); - TEST_SHIFT(16); - TEST_SHIFT(17); - TEST_SHIFT(18); - TEST_SHIFT(19); - TEST_SHIFT(20); - TEST_SHIFT(21); - TEST_SHIFT(22); - TEST_SHIFT(23); - TEST_SHIFT(24); - TEST_SHIFT(25); - TEST_SHIFT(26); - TEST_SHIFT(27); - TEST_SHIFT(28); - TEST_SHIFT(29); - TEST_SHIFT(30); - TEST_SHIFT(31); - TEST_SHIFT(32); -} +#endif // INTRINSICS_H diff --git a/src/util/make_unique.h b/src/util/make_unique.h index 12148af1b..651e8c5cf 100644 --- a/src/util/make_unique.h +++ b/src/util/make_unique.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,9 +39,9 @@ namespace ue2 { #if defined(USE_STD) - using std::make_unique; +using std::make_unique; #else - using boost::make_unique; +using boost::make_unique; #endif } diff --git a/src/util/masked_move.c b/src/util/masked_move.c index ec788db7c..001cd49f2 100644 --- a/src/util/masked_move.c +++ b/src/util/masked_move.c @@ -29,8 +29,9 @@ #include "ue2common.h" #include "masked_move.h" +#include "util/arch.h" -#if defined(__AVX2__) +#if defined(HAVE_AVX2) /* masks for masked moves */ /* magic mask for maskload (vmmaskmovq) - described in UE-2424 */ diff --git a/src/util/masked_move.h b/src/util/masked_move.h index 09276e802..4c877ca9e 100644 --- a/src/util/masked_move.h +++ b/src/util/masked_move.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,7 +29,9 @@ #ifndef MASKED_MOVE_H #define MASKED_MOVE_H -#if defined(__AVX2__) +#include "arch.h" + +#if defined(HAVE_AVX2) #include "unaligned.h" #include "simd_utils.h" @@ -68,7 +70,8 @@ masked_move256_len(const u8 *buf, const u32 len) { u32 end = unaligned_load_u32(buf + len - 4); m256 preshufend = _mm256_broadcastq_epi64(_mm_cvtsi32_si128(end)); m256 v = _mm256_maskload_epi32((const int *)buf, lmask); - m256 shufend = vpshufb(preshufend, loadu256(&mm_shuffle_end[len - 4])); + m256 shufend = pshufb_m256(preshufend, + loadu256(&mm_shuffle_end[len - 4])); m256 target = or256(v, shufend); return target; diff --git a/src/util/math.h b/src/util/math.h new file mode 100644 index 000000000..e18c50277 --- /dev/null +++ b/src/util/math.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTIL_MATH_H_ +#define UTIL_MATH_H_ + +#include "arch.h" +#include "intrinsics.h" + +#include + +static really_inline +double our_pow(double x, double y) { +#if defined(HAVE_AVX) + /* + * Clear the upper half of AVX registers before calling into the math lib. + * On some versions of glibc this can save thousands of AVX-to-SSE + * transitions. + */ + _mm256_zeroupper(); +#endif + return pow(x, y); +} + +#endif // UTIL_MATH_H_ diff --git a/src/util/multibit_build.cpp b/src/util/multibit_build.cpp index 5fe2d6172..ad6a0d6a6 100644 --- a/src/util/multibit_build.cpp +++ b/src/util/multibit_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -155,9 +155,9 @@ void bfs(vector &out, const TreeNode &tree) { /** \brief Construct a sparse iterator over the values in \a bits for a * multibit of size \a total_bits. */ -void mmbBuildSparseIterator(vector &out, - const vector &bits, u32 total_bits) { - assert(out.empty()); +vector mmbBuildSparseIterator(const vector &bits, + u32 total_bits) { + vector out; assert(!bits.empty()); assert(total_bits > 0); assert(total_bits <= MMB_MAX_BITS); @@ -186,6 +186,7 @@ void mmbBuildSparseIterator(vector &out, #endif DEBUG_PRINTF("iter has %zu records\n", out.size()); + return out; } template @@ -272,7 +273,7 @@ void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end, } // Partial block to deal with beginning. - block_offset += k1 / MMB_KEY_BITS; + block_offset += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE); if (k1 % MMB_KEY_BITS) { u32 idx = k1 / MMB_KEY_BITS; u32 block_end = (idx + 1) * MMB_KEY_BITS; diff --git a/src/util/multibit_build.h b/src/util/multibit_build.h index 951f1fb46..2d7b5fc26 100644 --- a/src/util/multibit_build.h +++ b/src/util/multibit_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,8 +61,8 @@ u32 mmbit_size(u32 total_bits); /** \brief Construct a sparse iterator over the values in \a bits for a * multibit of size \a total_bits. */ -void mmbBuildSparseIterator(std::vector &out, - const std::vector &bits, u32 total_bits); +std::vector +mmbBuildSparseIterator(const std::vector &bits, u32 total_bits); struct scatter_plan_raw; diff --git a/src/util/noncopyable.h b/src/util/noncopyable.h new file mode 100644 index 000000000..cd4f2e026 --- /dev/null +++ b/src/util/noncopyable.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Class that makes derived classes non-copyable. + */ + +#ifndef UTIL_NONCOPYABLE_H +#define UTIL_NONCOPYABLE_H + +namespace ue2 { + +/** \brief Class that makes derived classes non-copyable. */ +struct noncopyable { + noncopyable() = default; + noncopyable(const noncopyable &) = delete; + noncopyable(noncopyable &&) = default; + noncopyable &operator=(const noncopyable &) = delete; + noncopyable &operator=(noncopyable &&) = default; +}; + +} // namespace ue2 + +#endif // UTIL_NONCOPYABLE_H diff --git a/src/util/operators.h b/src/util/operators.h new file mode 100644 index 000000000..b0a1c1cca --- /dev/null +++ b/src/util/operators.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \brief Ordered operators: provides all the other compare operators for types + * that provide equal and less-than. + * + * This is similar to Boost's totally_ordered class, but much simpler and + * without injecting the boost namespace into ADL lookup. + */ + +#ifndef UTIL_OPERATORS_H +#define UTIL_OPERATORS_H + +namespace ue2 { + +/** + * \brief Ordered operators: provides all the other compare operators for types + * that provide equal and less-than. + * + * Simply inherit from this class with your class name as its template + * parameter. + */ +template +class totally_ordered { +public: + friend bool operator!=(const T &a, const T &b) { return !(a == b); } + friend bool operator<=(const T &a, const T &b) { return !(b < a); } + friend bool operator>(const T &a, const T &b) { return b < a; } + friend bool operator>=(const T &a, const T &b) { return !(a < b); } +}; + +} // namespace + +#endif // UTIL_OPERATORS_H diff --git a/src/util/partitioned_set.h b/src/util/partitioned_set.h index 8f92a8b75..a9e4644d1 100644 --- a/src/util/partitioned_set.h +++ b/src/util/partitioned_set.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,13 +30,13 @@ #define PARTITIONED_SET_H #include "container.h" +#include "noncopyable.h" #include "ue2_containers.h" #include "ue2common.h" #include #include -#include #include namespace ue2 { @@ -53,7 +53,7 @@ static constexpr size_t INVALID_SUBSET = ~(size_t)0; */ template -class partitioned_set : boost::noncopyable { +class partitioned_set : noncopyable { public: class subset { public: diff --git a/src/util/popcount.h b/src/util/popcount.h index d882a6720..eb08f6b1b 100644 --- a/src/util/popcount.h +++ b/src/util/popcount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,17 +30,11 @@ * \brief Platform specific popcount functions */ -#ifndef POPCOUNT_H_075D843B4545B6 -#define POPCOUNT_H_075D843B4545B6 +#ifndef UTIL_POPCOUNT_H_ +#define UTIL_POPCOUNT_H_ #include "ue2common.h" - -// We have a native popcount where the compiler has defined __POPCNT__. -#if defined(__POPCNT__) -#define HAVE_POPCOUNT_INSTR -#elif defined(_WIN32) && defined(__AVX__) // TODO: fix win preproc -#define HAVE_POPCOUNT_INSTR -#endif +#include "util/arch.h" static really_inline u32 popcount32(u32 x) { @@ -76,5 +70,5 @@ u32 popcount64(u64a x) { #endif } -#endif /* POPCOUNT_H_075D843B4545B6 */ +#endif /* UTIL_POPCOUNT_H_ */ diff --git a/src/util/queue_index_factory.h b/src/util/queue_index_factory.h index 1360beef5..e8f7028ec 100644 --- a/src/util/queue_index_factory.h +++ b/src/util/queue_index_factory.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,12 +33,11 @@ #define UTIL_QUEUE_INDEX_FACTORY_H #include "ue2common.h" - -#include +#include "util/noncopyable.h" namespace ue2 { -class QueueIndexFactory : boost::noncopyable { +class QueueIndexFactory : noncopyable { public: QueueIndexFactory() : val(0) {} u32 get_queue() { return val++; } diff --git a/src/util/report.h b/src/util/report.h index 24ecca9d4..a8e233ffd 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,9 +34,10 @@ #ifndef UTIL_REPORT_H #define UTIL_REPORT_H -#include "util/exhaust.h" // for INVALID_EKEY -#include "order_check.h" #include "ue2common.h" +#include "util/exhaust.h" // for INVALID_EKEY +#include "util/hash.h" +#include "util/order_check.h" #include @@ -195,6 +196,23 @@ bool operator<(const Report &a, const Report &b) { return false; } +inline +bool operator==(const Report &a, const Report &b) { + return a.type == b.type && a.quashSom == b.quashSom && + a.minOffset == b.minOffset && a.maxOffset == b.maxOffset && + a.minLength == b.minLength && a.ekey == b.ekey && + a.offsetAdjust == b.offsetAdjust && a.onmatch == b.onmatch && + a.revNfaIndex == b.revNfaIndex && a.somDistance == b.somDistance && + a.topSquashDistance == b.topSquashDistance; +} + +inline +size_t hash_value(const Report &r) { + return hash_all(r.type, r.quashSom, r.minOffset, r.maxOffset, r.minLength, + r.ekey, r.offsetAdjust, r.onmatch, r.revNfaIndex, + r.somDistance, r.topSquashDistance); +} + static inline Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) { Report ir(EXTERNAL_CALLBACK, report); diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index 8377ea036..a846eb25e 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,9 +29,12 @@ /** \file * \brief ReportManager: tracks Report structures, exhaustion and dedupe keys. */ -#include "grey.h" + #include "report_manager.h" + +#include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "rose/rose_build.h" #include "util/compile_error.h" @@ -64,7 +67,7 @@ u32 ReportManager::getInternalId(const Report &ir) { u32 size = reportIds.size(); reportIds.push_back(ir); - reportIdToInternalMap[ir] = size; + reportIdToInternalMap.emplace(ir, size); DEBUG_PRINTF("new report %u\n", size); return size; } @@ -171,8 +174,9 @@ u32 ReportManager::getDkey(const Report &r) const { void ReportManager::registerExtReport(ReportID id, const external_report_info &ext) { - if (contains(externalIdMap, id)) { - const external_report_info &eri = externalIdMap.at(id); + auto it = externalIdMap.find(id); + if (it != externalIdMap.end()) { + const external_report_info &eri = it->second; if (eri.highlander != ext.highlander) { /* we have a problem */ ostringstream out; @@ -201,20 +205,21 @@ void ReportManager::registerExtReport(ReportID id, } } -Report ReportManager::getBasicInternalReport(const NGWrapper &g, s32 adj) { +Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr, + s32 adj) { /* validate that we are not violating highlander constraints, this will * throw a CompileError if so. */ - registerExtReport(g.reportId, - external_report_info(g.highlander, g.expressionIndex)); + registerExtReport(expr.report, + external_report_info(expr.highlander, expr.index)); /* create the internal report */ u32 ekey = INVALID_EKEY; - if (g.highlander) { + if (expr.highlander) { /* all patterns with the same report id share an ekey */ - ekey = getExhaustibleKey(g.reportId); + ekey = getExhaustibleKey(expr.report); } - return makeECallback(g.reportId, adj, ekey); + return makeECallback(expr.report, adj, ekey); } void ReportManager::setProgramOffset(ReportID id, u32 programOffset) { diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 0eed2711b..95e14a2c3 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,18 +36,19 @@ #include "ue2common.h" #include "util/compile_error.h" +#include "util/noncopyable.h" #include "util/report.h" +#include "util/ue2_containers.h" #include #include #include -#include namespace ue2 { struct Grey; class RoseBuild; -class NGWrapper; +class ExpressionInfo; struct external_report_info { external_report_info(bool h, u32 fpi) @@ -57,7 +58,7 @@ struct external_report_info { }; /** \brief Tracks Report structures, exhaustion and dedupe keys. */ -class ReportManager : boost::noncopyable { +class ReportManager : noncopyable { public: explicit ReportManager(const Grey &g); @@ -92,13 +93,13 @@ class ReportManager : boost::noncopyable { const std::vector &reports() const { return reportIds; } /** - * Get a simple internal report corresponding to the wrapper. An ekey will - * be setup as required. + * Get a simple internal report corresponding to the expression. An ekey + * will be setup if required. * * Note: this function may throw a CompileError if constraints on external * match id are violated (mixed highlander status for example). */ - Report getBasicInternalReport(const NGWrapper &g, s32 adj = 0); + Report getBasicInternalReport(const ExpressionInfo &expr, s32 adj = 0); /** \brief Register an external report and validate that we are not * violating highlander constraints (which will cause an exception to be @@ -129,18 +130,18 @@ class ReportManager : boost::noncopyable { std::vector reportIds; /** \brief Mapping from Report to ID (inverse of \ref reportIds - * vector). */ - std::map reportIdToInternalMap; + * vector). */ + unordered_map reportIdToInternalMap; /** \brief Mapping from ReportID to dedupe key. */ - std::map reportIdToDedupeKey; + unordered_map reportIdToDedupeKey; /** \brief Mapping from ReportID to Rose program offset in bytecode. */ - std::map reportIdToProgramOffset; + unordered_map reportIdToProgramOffset; /** \brief Mapping from external match ids to information about that * id. */ - std::map externalIdMap; + unordered_map externalIdMap; /** \brief Mapping from expression index to exhaustion key. */ std::map toExhaustibleKeyMap; diff --git a/src/util/simd_types.h b/src/util/simd_types.h index d6e5d6a3e..962cad6c9 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,52 +30,28 @@ #define SIMD_TYPES_H #include "config.h" +#include "util/arch.h" +#include "util/intrinsics.h" #include "ue2common.h" -// more recent headers are bestest, but only if we can use them -#ifdef __cplusplus -# if defined(HAVE_CXX_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#else // C -# if defined(HAVE_C_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#endif - -#ifdef __cplusplus -# if defined(HAVE_CXX_INTRIN_H) -# define USE_INTRIN_H -# endif -#else // C -# if defined(HAVE_C_INTRIN_H) -# define USE_INTRIN_H -# endif -#endif - -#if defined(USE_X86INTRIN_H) -#include -#elif defined(USE_INTRIN_H) -#include -#else -#error no intrinsics! -#endif - -#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) +#if defined(HAVE_SSE2) typedef __m128i m128; #else typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) typedef __m256i m256; #else -typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256; +typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; #endif -// these should align to 16 and 32 respectively typedef struct {m128 lo; m128 mid; m128 hi;} m384; -typedef struct {m256 lo; m256 hi;} m512; +#if defined(HAVE_AVX512) +typedef __m512i m512; +#else +typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512; +#endif #endif /* SIMD_TYPES_H */ diff --git a/src/util/simd_utils.c b/src/util/simd_utils.c index 54b5b4baa..25a81412e 100644 --- a/src/util/simd_utils.c +++ b/src/util/simd_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,6 +49,7 @@ ALIGN_CL_DIRECTIVE const char vbs_mask_data[] = { /** \brief LUT for the mask1bit functions. */ ALIGN_CL_DIRECTIVE const u8 simd_onebit_masks[] = { + ZEROES_32, ZEROES_32, ZEROES_31, 0x01, ZEROES_32, ZEROES_31, 0x02, ZEROES_32, ZEROES_31, 0x04, ZEROES_32, @@ -57,4 +58,5 @@ ALIGN_CL_DIRECTIVE const u8 simd_onebit_masks[] = { ZEROES_31, 0x20, ZEROES_32, ZEROES_31, 0x40, ZEROES_32, ZEROES_31, 0x80, ZEROES_32, + ZEROES_32, ZEROES_32, }; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index e8676249a..047cdbab1 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,40 +38,13 @@ #endif #include "config.h" -#include // for memcpy - -// more recent headers are bestest, but only if we can use them -#ifdef __cplusplus -# if defined(HAVE_CXX_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#else // C -# if defined(HAVE_C_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#endif - -#ifdef __cplusplus -# if defined(HAVE_CXX_INTRIN_H) -# define USE_INTRIN_H -# endif -#else // C -# if defined(HAVE_C_INTRIN_H) -# define USE_INTRIN_H -# endif -#endif - -#if defined(USE_X86INTRIN_H) -#include -#elif defined(USE_INTRIN_H) -#include -#else -#error no intrins! -#endif - #include "ue2common.h" #include "simd_types.h" #include "unaligned.h" +#include "util/arch.h" +#include "util/intrinsics.h" + +#include // for memcpy // Define a common assume_aligned using an appropriate compiler built-in, if // it's available. Note that we need to handle C or C++ compilation. @@ -141,7 +114,7 @@ static really_inline u32 diffrich128(m128 a, m128 b) { * returns a 4-bit mask indicating which 64-bit words contain differences. */ static really_inline u32 diffrich64_128(m128 a, m128 b) { -#if defined(__SSE_41__) +#if defined(HAVE_SSE41) a = _mm_cmpeq_epi64(a, b); return ~(_mm_movemask_ps(_mm_castsi128_ps(a))) & 0x5; #else @@ -150,7 +123,17 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) { #endif } -#define lshift64_m128(a, b) _mm_slli_epi64((a), (b)) +static really_really_inline +m128 lshift64_m128(m128 a, unsigned b) { +#if defined(HAVE__BUILTIN_CONSTANT_P) + if (__builtin_constant_p(b)) { + return _mm_slli_epi64(a, b); + } +#endif + m128 x = _mm_cvtsi32_si128(b); + return _mm_sll_epi64(a, x); +} + #define rshift64_m128(a, b) _mm_srli_epi64((a), (b)) #define eq128(a, b) _mm_cmpeq_epi8((a), (b)) #define movemask128(a) ((u32)_mm_movemask_epi8((a))) @@ -180,25 +163,17 @@ static really_inline u64a movq(const m128 in) { /* another form of movq */ static really_inline m128 load_m128_from_u64a(const u64a *p) { -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) - /* unfortunately _mm_loadl_epi64() is best avoided as it seems to cause - * trouble on some older compilers, possibly because it is misdefined to - * take an m128 as its parameter */ - return _mm_set_epi64((__m64)0ULL, (__m64)*p); -#else - /* ICC doesn't like casting to __m64 */ - return _mm_loadl_epi64((const m128 *)p); -#endif + return _mm_set_epi64x(0LL, *p); } #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) #define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed) -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) // TODO: this entire file needs restructuring - this carveout is awful #define extractlow64from256(a) movq(a.lo) #define extractlow32from256(a) movd(a.lo) -#if defined(__SSE4_1__) +#if defined(HAVE_SSE41) #define extract32from256(a, imm) _mm_extract_epi32((imm >> 2) ? a.hi : a.lo, imm % 4) #define extract64from256(a, imm) _mm_extract_epi64((imm >> 2) ? a.hi : a.lo, imm % 2) #else @@ -275,7 +250,7 @@ extern const u8 simd_onebit_masks[]; static really_inline m128 mask1bit128(unsigned int n) { assert(n < sizeof(m128) * 8); - u32 mask_idx = ((n % 8) * 64) + 31; + u32 mask_idx = ((n % 8) * 64) + 95; mask_idx -= n / 8; return loadu128(&simd_onebit_masks[mask_idx]); } @@ -296,7 +271,7 @@ void clearbit128(m128 *ptr, unsigned int n) { static really_inline char testbit128(m128 val, unsigned int n) { const m128 mask = mask1bit128(n); -#if defined(__SSE4_1__) +#if defined(HAVE_SSE41) return !_mm_testz_si128(mask, val); #else return isnonzero128(and128(mask, val)); @@ -307,29 +282,41 @@ char testbit128(m128 val, unsigned int n) { #define palignr(r, l, offset) _mm_alignr_epi8(r, l, offset) static really_inline -m128 pshufb(m128 a, m128 b) { +m128 pshufb_m128(m128 a, m128 b) { m128 result; result = _mm_shuffle_epi8(a, b); return result; } static really_inline -m256 vpshufb(m256 a, m256 b) { -#if defined(__AVX2__) +m256 pshufb_m256(m256 a, m256 b) { +#if defined(HAVE_AVX2) return _mm256_shuffle_epi8(a, b); #else m256 rv; - rv.lo = pshufb(a.lo, b.lo); - rv.hi = pshufb(a.hi, b.hi); + rv.lo = pshufb_m128(a.lo, b.lo); + rv.hi = pshufb_m128(a.hi, b.hi); return rv; #endif } +#if defined(HAVE_AVX512) +static really_inline +m512 pshufb_m512(m512 a, m512 b) { + return _mm512_shuffle_epi8(a, b); +} + +static really_inline +m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) { + return _mm512_maskz_shuffle_epi8(k, a, b); +} +#endif + static really_inline m128 variable_byte_shift_m128(m128 in, s32 amount) { assert(amount >= -16 && amount <= 16); m128 shift_mask = loadu128(vbs_mask_data + 16 - amount); - return pshufb(in, shift_mask); + return pshufb_m128(in, shift_mask); } static really_inline @@ -352,12 +339,28 @@ m128 sub_u8_m128(m128 a, m128 b) { return _mm_sub_epi8(a, b); } +static really_inline +m128 set64x2(u64a hi, u64a lo) { + return _mm_set_epi64x(hi, lo); +} + /**** **** 256-bit Primitives ****/ -#if defined(__AVX2__) -#define lshift64_m256(a, b) _mm256_slli_epi64((a), (b)) +#if defined(HAVE_AVX2) + +static really_really_inline +m256 lshift64_m256(m256 a, unsigned b) { +#if defined(HAVE__BUILTIN_CONSTANT_P) + if (__builtin_constant_p(b)) { + return _mm256_slli_epi64(a, b); + } +#endif + m128 x = _mm_cvtsi32_si128(b); + return _mm256_sll_epi64(a, x); +} + #define rshift64_m256(a, b) _mm256_srli_epi64((a), (b)) static really_inline @@ -375,7 +378,7 @@ m256 set2x128(m128 a) { #else -static really_inline +static really_really_inline m256 lshift64_m256(m256 a, int b) { m256 rv = a; rv.lo = lshift64_m128(rv.lo, b); @@ -421,7 +424,7 @@ m256 set2x128(m128 a) { #endif static really_inline m256 zeroes256(void) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return _mm256_setzero_si256(); #else m256 rv = {zeroes128(), zeroes128()}; @@ -430,7 +433,7 @@ static really_inline m256 zeroes256(void) { } static really_inline m256 ones256(void) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) m256 rv = _mm256_set1_epi8(0xFF); #else m256 rv = {ones128(), ones128()}; @@ -438,7 +441,7 @@ static really_inline m256 ones256(void) { return rv; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 and256(m256 a, m256 b) { return _mm256_and_si256(a, b); } @@ -451,7 +454,7 @@ static really_inline m256 and256(m256 a, m256 b) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 or256(m256 a, m256 b) { return _mm256_or_si256(a, b); } @@ -464,7 +467,7 @@ static really_inline m256 or256(m256 a, m256 b) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 xor256(m256 a, m256 b) { return _mm256_xor_si256(a, b); } @@ -477,7 +480,7 @@ static really_inline m256 xor256(m256 a, m256 b) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 not256(m256 a) { return _mm256_xor_si256(a, ones256()); } @@ -490,7 +493,7 @@ static really_inline m256 not256(m256 a) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 andnot256(m256 a, m256 b) { return _mm256_andnot_si256(a, b); } @@ -504,7 +507,7 @@ static really_inline m256 andnot256(m256 a, m256 b) { #endif static really_inline int diff256(m256 a, m256 b) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return !!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(a, b)) ^ (int)-1); #else return diff128(a.lo, b.lo) || diff128(a.hi, b.hi); @@ -512,7 +515,7 @@ static really_inline int diff256(m256 a, m256 b) { } static really_inline int isnonzero256(m256 a) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return !!diff256(a, zeroes256()); #else return isnonzero128(or128(a.lo, a.hi)); @@ -524,7 +527,7 @@ static really_inline int isnonzero256(m256 a) { * mask indicating which 32-bit words contain differences. */ static really_inline u32 diffrich256(m256 a, m256 b) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) a = _mm256_cmpeq_epi32(a, b); return ~(_mm256_movemask_ps(_mm256_castsi256_ps(a))) & 0xFF; #else @@ -548,7 +551,7 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) { // aligned load static really_inline m256 load256(const void *ptr) { assert(ISALIGNED_N(ptr, alignof(m256))); -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return _mm256_load_si256((const m256 *)ptr); #else m256 rv = { load128(ptr), load128((const char *)ptr + 16) }; @@ -558,7 +561,7 @@ static really_inline m256 load256(const void *ptr) { // aligned load of 128-bit value to low and high part of 256-bit value static really_inline m256 load2x128(const void *ptr) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return set2x128(load128(ptr)); #else assert(ISALIGNED_N(ptr, alignof(m128))); @@ -575,7 +578,7 @@ static really_inline m256 loadu2x128(const void *ptr) { // aligned store static really_inline void store256(void *ptr, m256 a) { assert(ISALIGNED_N(ptr, alignof(m256))); -#if defined(__AVX2__) +#if defined(HAVE_AVX2) _mm256_store_si256((m256 *)ptr, a); #else ptr = assume_aligned(ptr, 16); @@ -585,7 +588,7 @@ static really_inline void store256(void *ptr, m256 a) { // unaligned load static really_inline m256 loadu256(const void *ptr) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return _mm256_loadu_si256((const m256 *)ptr); #else m256 rv = { loadu128(ptr), loadu128((const char *)ptr + 16) }; @@ -595,7 +598,7 @@ static really_inline m256 loadu256(const void *ptr) { // unaligned store static really_inline void storeu256(void *ptr, m256 a) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) _mm256_storeu_si256((m256 *)ptr, a); #else storeu128(ptr, a.lo); @@ -622,12 +625,24 @@ m256 loadbytes256(const void *ptr, unsigned int n) { static really_inline m256 mask1bit256(unsigned int n) { assert(n < sizeof(m256) * 8); - u32 mask_idx = ((n % 8) * 64) + 31; + u32 mask_idx = ((n % 8) * 64) + 95; mask_idx -= n / 8; return loadu256(&simd_onebit_masks[mask_idx]); } -#if !defined(__AVX2__) +static really_inline +m256 set64x4(u64a hi_1, u64a hi_0, u64a lo_1, u64a lo_0) { +#if defined(HAVE_AVX2) + return _mm256_set_epi64x(hi_1, hi_0, lo_1, lo_0); +#else + m256 rv; + rv.hi = set64x2(hi_1, hi_0); + rv.lo = set64x2(lo_1, lo_0); + return rv; +#endif +} + +#if !defined(HAVE_AVX2) // switches on bit N in the given vector. static really_inline void setbit256(m256 *ptr, unsigned int n) { @@ -782,7 +797,6 @@ static really_inline m384 andnot384(m384 a, m384 b) { return rv; } -// The shift amount is an immediate static really_really_inline m384 lshift64_m384(m384 a, unsigned b) { m384 rv; @@ -920,42 +934,119 @@ char testbit384(m384 val, unsigned int n) { **** 512-bit Primitives ****/ -static really_inline m512 and512(m512 a, m512 b) { +#define eq512mask(a, b) _mm512_cmpeq_epi8_mask((a), (b)) +#define masked_eq512mask(k, a, b) _mm512_mask_cmpeq_epi8_mask((k), (a), (b)) + +static really_inline +m512 zeroes512(void) { +#if defined(HAVE_AVX512) + return _mm512_setzero_si512(); +#else + m512 rv = {zeroes256(), zeroes256()}; + return rv; +#endif +} + +static really_inline +m512 ones512(void) { +#if defined(HAVE_AVX512) + return _mm512_set1_epi8(0xFF); + //return _mm512_xor_si512(_mm512_setzero_si512(), _mm512_setzero_si512()); +#else + m512 rv = {ones256(), ones256()}; + return rv; +#endif +} + +#if defined(HAVE_AVX512) +static really_inline +m512 set64x8(u8 a) { + return _mm512_set1_epi8(a); +} + +static really_inline +m512 set8x64(u64a a) { + return _mm512_set1_epi64(a); +} + +static really_inline +m512 set4x128(m128 a) { + return _mm512_broadcast_i32x4(a); +} +#endif + +static really_inline +m512 and512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_and_si512(a, b); +#else m512 rv; rv.lo = and256(a.lo, b.lo); rv.hi = and256(a.hi, b.hi); return rv; +#endif } -static really_inline m512 or512(m512 a, m512 b) { +static really_inline +m512 or512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_or_si512(a, b); +#else m512 rv; rv.lo = or256(a.lo, b.lo); rv.hi = or256(a.hi, b.hi); return rv; +#endif } -static really_inline m512 xor512(m512 a, m512 b) { +static really_inline +m512 xor512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_xor_si512(a, b); +#else m512 rv; rv.lo = xor256(a.lo, b.lo); rv.hi = xor256(a.hi, b.hi); return rv; +#endif } -static really_inline m512 not512(m512 a) { +static really_inline +m512 not512(m512 a) { +#if defined(HAVE_AVX512) + return _mm512_xor_si512(a, ones512()); +#else m512 rv; rv.lo = not256(a.lo); rv.hi = not256(a.hi); return rv; +#endif } -static really_inline m512 andnot512(m512 a, m512 b) { +static really_inline +m512 andnot512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_andnot_si512(a, b); +#else m512 rv; rv.lo = andnot256(a.lo, b.lo); rv.hi = andnot256(a.hi, b.hi); return rv; +#endif } -// The shift amount is an immediate +#if defined(HAVE_AVX512) +static really_really_inline +m512 lshift64_m512(m512 a, unsigned b) { +#if defined(HAVE__BUILTIN_CONSTANT_P) + if (__builtin_constant_p(b)) { + return _mm512_slli_epi64(a, b); + } +#endif + m128 x = _mm_cvtsi32_si128(b); + return _mm512_sll_epi64(a, x); +} +#else static really_really_inline m512 lshift64_m512(m512 a, unsigned b) { m512 rv; @@ -963,29 +1054,37 @@ m512 lshift64_m512(m512 a, unsigned b) { rv.hi = lshift64_m256(a.hi, b); return rv; } +#endif -static really_inline m512 zeroes512(void) { - m512 rv = {zeroes256(), zeroes256()}; - return rv; -} +#if defined(HAVE_AVX512) +#define rshift64_m512(a, b) _mm512_srli_epi64((a), (b)) +#define rshift128_m512(a, count_immed) _mm512_bsrli_epi128(a, count_immed) +#endif -static really_inline m512 ones512(void) { - m512 rv = {ones256(), ones256()}; - return rv; -} +#if !defined(_MM_CMPINT_NE) +#define _MM_CMPINT_NE 0x4 +#endif -static really_inline int diff512(m512 a, m512 b) { +static really_inline +int diff512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return !!_mm512_cmp_epi8_mask(a, b, _MM_CMPINT_NE); +#else return diff256(a.lo, b.lo) || diff256(a.hi, b.hi); +#endif } -static really_inline int isnonzero512(m512 a) { -#if !defined(__AVX2__) +static really_inline +int isnonzero512(m512 a) { +#if defined(HAVE_AVX512) + return diff512(a, zeroes512()); +#elif defined(HAVE_AVX2) + m256 x = or256(a.lo, a.hi); + return !!diff256(x, zeroes256()); +#else m128 x = or128(a.lo.lo, a.lo.hi); m128 y = or128(a.hi.lo, a.hi.hi); return isnonzero128(or128(x, y)); -#else - m256 x = or256(a.lo, a.hi); - return !!diff256(x, zeroes256()); #endif } @@ -993,8 +1092,11 @@ static really_inline int isnonzero512(m512 a) { * "Rich" version of diff512(). Takes two vectors a and b and returns a 16-bit * mask indicating which 32-bit words contain differences. */ -static really_inline u32 diffrich512(m512 a, m512 b) { -#if defined(__AVX2__) +static really_inline +u32 diffrich512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_NE); +#elif defined(HAVE_AVX2) return diffrich256(a.lo, b.lo) | (diffrich256(a.hi, b.hi) << 8); #else a.lo.lo = _mm_cmpeq_epi32(a.lo.lo, b.lo.lo); @@ -1011,22 +1113,32 @@ static really_inline u32 diffrich512(m512 a, m512 b) { * "Rich" version of diffrich(), 64-bit variant. Takes two vectors a and b and * returns a 16-bit mask indicating which 64-bit words contain differences. */ -static really_inline u32 diffrich64_512(m512 a, m512 b) { +static really_inline +u32 diffrich64_512(m512 a, m512 b) { + //TODO: cmp_epi64? u32 d = diffrich512(a, b); return (d | (d >> 1)) & 0x55555555; } // aligned load -static really_inline m512 load512(const void *ptr) { +static really_inline +m512 load512(const void *ptr) { +#if defined(HAVE_AVX512) + return _mm512_load_si512(ptr); +#else assert(ISALIGNED_N(ptr, alignof(m256))); m512 rv = { load256(ptr), load256((const char *)ptr + 32) }; return rv; +#endif } // aligned store -static really_inline void store512(void *ptr, m512 a) { - assert(ISALIGNED_N(ptr, alignof(m256))); -#if defined(__AVX2__) +static really_inline +void store512(void *ptr, m512 a) { + assert(ISALIGNED_N(ptr, alignof(m512))); +#if defined(HAVE_AVX512) + return _mm512_store_si512(ptr, a); +#elif defined(HAVE_AVX2) m512 *x = (m512 *)ptr; store256(&x->lo, a.lo); store256(&x->hi, a.hi); @@ -1037,11 +1149,28 @@ static really_inline void store512(void *ptr, m512 a) { } // unaligned load -static really_inline m512 loadu512(const void *ptr) { +static really_inline +m512 loadu512(const void *ptr) { +#if defined(HAVE_AVX512) + return _mm512_loadu_si512(ptr); +#else m512 rv = { loadu256(ptr), loadu256((const char *)ptr + 32) }; return rv; +#endif +} + +#if defined(HAVE_AVX512) +static really_inline +m512 loadu_maskz_m512(__mmask64 k, const void *ptr) { + return _mm512_maskz_loadu_epi8(k, ptr); } +static really_inline +m512 loadu_mask_m512(m512 src, __mmask64 k, const void *ptr) { + return _mm512_mask_loadu_epi8(src, k, ptr); +} +#endif + // packed unaligned store of first N bytes static really_inline void storebytes512(void *ptr, m512 a, unsigned int n) { @@ -1058,11 +1187,19 @@ m512 loadbytes512(const void *ptr, unsigned int n) { return a; } +static really_inline +m512 mask1bit512(unsigned int n) { + assert(n < sizeof(m512) * 8); + u32 mask_idx = ((n % 8) * 64) + 95; + mask_idx -= n / 8; + return loadu512(&simd_onebit_masks[mask_idx]); +} + // switches on bit N in the given vector. static really_inline void setbit512(m512 *ptr, unsigned int n) { assert(n < sizeof(*ptr) * 8); -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m128 *sub; if (n < 128) { sub = &ptr->lo.lo; @@ -1074,6 +1211,8 @@ void setbit512(m512 *ptr, unsigned int n) { sub = &ptr->hi.hi; } setbit128(sub, n % 128); +#elif defined(HAVE_AVX512) + *ptr = or512(mask1bit512(n), *ptr); #else m256 *sub; if (n < 256) { @@ -1090,7 +1229,7 @@ void setbit512(m512 *ptr, unsigned int n) { static really_inline void clearbit512(m512 *ptr, unsigned int n) { assert(n < sizeof(*ptr) * 8); -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m128 *sub; if (n < 128) { sub = &ptr->lo.lo; @@ -1102,6 +1241,8 @@ void clearbit512(m512 *ptr, unsigned int n) { sub = &ptr->hi.hi; } clearbit128(sub, n % 128); +#elif defined(HAVE_AVX512) + *ptr = andnot512(mask1bit512(n), *ptr); #else m256 *sub; if (n < 256) { @@ -1118,7 +1259,7 @@ void clearbit512(m512 *ptr, unsigned int n) { static really_inline char testbit512(m512 val, unsigned int n) { assert(n < sizeof(val) * 8); -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m128 sub; if (n < 128) { sub = val.lo.lo; @@ -1130,6 +1271,9 @@ char testbit512(m512 val, unsigned int n) { sub = val.hi.hi; } return testbit128(sub, n % 128); +#elif defined(HAVE_AVX512) + const m512 mask = mask1bit512(n); + return !!_mm512_test_epi8_mask(mask, val); #else m256 sub; if (n < 256) { diff --git a/src/nfa/multivermicelli.h b/src/util/small_vector.h similarity index 57% rename from src/nfa/multivermicelli.h rename to src/util/small_vector.h index 55f9b1f28..0b60d8c0f 100644 --- a/src/nfa/multivermicelli.h +++ b/src/util/small_vector.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,37 +26,37 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef MULTIVERMICELLI_H_ -#define MULTIVERMICELLI_H_ +#ifndef UTIL_SMALL_VECTOR_H +#define UTIL_SMALL_VECTOR_H -#ifdef __cplusplus -extern "C" -{ +#include + +#include + +#if BOOST_VERSION >= 105800 +# define HAVE_BOOST_CONTAINER_SMALL_VECTOR #endif -const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); +#if defined(HAVE_BOOST_CONTAINER_SMALL_VECTOR) +# include +#endif -const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); +namespace ue2 { -const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); +#if defined(HAVE_BOOST_CONTAINER_SMALL_VECTOR) -const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); +template > +using small_vector = boost::container::small_vector; -const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); +#else -const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); +// Boost version isn't new enough, fall back to just using std::vector. +template > +using small_vector = std::vector; -#ifdef __cplusplus -} -#endif +#endif // HAVE_BOOST_CONTAINER_SMALL_VECTOR +} // namespace ue2 -#endif /* MULTIVERMICELLI_H_ */ +#endif // UTIL_SMALL_VECTOR_H diff --git a/src/util/state_compress.c b/src/util/state_compress.c index 2a821dad6..7238849e7 100644 --- a/src/util/state_compress.c +++ b/src/util/state_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,7 @@ */ #include "config.h" #include "ue2common.h" +#include "arch.h" #include "bitutils.h" #include "unaligned.h" #include "pack_bits.h" @@ -262,7 +263,7 @@ m256 loadcompressed256_32bit(const void *ptr, m256 mvec) { expand32(v[4], m[4]), expand32(v[5], m[5]), expand32(v[6], m[6]), expand32(v[7], m[7]) }; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m256 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]), .hi = _mm_set_epi32(x[7], x[6], x[5], x[4]) }; #else @@ -289,7 +290,7 @@ m256 loadcompressed256_64bit(const void *ptr, m256 mvec) { u64a x[4] = { expand64(v[0], m[0]), expand64(v[1], m[1]), expand64(v[2], m[2]), expand64(v[3], m[3]) }; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m256 xvec = { .lo = _mm_set_epi64x(x[1], x[0]), .hi = _mm_set_epi64x(x[3], x[2]) }; #else @@ -546,16 +547,21 @@ m512 loadcompressed512_32bit(const void *ptr, m512 mvec) { expand32(v[14], m[14]), expand32(v[15], m[15]) }; m512 xvec; -#if !defined(__AVX2__) - xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]); - xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]); - xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]); - xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]); -#else +#if defined(HAVE_AVX512) + xvec = _mm512_set_epi32(x[15], x[14], x[13], x[12], + x[11], x[10], x[9], x[8], + x[7], x[6], x[5], x[4], + x[3], x[2], x[1], x[0]); +#elif defined(HAVE_AVX2) xvec.lo = _mm256_set_epi32(x[7], x[6], x[5], x[4], x[3], x[2], x[1], x[0]); xvec.hi = _mm256_set_epi32(x[15], x[14], x[13], x[12], x[11], x[10], x[9], x[8]); +#else + xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]); + xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]); + xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]); + xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]); #endif return xvec; } @@ -581,14 +587,17 @@ m512 loadcompressed512_64bit(const void *ptr, m512 mvec) { expand64(v[4], m[4]), expand64(v[5], m[5]), expand64(v[6], m[6]), expand64(v[7], m[7]) }; -#if !defined(__AVX2__) +#if defined(HAVE_AVX512) + m512 xvec = _mm512_set_epi64(x[7], x[6], x[5], x[4], + x[3], x[2], x[1], x[0]); +#elif defined(HAVE_AVX2) + m512 xvec = { .lo = _mm256_set_epi64x(x[3], x[2], x[1], x[0]), + .hi = _mm256_set_epi64x(x[7], x[6], x[5], x[4])}; +#else m512 xvec = { .lo = { _mm_set_epi64x(x[1], x[0]), _mm_set_epi64x(x[3], x[2]) }, .hi = { _mm_set_epi64x(x[5], x[4]), _mm_set_epi64x(x[7], x[6]) } }; -#else - m512 xvec = { .lo = _mm256_set_epi64x(x[3], x[2], x[1], x[0]), - .hi = _mm256_set_epi64x(x[7], x[6], x[5], x[4])}; #endif return xvec; } diff --git a/src/util/target_info.cpp b/src/util/target_info.cpp index 4eadec2d2..3a41e0207 100644 --- a/src/util/target_info.cpp +++ b/src/util/target_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,6 +46,10 @@ bool target_t::can_run_on_code_built_for(const target_t &code_target) const { return false; } + if (!has_avx512() && code_target.has_avx512()) { + return false; + } + return true; } @@ -53,11 +57,15 @@ target_t::target_t(const hs_platform_info &p) : tune(p.tune), cpu_features(p.cpu_features) {} bool target_t::has_avx2(void) const { - return (cpu_features & HS_CPU_FEATURES_AVX2); + return cpu_features & HS_CPU_FEATURES_AVX2; +} + +bool target_t::has_avx512(void) const { + return cpu_features & HS_CPU_FEATURES_AVX512; } bool target_t::is_atom_class(void) const { - return tune == HS_TUNE_FAMILY_SLM; + return tune == HS_TUNE_FAMILY_SLM || tune == HS_TUNE_FAMILY_GLM; } } // namespace ue2 diff --git a/src/util/target_info.h b/src/util/target_info.h index 67b5b7d9e..794b29855 100644 --- a/src/util/target_info.h +++ b/src/util/target_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,8 @@ struct target_t { bool has_avx2(void) const; + bool has_avx512(void) const; + bool is_atom_class(void) const; // This asks: can this target (the object) run on code that was built for diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 5bbf4cfe9..29919c7e1 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,12 +30,15 @@ #define UTIL_UE2_CONTAINERS_H_ #include "ue2common.h" +#include "util/operators.h" +#include "util/small_vector.h" #include #include #include #include +#include #include #include #include @@ -57,8 +60,8 @@ class iter_wrapper : public boost::iterator_facade, Value, boost::random_access_traversal_tag> { public: - iter_wrapper() {} - explicit iter_wrapper(const WrappedIter &it_in) : it(it_in) {} + iter_wrapper() = default; + explicit iter_wrapper(WrappedIter it_in) : it(std::move(it_in)) {} // Templated copy-constructor to allow for interoperable iterator and // const_iterator. @@ -67,10 +70,10 @@ class iter_wrapper public: template - iter_wrapper(const iter_wrapper &other, + iter_wrapper(iter_wrapper other, typename std::enable_if::value>::type * = nullptr) - : it(other.it) {} + : it(std::move(other.it)) {} WrappedIter get() const { return it; } @@ -90,6 +93,58 @@ class iter_wrapper Value &dereference() const { return *it; } }; +template +class flat_base { +protected: + // Underlying storage is a small vector with local space for one element. + using storage_type = small_vector; + using storage_alloc_type = typename storage_type::allocator_type; + + // Putting our storage and comparator in a tuple allows us to make use of + // the empty base class optimization (if this STL implements it for + // std::tuple). + std::tuple storage; + + flat_base(const Compare &compare, const Allocator &alloc) + : storage(storage_type(storage_alloc_type(alloc)), compare) {} + + storage_type &data() { return std::get<0>(this->storage); } + const storage_type &data() const { return std::get<0>(this->storage); } + + Compare &comp() { return std::get<1>(this->storage); } + const Compare &comp() const { return std::get<1>(this->storage); } + +public: + // Common member types. + using key_compare = Compare; + + Allocator get_allocator() const { + return data().get_allocator(); + } + + key_compare key_comp() const { + return comp(); + } + + // Capacity. + + bool empty() const { return data().empty(); } + size_t size() const { return data().size(); } + size_t max_size() const { return data().max_size(); } + + // Modifiers. + + void clear() { + data().clear(); + } + + void swap(flat_base &a) { + using std::swap; + swap(comp(), a.comp()); + swap(data(), a.data()); + } +}; + } // namespace flat_detail /** @@ -102,33 +157,35 @@ class iter_wrapper */ template , class Allocator = std::allocator> -class flat_set { - // Underlying storage is a sorted std::vector. - using StorageT = std::vector; - - Compare comp; - StorageT data; +class flat_set + : public flat_detail::flat_base, + public totally_ordered> { + using base_type = flat_detail::flat_base; + using storage_type = typename base_type::storage_type; + using base_type::data; + using base_type::comp; public: // Member types. using key_type = T; using value_type = T; - using size_type = typename StorageT::size_type; - using difference_type = typename StorageT::difference_type; - using key_compare = Compare; + using size_type = typename storage_type::size_type; + using difference_type = typename storage_type::difference_type; + using key_compare = typename base_type::key_compare; using value_compare = Compare; using allocator_type = Allocator; using reference = value_type &; using const_reference = const value_type &; - using pointer = typename std::allocator_traits::pointer; - using const_pointer = typename std::allocator_traits::const_pointer; + using allocator_traits_type = typename std::allocator_traits; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; // Iterator types. - using iterator = flat_detail::iter_wrapper; using const_iterator = - flat_detail::iter_wrapper; using reverse_iterator = std::reverse_iterator; @@ -138,19 +195,19 @@ class flat_set { flat_set(const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) {} + : base_type(compare, alloc) {} template flat_set(InputIt first, InputIt last, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_set(compare, alloc) { insert(first, last); } flat_set(std::initializer_list init, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_set(compare, alloc) { insert(init.begin(), init.end()); } @@ -159,20 +216,14 @@ class flat_set { flat_set &operator=(const flat_set &) = default; flat_set &operator=(flat_set &&) = default; - // Other members. - - allocator_type get_allocator() const { - return data.get_allocator(); - } - // Iterators. - iterator begin() { return iterator(data.begin()); } - const_iterator cbegin() const { return const_iterator(data.cbegin()); } + iterator begin() { return iterator(data().begin()); } + const_iterator cbegin() const { return const_iterator(data().cbegin()); } const_iterator begin() const { return cbegin(); } - iterator end() { return iterator(data.end()); } - const_iterator cend() const { return const_iterator(data.cend()); } + iterator end() { return iterator(data().end()); } + const_iterator cend() const { return const_iterator(data().cend()); } const_iterator end() const { return cend(); } reverse_iterator rbegin() { return reverse_iterator(end()); } @@ -187,22 +238,12 @@ class flat_set { } const_reverse_iterator rend() const { return crend(); } - // Capacity. - - bool empty() const { return data.empty(); } - size_t size() const { return data.size(); } - size_t max_size() const { return data.max_size(); } - // Modifiers. - void clear() { - data.clear(); - } - std::pair insert(const value_type &value) { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it == data.end() || comp(value, *it)) { - return std::make_pair(iterator(data.insert(it, value)), true); + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it == data().end() || comp()(value, *it)) { + return std::make_pair(iterator(data().insert(it, value)), true); } return std::make_pair(iterator(it), false); } @@ -212,9 +253,9 @@ class flat_set { } std::pair insert(value_type &&value) { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it == data.end() || comp(value, *it)) { - return std::make_pair(iterator(data.insert(it, std::move(value))), + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it == data().end() || comp()(value, *it)) { + return std::make_pair(iterator(data().insert(it, std::move(value))), true); } return std::make_pair(iterator(it), false); @@ -240,12 +281,12 @@ class flat_set { return insert(value_type(std::forward(args)...)); } - void erase(iterator pos) { - data.erase(pos.get()); + void erase(const_iterator pos) { + data().erase(pos.get()); } - void erase(iterator first, iterator last) { - data.erase(first.get(), last.get()); + void erase(const_iterator first, const_iterator last) { + data().erase(first.get(), last.get()); } void erase(const key_type &key) { @@ -255,12 +296,6 @@ class flat_set { } } - void swap(flat_set &a) { - using std::swap; - swap(comp, a.comp); - swap(data, a.data); - } - // Lookup. size_type count(const value_type &value) const { @@ -268,61 +303,50 @@ class flat_set { } iterator find(const value_type &value) { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it != data.end() && comp(value, *it)) { - it = data.end(); + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it != data().end() && comp()(value, *it)) { + it = data().end(); } return iterator(it); } const_iterator find(const value_type &value) const { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it != data.end() && comp(value, *it)) { - it = data.end(); + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it != data().end() && comp()(value, *it)) { + it = data().end(); } return const_iterator(it); } // Observers. - key_compare key_comp() const { - return comp; - } - value_compare value_comp() const { - return comp; + return comp(); } - // Operators. + // Operators. All others provided by ue2::totally_ordered. bool operator==(const flat_set &a) const { - return data == a.data; - } - bool operator!=(const flat_set &a) const { - return data != a.data; + return data() == a.data(); } bool operator<(const flat_set &a) const { - return data < a.data; - } - bool operator<=(const flat_set &a) const { - return data <= a.data; - } - bool operator>(const flat_set &a) const { - return data > a.data; - } - bool operator>=(const flat_set &a) const { - return data >= a.data; + return data() < a.data(); } // Free swap function for ADL. friend void swap(flat_set &a, flat_set &b) { a.swap(b); } + + // Free hash function. + friend size_t hash_value(const flat_set &a) { + return boost::hash_range(a.begin(), a.end()); + } }; /** * \brief Map container implemented internally as a sorted vector. Use this - * rather than std::map for small sets as it's faster, uses less memory and + * rather than std::map for small maps as it's faster, uses less memory and * incurs less malloc time. * * Note: we used to use boost::flat_map, but have run into problems with all @@ -336,7 +360,9 @@ class flat_set { */ template , class Allocator = std::allocator>> -class flat_map { +class flat_map + : public flat_detail::flat_base, Compare, Allocator>, + public totally_ordered> { public: // Member types. using key_type = Key; @@ -344,28 +370,29 @@ class flat_map { using value_type = std::pair; private: - // Underlying storage is a sorted std::vector. - using storage_type = std::pair; - using StorageT = std::vector; - - Compare comp; - StorageT data; + using base_type = + flat_detail::flat_base, Compare, Allocator>; + using keyval_storage_type = std::pair; + using storage_type = typename base_type::storage_type; + using base_type::data; + using base_type::comp; public: // More Member types. - using size_type = typename StorageT::size_type; - using difference_type = typename StorageT::difference_type; - using key_compare = Compare; + using size_type = typename storage_type::size_type; + using difference_type = typename storage_type::difference_type; + using key_compare = typename base_type::key_compare; using allocator_type = Allocator; using reference = value_type &; using const_reference = const value_type &; - using pointer = typename std::allocator_traits::pointer; - using const_pointer = typename std::allocator_traits::const_pointer; + using allocator_traits_type = typename std::allocator_traits; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; public: using const_iterator = - flat_detail::iter_wrapper; + flat_detail::iter_wrapper; using const_reverse_iterator = std::reverse_iterator; @@ -377,19 +404,19 @@ class flat_map { flat_map(const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) {} + : base_type(compare, alloc) {} template flat_map(InputIt first, InputIt last, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_map(compare, alloc) { insert(first, last); } flat_map(std::initializer_list init, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_map(compare, alloc) { insert(init.begin(), init.end()); } @@ -398,18 +425,12 @@ class flat_map { flat_map &operator=(const flat_map &) = default; flat_map &operator=(flat_map &&) = default; - // Other members. - - allocator_type get_allocator() const { - return data.get_allocator(); - } - // Iterators. - const_iterator cbegin() const { return const_iterator(data.cbegin()); } + const_iterator cbegin() const { return const_iterator(data().cbegin()); } const_iterator begin() const { return cbegin(); } - const_iterator cend() const { return const_iterator(data.cend()); } + const_iterator cend() const { return const_iterator(data().cend()); } const_iterator end() const { return cend(); } const_reverse_iterator crbegin() const { @@ -422,61 +443,55 @@ class flat_map { } const_reverse_iterator rend() const { return crend(); } - // Capacity. - - bool empty() const { return data.empty(); } - size_t size() const { return data.size(); } - size_t max_size() const { return data.max_size(); } - private: - using storage_iterator = typename StorageT::iterator; - using storage_const_iterator = typename StorageT::const_iterator; + using storage_iterator = typename storage_type::iterator; + using storage_const_iterator = typename storage_type::const_iterator; storage_iterator data_lower_bound(const key_type &key) { return std::lower_bound( - data.begin(), data.end(), key, - [&](const storage_type &elem, const key_type &k) { - return comp(elem.first, k); + data().begin(), data().end(), key, + [&](const keyval_storage_type &elem, const key_type &k) { + return comp()(elem.first, k); }); } storage_const_iterator data_lower_bound(const key_type &key) const { return std::lower_bound( - data.begin(), data.end(), key, - [&](const storage_type &elem, const key_type &k) { - return comp(elem.first, k); + data().begin(), data().end(), key, + [&](const keyval_storage_type &elem, const key_type &k) { + return comp()(elem.first, k); }); } std::pair data_insert(const value_type &value) { auto it = data_lower_bound(value.first); - if (it == data.end() || comp(value.first, it->first)) { - return std::make_pair(data.insert(it, value), true); + if (it == data().end() || comp()(value.first, it->first)) { + return std::make_pair(data().insert(it, value), true); } return std::make_pair(it, false); } std::pair data_insert(value_type &&value) { auto it = data_lower_bound(value.first); - if (it == data.end() || comp(value.first, it->first)) { - return std::make_pair(data.insert(it, std::move(value)), true); + if (it == data().end() || comp()(value.first, it->first)) { + return std::make_pair(data().insert(it, std::move(value)), true); } return std::make_pair(it, false); } storage_iterator data_find(const key_type &key) { auto it = data_lower_bound(key); - if (it != data.end() && comp(key, it->first)) { - it = data.end(); + if (it != data().end() && comp()(key, it->first)) { + it = data().end(); } return it; } storage_const_iterator data_find(const key_type &key) const { auto it = data_lower_bound(key); - if (it != data.end() && comp(key, it->first)) { - it = data.end(); + if (it != data().end() && comp()(key, it->first)) { + it = data().end(); } return it; } @@ -484,10 +499,6 @@ class flat_map { public: // Modifiers. - void clear() { - data.clear(); - } - std::pair insert(const value_type &value) { auto rv = data_insert(value); return std::make_pair(iterator(rv.first), rv.second); @@ -514,17 +525,12 @@ class flat_map { return insert(value_type(std::forward(args)...)); } - void erase(iterator pos) { - // Convert to a non-const storage iterator via pointer arithmetic. - storage_iterator it = data.begin() + distance(begin(), pos); - data.erase(it); + void erase(const_iterator pos) { + data().erase(pos.get()); } - void erase(iterator first, iterator last) { - // Convert to a non-const storage iterator via pointer arithmetic. - storage_iterator data_first = data.begin() + distance(begin(), first); - storage_iterator data_last = data.begin() + distance(begin(), last); - data.erase(data_first, data_last); + void erase(const_iterator first, const_iterator last) { + data().erase(first.get(), last.get()); } void erase(const key_type &key) { @@ -534,12 +540,6 @@ class flat_map { } } - void swap(flat_map &a) { - using std::swap; - swap(comp, a.comp); - swap(data, a.data); - } - // Lookup. size_type count(const key_type &key) const { @@ -554,7 +554,7 @@ class flat_map { mapped_type &at(const key_type &key) { auto it = data_find(key); - if (it == data.end()) { + if (it == data().end()) { throw std::out_of_range("element not found"); } return it->second; @@ -562,7 +562,7 @@ class flat_map { const mapped_type &at(const key_type &key) const { auto it = data_find(key); - if (it == data.end()) { + if (it == data().end()) { throw std::out_of_range("element not found"); } return it->second; @@ -575,35 +575,39 @@ class flat_map { // Observers. - key_compare key_comp() const { - return comp; + class value_compare { + friend class flat_map; + protected: + Compare c; + value_compare(Compare c_in) : c(c_in) {} + public: + bool operator()(const value_type &lhs, const value_type &rhs) { + return c(lhs.first, rhs.first); + } + }; + + value_compare value_comp() const { + return value_compare(comp()); } - // Operators. + // Operators. All others provided by ue2::totally_ordered. bool operator==(const flat_map &a) const { - return data == a.data; - } - bool operator!=(const flat_map &a) const { - return data != a.data; + return data() == a.data(); } bool operator<(const flat_map &a) const { - return data < a.data; - } - bool operator<=(const flat_map &a) const { - return data <= a.data; - } - bool operator>(const flat_map &a) const { - return data > a.data; - } - bool operator>=(const flat_map &a) const { - return data >= a.data; + return data() < a.data(); } // Free swap function for ADL. friend void swap(flat_map &a, flat_map &b) { a.swap(b); } + + // Free hash function. + friend size_t hash_value(const flat_map &a) { + return boost::hash_range(a.begin(), a.end()); + } }; } // namespace diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index 9634b0322..138d7467d 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,8 +31,9 @@ #include "ue2common.h" #include "util/graph_range.h" +#include "util/noncopyable.h" +#include "util/operators.h" -#include #include #include /* vertex_index_t, ... */ #include /* no_property */ @@ -156,7 +157,7 @@ namespace ue2 { namespace graph_detail { -class graph_base : boost::noncopyable { +class graph_base : noncopyable { }; struct default_edge_property { @@ -292,7 +293,7 @@ class ue2_graph : graph_detail::graph_base { using vertex_bundled = VertexPropertyType; using edge_bundled = EdgePropertyType; - class vertex_descriptor : boost::totally_ordered { + class vertex_descriptor : totally_ordered { public: vertex_descriptor() : p(nullptr), serial(0) { } explicit vertex_descriptor(vertex_node *pp) @@ -324,7 +325,7 @@ class ue2_graph : graph_detail::graph_base { friend ue2_graph; }; - class edge_descriptor : boost::totally_ordered { + class edge_descriptor : totally_ordered { public: edge_descriptor() : p(nullptr), serial(0) { } explicit edge_descriptor(edge_node *pp) : p(pp), serial(pp->serial) { } diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 08b6a5442..a90d47a35 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,6 +35,7 @@ #include "ue2common.h" #include "util/charreach.h" +#include "util/hash.h" #include #include @@ -206,6 +207,13 @@ struct ue2_literal { std::vector nocase; /* for trolling value */ }; +inline +size_t hash_value(const ue2_literal::elem &elem) { + return hash_all(elem.c, elem.nocase); +} + +inline +size_t hash_value(const ue2_literal &lit) { return hash_range(lit); } /// Return a reversed copy of this literal. ue2_literal reverse_literal(const ue2_literal &in); diff --git a/src/util/verify_types.h b/src/util/verify_types.h index 98c24c997..5833d5ec6 100644 --- a/src/util/verify_types.h +++ b/src/util/verify_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,45 +30,59 @@ #define UTIL_VERIFY_TYPES #include "ue2common.h" +#include "util/compile_error.h" #include +#include namespace ue2 { -template -static UNUSED u8 verify_u8(Int_T val) { - assert(val == (Int_T)((u8)val)); // there and back again - return (u8)(val); +template +To_T verify_cast(From_T val) { + static_assert(std::is_integral::value, + "Output type must be integral."); + static_assert(std::is_integral::value || + std::is_enum::value || + std::is_convertible::value, + "Must be integral or enum type, or convertible to output."); + + To_T conv_val = static_cast(val); + if (static_cast(conv_val) != val) { + assert(0); + throw ResourceLimitError(); + } + + return conv_val; +} + +template +s8 verify_s8(T val) { + return verify_cast(val); } -template -static UNUSED s8 verify_s8(Int_T val) { - assert(val == (Int_T)((s8)val)); // there and back again - return (s8)(val); +template +u8 verify_u8(T val) { + return verify_cast(val); } -template -static UNUSED s16 verify_s16(Int_T val) { - assert(val == (Int_T)((s16)val)); // there and back again - return (s16)(val); +template +s16 verify_s16(T val) { + return verify_cast(val); } -template -static UNUSED u16 verify_u16(Int_T val) { - assert(val == (Int_T)((u16)val)); // there and back again - return (u16)(val); +template +u16 verify_u16(T val) { + return verify_cast(val); } -template -static UNUSED s32 verify_s32(Int_T val) { - assert(val == (Int_T)((s32)val)); // there and back again - return (s32)(val); +template +s32 verify_s32(T val) { + return verify_cast(val); } -template -static UNUSED u32 verify_u32(Int_T val) { - assert(val == (Int_T)((u32)val)); // there and back again - return (u32)(val); +template +u32 verify_u32(T val) { + return verify_cast(val); } } // namespace ue2 diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 049fd3681..61bb00f20 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,3 +1,6 @@ +if (WIN32) + return() +endif() find_package(Threads) # remove some warnings diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt index 25a833d08..9b2cde4db 100644 --- a/tools/hsbench/CMakeLists.txt +++ b/tools/hsbench/CMakeLists.txt @@ -1,4 +1,8 @@ include (${CMAKE_MODULE_PATH}/sqlite3.cmake) +if (NOT SQLITE3_FOUND) + message(STATUS "sqlite3 not found, not building hsbench") + return() +endif() if (NOT XCODE) include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS}) @@ -7,6 +11,18 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}") endif() +# BSD has the _np funcs in a _np header +CHECK_INCLUDE_FILE_CXX(pthread_np.h HAVE_PTHREAD_NP_H) +if (HAVE_PTHREAD_NP_H) + set (PTHREAD_NP_INC pthread_np.h) +else () + set (PTHREAD_NP_INC pthread.h) +endif () + +set (CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -D_GNU_SOURCE") +set (CMAKE_REQUIRED_LIBRARIES pthread) +CHECK_CXX_SYMBOL_EXISTS(pthread_setaffinity_np ${PTHREAD_NP_INC} HAVE_DECL_PTHREAD_SETAFFINITY_NP) + CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO) CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET) set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()") diff --git a/tools/hsbench/common.h b/tools/hsbench/common.h index a4d60021a..efff3f99d 100644 --- a/tools/hsbench/common.h +++ b/tools/hsbench/common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,5 +38,7 @@ extern bool saveDatabases; extern bool loadDatabases; extern std::string serializePath; extern unsigned int somPrecisionMode; +extern bool forceEditDistance; +extern unsigned editDistance; #endif // COMMON_H diff --git a/tools/hsbench/data_corpus.cpp b/tools/hsbench/data_corpus.cpp index 55bfe93af..8e761ec34 100644 --- a/tools/hsbench/data_corpus.cpp +++ b/tools/hsbench/data_corpus.cpp @@ -110,7 +110,7 @@ vector readCorpus(const string &filename) { if (status != SQLITE_DONE) { ostringstream oss; oss << "Error retrieving blocks from corpus: " - << sqlite3_errstr(status); + << sqlite3_errmsg(db); status = sqlite3_finalize(statement); assert(status == SQLITE_OK); diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp index f5abb9faf..9674e5c84 100644 --- a/tools/hsbench/engine_hyperscan.cpp +++ b/tools/hsbench/engine_hyperscan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ #include "huge.h" #include "timer.h" -#include "crc32.h" #include "database.h" #include "hs_compile.h" #include "hs_internal.h" @@ -46,12 +45,15 @@ #include #include +#include #include #include #include #include #include +#include + using namespace std; EngineContext::EngineContext(const hs_database_t *db) { @@ -230,11 +232,13 @@ string dbSettingsHash(const string &filename, u32 mode) { string info = info_oss.str(); - u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size()); + boost::crc_32_type crc; + + crc.process_bytes(info.data(), info.length()); // return STL string with printable version of digest ostringstream oss; - oss << hex << setw(8) << setfill('0') << crc << dec; + oss << hex << setw(8) << setfill('0') << crc.checksum() << dec; return oss.str(); } @@ -289,6 +293,10 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, m.first); return nullptr; } + if (forceEditDistance) { + extparam.flags |= HS_EXT_FLAG_EDIT_DISTANCE; + extparam.edit_distance = editDistance; + } exprs.push_back(expr); ids.push_back(m.first); diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index 4298963b9..3153737ee 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,6 +56,9 @@ #include #ifndef _WIN32 #include +#if defined(HAVE_PTHREAD_NP_H) +#include +#endif #include #endif @@ -72,6 +75,8 @@ bool saveDatabases = false; bool loadDatabases = false; string serializePath(""); unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; +bool forceEditDistance = false; +unsigned editDistance = 0; namespace /* anonymous */ { @@ -120,7 +125,11 @@ class ThreadContext : boost::noncopyable { // Apply processor affinity (if available) to this thread. bool affine(UNUSED int cpu) { #ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP +#if defined(__linux__) cpu_set_t cpuset; +#else // BSD + cpuset_t cpuset; +#endif CPU_ZERO(&cpuset); assert(cpu >= 0 && cpu < CPU_SETSIZE); @@ -164,11 +173,15 @@ void usage(const char *error) { " (default: streaming).\n"); printf(" -V Benchmark in vectored mode" " (default: streaming).\n"); +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n"); +#endif printf(" -i DIR Don't compile, load from files in DIR" " instead.\n"); printf(" -w DIR After compiling, save to files in DIR.\n"); printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n"); + printf(" -E DISTANCE Match all patterns within edit distance" + " DISTANCE.\n"); printf("\n"); printf(" --per-scan Display per-scan Mbit/sec results.\n"); printf(" --echo-matches Display all matches that occur during scan.\n"); @@ -190,8 +203,12 @@ struct BenchmarkSigs { /** Process command-line arguments. Prints usage and exits on error. */ static void processArgs(int argc, char *argv[], vector &sigSets, - UNUSED Grey &grey) { - const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:"; + UNUSED unique_ptr &grey) { + const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sVw:z:" +#if HAVE_DECL_PTHREAD_SETAFFINITY_N + "T:" // add the thread flag +#endif + ; int in_sigfile = 0; int do_per_scan = 0; int do_echo_matches = 0; @@ -237,9 +254,17 @@ void processArgs(int argc, char *argv[], vector &sigSets, case 'e': exprPath.assign(optarg); break; + case 'E': + if (!fromString(optarg, editDistance)) { + usage("Couldn't parse argument to -E flag, should be" + " a non-negative integer."); + exit(1); + } + forceEditDistance = true; + break; #ifndef RELEASE_BUILD case 'G': - applyGreyOverrides(&grey, string(optarg)); + applyGreyOverrides(grey.get(), string(optarg)); break; #endif case 'h': @@ -585,6 +610,17 @@ void displayPerScanResults(const vector> &threads, printf("\n"); } +static +double fastestResult(const vector> &threads) { + double best = threads[0]->results[0].seconds; + for (const auto &t : threads) { + for (const auto &r : t->results) { + best = min(best, r.seconds); + } + } + return best; +} + static u64a byte_size(const vector &corpus_blocks) { u64a total = 0; @@ -638,8 +674,12 @@ void displayResults(const vector> &threads, double blockRate = (double)totalBlocks / (double)totalSecs; printf("Overall block rate: %'0.2f blocks/sec\n", blockRate); - printf("Overall throughput: %'0.2Lf Mbit/sec\n", + printf("Mean throughput: %'0.2Lf Mbit/sec\n", calc_mbps(totalSecs, totalBytes)); + + double lowestScanTime = fastestResult(threads); + printf("Maximum throughput: %'0.2Lf Mbit/sec\n", + calc_mbps(lowestScanTime, bytesPerRun)); printf("\n"); if (display_per_scan) { @@ -723,8 +763,10 @@ void runBenchmark(const EngineHyperscan &db, /** Main driver. */ int main(int argc, char *argv[]) { - Grey grey; - + unique_ptr grey; +#if !defined(RELEASE_BUILD) + grey = make_unique(); +#endif setlocale(LC_ALL, ""); // use the user's locale #ifndef NDEBUG @@ -742,6 +784,7 @@ int main(int argc, char *argv[]) { // known expressions together. if (sigSets.empty()) { SignatureSet sigs; + sigs.reserve(exprMapTemplate.size()); for (auto i : exprMapTemplate | map_keys) { sigs.push_back(i); } @@ -758,14 +801,12 @@ int main(int argc, char *argv[]) { } for (const auto &s : sigSets) { - ExpressionMap exprMap = exprMapTemplate; // copy - - limitBySignature(exprMap, s.sigs); + auto exprMap = limitToSignatures(exprMapTemplate, s.sigs); if (exprMap.empty()) { continue; } - auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey); + auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, *grey); if (!engine) { printf("Error: expressions failed to compile.\n"); exit(1); diff --git a/tools/hsbench/scripts/CorpusBuilder.py b/tools/hsbench/scripts/CorpusBuilder.py index 5baed2bd5..da2d593f5 100755 --- a/tools/hsbench/scripts/CorpusBuilder.py +++ b/tools/hsbench/scripts/CorpusBuilder.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ''' A module to construct corpora databases for the Hyperscan benchmarker diff --git a/tools/hsbench/scripts/gutenbergCorpus.py b/tools/hsbench/scripts/gutenbergCorpus.py index fa1b1570d..62752a4d2 100755 --- a/tools/hsbench/scripts/gutenbergCorpus.py +++ b/tools/hsbench/scripts/gutenbergCorpus.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ''' This script creates a Hyperscan benchmarking corpus database from a supplied diff --git a/tools/hsbench/scripts/linebasedCorpus.py b/tools/hsbench/scripts/linebasedCorpus.py index bde20e398..b27f8674f 100755 --- a/tools/hsbench/scripts/linebasedCorpus.py +++ b/tools/hsbench/scripts/linebasedCorpus.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ''' Simple script to take a file full of lines of text and push them into a diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 8b4944447..a7658b26a 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -30,12 +30,41 @@ if(CMAKE_COMPILER_IS_GNUCC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-array-bounds") endif() -add_library(gtest STATIC ${gtest_SOURCES}) - add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) +set(unit_hyperscan_SOURCES + ${gtest_SOURCES} + hyperscan/allocators.cpp + hyperscan/arg_checks.cpp + hyperscan/bad_patterns.cpp + hyperscan/bad_patterns.txt + hyperscan/behaviour.cpp + hyperscan/expr_info.cpp + hyperscan/extparam.cpp + hyperscan/identical.cpp + hyperscan/main.cpp + hyperscan/multi.cpp + hyperscan/order.cpp + hyperscan/scratch_op.cpp + hyperscan/scratch_in_use.cpp + hyperscan/serialize.cpp + hyperscan/single.cpp + hyperscan/som.cpp + hyperscan/stream_op.cpp + hyperscan/test_util.cpp + hyperscan/test_util.h + ) +add_executable(unit-hyperscan ${unit_hyperscan_SOURCES}) +if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) +target_link_libraries(unit-hyperscan hs_shared expressionutil) +else() +target_link_libraries(unit-hyperscan hs expressionutil) +endif() + + if (NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_internal_SOURCES + ${gtest_SOURCES} internal/bitfield.cpp internal/bitutils.cpp internal/charreach.cpp @@ -52,8 +81,6 @@ set(unit_internal_SOURCES internal/limex_nfa.cpp internal/masked_move.cpp internal/multi_bit.cpp - internal/multiaccel_matcher.cpp - internal/multiaccel_shift.cpp internal/nfagraph_common.h internal/nfagraph_comp.cpp internal/nfagraph_equivalence.cpp @@ -85,40 +112,13 @@ set(unit_internal_SOURCES internal/util_string.cpp internal/vermicelli.cpp internal/main.cpp -) + ) add_executable(unit-internal ${unit_internal_SOURCES}) -target_link_libraries(unit-internal hs gtest corpusomatic) +set_target_properties(unit-internal PROPERTIES COMPILE_FLAGS "${HS_CXX_FLAGS}") +target_link_libraries(unit-internal hs corpusomatic) endif(NOT (RELEASE_BUILD OR FAT_RUNTIME)) -set(unit_hyperscan_SOURCES - hyperscan/allocators.cpp - hyperscan/arg_checks.cpp - hyperscan/bad_patterns.cpp - hyperscan/bad_patterns.txt - hyperscan/behaviour.cpp - hyperscan/expr_info.cpp - hyperscan/extparam.cpp - hyperscan/identical.cpp - hyperscan/main.cpp - hyperscan/multi.cpp - hyperscan/order.cpp - hyperscan/scratch_op.cpp - hyperscan/scratch_in_use.cpp - hyperscan/serialize.cpp - hyperscan/single.cpp - hyperscan/som.cpp - hyperscan/stream_op.cpp - hyperscan/test_util.cpp - hyperscan/test_util.h - ) -add_executable(unit-hyperscan ${unit_hyperscan_SOURCES}) -if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) -target_link_libraries(unit-hyperscan hs_shared gtest expressionutil) -else() -target_link_libraries(unit-hyperscan hs gtest expressionutil) -endif() - # # build target to run unit tests # diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 1a33210d1..3d6d9db90 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -70,8 +70,8 @@ 70:/foo[^\x00-\xff]/ #Pattern can never match. 71:/foo[^\x00-\xff]$/ #Pattern can never match. 72:/\Bd\B/i{min_length=2,min_offset=4,max_offset=54} #Expression has min_length=2 but can only produce matches of length 1 bytes at most. -73:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){4}\Z/sm #Pattern is too large. -74:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){4}\Z/smL #Pattern is too large. +73:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){40}\Z/sm #Pattern is too large. +74:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){40}\Z/smL #Pattern is too large. 75:/\B/s8{min_length=1} #Expression has min_length=1 but can only produce matches of length 0 bytes at most. 76:/(f|d|(\b)|i|a\Z)/mHV8{min_length=2,min_offset=9,max_offset=14} #Expression has min_length=2 but can only produce matches of length 1 bytes at most. 77:/(f|e|d{19,}|h\Z|^j|\Aa)/smi{min_length=7,min_offset=8,max_offset=18} #Extended parameter constraints can not be satisfied for any match from this expression. @@ -90,9 +90,9 @@ 91:/a\owibble/ #Value in \o{...} sequence is non-octal or missing braces at index 1. 92:/a\o{wibble/ #Value in \o{...} sequence is non-octal or missing braces at index 1. 93:/a\o{777}/ #Value in \o{...} sequence is too large at index 1. -94:/(*UTF16)foo/ #(*UTF16) not supported at index 2. -95:/(*BSR_UNICODE)abc/ #Unknown control verb at index 2. -96:/a+(*SKIP)b/ #Unknown control verb at index 4. +94:/(*UTF16)foo/ #Unsupported control verb (*UTF16) at index 0. +95:/(*BSR_UNICODE)abc/ #Unsupported control verb (*BSR_UNICODE) at index 0. +96:/a+(*SKIP)b/ #Unknown control verb (*SKIP) at index 2. 97:/foo(*/ #Invalid repeat at index 4. 98:/[:\]:]/ #POSIX named classes are only supported inside a class at index 0. 99:/[[:[:]/ #Invalid POSIX named class at index 1. @@ -130,3 +130,15 @@ 133:/[a[.\].]]/ #Unsupported POSIX collating element at index 2. 134:/[a[=\]=]]/ #Unsupported POSIX collating element at index 2. 135:/[^\D\d]/8W #Pattern can never match. +136:/(*LIMIT_MATCH=1000)foobar/ #Unsupported control verb (*LIMIT_MATCH=1000) at index 0. +137:/(*UTF32)foobar/ #Unsupported control verb (*UTF32) at index 0. +138:/(*UNKNOWNVERB)foobar/ #Unknown control verb (*UNKNOWNVERB) at index 0. +139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5. +140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6. +141:/(*@&/ #Unknown control verb at index 2. +142:/abcd/si{edit_distance=4} #Approximate matching patterns that reduce to vacuous patterns are disallowed. +143:/foobar|hatstand/sL{edit_distance=6} #Approximate matching patterns that reduce to vacuous patterns are disallowed. +144:/abc\b/{edit_distance=1} #Zero-width assertions are disallowed for approximate matching. +145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching. +146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching. +147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match. diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index 984104c55..7cc6abd7f 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,6 +51,53 @@ struct expected_info { char matches_only_at_eod; }; +ostream& operator<<(ostream &os, const hs_expr_ext &ext) { + if (!ext.flags) { + return os; + } + bool first = true; + if (ext.flags & HS_EXT_FLAG_MIN_OFFSET) { + if (!first) { + os << ", "; + } + os << "min_offset=" << ext.min_offset; + first = false; + } + if (ext.flags & HS_EXT_FLAG_MAX_OFFSET) { + if (!first) { + os << ", "; + } + os << "max_offset=" << ext.max_offset; + first = false; + } + if (ext.flags & HS_EXT_FLAG_MIN_LENGTH) { + if (!first) { + os << ", "; + } + os << "min_length=" << ext.min_length; + first = false; + } + if (ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) { + if (!first) { + os << ", "; + } + os << "edit_distance=" << ext.edit_distance; + first = false; + } + return os; +} + +// For Google Test. +void PrintTo(const expected_info &ei, ostream *os) { + *os << "expected_info: " + << "pattern=\"" << ei.pattern << "\"" + << ", ext={" << ei.ext << "}" + << ", min=" << ei.min << ", max=" << ei.max + << ", unordered_matches=" << (ei.unordered_matches ? 1 : 0) + << ", matches_at_eod=" << (ei.matches_at_eod ? 1 : 0) + << ", matches_only_at_eod=" << (ei.matches_only_at_eod ? 1 : 0); +} + class ExprInfop : public TestWithParam { }; @@ -124,7 +171,7 @@ TEST_P(ExprInfop, check_ext_null) { free(info); } -static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0 }; +static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0 }; static const expected_info ei_test[] = { {"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0}, @@ -167,10 +214,38 @@ static const expected_info ei_test[] = { {"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1}, // Some cases with extended parameters. - {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100}, 100, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0}, + + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2}, + 10, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, + + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2}, + 10, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, + + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2}, + 8, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, }; INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test)); diff --git a/unit/hyperscan/serialize.cpp b/unit/hyperscan/serialize.cpp index 7e0fcb7ce..3b34abacd 100644 --- a/unit/hyperscan/serialize.cpp +++ b/unit/hyperscan/serialize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,55 +31,78 @@ */ #include "config.h" -#include -#include -#include - #include "gtest/gtest.h" #include "hs.h" #include "hs_internal.h" #include "test_util.h" +#include +#include +#include + namespace { using namespace std; using namespace testing; static const unsigned validModes[] = { - HS_MODE_STREAM, - HS_MODE_NOSTREAM + HS_MODE_NOSTREAM, + HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE, + HS_MODE_VECTORED }; -class Serializep : public TestWithParam { +static const pattern testPatterns[] = { + pattern("hatstand.*teakettle.*badgerbrush", HS_FLAG_CASELESS, 1000), + pattern("hatstand.*teakettle.*badgerbrush", HS_FLAG_DOTALL, 1001), + pattern("hatstand|teakettle|badgerbrush", 0, 1002), + pattern("^hatstand|teakettle|badgerbrush$", 0, 1003), + pattern("foobar.{10,1000}xyzzy", HS_FLAG_DOTALL, 1004), + pattern("foobar.{2,501}roobar", 0, 1005), + pattern("abc.*def.*ghi", HS_FLAG_SOM_LEFTMOST, 1006), + pattern("(\\p{L}){4}", HS_FLAG_UTF8|HS_FLAG_UCP, 1007), + pattern("\\.(exe|pdf|gif|jpg|png|wav|riff|mp4)\\z", 0, 1008) }; +class SerializeP : public TestWithParam> {}; + +static +const char *getModeString(unsigned mode) { + if (mode & HS_MODE_STREAM) { + return "STREAM"; + } + if (mode & HS_MODE_BLOCK) { + return "BLOCK"; + } + if (mode & HS_MODE_VECTORED) { + return "VECTORED"; + } + return "UNKNOWN"; +} + // Check that we can deserialize from a char array at any alignment and the info // is consistent -TEST_P(Serializep, DeserializeFromAnyAlignment) { - const unsigned mode = GetParam(); +TEST_P(SerializeP, DeserializeFromAnyAlignment) { + const unsigned mode = get<0>(GetParam()); + const pattern &pat = get<1>(GetParam()); SCOPED_TRACE(mode); + SCOPED_TRACE(pat); hs_error_t err; - hs_database_t *db = buildDB("hatstand.*teakettle.*badgerbrush", - HS_FLAG_CASELESS, 1000, mode); + hs_database_t *db = buildDB(pat, mode); ASSERT_TRUE(db != nullptr) << "database build failed."; char *original_info = nullptr; err = hs_database_info(db, &original_info); ASSERT_EQ(HS_SUCCESS, err); - const char *mode_string = nullptr; - switch (mode) { - case HS_MODE_STREAM: - mode_string = "STREAM"; - break; - case HS_MODE_NOSTREAM: - mode_string = "BLOCK"; - } + const char *mode_string = getModeString(mode); - ASSERT_NE(nullptr, original_info) << "hs_serialized_database_info returned null."; + ASSERT_NE(nullptr, original_info) + << "hs_serialized_database_info returned null."; ASSERT_STREQ("Version:", string(original_info).substr(0, 8).c_str()); - ASSERT_TRUE(strstr(original_info, mode_string) != nullptr); + ASSERT_TRUE(strstr(original_info, mode_string) != nullptr) + << "Original info \"" << original_info + << "\" does not contain " << mode_string; char *bytes = nullptr; size_t length = 0; @@ -133,31 +156,28 @@ TEST_P(Serializep, DeserializeFromAnyAlignment) { // Check that we can deserialize_at from a char array at any alignment and the // info is consistent -TEST_P(Serializep, DeserializeAtFromAnyAlignment) { - const unsigned mode = GetParam(); +TEST_P(SerializeP, DeserializeAtFromAnyAlignment) { + const unsigned mode = get<0>(GetParam()); + const pattern &pat = get<1>(GetParam()); SCOPED_TRACE(mode); + SCOPED_TRACE(pat); hs_error_t err; - hs_database_t *db = buildDB("hatstand.*teakettle.*badgerbrush", - HS_FLAG_CASELESS, 1000, mode); + hs_database_t *db = buildDB(pat, mode); ASSERT_TRUE(db != nullptr) << "database build failed."; char *original_info; err = hs_database_info(db, &original_info); ASSERT_EQ(HS_SUCCESS, err); - const char *mode_string = nullptr; - switch (mode) { - case HS_MODE_STREAM: - mode_string = "STREAM"; - break; - case HS_MODE_NOSTREAM: - mode_string = "BLOCK"; - } + const char *mode_string = getModeString(mode); - ASSERT_NE(nullptr, original_info) << "hs_serialized_database_info returned null."; + ASSERT_NE(nullptr, original_info) + << "hs_serialized_database_info returned null."; ASSERT_STREQ("Version:", string(original_info).substr(0, 8).c_str()); - ASSERT_TRUE(strstr(original_info, mode_string) != nullptr); + ASSERT_TRUE(strstr(original_info, mode_string) != nullptr) + << "Original info \"" << original_info + << "\" does not contain " << mode_string; char *bytes = nullptr; size_t length = 0; @@ -217,8 +237,8 @@ TEST_P(Serializep, DeserializeAtFromAnyAlignment) { delete[] mem; } -INSTANTIATE_TEST_CASE_P(Serialize, Serializep, - ValuesIn(validModes)); +INSTANTIATE_TEST_CASE_P(Serialize, SerializeP, + Combine(ValuesIn(validModes), ValuesIn(testPatterns))); // Attempt to reproduce the scenario in UE-1946. TEST(Serialize, CrossCompileSom) { @@ -226,11 +246,10 @@ TEST(Serialize, CrossCompileSom) { plat.cpu_features = 0; plat.tune = HS_TUNE_FAMILY_GENERIC; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; const unsigned mode = HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE; - hs_database_t *db = buildDB(pattern, HS_FLAG_SOM_LEFTMOST, 1000, mode, - &plat); + hs_database_t *db = buildDB(pat, HS_FLAG_SOM_LEFTMOST, 1000, mode, &plat); ASSERT_TRUE(db != nullptr) << "database build failed."; size_t db_len; @@ -275,15 +294,16 @@ static void misaligned_free(void *p) { free(c - 1); } -// make sure that serializing/deserializing to null or an unaligned address fails +// make sure that serializing/deserializing to null or an unaligned address +// fails TEST(Serialize, CompileNullMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; // mallocing null should fail compile hs_set_allocator(null_malloc, nullptr); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_NE(HS_SUCCESS, err); ASSERT_TRUE(db == nullptr); ASSERT_TRUE(c_err != nullptr); @@ -294,14 +314,14 @@ TEST(Serialize, CompileNullMalloc) { TEST(Serialize, CompileErrorAllocator) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatsta^nd.*(badgerbrush|teakettle)"; + static const char *pat = "hatsta^nd.*(badgerbrush|teakettle)"; // failing to compile should use the misc allocator allocated_count = 0; allocated_count_b = 0; hs_set_allocator(count_malloc_b, count_free_b); hs_set_misc_allocator(count_malloc, count_free); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_NE(HS_SUCCESS, err); ASSERT_TRUE(db == nullptr); ASSERT_TRUE(c_err != nullptr); @@ -315,13 +335,13 @@ TEST(Serialize, CompileErrorAllocator) { TEST(Serialize, AllocatorsUsed) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; allocated_count = 0; allocated_count_b = 0; hs_set_allocator(count_malloc_b, count_free_b); hs_set_database_allocator(count_malloc, count_free); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); ASSERT_TRUE(c_err == nullptr); @@ -344,15 +364,14 @@ TEST(Serialize, AllocatorsUsed) { ASSERT_EQ(0, allocated_count_b); } - TEST(Serialize, CompileUnalignedMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; // unaligned malloc should fail compile hs_set_allocator(misaligned_malloc, misaligned_free); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_NE(HS_SUCCESS, err); ASSERT_TRUE(db == nullptr); ASSERT_TRUE(c_err != nullptr); @@ -363,8 +382,8 @@ TEST(Serialize, CompileUnalignedMalloc) { TEST(Serialize, SerializeNullMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -384,13 +403,14 @@ TEST(Serialize, SerializeNullMalloc) { hs_free_database(db); } -// make sure that serializing/deserializing to null or an unaligned address fails +// make sure that serializing/deserializing to null or an unaligned address +// fails TEST(Serialize, SerializeUnalignedMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat= "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -414,9 +434,9 @@ TEST(Serialize, SerializeUnalignedMalloc) { TEST(Serialize, DeserializeNullMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -447,9 +467,9 @@ TEST(Serialize, DeserializeNullMalloc) { TEST(Serialize, DeserializeUnalignedMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -486,9 +506,9 @@ TEST(Serialize, DeserializeUnalignedMalloc) { TEST(Serialize, DeserializeGarbage) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); diff --git a/unit/hyperscan/single.cpp b/unit/hyperscan/single.cpp index 029d223ae..01fbfeab5 100644 --- a/unit/hyperscan/single.cpp +++ b/unit/hyperscan/single.cpp @@ -363,7 +363,8 @@ static const unsigned validModes[] = { // Mode bits for switching off various architecture features static const unsigned long long featureMask[] = { ~0ULL, /* native */ - ~HS_CPU_FEATURES_AVX2, /* no avx2 */ + ~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512), /* no avx2 */ + ~HS_CPU_FEATURES_AVX512, /* no avx512 */ }; INSTANTIATE_TEST_CASE_P(Single, diff --git a/unit/hyperscan/test_util.cpp b/unit/hyperscan/test_util.cpp index 345b05d00..f6c20a74e 100644 --- a/unit/hyperscan/test_util.cpp +++ b/unit/hyperscan/test_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,32 +26,37 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include -#include -#include -#include - #include "hs.h" #include "test_util.h" #include "gtest/gtest.h" #include "util/expressions.h" #include "util/ExpressionParser.h" +#include +#include +#include +#include + using namespace std; int record_cb(unsigned id, unsigned long long, unsigned long long to, unsigned, void *ctxt) { CallBackContext *c = (CallBackContext *)ctxt; - c->matches.push_back(MatchRecord(to, id)); + c->matches.emplace_back(to, id); return (int)c->halt; } -std::ostream &operator<< (std::ostream &o, const MatchRecord &m) { +std::ostream &operator<<(std::ostream &o, const MatchRecord &m) { return o << "[" << m.to << ", " << m.id << "]"; } +std::ostream &operator<<(std::ostream &o, const pattern &p) { + return o << "[" << "expr=\"" << p.expression << "\", flags=" << p.flags + << ", id=" << p.id << "]"; +} + hs_database_t *buildDB(const vector &patterns, unsigned int mode, hs_platform_info *plat) { vector expressions; @@ -59,20 +64,20 @@ hs_database_t *buildDB(const vector &patterns, unsigned int mode, vector ids; vector ext; - for (vector::const_iterator it = patterns.begin(); - it != patterns.end(); ++it) { - expressions.push_back(it->expression.c_str()); - flags.push_back(it->flags); - ids.push_back(it->id); - ext.push_back(&it->ext); + for (const auto &pat : patterns) { + expressions.push_back(pat.expression.c_str()); + flags.push_back(pat.flags); + ids.push_back(pat.id); + ext.push_back(&pat.ext); } hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; - err = hs_compile_ext_multi(&expressions[0], &flags[0], &ids[0], &ext[0], - patterns.size(), mode, plat, &db, &compile_err); + err = hs_compile_ext_multi(expressions.data(), flags.data(), ids.data(), + ext.data(), patterns.size(), mode, plat, &db, + &compile_err); if (err != HS_SUCCESS) { return nullptr; @@ -82,15 +87,13 @@ hs_database_t *buildDB(const vector &patterns, unsigned int mode, } hs_database_t *buildDB(const pattern &expr, unsigned int mode) { - return buildDB(vector(1, expr), mode); + return buildDB(vector({expr}), mode); } hs_database_t *buildDB(const char *expression, unsigned int flags, unsigned int id, unsigned int mode, hs_platform_info_t *plat) { - vector patterns; - patterns.push_back(pattern(expression, flags, id)); - return buildDB(patterns, mode, plat); + return buildDB({pattern(expression, flags, id)}, mode, plat); } hs_database_t *buildDB(const char *filename, unsigned int mode, @@ -99,16 +102,14 @@ hs_database_t *buildDB(const char *filename, unsigned int mode, ExpressionMap expressions; loadExpressionsFromFile(filename, expressions); - for (ExpressionMap::iterator it = expressions.begin(); - it != expressions.end(); ++it) { + for (const auto &expr : expressions) { unsigned int flags = 0; string regex; hs_expr_ext ext; - if (!readExpression(it->second, regex, &flags, &ext)) { + if (!readExpression(expr.second, regex, &flags, &ext)) { return nullptr; } - patterns.push_back(pattern(regex, flags | extra_flags, it->first, - ext)); + patterns.emplace_back(regex, flags | extra_flags, expr.first, ext); } return buildDB(patterns, mode); } @@ -145,13 +146,13 @@ hs_database_t *buildDB(const char *filename, unsigned int mode, ExpressionMap expressions; loadExpressionsFromFile(filename, expressions); - for (ExpressionMap::iterator it = expressions.begin(); - it != expressions.end(); ++it) { + for (const auto &expr : expressions) { unsigned int flags = 0; string regex; hs_expr_ext ext; bool must_be_ordered; - if (!readExpression(it->second, regex, &flags, &ext, &must_be_ordered)) { + if (!readExpression(expr.second, regex, &flags, &ext, + &must_be_ordered)) { return nullptr; } @@ -159,7 +160,7 @@ hs_database_t *buildDB(const char *filename, unsigned int mode, return nullptr; } - patterns.emplace_back(regex, flags, it->first, ext); + patterns.emplace_back(regex, flags, expr.first, ext); } return buildDB(patterns, mode); } diff --git a/unit/hyperscan/test_util.h b/unit/hyperscan/test_util.h index fad6137c1..efa0570c3 100644 --- a/unit/hyperscan/test_util.h +++ b/unit/hyperscan/test_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,13 +29,13 @@ #ifndef TEST_UTIL_H #define TEST_UTIL_H +#include "hs.h" + #include #include #include #include -#include "hs.h" - #ifndef UNUSED #if defined(_WIN32) || defined(_WIN64) #define UNUSED @@ -53,11 +53,10 @@ struct MatchRecord { int id; }; -std::ostream &operator<< (std::ostream &o, const MatchRecord &m); +std::ostream &operator<<(std::ostream &o, const MatchRecord &m); struct CallBackContext { - CallBackContext() : halt(false) {} - bool halt; + bool halt = false; std::vector matches; void clear() { @@ -79,22 +78,29 @@ int dummy_cb(unsigned, unsigned long long, unsigned long long, unsigned, struct pattern { std::string expression; - unsigned int flags; - unsigned int id; + unsigned int flags = 0; + unsigned int id = 0; hs_expr_ext ext; - pattern(const std::string &expression_in, unsigned int flags_in = 0, - unsigned int id_in = 0) : expression(expression_in), - flags(flags_in), id(id_in) { + // We need a default constructor for combining in parameterised tests. + pattern() { memset(&ext, 0, sizeof(ext)); } - pattern(const std::string &expression_in, unsigned int flags_in, - unsigned int id_in, const hs_expr_ext &ext_in) : - expression(expression_in), flags(flags_in), id(id_in), - ext(ext_in) { } + explicit pattern(std::string expression_in, + unsigned int flags_in = 0, unsigned int id_in = 0) + : expression(std::move(expression_in)), flags(flags_in), id(id_in) { + memset(&ext, 0, sizeof(ext)); + } + + pattern(std::string expression_in, unsigned int flags_in, + unsigned int id_in, hs_expr_ext ext_in) + : expression(std::move(expression_in)), flags(flags_in), id(id_in), + ext(std::move(ext_in)) {} }; +std::ostream &operator<<(std::ostream &o, const pattern &p); + hs_database_t *buildDB(const std::vector &patterns, unsigned int mode, hs_platform_info *plat = nullptr); hs_database_t *buildDB(const pattern &pat, unsigned int mode); diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 31aaf17fc..3f7885449 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,32 +29,33 @@ #include "config.h" #include "gtest/gtest.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/popcount.h" // open coded implementations to test against static u32 our_clz(u32 x) { - u32 n; - - if (x == 0) return(32); - n = 0; - if (x <= 0x0000FFFF) { n = n + 16; x = x << 16; } - if (x <= 0x00FFFFFF) { n = n + 8; x = x << 8; } - if (x <= 0x0FFFFFFF) { n = n + 4; x = x << 4; } - if (x <= 0x3FFFFFFF) { n = n + 2; x = x << 2; } - if (x <= 0x7FFFFFFF) { n = n + 1; } - return n; + u32 n; + + if (x == 0) return(32); + n = 0; + if (x <= 0x0000FFFF) { n = n + 16; x = x << 16; } + if (x <= 0x00FFFFFF) { n = n + 8; x = x << 8; } + if (x <= 0x0FFFFFFF) { n = n + 4; x = x << 4; } + if (x <= 0x3FFFFFFF) { n = n + 2; x = x << 2; } + if (x <= 0x7FFFFFFF) { n = n + 1; } + return n; } static u32 our_clzll(u64a x) { - // Synthesise from 32-bit variant. - u32 high = x >> 32; - if (high) { - return our_clz(high); - } - return 32 + our_clz(x); + // Synthesise from 32-bit variant. + u32 high = x >> 32; + if (high) { + return our_clz(high); + } + return 32 + our_clz(x); } @@ -437,7 +438,7 @@ TEST(BitUtils, rank_in_mask64) { ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63)); } -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) TEST(BitUtils, pdep64) { u64a data = 0xF123456789ABCDEF; ASSERT_EQ(0xfULL, pdep64(data, 0xf)); diff --git a/unit/internal/database.cpp b/unit/internal/database.cpp index cb3e76b50..8f0c1a695 100644 --- a/unit/internal/database.cpp +++ b/unit/internal/database.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "crc32.h" #include "database.h" #include "ue2common.h" +#include "util/arch.h" #include "util/target_info.h" #include "gtest/gtest.h" @@ -47,10 +48,14 @@ TEST(DB, flagsToPlatform) { p.cpu_features = 0; -#if defined(__AVX2__) +#if defined(HAVE_AVX2) p.cpu_features |= HS_CPU_FEATURES_AVX2; #endif +#if defined(HAVE_AVX512) + p.cpu_features |= HS_CPU_FEATURES_AVX512; +#endif + platform_t pp = target_to_platform(target_t(p)); ASSERT_EQ(pp, hs_current_platform); } diff --git a/unit/internal/depth.cpp b/unit/internal/depth.cpp index a004643b5..ad9ffe388 100644 --- a/unit/internal/depth.cpp +++ b/unit/internal/depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -112,9 +112,10 @@ TEST(depth, add_finite) { ASSERT_EQ(depth(900), depth(1000) + s32{-100}); // overflow must throw + depth max_depth(depth::max_value()); depth d; - ASSERT_THROW(d = depth::max_value() + depth(1), DepthOverflowError); - ASSERT_THROW(d = depth::max_value() + 1, DepthOverflowError); + ASSERT_THROW(d = max_depth + depth(1), DepthOverflowError); + ASSERT_THROW(d = max_depth + 1, DepthOverflowError); // underflow must throw ASSERT_THROW(d = depth(0) + s32{-1}, DepthOverflowError); @@ -267,11 +268,11 @@ TEST(depth, unordered_set) { ue2::unordered_set depths; for (const auto &val : finite_values) { - depths.insert(val); + depths.emplace(val); } for (const auto &val : finite_values) { - ASSERT_TRUE(depths.find(val) != depths.end()); + ASSERT_TRUE(depths.find(depth(val)) != depths.end()); } ASSERT_TRUE(depths.find(depth::infinity()) == depths.end()); diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index 6116bfdb6..bd0bb4c0c 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -96,15 +96,6 @@ struct match { }; extern "C" { -static -hwlmcb_rv_t countCallback(UNUSED size_t start, UNUSED size_t end, u32, - void *ctxt) { - if (ctxt) { - ++*(u32 *)ctxt; - } - - return HWLM_CONTINUE_MATCHING; -} static hwlmcb_rv_t decentCallback(size_t start, size_t end, u32 id, void *ctxt) { @@ -231,42 +222,6 @@ TEST_P(FDRp, MultiLocation) { } } -TEST_P(FDRp, Flood) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - - vector lits; - lits.push_back(hwlmLiteral("aaaa", 0, 1)); - lits.push_back(hwlmLiteral("aaaaaaaa", 0, 2)); - lits.push_back(hwlmLiteral("baaaaaaaa", 0, 3)); - lits.push_back(hwlmLiteral("aaaaaaaab", 0, 4)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - const u32 testSize = 1024; - vector data(testSize, 'a'); - - vector matches; - fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches, - HWLM_ALL_GROUPS); - ASSERT_EQ(testSize - 3 + testSize - 7, matches.size()); - EXPECT_EQ(match(0, 3, 1), matches[0]); - EXPECT_EQ(match(1, 4, 1), matches[1]); - EXPECT_EQ(match(2, 5, 1), matches[2]); - EXPECT_EQ(match(3, 6, 1), matches[3]); - - u32 currentMatch = 4; - for (u32 i = 7; i < testSize; i++, currentMatch += 2) { - EXPECT_TRUE( - (match(i - 3, i, 1) == matches[currentMatch] && - match(i - 7, i, 2) == matches[currentMatch+1]) || - (match(i - 7, i, 2) == matches[currentMatch+1] && - match(i - 3, i, 1) == matches[currentMatch]) - ); - } -} - TEST_P(FDRp, NoRepeat1) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -414,36 +369,6 @@ TEST_P(FDRp, SmallStreaming2) { ASSERT_EQ(expected.size(), matches.size()); } -TEST_P(FDRp, LongLiteral) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - size_t sz; - const u8 *data; - vector lits; - - string alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - string alpha4 = alpha+alpha+alpha+alpha; - lits.push_back(hwlmLiteral(alpha4.c_str(), 0,10)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - u32 count = 0; - - data = (const u8 *)alpha4.c_str(); - sz = alpha4.size(); - - fdrExec(fdr.get(), data, sz, 0, countCallback, &count, HWLM_ALL_GROUPS); - EXPECT_EQ(1U, count); - count = 0; - fdrExec(fdr.get(), data, sz - 1, 0, countCallback, &count, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, count); - count = 0; - fdrExec(fdr.get(), data + 1, sz - 1, 0, countCallback, &count, - HWLM_ALL_GROUPS); - EXPECT_EQ(0U, count); -} - TEST_P(FDRp, moveByteStream) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -458,7 +383,7 @@ TEST_P(FDRp, moveByteStream) { size_t size = fdrSize(fdrTable0.get()); - auto fdrTable = aligned_zmalloc_unique(size); + auto fdrTable = make_bytecode_ptr(size, 64); EXPECT_NE(nullptr, fdrTable); memcpy(fdrTable.get(), fdrTable0.get(), size); @@ -491,7 +416,7 @@ TEST_P(FDRp, Stream1) { vector lits; lits.push_back(hwlmLiteral("f", 0, 0)); - lits.push_back(hwlmLiteral("longsigislong", 0, 1)); + lits.push_back(hwlmLiteral("literal", 0, 1)); auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); @@ -514,7 +439,7 @@ INSTANTIATE_TEST_CASE_P(FDR, FDRp, ValuesIn(getValidFdrEngines())); typedef struct { string pattern; - unsigned char alien; + unsigned char alien; // character not present in pattern } pattern_alien_t; // gtest helper @@ -529,7 +454,6 @@ class FDRpp : public TestWithParam> {}; // not happen if literal is partially (from 1 character up to full literal // length) is out of searched buffer - "too early" and "too late" conditions TEST_P(FDRpp, AlignAndTooEarly) { - const size_t buf_alignment = 32; // Buffer should be big enough to hold two instances of matching literals // (up to 64 bytes each) and room for offset (up to 32 bytes) @@ -538,7 +462,7 @@ TEST_P(FDRpp, AlignAndTooEarly) { const u32 hint = get<0>(GetParam()); SCOPED_TRACE(hint); - // pattern which is used to generate literals of variable size - from 1 to 64 + // pattern which is used to generate literals of variable size - from 1 to 8 const string &pattern = get<1>(GetParam()).pattern; const size_t patLen = pattern.size(); const unsigned char alien = get<1>(GetParam()).alien; @@ -551,7 +475,7 @@ TEST_P(FDRpp, AlignAndTooEarly) { vector lits; for (size_t litLen = 1; litLen <= patLen; litLen++) { - // building literal from pattern substring of variable length 1-64 + // building literal from pattern substring of variable length 1-patLen lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0)); auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); @@ -596,9 +520,9 @@ TEST_P(FDRpp, AlignAndTooEarly) { } static const pattern_alien_t test_pattern[] = { - {"abaabaaabaaabbaaaaabaaaaabbaaaaaaabaabbaaaabaaaaaaaabbbbaaaaaaab", 'x'}, - {"zzzyyzyzyyyyzyyyyyzzzzyyyyyyyyzyyyyyyyzzzzzyzzzzzzzzzyzzyzzzzzzz", (unsigned char)'\x99'}, - {"abcdef lafjk askldfjklf alfqwei9rui 'gldgkjnooiuswfs138746453583", '\0'} + {"abaabaaa", 'x'}, + {"zzzyyzyz", (unsigned char)'\x99'}, + {"abcdef l", '\0'} }; INSTANTIATE_TEST_CASE_P(FDR, FDRpp, Combine(ValuesIn(getValidFdrEngines()), diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 7b00ac4c8..952fffc19 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -161,8 +161,8 @@ TEST_P(FDRFloodp, NoMask) { vector lits; // build literals of type "aaaa", "aaab", "baaa" - // of lengths 1, 2, 4, 8, 16, 32, both case-less and case-sensitive - for (int i = 0; i < 6 ; i++) { + // of lengths 1, 2, 4, 8, both case-less and case-sensitive + for (int i = 0; i < 4; i++) { string s(1 << i, c); lits.push_back(hwlmLiteral(s, false, i * 8 + 0)); s[0] = cAlt; @@ -183,13 +183,13 @@ TEST_P(FDRFloodp, NoMask) { Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - map matchesCounts; + map matchesCounts; hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); - for (u8 i = 0; i < 6 ; i++) { + for (u8 i = 0; i < 4; i++) { u32 cnt = dataSize - (1 << i) + 1; ASSERT_EQ(cnt, matchesCounts[i * 8 + 0]); ASSERT_EQ(0, matchesCounts[i * 8 + 1]); @@ -214,7 +214,7 @@ TEST_P(FDRFloodp, NoMask) { 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); - for (u8 i = 0; i < 6 ; i++) { + for (u8 i = 0; i < 4; i++) { u32 cnt = dataSize - (1 << i) + 1; ASSERT_EQ(0, matchesCounts[i * 8 + 0]); ASSERT_EQ(i == 0 ? cnt : 0, matchesCounts[i * 8 + 1]); diff --git a/unit/internal/fdr_loadval.cpp b/unit/internal/fdr_loadval.cpp index 22fee7704..bb5efb5f0 100644 --- a/unit/internal/fdr_loadval.cpp +++ b/unit/internal/fdr_loadval.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #include "gtest/gtest.h" #include "fdr/fdr_loadval.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" using namespace std; using namespace testing; @@ -71,7 +71,7 @@ static void fillWithBytes(u8 *ptr, size_t len) { TYPED_TEST(FDR_Loadval, Normal) { // We should be able to do a normal load at any alignment. const size_t len = sizeof(TypeParam); - aligned_unique_ptr mem_p = aligned_zmalloc_unique(len + 15); + auto mem_p = make_bytecode_ptr(len + 15, 16); u8 * mem = mem_p.get(); ASSERT_TRUE(ISALIGNED_16(mem)); fillWithBytes(mem, len + 15); @@ -90,7 +90,7 @@ TYPED_TEST(FDR_Loadval, CautiousEverywhere) { // the 'lo' ptr or after the 'hi' ptr. const size_t len = sizeof(TypeParam); - aligned_unique_ptr mem_p = aligned_zmalloc_unique(len + 1); + auto mem_p = make_bytecode_ptr(len + 1, 16); u8 *mem = mem_p.get() + 1; // force unaligned fillWithBytes(mem, len); diff --git a/unit/internal/flat_map.cpp b/unit/internal/flat_map.cpp index 54372dece..6a81bbfed 100644 --- a/unit/internal/flat_map.cpp +++ b/unit/internal/flat_map.cpp @@ -211,6 +211,7 @@ TEST(flat_map, custom_compare) { ASSERT_EQ(10, f.rbegin()->second); ASSERT_TRUE(flat_map_is_sorted(f)); + ASSERT_TRUE(std::is_sorted(f.begin(), f.end(), f.value_comp())); ASSERT_TRUE(flat_map_is_sorted_cmp(f, std::greater())); } @@ -401,3 +402,41 @@ TEST(flat_map, max_size) { flat_map f; ASSERT_LE(1ULL << 24, f.max_size()); } + +TEST(flat_map, hash_value) { + const vector> input = { + {0, 0}, {3, 1}, {76, 2}, {132, 3}, {77, 4}, {99999, 5}, {100, 6}}; + for (size_t len = 0; len < input.size(); len++) { + flat_map f1(input.begin(), input.begin() + len); + flat_map f2(input.rbegin() + input.size() - len, + input.rend()); + EXPECT_EQ(hash_value(f1), hash_value(f2)); + + // Try removing an element. + auto f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + if (!f3.empty()) { + f3.erase(f3.begin()); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } + + // Try adding an element. + f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + f3.emplace(32767, 7); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + + // Change a value, but not a key. + f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + f3.erase(77); + f3.emplace(77, 10); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } +} diff --git a/unit/internal/flat_set.cpp b/unit/internal/flat_set.cpp index 7d45cbb20..3bee0edbe 100644 --- a/unit/internal/flat_set.cpp +++ b/unit/internal/flat_set.cpp @@ -392,3 +392,31 @@ TEST(flat_set, max_size) { flat_set f; ASSERT_LE(1ULL << 24, f.max_size()); } + +TEST(flat_set, hash_value) { + const vector input = {0, 15, 3, 1, 20, 32768, + 24000000, 17, 100, 101, 104, 99999}; + for (size_t len = 0; len < input.size(); len++) { + flat_set f1(input.begin(), input.begin() + len); + flat_set f2(input.rbegin() + input.size() - len, input.rend()); + EXPECT_EQ(hash_value(f1), hash_value(f2)); + + // Try removing an element. + auto f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + if (!f3.empty()) { + f3.erase(f3.begin()); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } + + // Try adding an element. + f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + f3.insert(32767); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } +} diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index e40bda02d..d32f7e8fa 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,20 +29,20 @@ #include "config.h" #include "gtest/gtest.h" -#include "util/target_info.h" -#include "util/charreach.h" +#include "grey.h" +#include "hs_compile.h" /* for controlling ssse3 usage */ +#include "compiler/compiler.h" #include "nfa/lbr.h" #include "nfa/nfa_api.h" -#include "nfa/nfa_internal.h" #include "nfa/nfa_api_util.h" +#include "nfa/nfa_internal.h" +#include "nfagraph/ng.h" #include "nfagraph/ng_lbr.h" #include "nfagraph/ng_util.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" +#include "util/charreach.h" #include "util/compile_context.h" -#include "grey.h" -#include "nfagraph/ng.h" -#include "compiler/compiler.h" -#include "hs_compile.h" /* for controlling ssse3 usage */ +#include "util/target_info.h" #include @@ -96,7 +96,8 @@ class LbrTest : public TestWithParam { const CompileContext cc(true, false, target, grey); ReportManager rm(cc.grey); ParsedExpression parsed(0, pattern.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -109,8 +110,8 @@ class LbrTest : public TestWithParam { nfa = constructLBR(*g, triggers, cc, rm); ASSERT_TRUE(nfa != nullptr); - full_state = aligned_zmalloc_unique(nfa->scratchStateSize); - stream_state = aligned_zmalloc_unique(nfa->streamStateSize); + full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); + stream_state = make_bytecode_ptr(nfa->streamStateSize); } virtual void initQueue() { @@ -151,13 +152,13 @@ class LbrTest : public TestWithParam { unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; - // Space for full state. - aligned_unique_ptr full_state; + // Aligned space for full state. + bytecode_ptr full_state; // Space for stream state. - aligned_unique_ptr stream_state; + bytecode_ptr stream_state; // Queue structure. struct mq q; diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 804fcb1f2..c70ceeae1 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ #include "nfagraph/ng.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_util.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/target_info.h" using namespace std; @@ -73,7 +73,8 @@ class LimExModelTest : public TestWithParam { CompileContext cc(false, false, target, Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -87,8 +88,8 @@ class LimExModelTest : public TestWithParam { type, cc); ASSERT_TRUE(nfa != nullptr); - full_state = aligned_zmalloc_unique(nfa->scratchStateSize); - stream_state = aligned_zmalloc_unique(nfa->streamStateSize); + full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); + stream_state = make_bytecode_ptr(nfa->streamStateSize); } virtual void initQueue() { @@ -115,13 +116,13 @@ class LimExModelTest : public TestWithParam { unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; // Space for full state. - aligned_unique_ptr full_state; + bytecode_ptr full_state; // Space for stream state. - aligned_unique_ptr stream_state; + bytecode_ptr stream_state; // Queue structure. struct mq q; @@ -186,8 +187,7 @@ TEST_P(LimExModelTest, CompressExpand) { // Expand state into a new copy and check that it matches the original // uncompressed state. - aligned_unique_ptr state_copy = - aligned_zmalloc_unique(nfa->scratchStateSize); + auto state_copy = make_bytecode_ptr(nfa->scratchStateSize, 64); char *dest = state_copy.get(); memset(dest, 0xff, nfa->scratchStateSize); nfaExpandState(nfa.get(), dest, q.streamState, q.offset, @@ -306,7 +306,8 @@ class LimExReverseTest : public TestWithParam { CompileContext cc(false, false, get_current_target(), Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -329,7 +330,7 @@ class LimExReverseTest : public TestWithParam { unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; }; INSTANTIATE_TEST_CASE_P(LimExReverse, LimExReverseTest, @@ -365,7 +366,8 @@ class LimExZombieTest : public TestWithParam { CompileContext cc(true, false, get_current_target(), Grey()); ParsedExpression parsed(0, expr.c_str(), flags, 0); ReportManager rm(cc.grey); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -379,8 +381,8 @@ class LimExZombieTest : public TestWithParam { type, cc); ASSERT_TRUE(nfa != nullptr); - full_state = aligned_zmalloc_unique(nfa->scratchStateSize); - stream_state = aligned_zmalloc_unique(nfa->streamStateSize); + full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); + stream_state = make_bytecode_ptr(nfa->streamStateSize); } virtual void initQueue() { @@ -407,13 +409,13 @@ class LimExZombieTest : public TestWithParam { unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; // Space for full state. - aligned_unique_ptr full_state; + bytecode_ptr full_state; // Space for stream state. - aligned_unique_ptr stream_state; + bytecode_ptr stream_state; // Queue structure. struct mq q; diff --git a/unit/internal/main.cpp b/unit/internal/main.cpp index 566ae1a54..15e41d0bf 100644 --- a/unit/internal/main.cpp +++ b/unit/internal/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,9 +27,10 @@ */ #include "gtest/gtest.h" +#include "hs_common.h" // Driver: run all the tests (defined in other source files in this directory) -int main(int argc, char **argv) { +int HS_CDECL main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/unit/internal/masked_move.cpp b/unit/internal/masked_move.cpp index 6a2d742db..7bd78c504 100644 --- a/unit/internal/masked_move.cpp +++ b/unit/internal/masked_move.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,11 +31,12 @@ #include #include "gtest/gtest.h" +#include "util/arch.h" #include "util/masked_move.h" namespace { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) bool try_mask_len(const u8 *buf, u8 *target, size_t len) { memset(target, 0, 32); diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 38da1d8ac..2b0c7c797 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "gtest/gtest.h" #include "ue2common.h" +#include "rose/rose_build_scatter.h" #include "util/compile_error.h" #include "util/make_unique.h" #include "util/multibit.h" @@ -698,7 +699,9 @@ TEST_P(MultiBitTest, InitRangeChunked) { for (u32 n = 2; n <= 10; n++) { u32 chunk_size = test_size / n; - if (chunk_size == 0) break; + if (chunk_size == 0) { + break; + } for (u32 k = 0; k < n; k++) { u32 chunk_begin = k * chunk_size; @@ -723,9 +726,62 @@ TEST_P(MultiBitTest, InitRangeChunked) { } } +static +void apply(const scatter_plan_raw &sp, u8 *out) { + for (const auto &e : sp.p_u64a) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } + for (const auto &e : sp.p_u32) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } + for (const auto &e : sp.p_u16) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } + for (const auto &e : sp.p_u8) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } +} + +TEST_P(MultiBitTest, InitRangePlanChunked) { + SCOPED_TRACE(test_size); + ASSERT_TRUE(ba != nullptr); + + // Init ranges chunk by chunk. + + for (u32 n = 2; n <= 10; n++) { + u32 chunk_size = test_size / n; + if (chunk_size == 0) { + break; + } + + for (u32 k = 0; k < n; k++) { + u32 chunk_begin = k * chunk_size; + u32 chunk_end = min(test_size, (k + 1) * chunk_size); + + scatter_plan_raw sp; + mmbBuildInitRangePlan(test_size, chunk_begin, chunk_end, &sp); + memset(ba, 0xaa, mmbit_size(test_size)); + apply(sp, ba); + + // First bit set should be chunk_begin. + ASSERT_EQ(chunk_begin, mmbit_iterate(ba, test_size, MMB_INVALID)); + + // All bits in the chunk should be on. + for (u64a i = chunk_begin; i < chunk_end; i += stride) { + SCOPED_TRACE(i); + ASSERT_TRUE(mmbit_isset(ba, test_size, i)); + } + + // Last bit on is chunk_end - 1. + if (chunk_end) { + ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, chunk_end - 1)); + } + } + } +} + TEST(MultiBit, SparseIteratorBegin1) { const u32 test_size = 100; - vector it; vector bits; bits.push_back(1); @@ -734,7 +790,7 @@ TEST(MultiBit, SparseIteratorBegin1) { bits.push_back(35); bits.push_back(68); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); //ASSERT_EQ(4U, it.size()); // Trivial initial test: all bits in 'bits' are on, all others are off @@ -763,7 +819,6 @@ TEST(MultiBit, SparseIteratorBegin1) { TEST(MultiBit, SparseIteratorBegin2) { const u32 test_size = 40000; - vector it; vector bits; bits.push_back(1); @@ -773,7 +828,7 @@ TEST(MultiBit, SparseIteratorBegin2) { bits.push_back(8920); bits.push_back(37000); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); //ASSERT_EQ(12U, it.size()); // Trivial initial test: all bits in 'bits' are on, all others are off @@ -802,7 +857,6 @@ TEST(MultiBit, SparseIteratorBegin2) { TEST(MultiBit, SparseIteratorNext1) { const u32 test_size = 100; - vector it; vector bits; bits.push_back(1); @@ -811,7 +865,7 @@ TEST(MultiBit, SparseIteratorNext1) { bits.push_back(35); bits.push_back(68); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Trivial initial test: all bits in 'bits' are on, all others are off mmbit_holder ba(test_size); @@ -867,7 +921,6 @@ TEST(MultiBit, SparseIteratorNext1) { TEST(MultiBit, SparseIteratorNext2) { const u32 test_size = 40000; - vector it; vector bits; bits.push_back(1); @@ -882,7 +935,7 @@ TEST(MultiBit, SparseIteratorNext2) { bits.push_back(37000); bits.push_back(39999); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Trivial initial test: all bits in 'bits' are on, all others are off mmbit_holder ba(test_size); @@ -938,7 +991,6 @@ TEST(MultiBit, SparseIteratorNext2) { TEST(MultiBit, SparseIteratorNextSmall) { const u32 test_size = 15; - vector it; vector bits; bits.push_back(1); @@ -948,7 +1000,7 @@ TEST(MultiBit, SparseIteratorNextSmall) { bits.push_back(12); bits.push_back(14); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Trivial initial test: all bits in 'bits' are on, all others are off mmbit_holder ba(test_size); @@ -1007,13 +1059,12 @@ TEST_P(MultiBitTest, SparseIteratorBeginAll) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator. - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch all bits on in state. mmbit_clear(ba, test_size); @@ -1047,12 +1098,11 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) { } // Put all our bits into the sparse iterator - vector it; vector bits(test_size); for (u32 i = 0; i != test_size; i++) { bits[i] = i; } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch every third bits on in state mmbit_clear(ba, test_size); @@ -1082,13 +1132,12 @@ TEST_P(MultiBitTest, SparseIteratorNextAll) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator. - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch all bits on in state mmbit_clear(ba, test_size); @@ -1125,14 +1174,13 @@ TEST_P(MultiBitTest, SparseIteratorNextExactStrided) { // Put all our bits into the sparse iterator and switch them on in the // state. mmbit_clear(ba, test_size); - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); mmbit_set(ba, test_size, i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Iterate over all bits. vector state(mmbit_sparse_iter_state_size(test_size)); @@ -1157,13 +1205,12 @@ TEST_P(MultiBitTest, SparseIteratorNextNone) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator. - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch only the first bit on mmbit_clear(ba, test_size); @@ -1186,13 +1233,12 @@ TEST_P(MultiBitTest, SparseIteratorUnsetAll) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch all bits on mmbit_clear(ba, test_size); @@ -1226,9 +1272,8 @@ TEST_P(MultiBitTest, SparseIteratorUnsetHalves) { odd.push_back(i); } - vector it_even, it_odd; - mmbBuildSparseIterator(it_even, even, test_size); - mmbBuildSparseIterator(it_odd, odd, test_size); + auto it_even = mmbBuildSparseIterator(even, test_size); + auto it_odd = mmbBuildSparseIterator(odd, test_size); // Switch all bits on mmbit_clear(ba, test_size); diff --git a/unit/internal/multiaccel_matcher.cpp b/unit/internal/multiaccel_matcher.cpp deleted file mode 100644 index bdf56ff91..000000000 --- a/unit/internal/multiaccel_matcher.cpp +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -extern "C" { -#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works -} - -#include "config.h" -#include "src/ue2common.h" - -#include "gtest/gtest.h" -#include "nfagraph/ng_limex_accel.h" -#include "nfa/accelcompile.h" -#include "nfa/multivermicelli.h" -#include "nfa/multishufti.h" -#include "nfa/multitruffle.h" -#include "util/alloc.h" -#include "util/charreach.h" - -#include -#include -#include -#include -#include - -using namespace ue2; -using namespace std; -using namespace testing; - -// test parameters structure -struct MultiaccelTestParam { - string match_pattern; - u32 match_pattern_start_idx; - u32 match_idx; - bool test_all_offsets; - u8 match_len1; - u8 match_len2; - MultibyteAccelInfo::multiaccel_type type; -}; - -// buffer size is constant -static const u32 BUF_SIZE = 200; - -// strings, out of which CharReach will be generated -static const string VERM_CR = "a"; -static const string V_NC_CR = "aA"; -static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz"; -static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99"; - -// Parameterized test case for multiaccel patterns. -class MultiaccelTest : public TestWithParam { -protected: - virtual void SetUp() { - // set up is deferred until the actual test, since we can't compile - // any accel schemes unless we know CharReach - const MultiaccelTestParam &p = GetParam(); - - // reserve space in our buffer - buffer = (u8 *)aligned_zmalloc(BUF_SIZE); - - // store the index where we expect to see the match. note that it may - // be different from where the match pattern has started since we may - // have a flooded match (i.e. a match preceded by almost-match) or a - // no-match (in which case "match" index is at the end of the buffer). - match_idx = p.match_idx; - - // make note if we need to test all offsets - sometimes we don't, for - // example when testing partial or no-match. - test_all_offsets = p.test_all_offsets; - } - - char getChar(const CharReach &cr) { - assert(cr.count() > 0); - auto dist = uniform_int_distribution(0, cr.count() - 1); - size_t result = cr.find_nth(dist(prng)); - assert(result != CharReach::npos); - return (char)result; - } - - // char generator - char getChar(const CharReach &cr, bool match) { - return getChar(match ? cr : ~cr); - } - - // appends a string with matches/unmatches according to input match pattern - void getMatch(u8 *result, u32 start, const string &pattern, - const CharReach &cr) { - for (const auto &c : pattern) { - result[start++] = getChar(cr, c == '1'); - } - } - - // appends non-matching noise of certain lengths - void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) { - for (unsigned i = 0; i < len; i++) { - result[start + i] = getChar(cr, false); - } - } - - // deferred buffer generation, as we don't know CharReach before we run the test - void GenerateBuffer(const CharReach &cr) { - const MultiaccelTestParam &p = GetParam(); - - // step 1: fill prefix with non-matching noise - u32 start = 0; - getNoise(buffer, start, p.match_pattern_start_idx, cr); - - // step 2: add a match - start += p.match_pattern_start_idx; - getMatch(buffer, start, p.match_pattern, cr); - - // step 3: fill in the rest of the buffer with non-matching noise - start += p.match_pattern.size(); - getNoise(buffer, start, BUF_SIZE - p.match_pattern.size() - - p.match_pattern_start_idx, cr); - } - - // deferred accel scheme generation, as we don't know CharReach before we run the test - void CompileAccelScheme(const CharReach &cr, AccelAux *aux) { - const MultiaccelTestParam &p = GetParam(); - - AccelInfo ai; - ai.single_stops = cr; // dummy CharReach to prevent red tape accel - ai.ma_len1 = p.match_len1; - ai.ma_len2 = p.match_len2; - ai.multiaccel_stops = cr; - ai.ma_type = p.type; - - buildAccelAux(ai, aux); - - // now, verify we've successfully built our accel scheme, *and* that it's - // a multibyte scheme - ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM && - aux->accel_type <= ACCEL_MDSGTRUFFLE); - } - - virtual void TearDown() { - aligned_free(buffer); - } - - // We want our tests to be deterministic, so we use a PRNG in the test - // fixture. - mt19937 prng; - - u32 match_idx; - u8 *buffer; - bool test_all_offsets; -}; - -static -void runTest(const u8 *buffer, AccelAux *aux, unsigned match_idx, - bool test_all_offsets) { - const u8 *start = buffer; - const u8 *end = start + BUF_SIZE; - const u8 *match = start + match_idx; - - // comparing indexes into the buffer is easier to understand than pointers - if (test_all_offsets) { - // run_accel can only scan >15 byte buffers - u32 end_offset = min(match_idx, BUF_SIZE - 15); - - for (unsigned offset = 0; offset < end_offset; offset++) { - const u8 *ptr = run_accel(aux, (start + offset), end); - unsigned idx = ptr - start; - ASSERT_EQ(match_idx, idx); - } - } else { - const u8 *ptr = run_accel(aux, start, end); - unsigned idx = ptr - start; - ASSERT_EQ(match_idx, idx); - } -} - -TEST_P(MultiaccelTest, TestVermicelli) { - AccelAux aux = {0}; - CharReach cr(VERM_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestVermicelliNocase) { - AccelAux aux = {0}; - CharReach cr(V_NC_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestShufti) { - AccelAux aux = {0}; - CharReach cr(SHUF_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestTruffle) { - AccelAux aux = {0}; - CharReach cr(TRUF_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -static const MultiaccelTestParam multiaccelTests[] = { - // long matcher - - // full, partial, flooded, nomatch - {"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, - - // long-grab matcher - - // full, partial, flooded, nomatch - {"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - - // shift matcher - - // full, partial, flooded, nomatch - {"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - - // shift-grab matcher - - // full, partial, flooded, nomatch - {"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - - // doubleshift matcher - - // full, partial (one and two shifts), flooded, nomatch - {"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - - // doubleshift-grab matcher - - // full, partial (one and two shifts), flooded, nomatch - {"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, -}; - -INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests)); - -// boring stuff for google test -void PrintTo(const MultiaccelTestParam &p, ::std::ostream *os) { - *os << "MultiaccelTestParam: " << p.match_pattern; -} diff --git a/unit/internal/nfagraph_common.h b/unit/internal/nfagraph_common.h index d3aafc99f..ca5554c44 100644 --- a/unit/internal/nfagraph_common.h +++ b/unit/internal/nfagraph_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,18 +40,19 @@ namespace ue2 { // Helper function: construct a graph from an expression, flags and context. inline -std::unique_ptr constructGraphWithCC(const std::string &expr, - CompileContext &cc, - unsigned flags) { +std::unique_ptr constructGraphWithCC(const std::string &expr, + CompileContext &cc, + unsigned flags) { ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - return buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + return std::move(built_expr.g); } // Helper function: construct a graph from an expression and its flags. inline -std::unique_ptr constructGraph(const std::string &expr, - unsigned flags) { +std::unique_ptr constructGraph(const std::string &expr, + unsigned flags) { CompileContext cc(false, false, get_current_target(), Grey()); return constructGraphWithCC(expr, cc, flags); } diff --git a/unit/internal/nfagraph_comp.cpp b/unit/internal/nfagraph_comp.cpp index 41af3f0ca..61b05a465 100644 --- a/unit/internal/nfagraph_comp.cpp +++ b/unit/internal/nfagraph_comp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,13 +33,8 @@ #include "config.h" #include "gtest/gtest.h" #include "nfagraph_common.h" -#include "grey.h" -#include "hs.h" -#include "compiler/compiler.h" #include "nfagraph/ng.h" -#include "nfagraph/ng_builder.h" #include "nfagraph/ng_calc_components.h" -#include "util/target_info.h" using namespace std; using namespace ue2; @@ -48,7 +43,9 @@ TEST(NFAGraph, CalcComp1) { auto graph = constructGraph("abc|def|ghi", 0); ASSERT_TRUE(graph != nullptr); - deque> comps = calcComponents(*graph); + Grey grey; + grey.calcComponents = true; + auto comps = calcComponents(std::move(graph), grey); ASSERT_EQ(3, comps.size()); } @@ -56,7 +53,9 @@ TEST(NFAGraph, CalcComp2) { auto graph = constructGraph("a|b|c|d|e|f|g|h|i", 0); ASSERT_TRUE(graph != nullptr); - deque> comps = calcComponents(*graph); + Grey grey; + grey.calcComponents = true; + auto comps = calcComponents(std::move(graph), grey); // We should be identifying this as a trivial case and not splitting it. ASSERT_EQ(1, comps.size()); @@ -67,7 +66,9 @@ TEST(NFAGraph, RecalcComp1) { comps.push_back(constructGraph("abc|def|ghi", 0)); ASSERT_TRUE(comps.back() != nullptr); - recalcComponents(comps); + Grey grey; + grey.calcComponents = true; + recalcComponents(comps, grey); ASSERT_EQ(3, comps.size()); } diff --git a/unit/internal/nfagraph_equivalence.cpp b/unit/internal/nfagraph_equivalence.cpp index 8fda92231..73aec1d7e 100644 --- a/unit/internal/nfagraph_equivalence.cpp +++ b/unit/internal/nfagraph_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,7 +54,7 @@ TEST(NFAGraph, RemoveEquivalence1) { // The graph should be merged into: a(b|c) CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(ab|ac)", cc, 0)); + auto graph(constructGraphWithCC("(ab|ac)", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -115,7 +115,7 @@ TEST(NFAGraph, RemoveEquivalence2) { // The graph should be merged into: (b|c)a CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(ba|ca)", cc, 0)); + auto graph(constructGraphWithCC("(ba|ca)", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -176,8 +176,7 @@ TEST(NFAGraph, RemoveEquivalence3) { // The graph should be merged into: a(..)+(X|Y) CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -266,8 +265,7 @@ TEST(NFAGraph, RemoveEquivalence4) { // The graph should be merged into: (X|Y)(..)+a CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -363,8 +361,7 @@ TEST(NFAGraph, RemoveEquivalence5) { // The graph should be merged into: [^\x00]*[\x00] CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", - cc, 0)); + auto graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_PREFIX; @@ -420,7 +417,7 @@ TEST(NFAGraph, RemoveEquivalence5) { TEST(NFAGraph, RemoveEquivalence6) { // Build a small graph with two redundant vertices: ^(.*|.*)a // The graph should be merged into: a - unique_ptr graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL)); + auto graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; @@ -458,7 +455,7 @@ TEST(NFAGraph, RemoveEquivalence6) { TEST(NFAGraph, RemoveEquivalence7) { // Build a small graph with no redundant vertices: ^.+a // Make sure we don't merge anything - unique_ptr graph(constructGraph("^.+a", HS_FLAG_DOTALL)); + auto graph(constructGraph("^.+a", HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 553d6dc54..cd0cd796e 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,7 +76,7 @@ class MatchesTest: public TestWithParam { static const MatchesTestParams matchesTests[] = { // EOD and anchored patterns - // these should produce no matches + // these should produce no matches { "^foobar", "foolish", {}, 0, false, true}, { "^foobar$", "ze foobar", {}, 0, false, true}, { "^foobar$", "foobar ", {}, 0, false, true}, @@ -208,14 +208,25 @@ TEST_P(MatchesTest, Check) { CompileContext cc(false, false, get_current_target(), Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0); - auto g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; set> matches; - findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8); + bool success = findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8); + ASSERT_TRUE(success); set> expected(begin(t.matches), end(t.matches)); + // findMatches returns matches with SOM, so zero them out if not SOM + if (!t.som) { + set> new_matches; + for (auto &m : matches) { + new_matches.emplace(0, m.second); + } + matches.swap(new_matches); + } + ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern << "' against input '" << t.input << "'"; } diff --git a/unit/internal/nfagraph_redundancy.cpp b/unit/internal/nfagraph_redundancy.cpp index be9527fd3..c77045e02 100644 --- a/unit/internal/nfagraph_redundancy.cpp +++ b/unit/internal/nfagraph_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,7 +53,7 @@ TEST(NFAGraph, RemoveRedundancy1) { // The character reachability should be merged into: [ab]c CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(a|b)c", cc, 0)); + auto graph(constructGraphWithCC("(a|b)c", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); NGHolder &g = *graph; @@ -95,8 +95,7 @@ TEST(NFAGraph, RemoveRedundancy2) { // Build a small graph with a redundant vertex: a.*b?c // The dot-star should swallow the 'b?', leaving a.*c CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("a.*b?c", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph.get() != nullptr); NGHolder &g = *graph; @@ -152,8 +151,7 @@ TEST(NFAGraph, RemoveRedundancy2) { TEST(NFAGraph, RemoveRedundancy3) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("foobar.*(a|b)?teakettle", - cc, 0)); + auto graph(constructGraphWithCC("foobar.*(a|b)?teakettle", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); @@ -166,7 +164,7 @@ TEST(NFAGraph, RemoveRedundancy3) { TEST(NFAGraph, RemoveRedundancy4) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); + auto graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); @@ -178,8 +176,7 @@ TEST(NFAGraph, RemoveRedundancy4) { TEST(NFAGraph, RemoveRedundancy5) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("[0-9]?badgerbrush", - cc, 0)); + auto graph(constructGraphWithCC("[0-9]?badgerbrush", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); diff --git a/unit/internal/nfagraph_repeat.cpp b/unit/internal/nfagraph_repeat.cpp index b34d12717..941873ece 100644 --- a/unit/internal/nfagraph_repeat.cpp +++ b/unit/internal/nfagraph_repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -66,28 +66,28 @@ struct PureRepeatTest { class NFAPureRepeatTest : public TestWithParam { }; static const PureRepeatTest pureRepeatTests[] = { - { "^.*", 0, depth::infinity() }, - { "^.+", 1, depth::infinity() }, - { "^.", 1, 1 }, - { "^..", 2, 2 }, - { "^.?.", 1, 2 }, - { "^.{1,2}", 1, 2 }, - { "^.{1,3}", 1, 3 }, - { "^.{1,10}", 1, 10 }, - { "^.{1,200}", 1, 200 }, - { "^.{200}", 200, 200 }, - { "^.{0,}", 0, depth::infinity() }, - { "^.{1,}", 1, depth::infinity() }, - { "^.{2,}", 2, depth::infinity() }, - { "^.{10,}", 10, depth::infinity() }, - { "^.{200,}", 200, depth::infinity() }, - { "^.{5000,}", 5000, depth::infinity() }, - { "^.{0,1}", 0, 1 }, - { "^.{0,2}", 0, 2 }, - { "^.{0,100}", 0, 100 }, - { "^.{0,5000}", 0, 5000 }, - { "^x{10}x{20,30}", 30, 40 }, - { "^..?..?..?..?..?", 5, 10 } + { "^.*", depth(0), depth::infinity() }, + { "^.+", depth(1), depth::infinity() }, + { "^.", depth(1), depth(1) }, + { "^..", depth(2), depth(2) }, + { "^.?.", depth(1), depth(2) }, + { "^.{1,2}", depth(1), depth(2) }, + { "^.{1,3}", depth(1), depth(3) }, + { "^.{1,10}", depth(1), depth(10) }, + { "^.{1,200}", depth(1), depth(200) }, + { "^.{200}", depth(200), depth(200) }, + { "^.{0,}", depth(0), depth::infinity() }, + { "^.{1,}", depth(1), depth::infinity() }, + { "^.{2,}", depth(2), depth::infinity() }, + { "^.{10,}", depth(10), depth::infinity() }, + { "^.{200,}", depth(200), depth::infinity() }, + { "^.{5000,}", depth(5000), depth::infinity() }, + { "^.{0,1}", depth(0), depth(1) }, + { "^.{0,2}", depth(0), depth(2) }, + { "^.{0,100}", depth(0), depth(100) }, + { "^.{0,5000}", depth(0), depth(5000) }, + { "^x{10}x{20,30}", depth(30), depth(40) }, + { "^..?..?..?..?..?", depth(5), depth(10) } }; INSTANTIATE_TEST_CASE_P(PureRepeat, NFAPureRepeatTest, diff --git a/unit/internal/nfagraph_width.cpp b/unit/internal/nfagraph_width.cpp index 03508ea84..7ccdca37f 100644 --- a/unit/internal/nfagraph_width.cpp +++ b/unit/internal/nfagraph_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,26 +52,26 @@ struct WidthTest { class NFAWidthTest : public TestWithParam { }; static const WidthTest widthTests[] = { - { "()", 0, 0 }, - { "a", 1, 1 }, - { "a?b", 1, 2 }, - { "foobar", 6, 6 }, - { "foo(bar)?", 3, 6 }, - { "(a|ab|abc|abcd)", 1, 4 }, - { "foo.*bar", 6, depth::infinity() }, - { "foo(bar)*", 3, depth::infinity() }, - { "foo(bar)+", 6, depth::infinity() }, - { "foo(bar){1,3}", 6, 12 }, - { "(abcd)+", 4, depth::infinity() }, - { "foo\\z", 3, 3 }, - { "^foo", 3, 3 }, - { "^foo|bar.*baz", 3, depth::infinity() }, - { "^foobar.*|baz", 3, depth::infinity() }, - { "foo(\\z|bar)", 3, 6 }, - { "foo(|bar\\z)", 3, 6 }, - { "foo.{0,15}bar", 6, 21 }, - { "foo.{0,15}.*bar", 6, depth::infinity() }, - { "(?smi)^(aa[^a]aa$|a|a+\\Z|a)", 1, depth::infinity() } + { "()", depth(0), depth(0) }, + { "a", depth(1), depth(1) }, + { "a?b", depth(1), depth(2) }, + { "foobar", depth(6), depth(6) }, + { "foo(bar)?", depth(3), depth(6) }, + { "(a|ab|abc|abcd)", depth(1), depth(4) }, + { "foo.*bar", depth(6), depth::infinity() }, + { "foo(bar)*", depth(3), depth::infinity() }, + { "foo(bar)+", depth(6), depth::infinity() }, + { "foo(bar){1,3}", depth(6), depth(12) }, + { "(abcd)+", depth(4), depth::infinity() }, + { "foo\\z", depth(3), depth(3) }, + { "^foo", depth(3), depth(3) }, + { "^foo|bar.*baz", depth(3), depth::infinity() }, + { "^foobar.*|baz", depth(3), depth::infinity() }, + { "foo(\\z|bar)", depth(3), depth(6) }, + { "foo(|bar\\z)", depth(3), depth(6) }, + { "foo.{0,15}bar", depth(6), depth(21) }, + { "foo.{0,15}.*bar", depth(6), depth::infinity() }, + { "(?smi)^(aa[^a]aa$|a|a+\\Z|a)", depth(1), depth::infinity() } }; INSTANTIATE_TEST_CASE_P(NFAWidth, NFAWidthTest, ValuesIn(widthTests)); @@ -79,10 +79,10 @@ INSTANTIATE_TEST_CASE_P(NFAWidth, NFAWidthTest, ValuesIn(widthTests)); TEST_P(NFAWidthTest, Check) { const WidthTest &t = GetParam(); SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern); - unique_ptr w(constructGraph(t.pattern, 0)); + auto g = constructGraph(t.pattern, 0); - ASSERT_EQ(t.minWidth, findMinWidth(*w)); - ASSERT_EQ(t.maxWidth, findMaxWidth(*w)); + ASSERT_EQ(t.minWidth, findMinWidth(*g)); + ASSERT_EQ(t.maxWidth, findMaxWidth(*g)); } // for google test diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index 7f245e62f..546d7d4f8 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -106,96 +106,96 @@ class RepeatTest : public TestWithParam { static const RepeatTestInfo repeatTests[] = { // Fixed repeats -- ring model - { REPEAT_RING, 2, 2 }, - { REPEAT_RING, 4, 4 }, - { REPEAT_RING, 10, 10 }, - { REPEAT_RING, 16, 16 }, - { REPEAT_RING, 20, 20 }, - { REPEAT_RING, 30, 30 }, - { REPEAT_RING, 50, 50 }, - { REPEAT_RING, 64, 64 }, - { REPEAT_RING, 65, 65 }, - { REPEAT_RING, 100, 100 }, - { REPEAT_RING, 200, 200 }, - { REPEAT_RING, 1000, 1000 }, - { REPEAT_RING, 4100, 4100 }, - { REPEAT_RING, 16000, 16000 }, + { REPEAT_RING, depth(2), depth(2) }, + { REPEAT_RING, depth(4), depth(4) }, + { REPEAT_RING, depth(10), depth(10) }, + { REPEAT_RING, depth(16), depth(16) }, + { REPEAT_RING, depth(20), depth(20) }, + { REPEAT_RING, depth(30), depth(30) }, + { REPEAT_RING, depth(50), depth(50) }, + { REPEAT_RING, depth(64), depth(64) }, + { REPEAT_RING, depth(65), depth(65) }, + { REPEAT_RING, depth(100), depth(100) }, + { REPEAT_RING, depth(200), depth(200) }, + { REPEAT_RING, depth(1000), depth(1000) }, + { REPEAT_RING, depth(4100), depth(4100) }, + { REPEAT_RING, depth(16000), depth(16000) }, // {0, N} repeats -- last model - { REPEAT_LAST, 0, 4 }, - { REPEAT_LAST, 0, 10 }, - { REPEAT_LAST, 0, 20 }, - { REPEAT_LAST, 0, 30 }, - { REPEAT_LAST, 0, 50 }, - { REPEAT_LAST, 0, 100 }, - { REPEAT_LAST, 0, 200 }, - { REPEAT_LAST, 0, 1000 }, - { REPEAT_LAST, 0, 16000 }, + { REPEAT_LAST, depth(0), depth(4) }, + { REPEAT_LAST, depth(0), depth(10) }, + { REPEAT_LAST, depth(0), depth(20) }, + { REPEAT_LAST, depth(0), depth(30) }, + { REPEAT_LAST, depth(0), depth(50) }, + { REPEAT_LAST, depth(0), depth(100) }, + { REPEAT_LAST, depth(0), depth(200) }, + { REPEAT_LAST, depth(0), depth(1000) }, + { REPEAT_LAST, depth(0), depth(16000) }, // {0, N} repeats -- ring model (though we use 'last' model in practice) - { REPEAT_RING, 0, 2 }, - { REPEAT_RING, 0, 4 }, - { REPEAT_RING, 0, 10 }, - { REPEAT_RING, 0, 20 }, - { REPEAT_RING, 0, 30 }, - { REPEAT_RING, 0, 50 }, - { REPEAT_RING, 0, 64 }, - { REPEAT_RING, 0, 65 }, - { REPEAT_RING, 0, 100 }, - { REPEAT_RING, 0, 200 }, - { REPEAT_RING, 0, 1000 }, - { REPEAT_RING, 0, 16000 }, + { REPEAT_RING, depth(0), depth(2) }, + { REPEAT_RING, depth(0), depth(4) }, + { REPEAT_RING, depth(0), depth(10) }, + { REPEAT_RING, depth(0), depth(20) }, + { REPEAT_RING, depth(0), depth(30) }, + { REPEAT_RING, depth(0), depth(50) }, + { REPEAT_RING, depth(0), depth(64) }, + { REPEAT_RING, depth(0), depth(65) }, + { REPEAT_RING, depth(0), depth(100) }, + { REPEAT_RING, depth(0), depth(200) }, + { REPEAT_RING, depth(0), depth(1000) }, + { REPEAT_RING, depth(0), depth(16000) }, // {N, M} repeats -- ring model - { REPEAT_RING, 2, 3 }, - { REPEAT_RING, 1, 4 }, - { REPEAT_RING, 5, 10 }, - { REPEAT_RING, 10, 20 }, - { REPEAT_RING, 10, 50 }, - { REPEAT_RING, 50, 60 }, - { REPEAT_RING, 100, 200 }, - { REPEAT_RING, 1, 200 }, - { REPEAT_RING, 10, 16000 }, - { REPEAT_RING, 10000, 16000 }, + { REPEAT_RING, depth(2), depth(3) }, + { REPEAT_RING, depth(1), depth(4) }, + { REPEAT_RING, depth(5), depth(10) }, + { REPEAT_RING, depth(10), depth(20) }, + { REPEAT_RING, depth(10), depth(50) }, + { REPEAT_RING, depth(50), depth(60) }, + { REPEAT_RING, depth(100), depth(200) }, + { REPEAT_RING, depth(1), depth(200) }, + { REPEAT_RING, depth(10), depth(16000) }, + { REPEAT_RING, depth(10000), depth(16000) }, // {N, M} repeats -- range model - { REPEAT_RANGE, 1, 4 }, - { REPEAT_RANGE, 5, 10 }, - { REPEAT_RANGE, 10, 20 }, - { REPEAT_RANGE, 10, 50 }, - { REPEAT_RANGE, 50, 60 }, - { REPEAT_RANGE, 100, 200 }, - { REPEAT_RANGE, 1, 200 }, - { REPEAT_RANGE, 10, 16000 }, - { REPEAT_RANGE, 10000, 16000 }, + { REPEAT_RANGE, depth(1), depth(4) }, + { REPEAT_RANGE, depth(5), depth(10) }, + { REPEAT_RANGE, depth(10), depth(20) }, + { REPEAT_RANGE, depth(10), depth(50) }, + { REPEAT_RANGE, depth(50), depth(60) }, + { REPEAT_RANGE, depth(100), depth(200) }, + { REPEAT_RANGE, depth(1), depth(200) }, + { REPEAT_RANGE, depth(10), depth(16000) }, + { REPEAT_RANGE, depth(10000), depth(16000) }, // {N,M} repeats -- small bitmap model - { REPEAT_BITMAP, 1, 2 }, - { REPEAT_BITMAP, 5, 10 }, - { REPEAT_BITMAP, 10, 20 }, - { REPEAT_BITMAP, 20, 40 }, - { REPEAT_BITMAP, 1, 63 }, - { REPEAT_BITMAP, 50, 63 }, + { REPEAT_BITMAP, depth(1), depth(2) }, + { REPEAT_BITMAP, depth(5), depth(10) }, + { REPEAT_BITMAP, depth(10), depth(20) }, + { REPEAT_BITMAP, depth(20), depth(40) }, + { REPEAT_BITMAP, depth(1), depth(63) }, + { REPEAT_BITMAP, depth(50), depth(63) }, // {N,M} repeats -- trailer model - { REPEAT_TRAILER, 1, 2 }, - { REPEAT_TRAILER, 8, 8 }, - { REPEAT_TRAILER, 0, 8 }, - { REPEAT_TRAILER, 10, 20 }, - { REPEAT_TRAILER, 1, 32 }, - { REPEAT_TRAILER, 64, 64 }, - { REPEAT_TRAILER, 1, 64 }, - { REPEAT_TRAILER, 1, 100 }, - { REPEAT_TRAILER, 1, 2000 }, - { REPEAT_TRAILER, 50, 200 }, - { REPEAT_TRAILER, 50, 1000 }, - { REPEAT_TRAILER, 64, 1024 }, + { REPEAT_TRAILER, depth(1), depth(2) }, + { REPEAT_TRAILER, depth(8), depth(8) }, + { REPEAT_TRAILER, depth(0), depth(8) }, + { REPEAT_TRAILER, depth(10), depth(20) }, + { REPEAT_TRAILER, depth(1), depth(32) }, + { REPEAT_TRAILER, depth(64), depth(64) }, + { REPEAT_TRAILER, depth(1), depth(64) }, + { REPEAT_TRAILER, depth(1), depth(100) }, + { REPEAT_TRAILER, depth(1), depth(2000) }, + { REPEAT_TRAILER, depth(50), depth(200) }, + { REPEAT_TRAILER, depth(50), depth(1000) }, + { REPEAT_TRAILER, depth(64), depth(1024) }, // {N,} repeats -- first model - { REPEAT_FIRST, 0, depth::infinity() }, - { REPEAT_FIRST, 1, depth::infinity() }, - { REPEAT_FIRST, 4, depth::infinity() }, - { REPEAT_FIRST, 10, depth::infinity() }, - { REPEAT_FIRST, 50, depth::infinity() }, - { REPEAT_FIRST, 100, depth::infinity() }, - { REPEAT_FIRST, 1000, depth::infinity() }, - { REPEAT_FIRST, 3000, depth::infinity() }, - { REPEAT_FIRST, 10000, depth::infinity() }, + { REPEAT_FIRST, depth(0), depth::infinity() }, + { REPEAT_FIRST, depth(1), depth::infinity() }, + { REPEAT_FIRST, depth(4), depth::infinity() }, + { REPEAT_FIRST, depth(10), depth::infinity() }, + { REPEAT_FIRST, depth(50), depth::infinity() }, + { REPEAT_FIRST, depth(100), depth::infinity() }, + { REPEAT_FIRST, depth(1000), depth::infinity() }, + { REPEAT_FIRST, depth(3000), depth::infinity() }, + { REPEAT_FIRST, depth(10000), depth::infinity() }, // {,} repeats -- always - { REPEAT_ALWAYS, 0, depth::infinity() }, + { REPEAT_ALWAYS, depth(0), depth::infinity() }, }; INSTANTIATE_TEST_CASE_P(Repeat, RepeatTest, ValuesIn(repeatTests)); @@ -508,55 +508,55 @@ const u32 sparsePeriods[] = { static const RepeatTestInfo sparseRepeats[] = { // Fixed repeats - { REPEAT_SPARSE_OPTIMAL_P, 10, 10 }, - { REPEAT_SPARSE_OPTIMAL_P, 20, 20 }, - { REPEAT_SPARSE_OPTIMAL_P, 40, 40 }, - { REPEAT_SPARSE_OPTIMAL_P, 80, 80 }, - { REPEAT_SPARSE_OPTIMAL_P, 100, 100 }, - { REPEAT_SPARSE_OPTIMAL_P, 150, 150 }, - { REPEAT_SPARSE_OPTIMAL_P, 200, 200 }, - { REPEAT_SPARSE_OPTIMAL_P, 250, 250 }, - { REPEAT_SPARSE_OPTIMAL_P, 300, 300 }, - { REPEAT_SPARSE_OPTIMAL_P, 350, 350 }, - { REPEAT_SPARSE_OPTIMAL_P, 400, 400 }, - { REPEAT_SPARSE_OPTIMAL_P, 500, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 600, 600 }, - { REPEAT_SPARSE_OPTIMAL_P, 800, 800 }, - { REPEAT_SPARSE_OPTIMAL_P, 1000, 1000 }, - { REPEAT_SPARSE_OPTIMAL_P, 1500, 1500 }, - { REPEAT_SPARSE_OPTIMAL_P, 2000, 2000 }, - { REPEAT_SPARSE_OPTIMAL_P, 2500, 2500 }, - { REPEAT_SPARSE_OPTIMAL_P, 3000, 3000 }, - { REPEAT_SPARSE_OPTIMAL_P, 3500, 3500 }, - { REPEAT_SPARSE_OPTIMAL_P, 4000, 4000 }, - { REPEAT_SPARSE_OPTIMAL_P, 4500, 4500 }, - { REPEAT_SPARSE_OPTIMAL_P, 5000, 5000 }, - { REPEAT_SPARSE_OPTIMAL_P, 65534, 65534 }, + { REPEAT_SPARSE_OPTIMAL_P, depth(10), depth(10) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(20), depth(20) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(40), depth(40) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(80), depth(80) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(100), depth(100) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(150), depth(150) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(200), depth(200) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(250), depth(250) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(300), depth(300) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(350), depth(350) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(400), depth(400) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(500), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(600), depth(600) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(800), depth(800) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1000), depth(1000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1500), depth(1500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2000), depth(2000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2500), depth(2500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3000), depth(3000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3500), depth(3500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4000), depth(4000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4500), depth(4500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(5000), depth(5000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(65534), depth(65534) }, // {N, M} repeats - { REPEAT_SPARSE_OPTIMAL_P, 10, 20 }, - { REPEAT_SPARSE_OPTIMAL_P, 20, 40 }, - { REPEAT_SPARSE_OPTIMAL_P, 40, 80 }, - { REPEAT_SPARSE_OPTIMAL_P, 80, 100 }, - { REPEAT_SPARSE_OPTIMAL_P, 100, 120 }, - { REPEAT_SPARSE_OPTIMAL_P, 150, 180 }, - { REPEAT_SPARSE_OPTIMAL_P, 200, 400 }, - { REPEAT_SPARSE_OPTIMAL_P, 250, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 300, 400 }, - { REPEAT_SPARSE_OPTIMAL_P, 350, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 400, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 500, 600 }, - { REPEAT_SPARSE_OPTIMAL_P, 600, 700 }, - { REPEAT_SPARSE_OPTIMAL_P, 800, 1000 }, - { REPEAT_SPARSE_OPTIMAL_P, 1000, 1200 }, - { REPEAT_SPARSE_OPTIMAL_P, 1500, 1800 }, - { REPEAT_SPARSE_OPTIMAL_P, 2000, 4000 }, - { REPEAT_SPARSE_OPTIMAL_P, 2500, 3000 }, - { REPEAT_SPARSE_OPTIMAL_P, 3000, 3500 }, - { REPEAT_SPARSE_OPTIMAL_P, 3500, 4000 }, - { REPEAT_SPARSE_OPTIMAL_P, 4000, 8000 }, - { REPEAT_SPARSE_OPTIMAL_P, 4500, 8000 }, - { REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 }, - { REPEAT_SPARSE_OPTIMAL_P, 60000, 65534 } + { REPEAT_SPARSE_OPTIMAL_P, depth(10), depth(20) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(20), depth(40) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(40), depth(80) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(80), depth(100) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(100), depth(120) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(150), depth(180) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(200), depth(400) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(250), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(300), depth(400) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(350), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(400), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(500), depth(600) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(600), depth(700) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(800), depth(1000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1000), depth(1200) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1500), depth(1800) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2000), depth(4000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2500), depth(3000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3000), depth(3500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3500), depth(4000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4000), depth(8000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4500), depth(8000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(5000), depth(5001) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(60000), depth(65534) } }; static diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index a4632c36c..b2316babd 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "gtest/gtest.h" +#include "util/arch.h" #include "util/simd_utils.h" #include "nfa/limex_shuffle.h" @@ -164,14 +165,15 @@ TEST(Shuffle, PackedExtract64_3) { template static void build_pshufb_masks_onebit(unsigned int bit, T *permute, T *compare) { - static_assert(sizeof(T) == sizeof(m128) || sizeof(T) == sizeof(m256), + static_assert(sizeof(T) == sizeof(m128) || sizeof(T) == sizeof(m256) || + sizeof(T) == sizeof(m512), "should be valid type"); // permute mask has 0x80 in all bytes except the one we care about memset(permute, 0x80, sizeof(*permute)); memset(compare, 0, sizeof(*compare)); char *pmsk = (char *)permute; char *cmsk = (char *)compare; - u8 off = (bit >= 128) ? 0x10 : 0; + u8 off = (bit >= 128) ? (bit >= 256) ? (bit >= 384) ? 0x30 : 0x20 : 0x10 : 0; pmsk[off] = bit/8; cmsk[off] = ~(1 << (bit % 8)); } @@ -194,7 +196,7 @@ TEST(Shuffle, PackedExtract128_1) { } } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) TEST(Shuffle, PackedExtract256_1) { // Try all possible one-bit masks for (unsigned int i = 0; i < 256; i++) { @@ -213,4 +215,24 @@ TEST(Shuffle, PackedExtract256_1) { } } #endif + +#if defined(HAVE_AVX512) +TEST(Shuffle, PackedExtract512_1) { + // Try all possible one-bit masks + for (unsigned int i = 0; i < 512; i++) { + // shuffle a single 1 bit to the front + m512 permute, compare; + build_pshufb_masks_onebit(i, &permute, &compare); + EXPECT_EQ(1U, packedExtract512(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract512(ones512(), permute, compare)); + // we should get zero out of these cases + EXPECT_EQ(0U, packedExtract512(zeroes512(), permute, compare)); + EXPECT_EQ(0U, packedExtract512(not512(setbit(i)), permute, compare)); + // we should get zero out of all the other bit positions + for (unsigned int j = 0; (j != i && j < 512); j++) { + EXPECT_EQ(0U, packedExtract512(setbit(j), permute, compare)); + } + } +} +#endif } // namespace diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 7b34d92e2..623c2c998 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,7 +29,8 @@ #include "config.h" #include "gtest/gtest.h" -#include "util/alloc.h" +#include "util/arch.h" +#include "util/bytecode_ptr.h" #include "util/make_unique.h" #include "util/simd_utils.h" @@ -142,6 +143,10 @@ void simd_loadbytes(m128 *a, const void *ptr, unsigned i) { *a = loadbytes128(pt void simd_loadbytes(m256 *a, const void *ptr, unsigned i) { *a = loadbytes256(ptr, i); } void simd_loadbytes(m384 *a, const void *ptr, unsigned i) { *a = loadbytes384(ptr, i); } void simd_loadbytes(m512 *a, const void *ptr, unsigned i) { *a = loadbytes512(ptr, i); } +m128 simd_lshift64(const m128 &a, unsigned i) { return lshift64_m128(a, i); } +m256 simd_lshift64(const m256 &a, unsigned i) { return lshift64_m256(a, i); } +m384 simd_lshift64(const m384 &a, unsigned i) { return lshift64_m384(a, i); } +m512 simd_lshift64(const m512 &a, unsigned i) { return lshift64_m512(a, i); } template class SimdUtilsTest : public testing::Test { @@ -539,8 +544,9 @@ TYPED_TEST(SimdUtilsTest, load_store) { a.bytes[i] = (char)(i % 256); } - aligned_unique_ptr mem_ptr = aligned_zmalloc_unique(sizeof(a)); + auto mem_ptr = make_bytecode_ptr(sizeof(a), alignof(TypeParam)); char *mem = mem_ptr.get(); + ASSERT_EQ(0, (size_t)mem % 16U); memset(mem, 0, sizeof(a)); @@ -584,6 +590,65 @@ TYPED_TEST(SimdUtilsTest, loadbytes_storebytes) { } } +TYPED_TEST(SimdUtilsTest, lshift64) { + TypeParam a; + memset(&a, 0x5a, sizeof(a)); + + static constexpr u64a exp_val = 0x5a5a5a5a5a5a5a5aULL; + + union { + TypeParam simd; + u64a qword[sizeof(TypeParam) / 8]; + } c; + + for (unsigned s = 0; s < 64; s++) { + c.simd = simd_lshift64(a, s); + + const u64a expected = exp_val << s; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + } + + /* Clang 3.4 on FreeBSD 10 crashes on the following - disable for now */ +#if !(defined(__FreeBSD__) && defined(__clang__) && __clang_major__ == 3) + + // test immediates + u64a expected; + + c.simd = simd_lshift64(a, 1); + expected = exp_val << 1; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + + c.simd = simd_lshift64(a, 2); + expected = exp_val << 2; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + + c.simd = simd_lshift64(a, 7); + expected = exp_val << 7; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + + c.simd = simd_lshift64(a, 31); + expected = exp_val << 31; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } +#endif +} + +TEST(SimdUtilsTest, alignment) { + ASSERT_EQ(16, alignof(m128)); + ASSERT_EQ(32, alignof(m256)); + ASSERT_EQ(16, alignof(m384)); + ASSERT_EQ(64, alignof(m512)); +} + TEST(SimdUtilsTest, movq) { m128 simd; @@ -620,7 +685,7 @@ TEST(SimdUtilsTest, set4x32) { ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd))); } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) TEST(SimdUtilsTest, set32x8) { char cmp[sizeof(m256)]; diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp index 6649e6fe9..f570e6b02 100644 --- a/unit/internal/utf8_validate.cpp +++ b/unit/internal/utf8_validate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -117,6 +117,6 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests)); TEST_P(ValidUtf8Test, check) { const auto &info = GetParam(); - ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str())) - << "String is: " << printable(info.str) << std::endl; + SCOPED_TRACE(testing::Message() << "String is: " << printable(info.str)); + ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str())); } diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index c0a6bc212..ea942ef1a 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -2,7 +2,7 @@ CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS} ${HS_CXX_FLAGS}") include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${PROJECT_SOURCE_DIR}) diff --git a/util/ExpressionParser.h b/util/ExpressionParser.h index 992304484..c97c114e7 100644 --- a/util/ExpressionParser.h +++ b/util/ExpressionParser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,12 +29,14 @@ #ifndef EXPRESSIONPARSER_H #define EXPRESSIONPARSER_H +#include "hs_common.h" + #include struct hs_expr_ext; -bool readExpression(const std::string &line, std::string &expr, - unsigned int *flags, hs_expr_ext *ext, - bool *must_be_ordered = nullptr); +bool HS_CDECL readExpression(const std::string &line, std::string &expr, + unsigned int *flags, hs_expr_ext *ext, + bool *must_be_ordered = nullptr); #endif diff --git a/util/ExpressionParser.rl b/util/ExpressionParser.rl index 98ed8daa9..233b70c18 100644 --- a/util/ExpressionParser.rl +++ b/util/ExpressionParser.rl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,8 @@ enum ParamKey { PARAM_NONE, PARAM_MIN_OFFSET, PARAM_MAX_OFFSET, - PARAM_MIN_LENGTH + PARAM_MIN_LENGTH, + PARAM_EDIT_DISTANCE }; %%{ @@ -92,6 +93,10 @@ enum ParamKey { ext->flags |= HS_EXT_FLAG_MIN_LENGTH; ext->min_length = num; break; + case PARAM_EDIT_DISTANCE: + ext->flags |= HS_EXT_FLAG_EDIT_DISTANCE; + ext->edit_distance = num; + break; case PARAM_NONE: default: // No key specified, syntax invalid. @@ -110,9 +115,9 @@ void initExt(hs_expr_ext *ext) { ext->max_offset = MAX_OFFSET; } -bool readExpression(const std::string &input, std::string &expr, - unsigned int *flags, hs_expr_ext *ext, - bool *must_be_ordered) { +bool HS_CDECL readExpression(const std::string &input, std::string &expr, + unsigned int *flags, hs_expr_ext *ext, + bool *must_be_ordered) { assert(flags); assert(ext); @@ -151,8 +156,9 @@ bool readExpression(const std::string &input, std::string &expr, %%{ single_flag = [ismW8HPLVO]; param = ('min_offset' @{ key = PARAM_MIN_OFFSET; } | - 'max_offset' @{ key = PARAM_MAX_OFFSET; } | - 'min_length' @{ key = PARAM_MIN_LENGTH; } ); + 'max_offset' @{ key = PARAM_MAX_OFFSET; } | + 'min_length' @{ key = PARAM_MIN_LENGTH; } | + 'edit_distance' @{ key = PARAM_EDIT_DISTANCE; }); value = (digit @accumulateNum)+ >{num = 0;}; param_spec = (' '* param '=' value ' '*) >{ key = PARAM_NONE; } diff --git a/util/expressions.cpp b/util/expressions.cpp index 944c74772..a81e0cd58 100644 --- a/util/expressions.cpp +++ b/util/expressions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,6 +27,10 @@ */ #include "config.h" +#include "expressions.h" + +#include "hs.h" +#include "string_util.h" #include #include @@ -34,7 +38,6 @@ #include #include -#include #include #include #if !defined(_WIN32) @@ -45,9 +48,7 @@ #include #endif -#include "expressions.h" -#include "hs.h" -#include "string_util.h" +#include using namespace std; @@ -90,7 +91,7 @@ void processLine(string &line, unsigned lineNum, //cout << "Inserting expr: id=" << id << ", pcre=" << pcre_str << endl; - bool ins = exprMap.insert(ExpressionMap::value_type(id, pcre_str)).second; + bool ins = exprMap.emplace(id, pcre_str).second; if (!ins) { failLine(lineNum, file, line, "Duplicate ID found."); } @@ -101,7 +102,7 @@ void processLine(string &line, unsigned lineNum, #define S_ISDIR(st_m) (_S_IFDIR & (st_m)) #define S_ISREG(st_m) (_S_IFREG & (st_m)) #endif -void loadExpressionsFromFile(const string &fname, ExpressionMap &exprMap) { +void HS_CDECL loadExpressionsFromFile(const string &fname, ExpressionMap &exprMap) { struct stat st; if (stat(fname.c_str(), &st) != 0) { return; @@ -194,7 +195,7 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) { } } #else // windows TODO: improve -void loadExpressions(const string &inPath, ExpressionMap &exprMap) { +void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) { // Is our input path a file or a directory? struct stat st; if (stat(inPath.c_str(), &st) != 0) { @@ -250,8 +251,8 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) { } #endif -void loadSignatureList(const string &inFile, - SignatureSet &signatures) { +void HS_CDECL loadSignatureList(const string &inFile, + SignatureSet &signatures) { ifstream f(inFile.c_str()); if (!f.good()) { cerr << "Can't open file: '" << inFile << "'" << endl; @@ -278,20 +279,19 @@ void loadSignatureList(const string &inFile, } } -void limitBySignature(ExpressionMap &exprMap, - const SignatureSet &signatures) { +ExpressionMap limitToSignatures(const ExpressionMap &exprMap, + const SignatureSet &signatures) { ExpressionMap keepers; - SignatureSet::const_iterator it, ite; - for (it = signatures.begin(), ite = signatures.end(); it != ite; ++it) { - ExpressionMap::const_iterator match = exprMap.find(*it); + for (auto id : signatures) { + auto match = exprMap.find(id); if (match == exprMap.end()) { - cerr << "Unable to find signature " << *it + cerr << "Unable to find signature " << id << " in expression set!" << endl; exit(1); } keepers.insert(*match); } - exprMap.swap(keepers); + return keepers; } diff --git a/util/expressions.h b/util/expressions.h index 949c9201f..078b99722 100644 --- a/util/expressions.h +++ b/util/expressions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,27 +29,26 @@ #ifndef EXPRESSIONS_H #define EXPRESSIONS_H +#include "hs_common.h" + #include #include -#include +#include -typedef std::map ExpressionMap; -typedef std::list SignatureSet; +using ExpressionMap = std::map; +using SignatureSet = std::vector; // load all of the expressions from the given directory into the given // expression map. Exits on failure. -void loadExpressions(const std::string &inDir, ExpressionMap &exprMap); +void HS_CDECL loadExpressions(const std::string &inDir, ExpressionMap &exprMap); -void loadExpressionsFromFile(const std::string &fname, ExpressionMap &exprMap); +void HS_CDECL loadExpressionsFromFile(const std::string &fname, ExpressionMap &exprMap); // load a list of signature IDs -void loadSignatureList(const std::string &inFile, SignatureSet &signatures); +void HS_CDECL loadSignatureList(const std::string &inFile, SignatureSet &signatures); -// produce a new expression map only containing those signatures in the -// expression list -void generateExprMap(const SignatureSet &signatures, - const ExpressionMap &allExprs, ExpressionMap &out); +// trim expression map to only the given signatures, returning result +ExpressionMap limitToSignatures(const ExpressionMap &exprMap, + const SignatureSet &signatures); -// trim expression map to only the given signatures (in-place) -void limitBySignature(ExpressionMap &exprMap, const SignatureSet &signatures); #endif diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index ca7c413ab..19ab7edf2 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,6 +35,7 @@ #include "ng_corpus_generator.h" #include "ng_corpus_editor.h" +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "nfagraph/ng_util.h" #include "ue2common.h" @@ -48,15 +49,15 @@ #include #include +#include #include #include #include -#include +#include using namespace std; using namespace ue2; -using boost::ptr_vector; typedef vector VertexPath; @@ -139,8 +140,8 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // limit will evict a random existing one. const size_t MAX_OPEN = min((size_t)1000, corpusLimit * 10); - ptr_vector open; - open.push_back(new VertexPath(1, g.start)); + vector> open; + open.push_back(ue2::make_unique(1, g.start)); ue2::unordered_set one_way_in; for (const auto &v : vertices_range(g)) { @@ -152,7 +153,8 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, while (!open.empty()) { u32 slot = cProps.rand(0, open.size() - 1); swap(open.at(slot), open.back()); - ptr_vector::auto_type p = open.pop_back(); + auto p = std::move(open.back()); + open.pop_back(); NFAVertex u = p->back(); DEBUG_PRINTF("dequeuing path %s, back %zu\n", @@ -194,19 +196,19 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // If we've got no further adjacent vertices, re-use p rather than // copying it for the next path. - VertexPath *new_path; + unique_ptr new_path; if (boost::next(ai) == ae) { - new_path = p.release(); + new_path = std::move(p); } else { - new_path = new VertexPath(*p); + new_path = make_unique(*p); } new_path->push_back(v); if (open.size() < MAX_OPEN) { - open.push_back(new_path); + open.push_back(std::move(new_path)); } else { u32 victim = cProps.rand(0, open.size() - 1); - open.replace(victim, new_path); + open[victim] = std::move(new_path); } } } @@ -218,8 +220,9 @@ namespace { /** \brief Concrete implementation */ class CorpusGeneratorImpl : public CorpusGenerator { public: - CorpusGeneratorImpl(const NGHolder &graph_in, CorpusProperties &props); - ~CorpusGeneratorImpl() {} + CorpusGeneratorImpl(const NGHolder &graph_in, const ExpressionInfo &expr_in, + CorpusProperties &props); + ~CorpusGeneratorImpl() = default; void generateCorpus(vector &data); @@ -236,6 +239,9 @@ class CorpusGeneratorImpl : public CorpusGenerator { * bytes in length. */ void addRandom(const min_max &mm, string *out); + /** \brief Info about this expression. */ + const ExpressionInfo &expr; + /** \brief The NFA graph we operate over. */ const NGHolder &graph; @@ -245,9 +251,13 @@ class CorpusGeneratorImpl : public CorpusGenerator { }; CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in, + const ExpressionInfo &expr_in, CorpusProperties &props) - : graph(graph_in), cProps(props) { - // empty + : expr(expr_in), graph(graph_in), cProps(props) { + // if this pattern is to be matched approximately + if (expr.edit_distance && !props.editDistance) { + props.editDistance = props.rand(0, expr.edit_distance + 1); + } } void CorpusGeneratorImpl::generateCorpus(vector &data) { @@ -388,8 +398,9 @@ void CorpusGeneratorImpl::newGenerator(vector &outdata) { /** \brief Concrete implementation for UTF-8 */ class CorpusGeneratorUtf8 : public CorpusGenerator { public: - CorpusGeneratorUtf8(const NGHolder &graph_in, CorpusProperties &props); - ~CorpusGeneratorUtf8() {} + CorpusGeneratorUtf8(const NGHolder &graph_in, const ExpressionInfo &expr_in, + CorpusProperties &props); + ~CorpusGeneratorUtf8() = default; void generateCorpus(vector &data); @@ -406,6 +417,9 @@ class CorpusGeneratorUtf8 : public CorpusGenerator { * length. */ void addRandom(const min_max &mm, vector *out); + /** \brief Info about this expression. */ + const ExpressionInfo &expr; + /** \brief The NFA graph we operate over. */ const NGHolder &graph; @@ -415,9 +429,14 @@ class CorpusGeneratorUtf8 : public CorpusGenerator { }; CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in, + const ExpressionInfo &expr_in, CorpusProperties &props) - : graph(graph_in), cProps(props) { - // empty + : expr(expr_in), graph(graph_in), cProps(props) { + // we do not support Utf8 for approximate matching + if (expr.edit_distance) { + throw CorpusGenerationFailure("UTF-8 for edited patterns is not " + "supported."); + } } void CorpusGeneratorUtf8::generateCorpus(vector &data) { @@ -673,11 +692,12 @@ CorpusGenerator::~CorpusGenerator() { } // External entry point -unique_ptr makeCorpusGenerator(const NGWrapper &graph, +unique_ptr makeCorpusGenerator(const NGHolder &graph, + const ExpressionInfo &expr, CorpusProperties &props) { - if (graph.utf8) { - return ue2::make_unique(graph, props); + if (expr.utf8) { + return ue2::make_unique(graph, expr, props); } else { - return ue2::make_unique(graph, props); + return ue2::make_unique(graph, expr, props); } } diff --git a/util/ng_corpus_generator.h b/util/ng_corpus_generator.h index a7445ab64..f230a10d0 100644 --- a/util/ng_corpus_generator.h +++ b/util/ng_corpus_generator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,10 +41,17 @@ namespace ue2 { -class NGWrapper; +class ExpressionInfo; +class NGHolder; } // namespace ue2 +struct CorpusGenerationFailure { + explicit CorpusGenerationFailure(const std::string s) : + message(std::move(s)) {} + std::string message; +}; + /** \brief Abstract interface to corpus generator tool. */ class CorpusGenerator { public: @@ -62,6 +69,7 @@ class CorpusGenerator { /** \brief Build a concrete impl conforming to the \ref CorpusGenerator * interface. */ std::unique_ptr -makeCorpusGenerator(const ue2::NGWrapper &graph, CorpusProperties &props); +makeCorpusGenerator(const ue2::NGHolder &g, const ue2::ExpressionInfo &expr, + CorpusProperties &props); #endif diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 2b3373653..0a1f796f0 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,55 +47,744 @@ using namespace std; using namespace ue2; +using MatchSet = set>; +using StateBitSet = boost::dynamic_bitset<>; + namespace { +/** \brief Max number of states (taking edit distance into account). */ +static constexpr size_t STATE_COUNT_MAX = 15000; + +// returns all successors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> +gatherSuccessorsByDepth(const NGHolder &g, const NFAVertex &src, u32 depth) { + assert(depth > 0); + + vector> result(depth); + + // populate current set of successors + for (auto v : adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + result[0].insert(v); + } + + for (u32 d = 1; d < depth; d++) { + // collect all successors for all current level vertices + const auto &cur = result[d - 1]; + auto &next = result[d]; + for (auto u : cur) { + // don't go past special nodes + if (is_special(u, g)) { + continue; + } + + for (auto v : adjacent_vertices_range(u, g)) { + // ignore self-loops + if (u == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[v].index, d + 1); + next.insert(v); + } + } + } + + return result; +} + +// returns all predecessors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> +gatherPredecessorsByDepth(const NGHolder &g, NFAVertex src, u32 depth) { + assert(depth > 0); + + vector> result(depth); + + // populate current set of successors + for (auto v : inv_adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + result[0].insert(v); + } + + for (u32 d = 1; d < depth; d++) { + // collect all successors for all current level vertices + const auto &cur = result[d - 1]; + auto &next = result[d]; + for (auto v : cur) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == u) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[u].index, d + 1); + next.insert(u); + } + } + } + + return result; +} + +// this is a per-vertex, per-shadow level state transition table +struct GraphCache { + GraphCache(u32 dist_in, const NGHolder &g) : + size(num_vertices(g)), edit_distance(dist_in) + { + auto dist_max = edit_distance + 1; + + allocateStateTransitionTable(dist_max); + populateTransitionCache(g, dist_max); + populateAcceptCache(g, dist_max); + } + + void allocateStateTransitionTable(u32 dist_max) { + // resize level 1 - per vertex + shadow_transitions.resize(size); + helper_transitions.resize(size); + + // resize level 2 - per shadow level + for (u32 i = 0; i < size; i++) { + shadow_transitions[i].resize(dist_max); + helper_transitions[i].resize(dist_max); + + // resize level 3 - per vertex + for (u32 d = 0; d < dist_max; d++) { + shadow_transitions[i][d].resize(size); + helper_transitions[i][d].resize(size); + } + } + + // accept states are indexed by edit distance + accept_states.resize(dist_max); + accept_eod_states.resize(dist_max); + + // vertex report maps are indexed by edit distance + vertex_reports_by_level.resize(dist_max); + vertex_eod_reports_by_level.resize(dist_max); + } + + /* + * certain transitions to helpers are disallowed: + * 1. transitions from accept/acceptEod + * 2. transitions to accept/acceptEod + * 3. from start to startDs + * 4. to a virtual/multiline start + * + * everything else is allowed. + */ + bool canTransitionToHelper(NFAVertex u, NFAVertex v, const NGHolder &g) const { + if (is_any_accept(u, g)) { + return false; + } + if (is_any_accept(v, g)) { + return false; + } + if (u == g.start && v == g.startDs) { + return false; + } + if (is_virtual_start(v, g)) { + return false; + } + return true; + } + + void populateTransitionCache(const NGHolder &g, u32 dist_max) { + // populate mapping of vertex index to vertex + vector idx_to_v(size); + for (auto v : vertices_range(g)) { + idx_to_v[g[v].index] = v; + } + + for (u32 i = 0; i < size; i++) { + auto cur_v = idx_to_v[i]; + + // set up transition tables + auto succs = gatherSuccessorsByDepth(g, cur_v, dist_max); + + assert(succs.size() == dist_max); + + for (u32 d = 0; d < dist_max; d++) { + auto &v_shadows = shadow_transitions[i][d]; + auto cur_v_bit = i; + + // enable transition to next level helper (this handles insertion) + if (d < edit_distance && !is_any_accept(cur_v, g)) { + auto &next_v_helpers = helper_transitions[i][d + 1]; + + next_v_helpers.set(cur_v_bit); + } + + // if vertex has a self-loop, we can also transition to it, + // but only if we're at shadow level 0 + if (edge(cur_v, cur_v, g).second && d == 0) { + v_shadows.set(cur_v_bit); + } + + // populate state transition tables + for (auto v : succs[d]) { + auto v_bit = g[v].index; + + // we cannot transition to startDs on any level other than + // level 0 + if (v != g.startDs || d == 0) { + // this handles direct transitions as well as removals + v_shadows.set(v_bit); + } + + // we can also transition to next-level helper (handles + // replace), provided we meet the criteria + if (d < edit_distance && canTransitionToHelper(cur_v, v, g)) { + auto &next_v_helpers = helper_transitions[i][d + 1]; + + next_v_helpers.set(v_bit); + } + } + } + } + } + + void populateAcceptCache(const NGHolder &g, u32 dist_max) { + // set up accept states masks + StateBitSet accept(size); + accept.set(g[g.accept].index); + StateBitSet accept_eod(size); + accept_eod.set(g[g.acceptEod].index); + + // gather accept and acceptEod states + for (u32 base_dist = 0; base_dist < dist_max; base_dist++) { + auto &states = accept_states[base_dist]; + auto &eod_states = accept_eod_states[base_dist]; + + states.resize(size); + eod_states.resize(size); + + // inspect each vertex + for (u32 i = 0; i < size; i++) { + // inspect all shadow levels from base_dist to dist_max + for (u32 d = 0; d < dist_max - base_dist; d++) { + auto &shadows = shadow_transitions[i][d]; + + // if this state transitions to accept, set its bit + if ((shadows & accept).any()) { + states.set(i); + } + if ((shadows & accept_eod).any()) { + eod_states.set(i); + } + } + } + } + + // populate accepts cache + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + const auto &rs = g[v].reports; + + for (u32 d = 0; d <= edit_distance; d++) { + // add self to report list at all levels + vertex_reports_by_level[d][v].insert(rs.begin(), rs.end()); + } + if (edit_distance == 0) { + // if edit distance is 0, no predecessors will have reports + continue; + } + + auto preds_by_depth = gatherPredecessorsByDepth(g, v, edit_distance); + for (u32 pd = 0; pd < preds_by_depth.size(); pd++) { + const auto &preds = preds_by_depth[pd]; + // for each predecessor, add reports up to maximum edit distance + // for current depth from source vertex + for (auto pred : preds) { + for (u32 d = 0; d < edit_distance - pd; d++) { + vertex_reports_by_level[d][pred].insert(rs.begin(), rs.end()); + } + } + } + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + const auto &rs = g[v].reports; + + if (v == g.accept) { + continue; + } + + for (u32 d = 0; d <= edit_distance; d++) { + // add self to report list at all levels + vertex_eod_reports_by_level[d][v].insert(rs.begin(), rs.end()); + } + if (edit_distance == 0) { + // if edit distance is 0, no predecessors will have reports + continue; + } + + auto preds_by_depth = gatherPredecessorsByDepth(g, v, edit_distance); + for (u32 pd = 0; pd < preds_by_depth.size(); pd++) { + const auto &preds = preds_by_depth[pd]; + // for each predecessor, add reports up to maximum edit distance + // for current depth from source vertex + for (auto pred : preds) { + for (u32 d = 0; d < edit_distance - pd; d++) { + vertex_eod_reports_by_level[d][pred].insert(rs.begin(), rs.end()); + } + } + } + } + } + +#ifdef DEBUG + void dumpStateTransitionTable(const NGHolder &g) { + StateBitSet accept(size); + accept.set(g[g.accept].index); + StateBitSet accept_eod(size); + accept_eod.set(g[g.acceptEod].index); + + DEBUG_PRINTF("Dumping state transition tables\n"); + DEBUG_PRINTF("Shadows:\n"); + for (u32 i = 0; i < num_vertices(g); i++) { + DEBUG_PRINTF("%-7s %3u:", "Vertex", i); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getShadowTransitions(i, d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + } + + DEBUG_PRINTF("Helpers:\n"); + for (u32 i = 0; i < num_vertices(g); i++) { + DEBUG_PRINTF("%-7s %3u:", "Vertex", i); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getHelperTransitions(i, d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + } + + DEBUG_PRINTF("Accept transitions:\n"); + DEBUG_PRINTF("%-12s", "Vertex idx:"); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getAcceptTransitions(d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + + DEBUG_PRINTF("Accept EOD transitions:\n"); + DEBUG_PRINTF("%-12s", "Vertex idx:"); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getAcceptEodTransitions(d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + + DEBUG_PRINTF("%-12s ", "Accepts:"); + for (u32 i = 0; i < num_vertices(g); i++) { + printf("%3i", accept.test(i)); + } + printf("\n"); + + DEBUG_PRINTF("%-12s ", "EOD Accepts:"); + for (u32 i = 0; i < num_vertices(g); i++) { + printf("%3i", accept_eod.test(i)); + } + printf("\n"); + + DEBUG_PRINTF("Reports\n"); + for (auto v : vertices_range(g)) { + for (u32 d = 0; d <= edit_distance; d++) { + const auto &r = vertex_reports_by_level[d][v]; + const auto &e = vertex_eod_reports_by_level[d][v]; + DEBUG_PRINTF("%-7s %3zu %-8s %3zu %-8s %3zu\n", + "Vertex", g[v].index, "rs:", r.size(), "eod:", e.size()); + } + } + printf("\n"); + } +#endif + + const StateBitSet& getShadowTransitions(u32 idx, u32 level) const { + assert(idx < size); + assert(level <= edit_distance); + return shadow_transitions[idx][level]; + } + const StateBitSet& getHelperTransitions(u32 idx, u32 level) const { + assert(idx < size); + assert(level <= edit_distance); + return helper_transitions[idx][level]; + } + const StateBitSet& getAcceptTransitions(u32 level) const { + assert(level <= edit_distance); + return accept_states[level]; + } + const StateBitSet& getAcceptEodTransitions(u32 level) const { + assert(level <= edit_distance); + return accept_eod_states[level]; + } + + /* + * the bitsets are indexed by vertex and shadow level. the bitset's length is + * equal to the total number of vertices in the graph. + * + * for convenience, helper functions are provided. + */ + vector> shadow_transitions; + vector> helper_transitions; + + // accept states masks, indexed by shadow level + vector accept_states; + vector accept_eod_states; + + // map of all reports associated with any vertex, indexed by shadow level + vector>> vertex_reports_by_level; + vector>> vertex_eod_reports_by_level; + + u32 size; + u32 edit_distance; +}; + + +/* + * SOM workflow is expected to be the following: + * - Caller calls getActiveStates, which reports SOM for each active states + * - Caller calls getSuccessorStates on each of the active states, which *doesn't* + * report SOM + * - Caller decides if the successor state should be activated, and calls + * activateState with SOM set to that of previous active state (not successor!) + * - activateState then resolves any conflicts between SOMs that may arise from + * multiple active states progressing to the same successor + */ struct StateSet { - explicit StateSet(size_t sz) : s(sz), som(sz, 0) {} - boost::dynamic_bitset<> s; // bitset of states that are on - vector som; // som value for each state + struct State { + enum node_type { + NODE_SHADOW = 0, + NODE_HELPER + }; + State(size_t idx_in, u32 level_in, size_t som_in, node_type type_in) : + idx(idx_in), level(level_in), som(som_in), type(type_in) {} + size_t idx; + u32 level; + size_t som; + node_type type; + }; + + // Temporary working data used for step() which we want to keep around + // (rather than reallocating vectors all the time). + struct WorkingData { + vector active; + vector succ_list; + }; + + StateSet(size_t sz, u32 dist_in) : + shadows(dist_in + 1), helpers(dist_in + 1), + shadows_som(dist_in + 1), helpers_som(dist_in + 1), + edit_distance(dist_in) { + for (u32 dist = 0; dist <= dist_in; dist++) { + shadows[dist].resize(sz, false); + helpers[dist].resize(sz, false); + shadows_som[dist].resize(sz, 0); + helpers_som[dist].resize(sz, 0); + } + } + + void reset() { + for (u32 dist = 0; dist <= edit_distance; dist++) { + shadows[dist].reset(); + helpers[dist].reset(); + fill(shadows_som[dist].begin(), shadows_som[dist].end(), 0); + fill(helpers_som[dist].begin(), helpers_som[dist].end(), 0); + } + } + + bool empty() const { + for (u32 dist = 0; dist <= edit_distance; dist++) { + if (shadows[dist].any()) { + return false; + } + if (helpers[dist].any()) { + return false; + } + } + return true; + } + + size_t count() const { + size_t result = 0; + + for (u32 dist = 0; dist <= edit_distance; dist++) { + result += shadows[dist].count(); + result += helpers[dist].count(); + } + + return result; + } + + bool setActive(const State &s) { + switch (s.type) { + case State::NODE_HELPER: + return helpers[s.level].test_set(s.idx); + case State::NODE_SHADOW: + return shadows[s.level].test_set(s.idx); + } + assert(0); + return false; + } + + size_t getCachedSom(const State &s) const { + switch (s.type) { + case State::NODE_HELPER: + return helpers_som[s.level][s.idx]; + case State::NODE_SHADOW: + return shadows_som[s.level][s.idx]; + } + assert(0); + return 0; + } + + void setCachedSom(const State &s, const size_t som_val) { + switch (s.type) { + case State::NODE_HELPER: + helpers_som[s.level][s.idx] = som_val; + break; + case State::NODE_SHADOW: + shadows_som[s.level][s.idx] = som_val; + break; + default: + assert(0); + } + } + +#ifdef DEBUG + void dumpActiveStates() const { + vector states; + getActiveStates(states); + + DEBUG_PRINTF("Dumping active states\n"); + + for (const auto &state : states) { + DEBUG_PRINTF("type: %s idx: %zu level: %u som: %zu\n", + state.type == State::NODE_HELPER ? "HELPER" : "SHADOW", + state.idx, state.level, state.som); + } + } +#endif + + void getActiveStates(vector &result) const { + result.clear(); + + for (u32 dist = 0; dist <= edit_distance; dist++) { + // get all shadow vertices (including original graph) + const auto &cur_shadow_vertices = shadows[dist]; + for (size_t id = cur_shadow_vertices.find_first(); + id != cur_shadow_vertices.npos; + id = cur_shadow_vertices.find_next(id)) { + result.emplace_back(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); + } + + // the rest is only valid for edited graphs + if (dist == 0) { + continue; + } + + // get all helper vertices + const auto &cur_helper_vertices = helpers[dist]; + for (size_t id = cur_helper_vertices.find_first(); + id != cur_helper_vertices.npos; + id = cur_helper_vertices.find_next(id)) { + result.emplace_back(id, dist, helpers_som[dist][id], + State::NODE_HELPER); + } + } + + sort_and_unique(result); + } + + // does not return SOM + void getSuccessors(const State &state, const GraphCache &gc, + vector &result) const { + result.clear(); + + // maximum shadow depth that we can go from current level + u32 max_depth = edit_distance - state.level + 1; + + for (u32 d = 0; d < max_depth; d++) { + const auto &shadow_succ = gc.getShadowTransitions(state.idx, d); + for (size_t id = shadow_succ.find_first(); + id != shadow_succ.npos; + id = shadow_succ.find_next(id)) { + auto new_level = state.level + d; + result.emplace_back(id, new_level, 0, State::NODE_SHADOW); + } + + const auto &helper_succ = gc.getHelperTransitions(state.idx, d); + for (size_t id = helper_succ.find_first(); + id != helper_succ.npos; + id = helper_succ.find_next(id)) { + auto new_level = state.level + d; + result.emplace_back(id, new_level, 0, State::NODE_HELPER); + } + } + + sort_and_unique(result); + } + + void getAcceptStates(const GraphCache &gc, vector &result) const { + result.clear(); + + for (u32 dist = 0; dist <= edit_distance; dist++) { + // get all shadow vertices (including original graph) + auto cur_shadow_vertices = shadows[dist]; + cur_shadow_vertices &= gc.getAcceptTransitions(dist); + for (size_t id = cur_shadow_vertices.find_first(); + id != cur_shadow_vertices.npos; + id = cur_shadow_vertices.find_next(id)) { + result.emplace_back(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); + } + auto cur_helper_vertices = helpers[dist]; + cur_helper_vertices &= gc.getAcceptTransitions(dist); + for (size_t id = cur_helper_vertices.find_first(); + id != cur_helper_vertices.npos; + id = cur_helper_vertices.find_next(id)) { + result.emplace_back(id, dist, helpers_som[dist][id], + State::NODE_HELPER); + } + } + + sort_and_unique(result); + } + + void getAcceptEodStates(const GraphCache &gc, vector &result) const { + result.clear(); + + for (u32 dist = 0; dist <= edit_distance; dist++) { + // get all shadow vertices (including original graph) + auto cur_shadow_vertices = shadows[dist]; + cur_shadow_vertices &= gc.getAcceptEodTransitions(dist); + for (size_t id = cur_shadow_vertices.find_first(); + id != cur_shadow_vertices.npos; + id = cur_shadow_vertices.find_next(id)) { + result.emplace_back(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); + } + auto cur_helper_vertices = helpers[dist]; + cur_helper_vertices &= gc.getAcceptEodTransitions(dist); + for (size_t id = cur_helper_vertices.find_first(); + id != cur_helper_vertices.npos; + id = cur_helper_vertices.find_next(id)) { + result.emplace_back(id, dist, helpers_som[dist][id], + State::NODE_HELPER); + } + } + + sort_and_unique(result); + } + + // the caller must specify SOM at current offset, and must not attempt to + // resolve SOM inheritance conflicts + void activateState(const State &state) { + size_t cur_som = state.som; + if (setActive(state)) { + size_t cached_som = getCachedSom(state); + cur_som = min(cur_som, cached_som); + } + setCachedSom(state, cur_som); + } + + vector shadows; + vector helpers; + vector> shadows_som; + vector> helpers_som; + u32 edit_distance; }; -using MatchSet = set>; +// for flat_set +bool operator<(const StateSet::State &a, const StateSet::State &b) { + ORDER_CHECK(idx); + ORDER_CHECK(level); + ORDER_CHECK(type); + ORDER_CHECK(som); + return false; +} + +bool operator==(const StateSet::State &a, const StateSet::State &b) { + return a.idx == b.idx && a.level == b.level && a.type == b.type && + a.som == b.som; +} struct fmstate { const size_t num_states; // number of vertices in graph StateSet states; // currently active states StateSet next; // states on after this iteration + GraphCache &gc; vector vertices; // mapping from index to vertex size_t offset = 0; unsigned char cur = 0; unsigned char prev = 0; - const bool som; const bool utf8; const bool allowStartDs; const ReportManager &rm; - boost::dynamic_bitset<> accept; // states leading to accept - boost::dynamic_bitset<> accept_with_eod; // states leading to accept or eod - - fmstate(const NGHolder &g, bool som_in, bool utf8_in, bool aSD_in, - const ReportManager &rm_in) - : num_states(num_vertices(g)), states(num_states), next(num_states), - vertices(num_vertices(g), NGHolder::null_vertex()), som(som_in), - utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in), accept(num_states), - accept_with_eod(num_states) { + fmstate(const NGHolder &g, GraphCache &gc_in, bool utf8_in, bool aSD_in, + const u32 edit_distance, const ReportManager &rm_in) + : num_states(num_vertices(g)), + states(num_states, edit_distance), + next(num_states, edit_distance), + gc(gc_in), vertices(num_vertices(g), NGHolder::null_vertex()), + utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in) { // init states - states.s.set(g[g.start].index); + states.activateState( + StateSet::State {g[g.start].index, 0, 0, + StateSet::State::NODE_SHADOW}); if (allowStartDs) { - states.s.set(g[g.startDs].index); + states.activateState( + StateSet::State {g[g.startDs].index, 0, 0, + StateSet::State::NODE_SHADOW}); } // fill vertex mapping - for (const auto &v : vertices_range(g)) { + for (auto v : vertices_range(g)) { vertices[g[v].index] = v; } - // init accept states - for (const auto &u : inv_adjacent_vertices_range(g.accept, g)) { - accept.set(g[u].index); - } - accept_with_eod = accept; - for (const auto &u : inv_adjacent_vertices_range(g.acceptEod, g)) { - accept_with_eod.set(g[u].index); - } } }; @@ -140,8 +829,7 @@ bool isUtf8CodePoint(const char c) { } static -bool canReach(const NGHolder &g, const NFAEdge &e, - struct fmstate &state) { +bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) { auto flags = g[e].assert_flags; if (!flags) { return true; @@ -175,97 +863,149 @@ bool canReach(const NGHolder &g, const NFAEdge &e, } static -void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, - bool allowEodMatches) { - auto acc_states = state.states.s; - acc_states &= allowEodMatches ? state.accept_with_eod : state.accept; +void getAcceptMatches(const NGHolder &g, MatchSet &matches, + struct fmstate &state, NFAVertex accept_vertex, + vector &active_states) { + assert(accept_vertex == g.accept || accept_vertex == g.acceptEod); + + const bool eod = accept_vertex == g.acceptEod; + if (eod) { + state.states.getAcceptEodStates(state.gc, active_states); + } else { + state.states.getAcceptStates(state.gc, active_states); + } - for (size_t i = acc_states.find_first(); i != acc_states.npos; - i = acc_states.find_next(i)) { - const NFAVertex u = state.vertices[i]; - const size_t &som_offset = state.states.som[i]; + DEBUG_PRINTF("Number of active states: %zu\n", active_states.size()); + + for (const auto &cur : active_states) { + auto u = state.vertices[cur.idx]; // we can't accept anything from startDs in between UTF-8 codepoints if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) { continue; } - for (const auto &e : out_edges_range(u, g)) { - NFAVertex v = target(e, g); - if (v == g.accept || (v == g.acceptEod && allowEodMatches)) { - // check edge assertions if we are allowed to reach accept - if (!canReach(g, e, state)) { - continue; - } - DEBUG_PRINTF("match found at %zu\n", state.offset); + const auto &reports = + eod ? state.gc.vertex_eod_reports_by_level[cur.level][u] + : state.gc.vertex_reports_by_level[cur.level][u]; - assert(!g[u].reports.empty()); - for (const auto &report_id : g[u].reports) { - const Report &ri = state.rm.getReport(report_id); + NFAEdge e = edge(u, accept_vertex, g); - DEBUG_PRINTF("report %u has offset adjustment %d\n", - report_id, ri.offsetAdjust); - matches.emplace(som_offset, state.offset + ri.offsetAdjust); - } - } + // we assume edge assertions only exist at level 0 + if (e && !canReach(g, e, state)) { + continue; + } + + DEBUG_PRINTF("%smatch found at %zu\n", eod ? "eod " : "", state.offset); + + assert(!reports.empty()); + for (const auto &report_id : reports) { + const Report &ri = state.rm.getReport(report_id); + + DEBUG_PRINTF("report %u has offset adjustment %d\n", report_id, + ri.offsetAdjust); + DEBUG_PRINTF("match from (i:%zu,l:%u,t:%u): (%zu,%zu)\n", cur.idx, + cur.level, cur.type, cur.som, + state.offset + ri.offsetAdjust); + matches.emplace(cur.som, state.offset + ri.offsetAdjust); } } } static -void step(const NGHolder &g, struct fmstate &state) { - state.next.s.reset(); - - for (size_t i = state.states.s.find_first(); i != state.states.s.npos; - i = state.states.s.find_next(i)) { - const NFAVertex &u = state.vertices[i]; - const size_t &u_som_offset = state.states.som[i]; - - for (const auto &e : out_edges_range(u, g)) { - NFAVertex v = target(e, g); - if (v == g.acceptEod) { - // can't know the future: we don't know if we're at EOD. +void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, + StateSet::WorkingData &wd, bool allowEodMatches) { + getAcceptMatches(g, matches, state, g.accept, wd.active); + if (allowEodMatches) { + getAcceptMatches(g, matches, state, g.acceptEod, wd.active); + } +} + +static +void step(const NGHolder &g, fmstate &state, StateSet::WorkingData &wd) { + state.next.reset(); + + state.states.getActiveStates(wd.active); + + for (const auto &cur : wd.active) { + auto u = state.vertices[cur.idx]; + state.states.getSuccessors(cur, state.gc, wd.succ_list); + + for (auto succ : wd.succ_list) { + auto v = state.vertices[succ.idx]; + + if (is_any_accept(v, g)) { continue; } - if (v == g.accept) { + + if (!state.allowStartDs && v == g.startDs) { continue; } - if (!state.allowStartDs && v == g.startDs) { + // GraphCache doesn't differentiate between successors for shadows + // and helpers, and StateSet does not know anything about the graph, + // so the only place we can do it is here. we can't self-loop on a + // startDs if we're startDs's helper, so disallow it. + if (u == g.startDs && v == g.startDs && + succ.level != 0 && succ.level == cur.level) { continue; } - const CharReach &cr = g[v].char_reach; - const size_t v_idx = g[v].index; + // for the reasons outlined above, also putting this here. + // disallow transitions from start to startDs on levels other than zero + if (u == g.start && v == g.startDs && + cur.level != 0 && succ.level != 0) { + continue; + } - // check reachability and edge assertions - if (cr.test(state.cur) && canReach(g, e, state)) { - // if we aren't in SOM mode, just set every SOM to 0 - if (!state.som) { - state.next.s.set(v_idx); - state.next.som[v_idx] = 0; - continue; + bool can_reach = false; + + if (succ.type == StateSet::State::NODE_HELPER) { + can_reach = true; + } else { + // we assume edge assertions only exist on level 0 + const CharReach &cr = g[v].char_reach; + NFAEdge e = edge(u, v, g); + + if (cr.test(state.cur) && + (!e || canReach(g, e, state))) { + can_reach = true; } + } - // if this is first vertex since start, use current offset as SOM + // check edge assertions if we are allowed to reach accept + DEBUG_PRINTF("reaching %zu->%zu ('%c'->'%c'): %s\n", + g[u].index, g[v].index, + ourisprint(state.prev) ? state.prev : '?', + ourisprint(state.cur) ? state.cur : '?', + can_reach ? "yes" : "no"); + + if (can_reach) { + // we should use current offset as SOM if: + // - we're at level 0 and we're a start vertex + // - we're a fake start shadow size_t next_som; - if (u == g.start || u == g.startDs || is_virtual_start(u, g)) { + bool reset = is_any_start(u, g) && cur.level == 0; + reset |= is_virtual_start(u, g) && + cur.type == StateSet::State::NODE_SHADOW; + + if (reset) { next_som = state.offset; } else { // else, inherit SOM from predecessor - next_som = u_som_offset; + next_som = cur.som; } + succ.som = next_som; - // check if the vertex is already active - // if this vertex is not yet active, use current SOM - if (!state.next.s.test(v_idx)) { - state.next.s.set(v_idx); - state.next.som[v_idx] = next_som; - } else { - // else, work out leftmost SOM - state.next.som[v_idx] = - min(next_som, state.next.som[v_idx]); - } + DEBUG_PRINTF("src: idx %zu level: %u som: %zu type: %s\n", + cur.idx, cur.level, cur.som, + cur.type == StateSet::State::NODE_HELPER ? "H" : "S"); + DEBUG_PRINTF("dst: idx %zu level: %u som: %zu type: %s\n", + succ.idx, succ.level, succ.som, + succ.type == StateSet::State::NODE_HELPER ? "H" : "S"); + + // activate successor (SOM will be handled by activateState) + state.next.activateState(succ); } } } @@ -311,43 +1051,64 @@ void filterMatches(MatchSet &matches) { * * Fills \a matches with offsets into the data stream where a match is found. */ -void findMatches(const NGHolder &g, const ReportManager &rm, - const string &input, MatchSet &matches, const bool notEod, - const bool som, const bool utf8) { +bool findMatches(const NGHolder &g, const ReportManager &rm, + const string &input, MatchSet &matches, + const u32 edit_distance, const bool notEod, const bool utf8) { assert(hasCorrectlyNumberedVertices(g)); + // cannot match fuzzy utf8 patterns, this should've been filtered out at + // compile time, so make it an assert + assert(!edit_distance || !utf8); + + const size_t total_states = num_vertices(g) * (3 * edit_distance + 1); + DEBUG_PRINTF("Finding matches (%zu total states)\n", total_states); + if (total_states > STATE_COUNT_MAX) { + DEBUG_PRINTF("too big\n"); + return false; + } + + GraphCache gc(edit_distance, g); +#ifdef DEBUG + gc.dumpStateTransitionTable(g); +#endif const bool allowStartDs = (proper_out_degree(g.startDs, g) > 0); - struct fmstate state(g, som, utf8, allowStartDs, rm); + struct fmstate state(g, gc, utf8, allowStartDs, edit_distance, rm); + + StateSet::WorkingData wd; for (auto it = input.begin(), ite = input.end(); it != ite; ++it) { +#ifdef DEBUG + state.states.dumpActiveStates(); +#endif state.offset = distance(input.begin(), it); state.cur = *it; - step(g, state); + step(g, state, wd); - getMatches(g, matches, state, false); + getMatches(g, matches, state, wd, false); - DEBUG_PRINTF("index %zu, %zu states on\n", state.offset, - state.next.s.count()); - if (state.next.s.empty()) { - if (state.som) { - filterMatches(matches); - } - return; + DEBUG_PRINTF("offset %zu, %zu states on\n", state.offset, + state.next.count()); + if (state.next.empty()) { + filterMatches(matches); + return true; } state.states = state.next; state.prev = state.cur; } +#ifdef DEBUG + state.states.dumpActiveStates(); +#endif state.offset = input.size(); state.cur = 0; // do additional step to get matches after stream end, this time count eod // matches also (or not, if we're in notEod mode) - getMatches(g, matches, state, !notEod); + DEBUG_PRINTF("Looking for EOD matches\n"); + getMatches(g, matches, state, wd, !notEod); - if (state.som) { - filterMatches(matches); - } + filterMatches(matches); + return true; } diff --git a/util/ng_find_matches.h b/util/ng_find_matches.h index e9e47010f..9860c202e 100644 --- a/util/ng_find_matches.h +++ b/util/ng_find_matches.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,13 +44,18 @@ struct BoundaryReports; } // namespace ue2 -/** \brief Find all matches for a given graph when executed against \a input. +/** + * \brief Find all matches for a given graph when executed against \a input. * - * Fills \a matches with offsets into the data stream where a match is found. + * Fills \a matches with offsets into the data stream where a match is found. + * + * Returns false if this graph is too large to find its matches in reasonable + * time. */ -void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm, +bool findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm, const std::string &input, std::set> &matches, - const bool notEod, const bool som, const bool utf8); + const unsigned int max_edit_distance, const bool notEod, + const bool utf8); #endif // NG_FIND_MATCHES_H