diff --git a/CMakeLists.txt b/CMakeLists.txt index d7e07a9a..c6952f41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1221,11 +1221,17 @@ if (NOT BUILD_STATIC_LIBS) endif () add_subdirectory(util) -add_subdirectory(unit) -if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) +option(BUILD_UNIT "Build Hyperscan unit tests (default TRUE)" TRUE) +if(BUILD_UNIT) + add_subdirectory(unit) +endif() + +option(BUILD_TOOLS "Build Hyperscan tools (default TRUE)" TRUE) +if(EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt AND BUILD_TOOLS) add_subdirectory(tools) endif() + if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA) add_subdirectory(chimera) endif() @@ -1240,4 +1246,7 @@ if(BUILD_BENCHMARKS) add_subdirectory(benchmarks) endif() -add_subdirectory(doc/dev-reference) +option(BUILD_DOC "Build the Hyperscan documentation (default TRUE)" TRUE) +if(BUILD_DOC) + add_subdirectory(doc/dev-reference) +endif() diff --git a/README.md b/README.md index 2e68d2e6..483b2cad 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,7 @@ export CXX="/usr/pkg/gcc12/bin/g++" ``` In FreeBSD similarly, you might want to install a different compiler. +If you want to use gcc, it is recommended to use gcc12. You will also, as in NetBSD, need to install cmake, sqlite, boost and ragel packages. Using the example of gcc12 from pkg: installing the desired compiler: @@ -164,7 +165,6 @@ the environment variables to point to this compiler: export CC="/usr/local/bin/gcc" export CXX="/usr/local/bin/g++" ``` - A further note in FreeBSD, on the PowerPC and ARM platforms, the gcc12 package installs to a slightly different name, on FreeBSD/ppc, gcc12 will be found using: @@ -175,12 +175,6 @@ export CXX="/usr/local/bin/g++12" Then continue with the build as below. -A note about running in FreeBSD: if you built a dynamically linked binary -with an alternative compiler, the libraries specific to the compiler that -built the binary will probably not be found and the base distro libraries -in /lib will be found instead. Adjust LD_LIBRARY_PATH appropriately. For -example, with gcc12 installed from pkg, one would want to use -```export LD_LIBRARY_PATH=/usr/local/lib/gcc12/``` ## Configure & build diff --git a/benchmarks/benchmarks.cpp b/benchmarks/benchmarks.cpp index 91cab3f8..2890737f 100644 --- a/benchmarks/benchmarks.cpp +++ b/benchmarks/benchmarks.cpp @@ -26,32 +26,30 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include #include +#include #include #include -#include -#include #include +#include +#include #include "benchmarks.hpp" -#define MAX_LOOPS 1000000000 -#define MAX_MATCHES 5 -#define N 8 +#define MAX_LOOPS 1000000000 +#define MAX_MATCHES 5 +#define N 8 struct hlmMatchEntry { size_t to; u32 id; - hlmMatchEntry(size_t end, u32 identifier) : - to(end), id(identifier) {} + hlmMatchEntry(size_t end, u32 identifier) : to(end), id(identifier) {} }; std::vector ctxt; -static -hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, - UNUSED struct hs_scratch *scratch) { +static hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, + UNUSED struct hs_scratch *scratch) { DEBUG_PRINTF("match @%zu = %u\n", to, id); ctxt.push_back(hlmMatchEntry(to, id)); @@ -59,40 +57,42 @@ hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, return HWLM_CONTINUE_MATCHING; } -template -static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) { +template +static void run_benchmarks(int size, int loops, int max_matches, + bool is_reverse, MicroBenchmark &bench, + InitFunc &&init, BenchFunc &&func) { init(bench); - double total_sec = 0.0; - u64a total_size = 0; - double bw = 0.0; - double avg_bw = 0.0; + double total_sec = 0.0; double max_bw = 0.0; double avg_time = 0.0; if (max_matches) { + double avg_bw = 0.0; int pos = 0; - for(int j = 0; j < max_matches - 1; j++) { + for (int j = 0; j < max_matches - 1; j++) { bench.buf[pos] = 'b'; - pos = (j+1) *size / max_matches ; + pos = (j + 1) * size / max_matches; bench.buf[pos] = 'a'; u64a actual_size = 0; auto start = std::chrono::steady_clock::now(); - for(int i = 0; i < loops; i++) { + for (int i = 0; i < loops; i++) { const u8 *res = func(bench); - if (is_reverse) - actual_size += bench.buf.data() + size - res; - else - actual_size += res - bench.buf.data(); + if (is_reverse) + actual_size += bench.buf.data() + size - res; + else + actual_size += res - bench.buf.data(); } auto end = std::chrono::steady_clock::now(); - double dt = std::chrono::duration_cast(end - start).count(); + double dt = std::chrono::duration_cast( + end - start) + .count(); total_sec += dt; /*convert microseconds to seconds*/ /*calculate bandwidth*/ - bw = (actual_size / dt) * 1000000.0 / 1048576.0; - /*std::cout << "act_size = " << act_size << std::endl; - std::cout << "dt = " << dt << std::endl; - std::cout << "bw = " << bw << std::endl;*/ - avg_bw += bw; + double bw = (actual_size / dt) * 1000000.0 / 1048576.0; + /*std::cout << "act_size = " << act_size << std::endl; + std::cout << "dt = " << dt << std::endl; + std::cout << "bw = " << bw << std::endl;*/ + avg_bw += bw; /*convert to MB/s*/ max_bw = std::max(bw, max_bw); /*calculate average time*/ @@ -100,20 +100,22 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse } avg_time /= max_matches; avg_bw /= max_matches; - total_sec /= 1000000.0; + total_sec /= 1000000.0; /*convert average time to us*/ - printf(KMAG "%s: %u matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " - KBLU "average time per call =" RST " %.3f μs," KBLU " max bandwidth = " RST " %.3f MB/s," KBLU " average bandwidth =" RST " %.3f MB/s \n", + printf("%-18s, %-12d, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7.3f\n", bench.label, max_matches, size ,loops, total_sec, avg_time, max_bw, avg_bw); } else { + u64a total_size = 0; auto start = std::chrono::steady_clock::now(); for (int i = 0; i < loops; i++) { - const u8 *res = func(bench); + func(bench); } auto end = std::chrono::steady_clock::now(); - total_sec += std::chrono::duration_cast(end - start).count(); + total_sec += + std::chrono::duration_cast(end - start) + .count(); /*calculate transferred size*/ - total_size = size * loops; + total_size = (u64a)size * (u64a)loops; /*calculate average time*/ avg_time = total_sec / loops; /*convert microseconds to seconds*/ @@ -122,130 +124,139 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse max_bw = total_size / total_sec; /*convert to MB/s*/ max_bw /= 1048576.0; - printf(KMAG "%s: no matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " - KBLU "average time per call =" RST " %.3f μs ," KBLU " bandwidth = " RST " %.3f MB/s \n", - bench.label, size ,loops, total_sec, avg_time, max_bw ); + printf("%-18s, %-12s, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7s\n", + bench.label, "0", size, loops, total_sec, avg_time, max_bw, "0"); } } int main(){ - int matches[] = {0, MAX_MATCHES}; + const int matches[] = {0, MAX_MATCHES}; std::vector sizes; - for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2); - const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa"; - + for (size_t i = 0; i < N; i++) + sizes.push_back(16000 << i * 2); + const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa"; + printf("%-18s, %-12s, %-10s, %-6s, %-10s, %-9s, %-8s, %-7s\n", "Matcher", + "max_matches", "size", "loops", "total_sec", "avg_time", "max_bw", + "avg_bw"); for (int m = 0; m < 2; m++) { for (size_t i = 0; i < std::size(sizes); i++) { MicroBenchmark bench("Shufti", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, + run_benchmarks( + sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, [&](MicroBenchmark &b) { b.chars.set('a'); ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); memset(b.buf.data(), 'b', b.size); }, [&](MicroBenchmark &b) { - return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - } - ); + return shuftiExec(b.lo, b.hi, b.buf.data(), + b.buf.data() + b.size); + }); } for (size_t i = 0; i < std::size(sizes); i++) { MicroBenchmark bench("Reverse Shufti", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, + run_benchmarks( + sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, [&](MicroBenchmark &b) { b.chars.set('a'); ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); memset(b.buf.data(), 'b', b.size); }, [&](MicroBenchmark &b) { - return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - } - ); + return rshuftiExec(b.lo, b.hi, b.buf.data(), + b.buf.data() + b.size); + }); } for (size_t i = 0; i < std::size(sizes); i++) { MicroBenchmark bench("Truffle", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, + run_benchmarks( + sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, [&](MicroBenchmark &b) { b.chars.set('a'); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); memset(b.buf.data(), 'b', b.size); }, [&](MicroBenchmark &b) { - return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - } - ); + return truffleExec(b.lo, b.hi, b.buf.data(), + b.buf.data() + b.size); + }); } for (size_t i = 0; i < std::size(sizes); i++) { MicroBenchmark bench("Reverse Truffle", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, + run_benchmarks( + sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, [&](MicroBenchmark &b) { b.chars.set('a'); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); memset(b.buf.data(), 'b', b.size); }, [&](MicroBenchmark &b) { - return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - } - ); + return rtruffleExec(b.lo, b.hi, b.buf.data(), + b.buf.data() + b.size); + }); } for (size_t i = 0; i < std::size(sizes); i++) { MicroBenchmark bench("Vermicelli", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, + run_benchmarks( + sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, [&](MicroBenchmark &b) { b.chars.set('a'); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); memset(b.buf.data(), 'b', b.size); }, [&](MicroBenchmark &b) { - return vermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size); - } - ); + return vermicelliExec('a', 'b', b.buf.data(), + b.buf.data() + b.size); + }); } for (size_t i = 0; i < std::size(sizes); i++) { MicroBenchmark bench("Reverse Vermicelli", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, + run_benchmarks( + sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, [&](MicroBenchmark &b) { b.chars.set('a'); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); memset(b.buf.data(), 'b', b.size); }, [&](MicroBenchmark &b) { - return rvermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size); - } - ); + return rvermicelliExec('a', 'b', b.buf.data(), + b.buf.data() + b.size); + }); } for (size_t i = 0; i < std::size(sizes); i++) { - //we imitate the noodle unit tests + // we imitate the noodle unit tests std::string str; const size_t char_len = 5; str.resize(char_len + 1); - for (size_t j=0; j < char_len; j++) { - srand (time(NULL)); - int key = rand() % + 36 ; + for (size_t j = 0; j < char_len; j++) { + srand(time(NULL)); + int key = rand() % +36; str[char_len] = charset[key]; str[char_len + 1] = '\0'; } MicroBenchmark bench("Noodle", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, + run_benchmarks( + sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, [&](MicroBenchmark &b) { ctxt.clear(); memset(b.buf.data(), 'a', b.size); u32 id = 1000; ue2::hwlmLiteral lit(str, true, id); b.nt = ue2::noodBuildTable(lit); - assert(b.nt != nullptr); + assert(b.nt.get() != nullptr); }, [&](MicroBenchmark &b) { - noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch); + noodExec(b.nt.get(), b.buf.data(), b.size, 0, + hlmSimpleCallback, &b.scratch); return b.buf.data() + b.size; - } - ); + }); } } diff --git a/benchmarks/benchmarks.hpp b/benchmarks/benchmarks.hpp index 974d2234..13f66fa5 100644 --- a/benchmarks/benchmarks.hpp +++ b/benchmarks/benchmarks.hpp @@ -26,44 +26,32 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "hwlm/hwlm_literal.h" +#include "hwlm/noodle_build.h" +#include "hwlm/noodle_engine.h" +#include "hwlm/noodle_internal.h" #include "nfa/shufti.h" #include "nfa/shufticompile.h" #include "nfa/truffle.h" #include "nfa/trufflecompile.h" #include "nfa/vermicelli.hpp" -#include "hwlm/noodle_build.h" -#include "hwlm/noodle_engine.h" -#include "hwlm/noodle_internal.h" -#include "hwlm/hwlm_literal.h" -#include "util/bytecode_ptr.h" #include "scratch.h" +#include "util/bytecode_ptr.h" -/*define colour control characters*/ -#define RST "\x1B[0m" -#define KRED "\x1B[31m" -#define KGRN "\x1B[32m" -#define KYEL "\x1B[33m" -#define KBLU "\x1B[34m" -#define KMAG "\x1B[35m" -#define KCYN "\x1B[36m" -#define KWHT "\x1B[37m" - -class MicroBenchmark -{ +class MicroBenchmark { public: - char const *label; - size_t size; + char const *label; + size_t size; - // Shufti/Truffle - m128 lo, hi; - ue2::CharReach chars; - std::vector buf; + // Shufti/Truffle + m128 lo, hi; + ue2::CharReach chars; + std::vector buf; - // Noodle - struct hs_scratch scratch; - ue2::bytecode_ptr nt; + // Noodle + struct hs_scratch scratch; + ue2::bytecode_ptr nt; - MicroBenchmark(char const *label_, size_t size_) - :label(label_), size(size_), buf(size_) { - }; + MicroBenchmark(char const *label_, size_t size_) + : label(label_), size(size_), buf(size_){}; }; diff --git a/cmake/osdetection.cmake b/cmake/osdetection.cmake index 3369447a..2cef0b94 100644 --- a/cmake/osdetection.cmake +++ b/cmake/osdetection.cmake @@ -6,10 +6,10 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") set(FREEBSD true) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) #FIXME: find a nicer and more general way of doing this - if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12") - set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12") - elseif(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13") + if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13") set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc13") + elseif(ARCH_AARCH64 AND (CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12")) + set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12") endif() endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") diff --git a/doc/dev-reference/CMakeLists.txt b/doc/dev-reference/CMakeLists.txt index 449589f6..6f48e2e4 100644 --- a/doc/dev-reference/CMakeLists.txt +++ b/doc/dev-reference/CMakeLists.txt @@ -19,6 +19,7 @@ else() set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build") set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees") set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html") +set(SPHINX_MAN_DIR "${CMAKE_CURRENT_BINARY_DIR}/man") configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in" "${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY) @@ -32,4 +33,14 @@ add_custom_target(dev-reference "${SPHINX_HTML_DIR}" DEPENDS dev-reference-doxygen COMMENT "Building HTML dev reference with Sphinx") + +add_custom_target(dev-reference-man + ${SPHINX_BUILD} + -b man + -c "${CMAKE_CURRENT_BINARY_DIR}" + -d "${SPHINX_CACHE_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${SPHINX_MAN_DIR}" + DEPENDS dev-reference-doxygen + COMMENT "Building man page reference with Sphinx") endif() diff --git a/doc/dev-reference/chimera.rst b/doc/dev-reference/chimera.rst index d35b116f..cb8c84c4 100644 --- a/doc/dev-reference/chimera.rst +++ b/doc/dev-reference/chimera.rst @@ -11,10 +11,10 @@ Introduction ************ Chimera is a software regular expression matching engine that is a hybrid of -Hyperscan and PCRE. The design goals of Chimera are to fully support PCRE -syntax as well as to take advantage of the high performance nature of Hyperscan. +Vectorscan and PCRE. The design goals of Chimera are to fully support PCRE +syntax as well as to take advantage of the high performance nature of Vectorscan. -Chimera inherits the design guideline of Hyperscan with C APIs for compilation +Chimera inherits the design guideline of Vectorscan with C APIs for compilation and scanning. The Chimera API itself is composed of two major components: @@ -65,13 +65,13 @@ For a given database, Chimera provides several guarantees: .. note:: Chimera is designed to have the same matching behavior as PCRE, including greedy/ungreedy, capturing, etc. Chimera reports both **start offset** and **end offset** for each match like PCRE. Different - from the fashion of reporting all matches in Hyperscan, Chimera only reports + from the fashion of reporting all matches in Vectorscan, Chimera only reports non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will match ``foofoofoofoo`` at offsets (0, 6) and (6, 12). -.. note:: Since Chimera is a hybrid of Hyperscan and PCRE in order to support +.. note:: Since Chimera is a hybrid of Vectorscan and PCRE in order to support full PCRE syntax, there will be extra performance overhead compared to - Hyperscan-only solution. Please always use Hyperscan for better performance + Vectorscan-only solution. Please always use Vectorscan for better performance unless you must need full PCRE syntax support. See :ref:`chruntime` for more details @@ -83,12 +83,12 @@ Requirements The PCRE library (http://pcre.org/) version 8.41 is required for Chimera. .. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source - directory under Hyperscan root directory in order to build Chimera. + directory under Vectorscan root directory in order to build Chimera. -Beside this, both hardware and software requirements of Chimera are the same to Hyperscan. +Beside this, both hardware and software requirements of Chimera are the same to Vectorscan. See :ref:`hardware` and :ref:`software` for more details. -.. note:: Building Hyperscan will automatically generate Chimera library. +.. note:: Building Vectorscan will automatically generate Chimera library. Currently only static library is supported for Chimera, so please use static build type when configure CMake build options. @@ -119,7 +119,7 @@ databases: Compilation allows the Chimera library to analyze the given pattern(s) and pre-determine how to scan for these patterns in an optimized fashion using -Hyperscan and PCRE. +Vectorscan and PCRE. =============== Pattern Support @@ -134,7 +134,7 @@ Semantics ========= Chimera supports the exact same semantics of PCRE library. Moreover, it supports -multiple simultaneous pattern matching like Hyperscan and the multiple matches +multiple simultaneous pattern matching like Vectorscan and the multiple matches will be reported in order by end offset. .. _chruntime: diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 6f5541ec..a0ae8c8b 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -9,7 +9,7 @@ Compiling Patterns Building a Database ******************* -The Hyperscan compiler API accepts regular expressions and converts them into a +The Vectorscan compiler API accepts regular expressions and converts them into a compiled pattern database that can then be used to scan data. The API provides three functions that compile regular expressions into @@ -24,7 +24,7 @@ databases: #. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above, but allows :ref:`extparam` to be specified for each expression. -Compilation allows the Hyperscan library to analyze the given pattern(s) and +Compilation allows the Vectorscan library to analyze the given pattern(s) and pre-determine how to scan for these patterns in an optimized fashion that would be far too expensive to compute at run-time. @@ -48,10 +48,10 @@ To compile patterns to be used in streaming mode, the ``mode`` parameter of block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled for one mode (streaming, block or vectored) can only be used in that mode. The -version of Hyperscan used to produce a compiled pattern database must match the -version of Hyperscan used to scan with it. +version of Vectorscan used to produce a compiled pattern database must match the +version of Vectorscan used to scan with it. -Hyperscan provides support for targeting a database at a particular CPU +Vectorscan provides support for targeting a database at a particular CPU platform; see :ref:`instr_specialization` for details. ===================== @@ -75,14 +75,14 @@ characters exist in regular grammar like ``[``, ``]``, ``(``, ``)``, ``{``, While in pure literal case, all these meta characters lost extra meanings expect for that they are just common ASCII codes. -Hyperscan is initially designed to process common regular expressions. It is +Vectorscan is initially designed to process common regular expressions. It is hence embedded with a complex parser to do comprehensive regular grammar interpretation. Particularly, the identification of above meta characters is the basic step for the interpretation of far more complex regular grammars. However in real cases, patterns may not always be regular expressions. They could just be pure literals. Problem will come if the pure literals contain -regular meta characters. Supposing fed directly into traditional Hyperscan +regular meta characters. Supposing fed directly into traditional Vectorscan compile API, all these meta characters will be interpreted in predefined ways, which is unnecessary and the result is totally out of expectation. To avoid such misunderstanding by traditional API, users have to preprocess these @@ -90,7 +90,7 @@ literal patterns by converting the meta characters into some other formats: either by adding a backslash ``\`` before certain meta characters, or by converting all the characters into a hexadecimal representation. -In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns: +In ``v5.2.0``, Vectorscan introduces 2 new compile APIs for pure literal patterns: #. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern database. @@ -106,7 +106,7 @@ content directly into these APIs without worrying about writing regular meta characters in their patterns. No preprocessing work is needed any more. For new APIs, the ``length`` of each literal pattern is a newly added parameter. -Hyperscan needs to locate the end position of the input expression via clearly +Vectorscan needs to locate the end position of the input expression via clearly knowing each literal's length, not by simply identifying character ``\0`` of a string. @@ -127,19 +127,19 @@ Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`, Pattern Support *************** -Hyperscan supports the pattern syntax used by the PCRE library ("libpcre"), +Vectorscan supports the pattern syntax used by the PCRE library ("libpcre"), described at . However, not all constructs available in libpcre are supported. The use of unsupported constructs will result in compilation errors. -The version of PCRE used to validate Hyperscan's interpretation of this syntax +The version of PCRE used to validate Vectorscan's interpretation of this syntax is 8.41 or above. ==================== Supported Constructs ==================== -The following regex constructs are supported by Hyperscan: +The following regex constructs are supported by Vectorscan: * Literal characters and strings, with all libpcre quoting and character escapes. @@ -177,7 +177,7 @@ The following regex constructs are supported by Hyperscan: :c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern. * Lazy modifiers (:regexp:`?` appended to another quantifier, e.g. - :regexp:`\\w+?`) are supported but ignored (as Hyperscan reports all + :regexp:`\\w+?`) are supported but ignored (as Vectorscan reports all matches). * Parenthesization, including the named and unnamed capturing and @@ -219,15 +219,15 @@ The following regex constructs are supported by Hyperscan: .. note:: At this time, not all patterns can be successfully compiled with the :c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for :ref:`som`. The patterns that support this flag are a subset of patterns that - can be successfully compiled with Hyperscan; notably, many bounded repeat - forms that can be compiled with Hyperscan without the Start of Match flag + can be successfully compiled with Vectorscan; notably, many bounded repeat + forms that can be compiled with Vectorscan without the Start of Match flag enabled cannot be compiled with the flag enabled. ====================== Unsupported Constructs ====================== -The following regex constructs are not supported by Hyperscan: +The following regex constructs are not supported by Vectorscan: * Backreferences and capturing sub-expressions. * Arbitrary zero-width assertions. @@ -246,32 +246,32 @@ The following regex constructs are not supported by Hyperscan: Semantics ********* -While Hyperscan follows libpcre syntax, it provides different semantics. The +While Vectorscan follows libpcre syntax, it provides different semantics. The major departures from libpcre semantics are motivated by the requirements of streaming and multiple simultaneous pattern matching. The major departures from libpcre semantics are: -#. **Multiple pattern matching**: Hyperscan allows matches to be reported for +#. **Multiple pattern matching**: Vectorscan allows matches to be reported for several patterns simultaneously. This is not equivalent to separating the patterns by :regexp:`|` in libpcre, which evaluates alternations left-to-right. -#. **Lack of ordering**: the multiple matches that Hyperscan produces are not +#. **Lack of ordering**: the multiple matches that Vectorscan produces are not guaranteed to be ordered, although they will always fall within the bounds of the current scan. -#. **End offsets only**: Hyperscan's default behaviour is only to report the end +#. **End offsets only**: Vectorscan's default behaviour is only to report the end offset of a match. Reporting of the start offset can be enabled with per-expression flags at pattern compile time. See :ref:`som` for details. #. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against - ``fooxyzbarbar`` will return two matches from Hyperscan -- at the points + ``fooxyzbarbar`` will return two matches from Vectorscan -- at the points corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast, libpcre semantics by default would report only one match at ``fooxyzbarbar`` (greedy semantics) or, if non-greedy semantics were switched on, one match at ``fooxyzbar``. This means that switching between greedy and non-greedy - semantics is a no-op in Hyperscan. + semantics is a no-op in Vectorscan. To support libpcre quantifier semantics while accurately reporting streaming matches at the time they occur is impossible. For example, consider the pattern @@ -299,7 +299,7 @@ as in block 3 -- which would constitute a better match for the pattern. Start of Match ============== -In standard operation, Hyperscan will only provide the end offset of a match +In standard operation, Vectorscan will only provide the end offset of a match when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag is specified for a particular pattern, then the same set of matches is returned, but each match will also provide the leftmost possible start offset @@ -308,7 +308,7 @@ corresponding to its end offset. Using the SOM flag entails a number of trade-offs and limitations: * Reduced pattern support: For many patterns, tracking SOM is complex and can - result in Hyperscan failing to compile a pattern with a "Pattern too + result in Vectorscan failing to compile a pattern with a "Pattern too large" error, even if the pattern is supported in normal operation. * Increased stream state: At scan time, state space is required to track potential SOM offsets, and this must be stored in persistent stream state in @@ -316,20 +316,20 @@ Using the SOM flag entails a number of trade-offs and limitations: required to match a pattern. * Performance overhead: Similarly, there is generally a performance cost associated with tracking SOM. -* Incompatible features: Some other Hyperscan pattern flags (such as +* Incompatible features: Some other Vectorscan pattern flags (such as :c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be used in combination with SOM. Specifying them together with :c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error. In streaming mode, the amount of precision delivered by SOM can be controlled -with the SOM horizon flags. These instruct Hyperscan to deliver accurate SOM +with the SOM horizon flags. These instruct Vectorscan to deliver accurate SOM information within a certain distance of the end offset, and return a special start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a small or medium SOM horizon will usually reduce the stream state required for a given database. .. note:: In streaming mode, the start offset returned for a match may refer to - a point in the stream *before* the current block being scanned. Hyperscan + a point in the stream *before* the current block being scanned. Vectorscan provides no facility for accessing earlier blocks; if the calling application needs to inspect historical data, then it must store it itself. @@ -341,7 +341,7 @@ Extended Parameters In some circumstances, more control over the matching behaviour of a pattern is required than can be specified easily using regular expression syntax. For -these scenarios, Hyperscan provides the :c:func:`hs_compile_ext_multi` function +these scenarios, Vectorscan provides the :c:func:`hs_compile_ext_multi` function that allows a set of "extended parameters" to be set on a per-pattern basis. Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure, @@ -383,18 +383,18 @@ section. Prefiltering Mode ================= -Hyperscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can -be used to implement a prefilter for a pattern than Hyperscan would not +Vectorscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can +be used to implement a prefilter for a pattern than Vectorscan would not ordinarily support. -This flag instructs Hyperscan to compile an "approximate" version of this -pattern for use in a prefiltering application, even if Hyperscan does not +This flag instructs Vectorscan to compile an "approximate" version of this +pattern for use in a prefiltering application, even if Vectorscan does not support the pattern in normal operation. The set of matches returned when this flag is used is guaranteed to be a superset of the matches specified by the non-prefiltering expression. -If the pattern contains pattern constructs not supported by Hyperscan (such as +If the pattern contains pattern constructs not supported by Vectorscan (such as zero-width assertions, back-references or conditional references) these constructs will be replaced internally with broader constructs that may match more often. @@ -404,7 +404,7 @@ back-reference :regexp:`\\1`. In prefiltering mode, this pattern might be approximated by having its back-reference replaced with its referent, forming :regexp:`/\\w+ again \\w+/`. -Furthermore, in prefiltering mode Hyperscan may simplify a pattern that would +Furthermore, in prefiltering mode Vectorscan may simplify a pattern that would otherwise return a "Pattern too large" error at compile time, or for performance reasons (subject to the matching guarantee above). @@ -422,22 +422,22 @@ matches for the pattern. Instruction Set Specialization ****************************** -Hyperscan is able to make use of several modern instruction set features found +Vectorscan is able to make use of several modern instruction set features found on x86 processors to provide improvements in scanning performance. Some of these features are selected when the library is built; for example, -Hyperscan will use the native ``POPCNT`` instruction on processors where it is +Vectorscan will use the native ``POPCNT`` instruction on processors where it is available and the library has been optimized for the host architecture. -.. note:: By default, the Hyperscan runtime is built with the ``-march=native`` +.. note:: By default, the Vectorscan runtime is built with the ``-march=native`` compiler flag and (where possible) will make use of all instructions known by the host's C compiler. -To use some instruction set features, however, Hyperscan must build a +To use some instruction set features, however, Vectorscan must build a specialized database to support them. This means that the target platform must be specified at pattern compile time. -The Hyperscan compiler API functions all accept an optional +The Vectorscan compiler API functions all accept an optional :c:type:`hs_platform_info_t` argument, which describes the target platform for the database to be built. If this argument is NULL, the database will be targeted at the current host platform. @@ -467,7 +467,7 @@ See :ref:`api_constants` for the full list of CPU tuning and feature flags. Approximate matching ******************** -Hyperscan provides an experimental approximate matching mode, which will match +Vectorscan provides an experimental approximate matching mode, which will match patterns within a given edit distance. The exact matching behavior is defined as follows: @@ -492,7 +492,7 @@ follows: Here are a few examples of approximate matching: -* Pattern :regexp:`/foo/` can match ``foo`` when using regular Hyperscan +* Pattern :regexp:`/foo/` can match ``foo`` when using regular Vectorscan matching behavior. With approximate matching within edit distance 2, the pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``, ``f``, and anything else that lies within edit distance 2 of matching corpora @@ -513,7 +513,7 @@ matching support. Here they are, in a nutshell: * Reduced pattern support: * For many patterns, approximate matching is complex and can result in - Hyperscan failing to compile a pattern with a "Pattern too large" error, + Vectorscan failing to compile a pattern with a "Pattern too large" error, even if the pattern is supported in normal operation. * Additionally, some patterns cannot be approximately matched because they reduce to so-called "vacuous" patterns (patterns that match everything). For @@ -548,7 +548,7 @@ Logical Combinations ******************** For situations when a user requires behaviour that depends on the presence or -absence of matches from groups of patterns, Hyperscan provides support for the +absence of matches from groups of patterns, Vectorscan provides support for the logical combination of patterns in a given pattern set, with three operators: ``NOT``, ``AND`` and ``OR``. @@ -561,7 +561,7 @@ offset is *true* if the expression it refers to is *false* at this offset. For example, ``NOT 101`` means that expression 101 has not yet matched at this offset. -A logical combination is passed to Hyperscan at compile time as an expression. +A logical combination is passed to Vectorscan at compile time as an expression. This combination expression will raise matches at every offset where one of its sub-expressions matches and the logical value of the whole expression is *true*. @@ -603,7 +603,7 @@ In a logical combination expression: * Whitespace is ignored. To use a logical combination expression, it must be passed to one of the -Hyperscan compile functions (:c:func:`hs_compile_multi`, +Vectorscan compile functions (:c:func:`hs_compile_multi`, :c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag, which identifies the pattern as a logical combination expression. The patterns referred to in the logical combination expression must be compiled together in @@ -613,7 +613,7 @@ When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the :c:member:`HS_FLAG_QUIET` flag. -Hyperscan will accept logical combination expressions at compile time that +Vectorscan will accept logical combination expressions at compile time that evaluate to *true* when no patterns have matched, and report the match for combination at end of data if no patterns have matched; for example: :: diff --git a/doc/dev-reference/conf.py.in b/doc/dev-reference/conf.py.in index d0ef371b..298a54b1 100644 --- a/doc/dev-reference/conf.py.in +++ b/doc/dev-reference/conf.py.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Hyperscan documentation build configuration file, created by +# Vectorscan documentation build configuration file, created by # sphinx-quickstart on Tue Sep 29 15:59:19 2015. # # This file is execfile()d with the current directory set to its @@ -43,8 +43,8 @@ source_suffix = '.rst' master_doc = 'index' # General information about the project. -project = u'Hyperscan' -copyright = u'2015-2018, Intel Corporation' +project = u'Vectorscan' +copyright = u'2015-2020, Intel Corporation; 2020-2024, VectorCamp; and other contributors' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -202,7 +202,7 @@ latex_elements = { # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - ('index', 'Hyperscan.tex', u'Hyperscan Documentation', + ('index', 'Hyperscan.tex', u'Vectorscan Documentation', u'Intel Corporation', 'manual'), ] @@ -232,8 +232,8 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'hyperscan', u'Hyperscan Documentation', - [u'Intel Corporation'], 1) + ('index', 'vectorscan', u'Vectorscan Documentation', + [u'Intel Corporation'], 7) ] # If true, show URL addresses after external links. @@ -246,8 +246,8 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'Hyperscan', u'Hyperscan Documentation', - u'Intel Corporation', 'Hyperscan', 'High-performance regular expression matcher.', + ('index', 'Vectorscan', u'Vectorscan Documentation', + u'Intel Corporation; VectorCamp', 'Vectorscan', 'High-performance regular expression matcher.', 'Miscellaneous'), ] diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index aaff15ba..57d78211 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -7,43 +7,41 @@ Getting Started Very Quick Start **************** -#. Clone Hyperscan :: +#. Clone Vectorscan :: - cd - git clone git://github.com/intel/hyperscan + cd + git clone https://github.com/VectorCamp/vectorscan -#. Configure Hyperscan +#. Configure Vectorscan Ensure that you have the correct :ref:`dependencies ` present, and then: :: - cd + cd mkdir cd - cmake [-G ] [options] + cmake [-G ] [options] Known working generators: * ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X) * ``Ninja`` --- `Ninja `_ build files. - * ``Visual Studio 15 2017`` --- Visual Studio projects - Generators that might work include: + Unsupported generators that might work include: * ``Xcode`` --- OS X Xcode projects. -#. Build Hyperscan +#. Build Vectorscan Depending on the generator used: * ``cmake --build .`` --- will build everything * ``make -j`` --- use makefiles in parallel * ``ninja`` --- use Ninja build - * ``MsBuild.exe`` --- use Visual Studio MsBuild * etc. -#. Check Hyperscan +#. Check Vectorscan - Run the Hyperscan unit tests: :: + Run the Vectorscan unit tests: :: bin/unit-hyperscan @@ -55,20 +53,23 @@ Requirements Hardware ======== -Hyperscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and -32-bit (IA-32 Architecture) modes. +Vectorscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and +32-bit (IA-32 Architecture) modes as well as Arm v8.0+ aarch64, and POWER 8+ ppc64le +machines. Hyperscan is a high performance software library that takes advantage of recent -Intel architecture advances. At a minimum, support for Supplemental Streaming -SIMD Extensions 3 (SSSE3) is required, which should be available on any modern -x86 processor. +architecture advances. -Additionally, Hyperscan can make use of: +Additionally, Vectorscan can make use of: * Intel Streaming SIMD Extensions 4.2 (SSE4.2) * the POPCNT instruction * Bit Manipulation Instructions (BMI, BMI2) * Intel Advanced Vector Extensions 2 (Intel AVX2) + * Arm NEON + * Arm SVE and SVE2 + * Arm SVE2 BITPERM + * IBM Power8/Power9 VSX if present. @@ -79,40 +80,34 @@ These can be determined at library compile time, see :ref:`target_arch`. Software ======== -As a software library, Hyperscan doesn't impose any particular runtime -software requirements, however to build the Hyperscan library we require a -modern C and C++ compiler -- in particular, Hyperscan requires C99 and C++11 +As a software library, Vectorscan doesn't impose any particular runtime +software requirements, however to build the Vectorscan library we require a +modern C and C++ compiler -- in particular, Vectorscan requires C99 and C++17 compiler support. The supported compilers are: - * GCC, v4.8.1 or higher - * Clang, v3.4 or higher (with libstdc++ or libc++) - * Intel C++ Compiler v15 or higher - * Visual C++ 2017 Build Tools + * GCC, v9 or higher + * Clang, v5 or higher (with libstdc++ or libc++) -Examples of operating systems that Hyperscan is known to work on include: +Examples of operating systems that Vectorscan is known to work on include: Linux: -* Ubuntu 14.04 LTS or newer +* Ubuntu 20.04 LTS or newer * RedHat/CentOS 7 or newer +* Fedora 38 or newer +* Debian 10 FreeBSD: * 10.0 or newer -Windows: - -* 8 or newer - Mac OS X: * 10.8 or newer, using XCode/Clang -Hyperscan *may* compile and run on other platforms, but there is no guarantee. -We currently have experimental support for Windows using Intel C++ Compiler -or Visual Studio 2017. +Vectorscan *may* compile and run on other platforms, but there is no guarantee. -In addition, the following software is required for compiling the Hyperscan library: +In addition, the following software is required for compiling the Vectorscan library: ======================================================= =========== ====================================== Dependency Version Notes @@ -132,20 +127,20 @@ Ragel, you may use Cygwin to build it from source. Boost Headers ------------- -Compiling Hyperscan depends on a recent version of the Boost C++ header +Compiling Vectorscan depends on a recent version of the Boost C++ header library. If the Boost libraries are installed on the build machine in the usual paths, CMake will find them. If the Boost libraries are not installed, the location of the Boost source tree can be specified during the CMake configuration step using the ``BOOST_ROOT`` variable (described below). Another alternative is to put a copy of (or a symlink to) the boost -subdirectory in ``/include/boost``. +subdirectory in ``/include/boost``. For example: for the Boost-1.59.0 release: :: - ln -s boost_1_59_0/boost /include/boost + ln -s boost_1_59_0/boost /include/boost -As Hyperscan uses the header-only parts of Boost, it is not necessary to +As Vectorscan uses the header-only parts of Boost, it is not necessary to compile the Boost libraries. CMake Configuration @@ -168,11 +163,12 @@ Common options for CMake include: | | Valid options are Debug, Release, RelWithDebInfo, | | | and MinSizeRel. Default is RelWithDebInfo. | +------------------------+----------------------------------------------------+ -| BUILD_SHARED_LIBS | Build Hyperscan as a shared library instead of | +| BUILD_SHARED_LIBS | Build Vectorscan as a shared library instead of | | | the default static library. | +| | Default: Off | +------------------------+----------------------------------------------------+ -| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. | -| | Default off. | +| BUILD_STATIC_LIBS | Build Vectorscan as a static library. | +| | Default: On | +------------------------+----------------------------------------------------+ | BOOST_ROOT | Location of Boost source tree. | +------------------------+----------------------------------------------------+ @@ -180,12 +176,64 @@ Common options for CMake include: +------------------------+----------------------------------------------------+ | FAT_RUNTIME | Build the :ref:`fat runtime`. Default | | | true on Linux, not available elsewhere. | +| | Default: Off | ++------------------------+----------------------------------------------------+ +| USE_CPU_NATIVE | Native CPU detection is off by default, however it | +| | is possible to build a performance-oriented non-fat| +| | library tuned to your CPU. | +| | Default: Off | ++------------------------+----------------------------------------------------+ +| SANITIZE | Use libasan sanitizer to detect possible bugs. | +| | Valid options are address, memory and undefined. | ++------------------------+----------------------------------------------------+ +| SIMDE_BACKEND | Enable SIMDe backend. If this is chosen all native | +| | (SSE/AVX/AVX512/Neon/SVE/VSX) backends will be | +| | disabled and a SIMDe SSE4.2 emulation backend will | +| | be enabled. This will enable Vectorscan to build | +| | and run on architectures without SIMD. | +| | Default: Off | ++------------------------+----------------------------------------------------+ +| SIMDE_NATIVE | Enable SIMDe native emulation of x86 SSE4.2 | +| | intrinsics on the building platform. That is, | +| | SSE4.2 intrinsics will be emulated using Neon on | +| | an Arm platform, or VSX on a Power platform, etc. | +| | Default: Off | ++------------------------+----------------------------------------------------+ + +X86 platform specific options include: + ++------------------------+----------------------------------------------------+ +| Variable | Description | ++========================+====================================================+ +| BUILD_AVX2 | Enable code for AVX2. | ++------------------------+----------------------------------------------------+ +| BUILD_AVX512 | Enable code for AVX512. Implies BUILD_AVX2. | ++------------------------+----------------------------------------------------+ +| BUILD_AVX512VBMI | Enable code for AVX512 with VBMI extension. Implies| +| | BUILD_AVX512. | ++------------------------+----------------------------------------------------+ + +Arm platform specific options include: + ++------------------------+----------------------------------------------------+ +| Variable | Description | ++========================+====================================================+ +| BUILD_SVE | Enable code for SVE, like on AWS Graviton3 CPUs. | +| | Not much code is ported just for SVE , but enabling| +| | SVE code production, does improve code generation, | +| | see Benchmarks. | ++------------------------+----------------------------------------------------+ +| BUILD_SVE2 | Enable code for SVE2, implies BUILD_SVE. Most | +| | non-Neon code is written for SVE2. | ++------------------------+----------------------------------------------------+ +| BUILD_SVE2_BITPERM | Enable code for SVE2_BITPERM harwdare feature, | +| | implies BUILD_SVE2. | +------------------------+----------------------------------------------------+ For example, to generate a ``Debug`` build: :: cd - cmake -DCMAKE_BUILD_TYPE=Debug + cmake -DCMAKE_BUILD_TYPE=Debug @@ -193,7 +241,7 @@ Build Type ---------- CMake determines a number of features for a build based on the Build Type. -Hyperscan defaults to ``RelWithDebInfo``, i.e. "release with debugging +Vectorscan defaults to ``RelWithDebInfo``, i.e. "release with debugging information". This is a performance optimized build without runtime assertions but with debug symbols enabled. @@ -201,7 +249,7 @@ The other types of builds are: * ``Release``: as above, but without debug symbols * ``MinSizeRel``: a stripped release build - * ``Debug``: used when developing Hyperscan. Includes runtime assertions + * ``Debug``: used when developing Vectorscan. Includes runtime assertions (which has a large impact on runtime performance), and will also enable some other build features like building internal unit tests. @@ -211,7 +259,7 @@ The other types of builds are: Target Architecture ------------------- -Unless using the :ref:`fat runtime`, by default Hyperscan will be +Unless using the :ref:`fat runtime`, by default Vectorscan will be compiled to target the instruction set of the processor of the machine that being used for compilation. This is done via the use of ``-march=native``. The result of this means that a library built on one machine may not work on a @@ -223,7 +271,7 @@ CMake, or ``CMAKE_C_FLAGS`` and ``CMAKE_CXX_FLAGS`` on the CMake command line. F example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: :: cmake -DCMAKE_C_FLAGS="-march=corei7" \ - -DCMAKE_CXX_FLAGS="-march=corei7" + -DCMAKE_CXX_FLAGS="-march=corei7" For more information, refer to :ref:`instr_specialization`. @@ -232,17 +280,17 @@ For more information, refer to :ref:`instr_specialization`. Fat Runtime ----------- -A feature introduced in Hyperscan v4.4 is the ability for the Hyperscan +A feature introduced in Hyperscan v4.4 is the ability for the Vectorscan library to dispatch the most appropriate runtime code for the host processor. -This feature is called the "fat runtime", as a single Hyperscan library +This feature is called the "fat runtime", as a single Vectorscan library contains multiple copies of the runtime code for different instruction sets. .. note:: The fat runtime feature is only available on Linux. Release builds of - Hyperscan will default to having the fat runtime enabled where supported. + Vectorscan will default to having the fat runtime enabled where supported. -When building the library with the fat runtime, the Hyperscan runtime code +When building the library with the fat runtime, the Vectorscan runtime code will be compiled multiple times for these different instruction sets, and these compiled objects are combined into one library. There are no changes to how user applications are built against this library. @@ -254,11 +302,11 @@ resolved so that the right version of each API function is used. There is no impact on function call performance, as this check and resolution is performed by the ELF loader once when the binary is loaded. -If the Hyperscan library is used on x86 systems without ``SSSE3``, the runtime +If the Vectorscan library is used on x86 systems without ``SSSE4.2``, the runtime API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR` instead of potentially executing illegal instructions. The API function :c:func:`hs_valid_platform` can be used by application writers to determine if -the current platform is supported by Hyperscan. +the current platform is supported by Vectorscan. As of this release, the variants of the runtime that are built, and the CPU capability that is required, are the following: @@ -299,6 +347,11 @@ capability that is required, are the following: cmake -DBUILD_AVX512VBMI=on <...> + Vectorscan add support for Arm processors and SVE, SV2 and SVE2_BITPERM. + example: :: + + cmake -DBUILD_SVE=ON -DBUILD_SVE2=ON -DBUILD_SVE2_BITPERM=ON <...> + As the fat runtime requires compiler, libc, and binutils support, at this time it will only be enabled for Linux builds where the compiler supports the `indirect function "ifunc" function attribute diff --git a/doc/dev-reference/index.rst b/doc/dev-reference/index.rst index b5d6a54b..4046a298 100644 --- a/doc/dev-reference/index.rst +++ b/doc/dev-reference/index.rst @@ -1,5 +1,5 @@ ############################################### -Hyperscan |version| Developer's Reference Guide +Vectorscan |version| Developer's Reference Guide ############################################### ------- diff --git a/doc/dev-reference/intro.rst b/doc/dev-reference/intro.rst index 58879aef..71538eb0 100644 --- a/doc/dev-reference/intro.rst +++ b/doc/dev-reference/intro.rst @@ -5,11 +5,11 @@ Introduction ############ -Hyperscan is a software regular expression matching engine designed with +Vectorscan is a software regular expression matching engine designed with high performance and flexibility in mind. It is implemented as a library that exposes a straightforward C API. -The Hyperscan API itself is composed of two major components: +The Vectorscan API itself is composed of two major components: *********** Compilation @@ -17,7 +17,7 @@ Compilation These functions take a group of regular expressions, along with identifiers and option flags, and compile them into an immutable database that can be used by -the Hyperscan scanning API. This compilation process performs considerable +the Vectorscan scanning API. This compilation process performs considerable analysis and optimization work in order to build a database that will match the given expressions efficiently. @@ -36,8 +36,8 @@ See :ref:`compilation` for more detail. Scanning ******** -Once a Hyperscan database has been created, it can be used to scan data in -memory. Hyperscan provides several scanning modes, depending on whether the +Once a Vectorscan database has been created, it can be used to scan data in +memory. Vectorscan provides several scanning modes, depending on whether the data to be scanned is available as a single contiguous block, whether it is distributed amongst several blocks in memory at the same time, or whether it is to be scanned as a sequence of blocks in a stream. @@ -45,7 +45,7 @@ to be scanned as a sequence of blocks in a stream. Matches are delivered to the application via a user-supplied callback function that is called synchronously for each match. -For a given database, Hyperscan provides several guarantees: +For a given database, Vectorscan provides several guarantees: * No memory allocations occur at runtime with the exception of two fixed-size allocations, both of which should be done ahead of time for @@ -56,7 +56,7 @@ For a given database, Hyperscan provides several guarantees: call. - **Stream state**: in streaming mode only, some state space is required to store data that persists between scan calls for each stream. This allows - Hyperscan to track matches that span multiple blocks of data. + Vectorscan to track matches that span multiple blocks of data. * The sizes of the scratch space and stream state (in streaming mode) required for a given database are fixed and determined at database compile time. This @@ -64,7 +64,7 @@ For a given database, Hyperscan provides several guarantees: time, and these structures can be pre-allocated if required for performance reasons. -* Any pattern that has successfully been compiled by the Hyperscan compiler can +* Any pattern that has successfully been compiled by the Vectorscan compiler can be scanned against any input. There are no internal resource limits or other limitations at runtime that could cause a scan call to return an error. @@ -74,12 +74,12 @@ See :ref:`runtime` for more detail. Tools ***** -Some utilities for testing and benchmarking Hyperscan are included with the +Some utilities for testing and benchmarking Vectorscan are included with the library. See :ref:`tools` for more information. ************ Example Code ************ -Some simple example code demonstrating the use of the Hyperscan API is -available in the ``examples/`` subdirectory of the Hyperscan distribution. +Some simple example code demonstrating the use of the Vectorscan API is +available in the ``examples/`` subdirectory of the Vectorscan distribution. diff --git a/doc/dev-reference/performance.rst b/doc/dev-reference/performance.rst index 23781bd6..12074ea3 100644 --- a/doc/dev-reference/performance.rst +++ b/doc/dev-reference/performance.rst @@ -4,7 +4,7 @@ Performance Considerations ########################## -Hyperscan supports a wide range of patterns in all three scanning modes. It is +Vectorscan supports a wide range of patterns in all three scanning modes. It is capable of extremely high levels of performance, but certain patterns can reduce performance markedly. @@ -25,7 +25,7 @@ For example, caseless matching of :regexp:`/abc/` can be written as: * :regexp:`/(?i)abc(?-i)/` * :regexp:`/abc/i` -Hyperscan is capable of handling all these constructs. Unless there is a +Vectorscan is capable of handling all these constructs. Unless there is a specific reason otherwise, do not rewrite patterns from one form to another. As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be @@ -41,24 +41,24 @@ Library usage .. tip:: Do not hand-optimize library usage. -The Hyperscan library is capable of dealing with small writes, unusually large +The Vectorscan library is capable of dealing with small writes, unusually large and small pattern sets, etc. Unless there is a specific performance problem -with some usage of the library, it is best to use Hyperscan in a simple and +with some usage of the library, it is best to use Vectorscan in a simple and direct fashion. For example, it is unlikely for there to be much benefit in buffering input to the library into larger blocks unless streaming writes are tiny (say, 1-2 bytes at a time). -Unlike many other pattern matching products, Hyperscan will run faster with +Unlike many other pattern matching products, Vectorscan will run faster with small numbers of patterns and slower with large numbers of patterns in a smooth fashion (as opposed to, typically, running at a moderate speed up to some fixed limit then either breaking or running half as fast). -Hyperscan also provides high-throughput matching with a single thread of -control per core; if a database runs at 3.0 Gbps in Hyperscan it means that a +Vectorscan also provides high-throughput matching with a single thread of +control per core; if a database runs at 3.0 Gbps in Vectorscan it means that a 3000-bit block of data will be scanned in 1 microsecond in a single thread of control, not that it is required to scan 22 3000-bit blocks of data in 22 microseconds. Thus, it is not usually necessary to buffer data to supply -Hyperscan with available parallelism. +Vectorscan with available parallelism. ******************** Block-based matching @@ -72,7 +72,7 @@ accumulated before processing, it should be scanned in block rather than in streaming mode. Unnecessary use of streaming mode reduces the number of optimizations that can -be applied in Hyperscan and may make some patterns run slower. +be applied in Vectorscan and may make some patterns run slower. If there is a mixture of 'block' and 'streaming' mode patterns, these should be scanned in separate databases except in the case that the streaming patterns @@ -107,7 +107,7 @@ Allocate scratch ahead of time Scratch allocation is not necessarily a cheap operation. Since it is the first time (after compilation or deserialization) that a pattern database is used, -Hyperscan performs some validation checks inside :c:func:`hs_alloc_scratch` and +Vectorscan performs some validation checks inside :c:func:`hs_alloc_scratch` and must also allocate memory. Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not @@ -329,7 +329,7 @@ Consequently, :regexp:`/foo.*bar/L` with a check on start of match values after the callback is considerably more expensive and general than :regexp:`/foo.{300}bar/`. -Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be +Similarly, the :cpp:member:`hs_expr_ext::min_length` extended parameter can be used to specify a lower bound on the length of the matches for a pattern. Using this facility may be more lightweight in some circumstances than using the SOM flag and post-confirming match length in the calling application. diff --git a/doc/dev-reference/preface.rst b/doc/dev-reference/preface.rst index 68373b7f..5739690f 100644 --- a/doc/dev-reference/preface.rst +++ b/doc/dev-reference/preface.rst @@ -6,35 +6,35 @@ Preface Overview ******** -Hyperscan is a regular expression engine designed to offer high performance, the +Vectorscan is a regular expression engine designed to offer high performance, the ability to match multiple expressions simultaneously and flexibility in scanning operation. Patterns are provided to a compilation interface which generates an immutable pattern database. The scan interface then can be used to scan a target data buffer for the given patterns, returning any matching results from that data -buffer. Hyperscan also provides a streaming mode, in which matches that span +buffer. Vectorscan also provides a streaming mode, in which matches that span several blocks in a stream are detected. -This document is designed to facilitate code-level integration of the Hyperscan +This document is designed to facilitate code-level integration of the Vectorscan library with existing or new applications. -:ref:`intro` is a short overview of the Hyperscan library, with more detail on -the Hyperscan API provided in the subsequent sections: :ref:`compilation` and +:ref:`intro` is a short overview of the Vectorscan library, with more detail on +the Vectorscan API provided in the subsequent sections: :ref:`compilation` and :ref:`runtime`. :ref:`perf` provides details on various factors which may impact the -performance of a Hyperscan integration. +performance of a Vectorscan integration. :ref:`api_constants` and :ref:`api_files` provides a detailed summary of the -Hyperscan Application Programming Interface (API). +Vectorscan Application Programming Interface (API). ******** Audience ******** -This guide is aimed at developers interested in integrating Hyperscan into an -application. For information on building the Hyperscan library, see the Quick +This guide is aimed at developers interested in integrating Vectorscan into an +application. For information on building the Vectorscan library, see the Quick Start Guide. *********** diff --git a/doc/dev-reference/runtime.rst b/doc/dev-reference/runtime.rst index 396521c9..249fd235 100644 --- a/doc/dev-reference/runtime.rst +++ b/doc/dev-reference/runtime.rst @@ -4,7 +4,7 @@ Scanning for Patterns ##################### -Hyperscan provides three different scanning modes, each with its own scan +Vectorscan provides three different scanning modes, each with its own scan function beginning with ``hs_scan``. In addition, streaming mode has a number of other API functions for managing stream state. @@ -33,8 +33,8 @@ See :c:type:`match_event_handler` for more information. Streaming Mode ************** -The core of the Hyperscan streaming runtime API consists of functions to open, -scan, and close Hyperscan data streams: +The core of the Vectorscan streaming runtime API consists of functions to open, +scan, and close Vectorscan data streams: * :c:func:`hs_open_stream`: allocates and initializes a new stream for scanning. @@ -57,14 +57,14 @@ will return immediately with :c:member:`HS_SCAN_TERMINATED`. The caller must still call :c:func:`hs_close_stream` to complete the clean-up process for that stream. -Streams exist in the Hyperscan library so that pattern matching state can be +Streams exist in the Vectorscan library so that pattern matching state can be maintained across multiple blocks of target data -- without maintaining this state, it would not be possible to detect patterns that span these blocks of data. This, however, does come at the cost of requiring an amount of storage per-stream (the size of this storage is fixed at compile time), and a slight performance penalty in some cases to manage the state. -While Hyperscan does always support a strict ordering of multiple matches, +While Vectorscan does always support a strict ordering of multiple matches, streaming matches will not be delivered at offsets before the current stream write, with the exception of zero-width asserts, where constructs such as :regexp:`\\b` and :regexp:`$` can cause a match on the final character of a @@ -76,7 +76,7 @@ Stream Management ================= In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and -:c:func:`hs_close_stream`, the Hyperscan API provides a number of other +:c:func:`hs_close_stream`, the Vectorscan API provides a number of other functions for the management of streams: * :c:func:`hs_reset_stream`: resets a stream to its initial state; this is @@ -98,10 +98,10 @@ A stream object is allocated as a fixed size region of memory which has been sized to ensure that no memory allocations are required during scan operations. When the system is under memory pressure, it may be useful to reduce the memory consumed by streams that are not expected to be used soon. The -Hyperscan API provides calls for translating a stream to and from a compressed +Vectorscan API provides calls for translating a stream to and from a compressed representation for this purpose. The compressed representation differs from the full stream object as it does not reserve space for components which are not -required given the current stream state. The Hyperscan API functions for this +required given the current stream state. The Vectorscan API functions for this functionality are: * :c:func:`hs_compress_stream`: fills the provided buffer with a compressed @@ -157,7 +157,7 @@ scanned in block mode. Scratch Space ************* -While scanning data, Hyperscan needs a small amount of temporary memory to store +While scanning data, Vectorscan needs a small amount of temporary memory to store on-the-fly internal data. This amount is unfortunately too large to fit on the stack, particularly for embedded applications, and allocating memory dynamically is too expensive, so a pre-allocated "scratch" space must be provided to the @@ -170,7 +170,7 @@ databases, only a single scratch region is necessary: in this case, calling will ensure that the scratch space is large enough to support scanning against any of the given databases. -While the Hyperscan library is re-entrant, the use of scratch spaces is not. +While the Vectorscan library is re-entrant, the use of scratch spaces is not. For example, if by design it is deemed necessary to run recursive or nested scanning (say, from the match callback function), then an additional scratch space is required for that context. @@ -219,11 +219,11 @@ For example: Custom Allocators ***************** -By default, structures used by Hyperscan at runtime (scratch space, stream +By default, structures used by Vectorscan at runtime (scratch space, stream state, etc) are allocated with the default system allocators, usually ``malloc()`` and ``free()``. -The Hyperscan API provides a facility for changing this behaviour to support +The Vectorscan API provides a facility for changing this behaviour to support applications that use custom memory allocators. These functions are: diff --git a/doc/dev-reference/serialization.rst b/doc/dev-reference/serialization.rst index 4f884c75..5950e607 100644 --- a/doc/dev-reference/serialization.rst +++ b/doc/dev-reference/serialization.rst @@ -4,7 +4,7 @@ Serialization ############# -For some applications, compiling Hyperscan pattern databases immediately prior +For some applications, compiling Vectorscan pattern databases immediately prior to use is not an appropriate design. Some users may wish to: * Compile pattern databases on a different host; @@ -14,9 +14,9 @@ to use is not an appropriate design. Some users may wish to: * Control the region of memory in which the compiled database is located. -Hyperscan pattern databases are not completely flat in memory: they contain +Vectorscan pattern databases are not completely flat in memory: they contain pointers and have specific alignment requirements. Therefore, they cannot be -copied (or otherwise relocated) directly. To enable these use cases, Hyperscan +copied (or otherwise relocated) directly. To enable these use cases, Vectorscan provides functionality for serializing and deserializing compiled pattern databases. @@ -40,10 +40,10 @@ The API provides the following functions: returns a string containing information about the database. This call is analogous to :c:func:`hs_database_info`. -.. note:: Hyperscan performs both version and platform compatibility checks +.. note:: Vectorscan performs both version and platform compatibility checks upon deserialization. The :c:func:`hs_deserialize_database` and :c:func:`hs_deserialize_database_at` functions will only permit the - deserialization of databases compiled with (a) the same version of Hyperscan + deserialization of databases compiled with (a) the same version of Vectorscan and (b) platform features supported by the current host platform. See :ref:`instr_specialization` for more information on platform specialization. @@ -51,17 +51,17 @@ The API provides the following functions: The Runtime Library =================== -The main Hyperscan library (``libhs``) contains both the compiler and runtime -portions of the library. This means that in order to support the Hyperscan +The main Vectorscan library (``libhs``) contains both the compiler and runtime +portions of the library. This means that in order to support the Vectorscan compiler, which is written in C++, it requires C++ linkage and has a dependency on the C++ standard library. Many embedded applications require only the scanning ("runtime") portion of the -Hyperscan library. In these cases, pattern compilation generally takes place on +Vectorscan library. In these cases, pattern compilation generally takes place on another host, and serialized pattern databases are delivered to the application for use. To support these applications without requiring the C++ dependency, a -runtime-only version of the Hyperscan library, called ``libhs_runtime``, is also +runtime-only version of the Vectorscan library, called ``libhs_runtime``, is also distributed. This library does not depend on the C++ standard library and -provides all Hyperscan functions other that those used to compile databases. +provides all Vectorscan functions other that those used to compile databases. diff --git a/doc/dev-reference/tools.rst b/doc/dev-reference/tools.rst index e0465fc6..f6d51515 100644 --- a/doc/dev-reference/tools.rst +++ b/doc/dev-reference/tools.rst @@ -4,14 +4,14 @@ Tools ##### -This section describes the set of utilities included with the Hyperscan library. +This section describes the set of utilities included with the Vectorscan library. ******************** Quick Check: hscheck ******************** -The ``hscheck`` tool allows the user to quickly check whether Hyperscan supports -a group of patterns. If a pattern is rejected by Hyperscan's compiler, the +The ``hscheck`` tool allows the user to quickly check whether Vectorscan supports +a group of patterns. If a pattern is rejected by Vectorscan's compiler, the compile error is provided on standard output. For example, given the following three patterns (the last of which contains a @@ -34,7 +34,7 @@ syntax error) in a file called ``/tmp/test``:: Benchmarker: hsbench ******************** -The ``hsbench`` tool provides an easy way to measure Hyperscan's performance +The ``hsbench`` tool provides an easy way to measure Vectorscan's performance for a particular set of patterns and corpus of data to be scanned. Patterns are supplied in the format described below in @@ -44,7 +44,7 @@ easy control of how a corpus is broken into blocks and streams. .. note:: A group of Python scripts for constructing corpora databases from various input types, such as PCAP network traffic captures or text files, can - be found in the Hyperscan source tree in ``tools/hsbench/scripts``. + be found in the Vectorscan source tree in ``tools/hsbench/scripts``. Running hsbench =============== @@ -56,7 +56,7 @@ produce output like this:: $ hsbench -e /tmp/patterns -c /tmp/corpus.db Signatures: /tmp/patterns - Hyperscan info: Version: 4.3.1 Features: AVX2 Mode: STREAM + Vectorscan info: Version: 5.4.11 Features: AVX2 Mode: STREAM Expression count: 200 Bytecode size: 342,540 bytes Database CRC: 0x6cd6b67c @@ -77,7 +77,7 @@ takes to perform all twenty scans. The number of repeats can be changed with the ``-n`` argument, and the results of each scan will be displayed if the ``--per-scan`` argument is specified. -To benchmark Hyperscan on more than one core, you can supply a list of cores +To benchmark Vectorscan on more than one core, you can supply a list of cores with the ``-T`` argument, which will instruct ``hsbench`` to start one benchmark thread per core given and compute the throughput from the time taken to complete all of them. @@ -91,17 +91,17 @@ Correctness Testing: hscollider ******************************* The ``hscollider`` tool, or Pattern Collider, provides a way to verify -Hyperscan's matching behaviour. It does this by compiling and scanning patterns +Vectorscan's matching behaviour. It does this by compiling and scanning patterns (either singly or in groups) against known corpora and comparing the results against another engine (the "ground truth"). Two sources of ground truth for comparison are available: * The PCRE library (http://pcre.org/). - * An NFA simulation run on Hyperscan's compile-time graph representation. This + * An NFA simulation run on Vectorscan's compile-time graph representation. This is used if PCRE cannot support the pattern or if PCRE execution fails due to a resource limit. -Much of Hyperscan's testing infrastructure is built on ``hscollider``, and the +Much of Vectorscan's testing infrastructure is built on ``hscollider``, and the tool is designed to take advantage of multiple cores and provide considerable flexibility in controlling the test. These options are described in the help (``hscollider -h``) and include: @@ -116,11 +116,11 @@ flexibility in controlling the test. These options are described in the help Using hscollider to debug a pattern =================================== -One common use-case for ``hscollider`` is to determine whether Hyperscan will +One common use-case for ``hscollider`` is to determine whether Vectorscan will match a pattern in the expected location, and whether this accords with PCRE's behaviour for the same case. -Here is an example. We put our pattern in a file in Hyperscan's pattern +Here is an example. We put our pattern in a file in Vectorscan's pattern format:: $ cat /tmp/pat @@ -172,7 +172,7 @@ individual matches are displayed in the output:: Total elapsed time: 0.00522815 secs. -We can see from this output that both PCRE and Hyperscan find matches ending at +We can see from this output that both PCRE and Vectorscan find matches ending at offset 33 and 45, and so ``hscollider`` considers this test case to have passed. @@ -180,13 +180,13 @@ passed. corpus alignment 0, and ``-T 1`` instructs us to only use one thread.) .. note:: In default operation, PCRE produces only one match for a scan, unlike - Hyperscan's automata semantics. The ``hscollider`` tool uses libpcre's - "callout" functionality to match Hyperscan's semantics. + Vectorscan's automata semantics. The ``hscollider`` tool uses libpcre's + "callout" functionality to match Vectorscan's semantics. Running a larger scan test ========================== -A set of patterns for testing purposes are distributed with Hyperscan, and these +A set of patterns for testing purposes are distributed with Vectorscan, and these can be tested via ``hscollider`` on an in-tree build. Two CMake targets are provided to do this easily: @@ -202,10 +202,10 @@ Debugging: hsdump ***************** When built in debug mode (using the CMake directive ``CMAKE_BUILD_TYPE`` set to -``Debug``), Hyperscan includes support for dumping information about its +``Debug``), Vectorscan includes support for dumping information about its internals during pattern compilation with the ``hsdump`` tool. -This information is mostly of use to Hyperscan developers familiar with the +This information is mostly of use to Vectorscan developers familiar with the library's internal structure, but can be used to diagnose issues with patterns and provide more information in bug reports. @@ -215,7 +215,7 @@ and provide more information in bug reports. Pattern Format ************** -All of the Hyperscan tools accept patterns in the same format, read from plain +All of the Vectorscan tools accept patterns in the same format, read from plain text files with one pattern per line. Each line looks like this: * ``://`` @@ -227,12 +227,12 @@ For example:: 3:/^.{10,20}hatstand/m The integer ID is the value that will be reported when a match is found by -Hyperscan and must be unique. +Vectorscan and must be unique. The pattern itself is a regular expression in PCRE syntax; see :ref:`compilation` for more information on supported features. -The flags are single characters that map to Hyperscan flags as follows: +The flags are single characters that map to Vectorscan flags as follows: ========= ================================= =========== Character API Flag Description @@ -256,7 +256,7 @@ between braces, separated by commas. For example:: 1:/hatstand.*teakettle/s{min_offset=50,max_offset=100} -All Hyperscan tools will accept a pattern file (or a directory containing +All Vectorscan tools will accept a pattern file (or a directory containing pattern files) with the ``-e`` argument. If no further arguments constraining the pattern set are given, all patterns in those files are used. diff --git a/examples/patbench.cc b/examples/patbench.cc index 132e416f..2aefbb69 100644 --- a/examples/patbench.cc +++ b/examples/patbench.cc @@ -202,7 +202,7 @@ struct FiveTuple { unsigned int dstPort; // Construct a FiveTuple from a TCP or UDP packet. - FiveTuple(const struct ip *iphdr) { + explicit FiveTuple(const struct ip *iphdr) { // IP fields protocol = iphdr->ip_p; srcAddr = iphdr->ip_src.s_addr; @@ -391,7 +391,7 @@ public: // Close all open Hyperscan streams (potentially generating any // end-anchored matches) void closeStreams() { - for (auto &stream : streams) { + for (const auto &stream : streams) { hs_error_t err = hs_close_stream(stream, scratch, onMatch, &matchCount); if (err != HS_SUCCESS) { @@ -444,7 +444,7 @@ class Sigdata { public: Sigdata() {} - Sigdata(const char *filename) { + explicit Sigdata(const char *filename) { parseFile(filename, patterns, flags, ids, originals); } @@ -568,7 +568,7 @@ double measure_block_time(Benchmark &bench, unsigned int repeatCount) { } static -double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode, +double eval_set(Benchmark &bench, const Sigdata &sigs, unsigned int mode, unsigned repeatCount, Criterion criterion, bool diagnose = true) { double compileTime = 0; @@ -608,8 +608,9 @@ double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode, scan_time = measure_stream_time(bench, repeatCount); } size_t bytes = bench.bytes(); - size_t matches = bench.matches(); + if (diagnose) { + size_t matches = bench.matches(); std::ios::fmtflags f(cout.flags()); cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time << " sec, Scanned " << bytes * repeatCount << " bytes, Throughput " diff --git a/examples/pcapscan.cc b/examples/pcapscan.cc index 071de740..2a50b3b9 100644 --- a/examples/pcapscan.cc +++ b/examples/pcapscan.cc @@ -100,15 +100,14 @@ struct FiveTuple { unsigned int dstPort; // Construct a FiveTuple from a TCP or UDP packet. - FiveTuple(const struct ip *iphdr) { + explicit FiveTuple(const struct ip *iphdr) { // IP fields protocol = iphdr->ip_p; srcAddr = iphdr->ip_src.s_addr; dstAddr = iphdr->ip_dst.s_addr; // UDP/TCP ports - const struct udphdr *uh = - (const struct udphdr *)(((const char *)iphdr) + (iphdr->ip_hl * 4)); + const struct udphdr *uh = reinterpret_cast(iphdr) + (iphdr->ip_hl * 4); srcPort = uh->uh_sport; dstPort = uh->uh_dport; } @@ -137,7 +136,7 @@ static int onMatch(unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *ctx) { // Our context points to a size_t storing the match count - size_t *matches = (size_t *)ctx; + size_t *matches = static_cast(ctx); (*matches)++; return 0; // continue matching } @@ -233,9 +232,8 @@ public: } // Valid TCP or UDP packet - const struct ip *iphdr = (const struct ip *)(pktData - + sizeof(struct ether_header)); - const char *payload = (const char *)pktData + offset; + const struct ip *iphdr = reinterpret_cast(pktData) + sizeof(struct ether_header); + const char *payload = reinterpret_cast(pktData) + offset; size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr), stream_map.size())).first->second; @@ -281,7 +279,7 @@ public: // Close all open Hyperscan streams (potentially generating any // end-anchored matches) void closeStreams() { - for (auto &stream : streams) { + for (const auto &stream : streams) { hs_error_t err = hs_close_stream(stream, scratch, onMatch, &matchCount); if (err != HS_SUCCESS) { @@ -575,7 +573,7 @@ int main(int argc, char **argv) { */ static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset, unsigned int *length) { - const ip *iph = (const ip *)(pkt_data + sizeof(ether_header)); + const ip *iph = reinterpret_cast(pkt_data) + sizeof(ether_header); const tcphdr *th = nullptr; // Ignore packets that aren't IPv4 @@ -594,7 +592,7 @@ static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset, switch (iph->ip_p) { case IPPROTO_TCP: - th = (const tcphdr *)((const char *)iph + ihlen); + th = reinterpret_cast(iph) + ihlen; thlen = th->th_off * 4; break; case IPPROTO_UDP: diff --git a/examples/simplegrep.c b/examples/simplegrep.c index d6bd4b39..cceaa109 100644 --- a/examples/simplegrep.c +++ b/examples/simplegrep.c @@ -67,7 +67,7 @@ * to pass in the pattern that was being searched for so we can print it out. */ static int eventHandler(unsigned int id, unsigned long long from, - unsigned long long to, unsigned int flags, void *ctx) { + unsigned long long to, unsigned int flags, void *ctx) { // cppcheck-suppress constParameterCallback printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to); return 0; } @@ -150,7 +150,7 @@ int main(int argc, char *argv[]) { } char *pattern = argv[1]; - char *inputFN = argv[2]; + const char *inputFN = argv[2]; /* First, we attempt to compile the pattern provided on the command line. * We assume 'DOTALL' semantics, meaning that the '.' meta-character will diff --git a/libhs.pc.in b/libhs.pc.in index 3ad2b90c..d1e3ffb0 100644 --- a/libhs.pc.in +++ b/libhs.pc.in @@ -4,7 +4,7 @@ libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@ Name: libhs -Description: Intel(R) Hyperscan Library +Description: A portable fork of the high-performance regular expression matching library Version: @HS_VERSION@ Libs: -L${libdir} -lhs Cflags: -I${includedir}/hs diff --git a/simde b/simde index aae22459..416091eb 160000 --- a/simde +++ b/simde @@ -1 +1 @@ -Subproject commit aae22459fa284e9fc2b7d4b8e4571afa0418125f +Subproject commit 416091ebdb9e901b29d026633e73167d6353a0b0 diff --git a/src/compiler/asserts.cpp b/src/compiler/asserts.cpp index b3e89a3a..ae8f5c6c 100644 --- a/src/compiler/asserts.cpp +++ b/src/compiler/asserts.cpp @@ -176,7 +176,8 @@ void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr, auto ecit = edge_cache.find(cache_key); if (ecit == edge_cache.end()) { DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index); - NFAEdge e = add_edge(u, v, g); + NFAEdge e; + std::tie(e, std::ignore) = add_edge(u, v, g); edge_cache.emplace(cache_key, e); g[e].assert_flags = flags; if (++assert_edge_count > MAX_ASSERT_EDGES) { @@ -229,11 +230,12 @@ void checkForMultilineStart(ReportManager &rm, NGHolder &g, /* we need to interpose a dummy dot vertex between v and accept if * required so that ^ doesn't match trailing \n */ - for (const auto &e : out_edges_range(v, g)) { - if (target(e, g) == g.accept) { - dead.emplace_back(e); - } - } + auto deads = [&g=g](const NFAEdge &e) { + return (target(e, g) == g.accept); + }; + const auto &er = out_edges_range(v, g); + std::copy_if(begin(er), end(er), std::back_inserter(dead), deads); + /* assert has been resolved; clear flag */ g[v].assert_flags &= ~POS_FLAG_MULTILINE_START; } diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 35f46b3f..aa8de4ba 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -443,7 +443,7 @@ bytecode_ptr generateRoseEngine(NG &ng) { if (!rose) { DEBUG_PRINTF("error building rose\n"); assert(0); - return nullptr; + return bytecode_ptr(nullptr); } dumpReportManager(ng.rm, ng.cc.grey); @@ -478,7 +478,7 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { DEBUG_PRINTF("db size %zu\n", db_len); DEBUG_PRINTF("db platform %llx\n", platform); - struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); + struct hs_database *db = static_cast(hs_database_alloc(db_len)); if (hs_check_alloc(db) != HS_SUCCESS) { hs_database_free(db); return nullptr; @@ -492,7 +492,7 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { DEBUG_PRINTF("shift is %zu\n", shift); db->bytecode = offsetof(struct hs_database, bytes) - shift; - char *bytecode = (char *)db + db->bytecode; + char *bytecode = reinterpret_cast(db) + db->bytecode; assert(ISALIGNED_CL(bytecode)); db->magic = HS_DB_MAGIC; @@ -525,7 +525,7 @@ struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) { throw CompileError("Internal error."); } - const char *bytecode = (const char *)(rose.get()); + const char *bytecode = reinterpret_cast(rose.get()); const platform_t p = target_to_platform(ng.cc.target_info); struct hs_database *db = dbCreate(bytecode, *length, p); if (!db) { diff --git a/src/compiler/error.cpp b/src/compiler/error.cpp index 07db9819..c4252f7c 100644 --- a/src/compiler/error.cpp +++ b/src/compiler/error.cpp @@ -57,15 +57,14 @@ extern const hs_compile_error_t hs_badalloc = { namespace ue2 { hs_compile_error_t *generateCompileError(const string &err, int expression) { - hs_compile_error_t *ret = - (struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t)); + hs_compile_error_t *ret = static_cast(hs_misc_alloc(sizeof(hs_compile_error_t))); if (ret) { hs_error_t e = hs_check_alloc(ret); if (e != HS_SUCCESS) { hs_misc_free(ret); return const_cast(&hs_badalloc); } - char *msg = (char *)hs_misc_alloc(err.size() + 1); + char *msg = static_cast(hs_misc_alloc(err.size() + 1)); if (msg) { e = hs_check_alloc(msg); if (e != HS_SUCCESS) { diff --git a/src/crc32.c b/src/crc32.c index 19c7b7fa..ca5b5fed 100644 --- a/src/crc32.c +++ b/src/crc32.c @@ -542,14 +542,13 @@ u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf, // Main aligned loop, processes eight bytes at a time. - u32 term1, term2; for (size_t li = 0; li < running_length/8; li++) { u32 block = *(const u32 *)p_buf; crc ^= block; p_buf += 4; - term1 = crc_tableil8_o88[crc & 0x000000FF] ^ + u32 term1 = crc_tableil8_o88[crc & 0x000000FF] ^ crc_tableil8_o80[(crc >> 8) & 0x000000FF]; - term2 = crc >> 16; + u32 term2 = crc >> 16; crc = term1 ^ crc_tableil8_o72[term2 & 0x000000FF] ^ crc_tableil8_o64[(term2 >> 8) & 0x000000FF]; diff --git a/src/database.h b/src/database.h index a4d6e4dc..1b94f1b0 100644 --- a/src/database.h +++ b/src/database.h @@ -79,21 +79,18 @@ static UNUSED const platform_t hs_current_platform_no_avx2 = { HS_PLATFORM_NOAVX2 | HS_PLATFORM_NOAVX512 | - HS_PLATFORM_NOAVX512VBMI | - 0, + HS_PLATFORM_NOAVX512VBMI }; static UNUSED const platform_t hs_current_platform_no_avx512 = { HS_PLATFORM_NOAVX512 | - HS_PLATFORM_NOAVX512VBMI | - 0, + HS_PLATFORM_NOAVX512VBMI }; static UNUSED const platform_t hs_current_platform_no_avx512vbmi = { - HS_PLATFORM_NOAVX512VBMI | - 0, + HS_PLATFORM_NOAVX512VBMI }; /* diff --git a/src/dispatcher.c b/src/dispatcher.c index a817e744..e213bbe6 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2024, VectorCamp PC * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +31,39 @@ #include "hs_common.h" #include "hs_runtime.h" #include "ue2common.h" + +/* Streamlining the dispatch to eliminate runtime checking/branching: + * What we want to do is, first call to the function will run the resolve + * code and set the static resolved/dispatch pointer to point to the + * correct function. Subsequent calls to the function will go directly to + * the resolved ptr. The simplest way to accomplish this is, to + * initially set the pointer to the resolve function. + * To accomplish this in a manner invisible to the user, + * we do involve some rather ugly/confusing macros in here. + * There are four macros that assemble the code for each function + * we want to dispatch in this manner: + * CREATE_DISPATCH + * this generates the declarations for the candidate target functions, + * for the fat_dispatch function pointer, for the resolve_ function, + * points the function pointer to the resolve function, and contains + * most of the definition of the resolve function. The very end of the + * resolve function is completed by the next macro, because in the + * CREATE_DISPATCH macro we have the argument list with the arg declarations, + * which is needed to generate correct function signatures, but we + * can't generate from this, in a macro, a _call_ to one of those functions. + * CONNECT_ARGS_1 + * this macro fills in the actual call at the end of the resolve function, + * with the correct arg list. hence the name connect args. + * CONNECT_DISPATCH_2 + * this macro likewise gives up the beginning of the definition of the + * actual entry point function (the 'real name' that's called by the user) + * but again in the pass-through call, cannot invoke the target without + * getting the arg list , which is supplied by the final macro, + * CONNECT_ARGS_3 + * + */ + + #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/cpuid_inline.h" #include "util/join.h" @@ -57,30 +91,38 @@ return (RTYPE)HS_ARCH_ERROR; \ } \ \ - /* resolver */ \ - static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ - if (check_avx512vbmi()) { \ - return JOIN(avx512vbmi_, NAME); \ - } \ - if (check_avx512()) { \ - return JOIN(avx512_, NAME); \ - } \ - if (check_avx2()) { \ - return JOIN(avx2_, NAME); \ - } \ - if (check_sse42() && check_popcnt()) { \ - return JOIN(corei7_, NAME); \ - } \ - if (check_ssse3()) { \ - return JOIN(core2_, NAME); \ - } \ - /* anything else is fail */ \ - return JOIN(error_, NAME); \ - } \ + /* dispatch routing pointer for this function */ \ + /* initially point it at the resolve function */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \ + static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \ + &JOIN(resolve_, NAME); \ \ - /* function */ \ - HS_PUBLIC_API \ - RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + /* resolver */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \ + if (check_avx512vbmi()) { \ + fat_dispatch_ ## NAME = &JOIN(avx512vbmi_, NAME); \ + } \ + else if (check_avx512()) { \ + fat_dispatch_ ## NAME = &JOIN(avx512_, NAME); \ + } \ + else if (check_avx2()) { \ + fat_dispatch_ ## NAME = &JOIN(avx2_, NAME); \ + } \ + else if (check_sse42() && check_popcnt()) { \ + fat_dispatch_ ## NAME = &JOIN(corei7_, NAME); \ + } \ + else if (check_ssse3()) { \ + fat_dispatch_ ## NAME = &JOIN(core2_, NAME); \ + } else { \ + /* anything else is fail */ \ + fat_dispatch_ ## NAME = &JOIN(error_, NAME); \ + } \ + + + +/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */ + + #elif defined(ARCH_AARCH64) #include "util/arch/arm/cpuid_inline.h" @@ -97,99 +139,226 @@ return (RTYPE)HS_ARCH_ERROR; \ } \ \ - /* resolver */ \ - static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ - if (check_sve2()) { \ - return JOIN(sve2_, NAME); \ - } \ - if (check_sve()) { \ - return JOIN(sve_, NAME); \ - } \ - if (check_neon()) { \ - return JOIN(neon_, NAME); \ - } \ - /* anything else is fail */ \ - return JOIN(error_, NAME); \ - } \ + /* dispatch routing pointer for this function */ \ + /* initially point it at the resolve function */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \ + static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \ + &JOIN(resolve_, NAME); \ \ - /* function */ \ - HS_PUBLIC_API \ - RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + /* resolver */ \ + static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \ + if (check_sve2()) { \ + fat_dispatch_ ## NAME = &JOIN(sve2_, NAME); \ + } \ + else if (check_sve()) { \ + fat_dispatch_ ## NAME = &JOIN(sve_, NAME); \ + } \ + else if (check_neon()) { \ + fat_dispatch_ ## NAME = &JOIN(neon_, NAME); \ + } else { \ + /* anything else is fail */ \ + fat_dispatch_ ## NAME = &JOIN(error_, NAME); \ + } \ + + +/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */ + #endif + +#define CONNECT_ARGS_1(RTYPE, NAME, ...) \ + return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \ + } \ + + +#define CONNECT_DISPATCH_2(RTYPE, NAME, ...) \ + /* new function */ \ + HS_PUBLIC_API \ + RTYPE NAME(__VA_ARGS__) { \ + + +#define CONNECT_ARGS_3(RTYPE, NAME, ...) \ + return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \ + } \ + + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-function" + +/* this gets a bit ugly to compose the static redirect functions, + * as we necessarily need first the typed arg list and then just the arg + * names, twice in a row, to define the redirect function and the + * dispatch function call */ + CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, unsigned length, unsigned flags, hs_scratch_t *scratch, match_event_handler onEvent, void *userCtx); +CONNECT_ARGS_1(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx); +CONNECT_DISPATCH_2(hs_error_t, hs_scan, const hs_database_t *db, const char *data, + unsigned length, unsigned flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *userCtx); +CONNECT_ARGS_3(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx); CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database, size_t *stream_size); +CONNECT_ARGS_1(hs_error_t, hs_stream_size, database, stream_size); +CONNECT_DISPATCH_2(hs_error_t, hs_stream_size, const hs_database_t *database, + size_t *stream_size); +CONNECT_ARGS_3(hs_error_t, hs_stream_size, database, stream_size); CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db, size_t *size); +CONNECT_ARGS_1(hs_error_t, hs_database_size, db, size); +CONNECT_DISPATCH_2(hs_error_t, hs_database_size, const hs_database_t *db, + size_t *size); +CONNECT_ARGS_3(hs_error_t, hs_database_size, db, size); + CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db); +CONNECT_ARGS_1(hs_error_t, dbIsValid, db); +CONNECT_DISPATCH_2(hs_error_t, dbIsValid, const hs_database_t *db); +CONNECT_ARGS_3(hs_error_t, dbIsValid, db); + CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db); +CONNECT_ARGS_1(hs_error_t, hs_free_database, db); +CONNECT_DISPATCH_2(hs_error_t, hs_free_database, hs_database_t *db); +CONNECT_ARGS_3(hs_error_t, hs_free_database, db); CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db, unsigned int flags, hs_stream_t **stream); +CONNECT_ARGS_1(hs_error_t, hs_open_stream, db, flags, stream); +CONNECT_DISPATCH_2(hs_error_t, hs_open_stream, const hs_database_t *db, + unsigned int flags, hs_stream_t **stream); +CONNECT_ARGS_3(hs_error_t, hs_open_stream, db, flags, stream); CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, unsigned int length, unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_1(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt); +CONNECT_DISPATCH_2(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_3(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt); CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id, hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_1(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt); +CONNECT_DISPATCH_2(hs_error_t, hs_close_stream, hs_stream_t *id, + hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); +CONNECT_ARGS_3(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt); CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db, const char *const *data, const unsigned int *length, unsigned int count, unsigned int flags, hs_scratch_t *scratch, match_event_handler onevent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_scan_vector, const hs_database_t *db, + const char *const *data, const unsigned int *length, + unsigned int count, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onevent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context); CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info); +CONNECT_ARGS_1(hs_error_t, hs_database_info, db, info); +CONNECT_DISPATCH_2(hs_error_t, hs_database_info, const hs_database_t *db, char **info); +CONNECT_ARGS_3(hs_error_t, hs_database_info, db, info); CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id, const hs_stream_t *from_id); +CONNECT_ARGS_1(hs_error_t, hs_copy_stream, to_id, from_id); +CONNECT_DISPATCH_2(hs_error_t, hs_copy_stream, hs_stream_t **to_id, + const hs_stream_t *from_id); +CONNECT_ARGS_3(hs_error_t, hs_copy_stream, to_id, from_id); CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id, unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_reset_stream, hs_stream_t *id, + unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context); CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, const hs_stream_t *from_id, hs_scratch_t *scratch, match_event_handler onEvent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, + const hs_stream_t *from_id, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context); CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db, char **bytes, size_t *length); +CONNECT_ARGS_1(hs_error_t, hs_serialize_database, db, bytes, length); +CONNECT_DISPATCH_2(hs_error_t, hs_serialize_database, const hs_database_t *db, + char **bytes, size_t *length); +CONNECT_ARGS_3(hs_error_t, hs_serialize_database, db, bytes, length); CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes, const size_t length, hs_database_t **db); +CONNECT_ARGS_1(hs_error_t, hs_deserialize_database, bytes, length, db); +CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database, const char *bytes, + const size_t length, hs_database_t **db); +CONNECT_ARGS_3(hs_error_t, hs_deserialize_database, bytes, length, db); CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes, const size_t length, hs_database_t *db); +CONNECT_ARGS_1(hs_error_t, hs_deserialize_database_at, bytes, length, db); +CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database_at, const char *bytes, + const size_t length, hs_database_t *db); +CONNECT_ARGS_3(hs_error_t, hs_deserialize_database_at, bytes, length, db); CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, size_t length, char **info); +CONNECT_ARGS_1(hs_error_t, hs_serialized_database_info, bytes, length, info); +CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_info, const char *bytes, + size_t length, char **info); +CONNECT_ARGS_3(hs_error_t, hs_serialized_database_info, bytes, length, info); CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, const size_t length, size_t *deserialized_size); +CONNECT_ARGS_1(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size); +CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_size, const char *bytes, + const size_t length, size_t *deserialized_size); +CONNECT_ARGS_3(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size); CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream, char *buf, size_t buf_space, size_t *used_space); +CONNECT_ARGS_1(hs_error_t, hs_compress_stream, stream, + buf, buf_space, used_space); +CONNECT_DISPATCH_2(hs_error_t, hs_compress_stream, const hs_stream_t *stream, + char *buf, size_t buf_space, size_t *used_space); +CONNECT_ARGS_3(hs_error_t, hs_compress_stream, stream, + buf, buf_space, used_space); CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db, hs_stream_t **stream, const char *buf,size_t buf_size); +CONNECT_ARGS_1(hs_error_t, hs_expand_stream, db, stream, buf,buf_size); +CONNECT_DISPATCH_2(hs_error_t, hs_expand_stream, const hs_database_t *db, + hs_stream_t **stream, const char *buf,size_t buf_size); +CONNECT_ARGS_3(hs_error_t, hs_expand_stream, db, stream, buf,buf_size); CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream, const char *buf, size_t buf_size, hs_scratch_t *scratch, match_event_handler onEvent, void *context); +CONNECT_ARGS_1(hs_error_t, hs_reset_and_expand_stream, to_stream, + buf, buf_size, scratch, onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream, + const char *buf, size_t buf_size, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); +CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream, + buf, buf_size, scratch, onEvent, context); /** INTERNALS **/ CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); +CONNECT_ARGS_1(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen); +CONNECT_DISPATCH_2(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); +CONNECT_ARGS_3(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen); #pragma GCC diagnostic pop #pragma GCC diagnostic pop + diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 561e8f98..302487a3 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -298,7 +298,7 @@ void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr, static really_inline void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, const u32 *confBase, const struct FDR_Runtime_Args *a, - const u8 *ptr, u32 *last_match_id, struct zone *z) { + const u8 *ptr, u32 *last_match_id, const struct zone *z) { const u8 bucket = 8; if (likely(!*conf)) { @@ -333,7 +333,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, } static really_inline -void dumpZoneInfo(UNUSED struct zone *z, UNUSED size_t zone_id) { +void dumpZoneInfo(UNUSED const struct zone *z, UNUSED size_t zone_id) { #ifdef DEBUG DEBUG_PRINTF("zone: zone=%zu, bufPtr=%p\n", zone_id, z->buf); DEBUG_PRINTF("zone: startPtr=%p, endPtr=%p, shift=%u\n", diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index f087e958..63e8b34f 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -127,7 +127,7 @@ void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) { } void FDRCompiler::createInitialState(FDR *fdr) { - u8 *start = (u8 *)&fdr->start; + u8 *start = reinterpret_cast(&fdr->start); /* initial state should to be 1 in each slot in the bucket up to bucket * minlen - 1, and 0 thereafter */ @@ -176,7 +176,7 @@ bytecode_ptr FDRCompiler::setupFDR() { auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc - u8 *fdr_base = (u8 *)fdr.get(); + u8 *fdr_base = reinterpret_cast(fdr.get()); // Write header. fdr->size = size; diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index edd68e0b..f5b95280 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -58,7 +58,7 @@ u64a make_u64a_mask(const vector &v) { u64a mask = 0; size_t vlen = v.size(); size_t len = std::min(vlen, sizeof(mask)); - unsigned char *m = (unsigned char *)&mask; + u8 *m = reinterpret_cast(&mask); memcpy(m + sizeof(mask) - len, &v[vlen - len], len); return mask; } @@ -159,7 +159,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, map > res2lits; hwlm_group_t gm = 0; for (LiteralIndex i = 0; i < lits.size(); i++) { - LitInfo & li = tmpLitInfo[i]; + const LitInfo & li = tmpLitInfo[i]; u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits); DEBUG_PRINTF("%016llx --> %u\n", li.v, hash); res2lits[hash].emplace_back(i); @@ -245,10 +245,10 @@ bytecode_ptr getFDRConfirm(const vector &lits, fdrc->groups = gm; // After the FDRConfirm, we have the lit index array. - u8 *fdrc_base = (u8 *)fdrc.get(); + u8 *fdrc_base = reinterpret_cast(fdrc.get()); u8 *ptr = fdrc_base + sizeof(*fdrc); ptr = ROUNDUP_PTR(ptr, alignof(u32)); - u32 *bitsToLitIndex = (u32 *)ptr; + u32 *bitsToLitIndex = reinterpret_cast(ptr); ptr += bitsToLitIndexSize; // After the lit index array, we have the LitInfo structures themselves, @@ -265,7 +265,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, LiteralIndex litIdx = *i; // Write LitInfo header. - LitInfo &finalLI = *(LitInfo *)ptr; + LitInfo &finalLI = *(reinterpret_cast(ptr)); finalLI = tmpLitInfo[litIdx]; ptr += sizeof(LitInfo); // String starts directly after LitInfo. @@ -294,9 +294,6 @@ setupFullConfs(const vector &lits, const EngineDescription &eng, const map> &bucketToLits, bool make_small) { - unique_ptr teddyDescr = - getTeddyDescription(eng.getID()); - BC2CONF bc2Conf; u32 totalConfirmSize = 0; for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { @@ -321,7 +318,7 @@ setupFullConfs(const vector &lits, auto buf = make_zeroed_bytecode_ptr(totalSize, 64); assert(buf); // otherwise would have thrown std::bad_alloc - u32 *confBase = (u32 *)buf.get(); + u32 *confBase = reinterpret_cast(buf.get()); u8 *ptr = buf.get() + totalConfSwitchSize; assert(ISALIGNED_CL(ptr)); diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index c4f59258..6de09f92 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -71,7 +71,7 @@ u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) { } else if (num_lits < 5000) { // for larger but not huge sizes, go to stride 2 only if we have at // least minlen 3 - desiredStride = MIN(min_len - 1, 2); + desiredStride = std::min(min_len - 1, 2UL); } } diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index ff805ca3..6811fc95 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -208,8 +208,8 @@ bytecode_ptr setupFDRFloodControl(const vector &lits, auto buf = make_zeroed_bytecode_ptr(totalSize, 16); assert(buf); // otherwise would have thrown std::bad_alloc - u32 *floodHeader = (u32 *)buf.get(); - FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize); + u32 *floodHeader = reinterpret_cast(buf.get()); + FDRFlood *layoutFlood = reinterpret_cast(buf.get() + floodHeaderSize); u32 currentFloodIndex = 0; for (const auto &m : flood2chars) { diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index e7398b6f..821a69e2 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -328,7 +328,7 @@ bool pack(const vector &lits, static void initReinforcedTable(u8 *rmsk) { - u64a *mask = (u64a *)rmsk; + u64a *mask = reinterpret_cast(rmsk); fill_n(mask, N_CHARS, 0x00ffffffffffffffULL); } @@ -576,8 +576,8 @@ bytecode_ptr TeddyCompiler::build() { auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc - Teddy *teddy = (Teddy *)fdr.get(); // ugly - u8 *teddy_base = (u8 *)teddy; + Teddy *teddy = reinterpret_cast(fdr.get()); // ugly + u8 *teddy_base = reinterpret_cast(teddy); // Write header. teddy->size = size; @@ -622,7 +622,7 @@ bytecode_ptr TeddyCompiler::build() { static bool assignStringsToBuckets( const vector &lits, - TeddyEngineDescription &eng, + const TeddyEngineDescription &eng, map> &bucketToLits) { assert(eng.numMasks <= MAX_NUM_MASKS); if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index 47c79ef4..a544d4e5 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -52,14 +52,14 @@ u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const { void getTeddyDescriptions(vector *out) { static const TeddyEngineDef defns[] = { - { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false }, - { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true }, - { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false }, - { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true }, - { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false }, - { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true }, - { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false }, - { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true }, + { 3, HS_CPU_FEATURES_AVX2, 1, 16, false }, + { 4, HS_CPU_FEATURES_AVX2, 1, 16, true }, + { 5, HS_CPU_FEATURES_AVX2, 2, 16, false }, + { 6, HS_CPU_FEATURES_AVX2, 2, 16, true }, + { 7, HS_CPU_FEATURES_AVX2, 3, 16, false }, + { 8, HS_CPU_FEATURES_AVX2, 3, 16, true }, + { 9, HS_CPU_FEATURES_AVX2, 4, 16, false }, + { 10, HS_CPU_FEATURES_AVX2, 4, 16, true }, { 11, 0, 1, 8, false }, { 12, 0, 1, 8, true }, { 13, 0, 2, 8, false }, diff --git a/src/hs.cpp b/src/hs.cpp index 61e46148..22a9043b 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -589,7 +589,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, return HS_COMPILER_ERROR; } - hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv)); + hs_expr_info *rv = static_cast(hs_misc_alloc(sizeof(*rv))); if (!rv) { *error = const_cast(&hs_enomem); return HS_COMPILER_ERROR; diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c index 74a8fc1e..00c612bc 100644 --- a/src/hs_valid_platform.c +++ b/src/hs_valid_platform.c @@ -48,6 +48,8 @@ hs_error_t HS_CDECL hs_valid_platform(void) { return HS_ARCH_ERROR; } #elif !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64)) + //check_neon returns true for now + // cppcheck-suppress knownConditionTrueFalse if (check_neon()) { return HS_SUCCESS; } else { diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index e50deff7..40349def 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -170,8 +170,7 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen, DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len); *start += delta; } else if (hlen) { - UNUSED size_t remaining = offset + ptr2 - found; - DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen); + DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", offset + ptr2 - found, hlen); } } diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 7837819a..bb83849b 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -143,7 +143,7 @@ bytecode_ptr hwlmBuild(const HWLMProto &proto, const CompileContext &cc, } if (!eng) { - return nullptr; + return bytecode_ptr(nullptr); } assert(engSize); @@ -155,6 +155,7 @@ bytecode_ptr hwlmBuild(const HWLMProto &proto, const CompileContext &cc, auto h = make_zeroed_bytecode_ptr(hwlm_len, 64); h->type = proto.engType; + // cppcheck-suppress cstyleCast memcpy(HWLM_DATA(h.get()), eng.get(), engSize); return h; @@ -218,10 +219,12 @@ size_t hwlmSize(const HWLM *h) { switch (h->type) { case HWLM_ENGINE_NOOD: - engSize = noodSize((const noodTable *)HWLM_C_DATA(h)); + // cppcheck-suppress cstyleCast + engSize = noodSize(reinterpret_cast(HWLM_C_DATA(h))); break; case HWLM_ENGINE_FDR: - engSize = fdrSize((const FDR *)HWLM_C_DATA(h)); + // cppcheck-suppress cstyleCast + engSize = fdrSize(reinterpret_cast(HWLM_C_DATA(h))); break; } diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index a0128d0a..74dfbd2c 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -56,7 +56,7 @@ u64a make_u64a_mask(const vector &v) { u64a mask = 0; size_t len = v.size(); - unsigned char *m = (unsigned char *)&mask; + u8 *m = reinterpret_cast(&mask); DEBUG_PRINTF("making mask len %zu\n", len); memcpy(m, &v[0], len); return mask; diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 6a848812..4c72bd31 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -427,7 +427,7 @@ void accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, const AccelScheme &info, void *accel_out) { - AccelAux *accel = (AccelAux *)accel_out; + AccelAux *accel = reinterpret_cast(accel_out); DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, info.double_offset); @@ -474,7 +474,8 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, u8 c1 = info.double_byte.begin()->first & m1; u8 c2 = info.double_byte.begin()->second & m2; #ifdef HAVE_SVE2 - if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, (u8 *)&accel->mdverm16.mask)) { + if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, + reinterpret_cast(&accel->mdverm16.mask))) { accel->accel_type = ACCEL_DVERM16_MASKED; accel->mdverm16.offset = verify_u8(info.double_offset); accel->mdverm16.c1 = c1; @@ -483,8 +484,9 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, c1, c2); return; } else if (info.double_byte.size() <= 8 && - vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask, - (u8 *)&accel->dverm16.firsts)) { + vermicelliDouble16Build(info.double_byte, + reinterpret_cast(&accel->dverm16.mask), + reinterpret_cast(&accel->dverm16.firsts))) { accel->accel_type = ACCEL_DVERM16; accel->dverm16.offset = verify_u8(info.double_offset); DEBUG_PRINTF("building double16-vermicelli\n"); @@ -504,8 +506,9 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } #ifdef HAVE_SVE2 if (info.double_byte.size() <= 8 && - vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask, - (u8 *)&accel->dverm16.firsts)) { + vermicelliDouble16Build(info.double_byte, + reinterpret_cast(&accel->dverm16.mask), + reinterpret_cast(&accel->dverm16.firsts))) { accel->accel_type = ACCEL_DVERM16; accel->dverm16.offset = verify_u8(info.double_offset); DEBUG_PRINTF("building double16-vermicelli\n"); @@ -516,9 +519,11 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, if (double_byte_ok(info) && shuftiBuildDoubleMasks( - info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1, - (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2, - (u8 *)&accel->dshufti.hi2)) { + info.double_cr, info.double_byte, + reinterpret_cast(&accel->dshufti.lo1), + reinterpret_cast(&accel->dshufti.hi1), + reinterpret_cast(&accel->dshufti.lo2), + reinterpret_cast(&accel->dshufti.hi2))) { accel->accel_type = ACCEL_DSHUFTI; accel->dshufti.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is double shufti\n", this_idx); @@ -550,7 +555,7 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, #ifdef HAVE_SVE2 if (info.cr.count() <= 16) { accel->accel_type = ACCEL_VERM16; - vermicelli16Build(info.cr, (u8 *)&accel->verm16.mask); + vermicelli16Build(info.cr, reinterpret_cast(&accel->verm16.mask)); DEBUG_PRINTF("state %hu is vermicelli16\n", this_idx); return; } @@ -563,16 +568,18 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo, - (u8 *)&accel->shufti.hi)) { + if (-1 != shuftiBuildMasks(info.cr, + reinterpret_cast(&accel->shufti.lo), + reinterpret_cast(&accel->shufti.hi))) { DEBUG_PRINTF("state %hu is shufti\n", this_idx); return; } assert(!info.cr.none()); accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1, - (u8 *)&accel->truffle.mask2); + truffleBuildMasks(info.cr, + reinterpret_cast(&accel->truffle.mask1), + reinterpret_cast(&accel->truffle.mask2)); DEBUG_PRINTF("state %hu is truffle\n", this_idx); } diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index e0be910d..5da0df82 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -84,8 +84,9 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { #endif DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); - if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, - (u8 *)&aux->shufti.hi)) { + if (-1 != shuftiBuildMasks(info.single_stops, + reinterpret_cast(&aux->shufti.lo), + reinterpret_cast(&aux->shufti.hi))) { aux->accel_type = ACCEL_SHUFTI; aux->shufti.offset = offset; DEBUG_PRINTF("shufti built OK\n"); @@ -98,8 +99,9 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("building Truffle for %zu chars\n", outs); aux->accel_type = ACCEL_TRUFFLE; aux->truffle.offset = offset; - truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1, - (u8 *)&aux->truffle.mask2); + truffleBuildMasks(info.single_stops, + reinterpret_cast(&aux->truffle.mask1), + reinterpret_cast(&aux->truffle.mask2)); return; } @@ -219,8 +221,9 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { c1, c2); return; } else if (outs2 <= 8 && - vermicelliDouble16Build(info.double_stop2, (u8 *)&aux->dverm16.mask, - (u8 *)&aux->dverm16.firsts)) { + vermicelliDouble16Build(info.double_stop2, + reinterpret_cast(&aux->dverm16.mask), + reinterpret_cast(&aux->dverm16.firsts))) { aux->accel_type = ACCEL_DVERM16; aux->dverm16.offset = offset; DEBUG_PRINTF("building double16-vermicelli\n"); @@ -254,9 +257,11 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_DSHUFTI; aux->dshufti.offset = offset; if (shuftiBuildDoubleMasks( - info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1, - (u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2, - (u8 *)&aux->dshufti.hi2)) { + info.double_stop1, info.double_stop2, + reinterpret_cast(&aux->dshufti.lo1), + reinterpret_cast(&aux->dshufti.hi1), + reinterpret_cast(&aux->dshufti.lo2), + reinterpret_cast(&aux->dshufti.hi2))) { return; } } diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 29208f8d..e6769ac7 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -94,8 +94,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q, const struct SubCastle *sub = getSubCastle(c, subIdx); const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(q->state, sub); - char *rstate = (char *)q->streamState + sub->streamStateOffset + + const union RepeatControl *rctrl = getControl(q->state, sub); + const char *rstate = (char *)q->streamState + sub->streamStateOffset + info->packedCtrlSize; enum RepeatMatch match = repeatHasMatch(info, rctrl, rstate, offset); @@ -118,10 +118,10 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) { if (c->exclusive) { u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; + const u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; + const u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); if (subCastleReportCurrent(c, q, @@ -156,8 +156,8 @@ char subCastleInAccept(const struct Castle *c, struct mq *q, } const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(q->state, sub); - char *rstate = (char *)q->streamState + sub->streamStateOffset + + const union RepeatControl *rctrl = getControl(q->state, sub); + const char *rstate = (char *)q->streamState + sub->streamStateOffset + info->packedCtrlSize; enum RepeatMatch match = repeatHasMatch(info, rctrl, rstate, offset); @@ -180,10 +180,10 @@ char castleInAccept(const struct Castle *c, struct mq *q, if (c->exclusive) { u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; + const u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; + const u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); if (subCastleInAccept(c, q, report, offset, activeIdx)) { @@ -213,8 +213,8 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, const struct SubCastle *sub = getSubCastle(c, subIdx); const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(full_state, sub); - char *rstate = (char *)stream_state + sub->streamStateOffset + + const union RepeatControl *rctrl = getControl(full_state, sub); + const char *rstate = (char *)stream_state + sub->streamStateOffset + info->packedCtrlSize; if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { @@ -242,10 +242,10 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, if (c->exclusive) { u8 *active = (u8 *)stream_state; - u8 *groups = active + c->groupIterOffset; + const u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; + const u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleDeactivateStaleSubs(c, offset, full_state, @@ -329,8 +329,8 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin, size_t *mloc, char *found, const u32 subIdx) { const struct SubCastle *sub = getSubCastle(c, subIdx); const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(full_state, sub); - char *rstate = (char *)stream_state + sub->streamStateOffset + + const union RepeatControl *rctrl = getControl(full_state, sub); + const char *rstate = (char *)stream_state + sub->streamStateOffset + info->packedCtrlSize; u64a match = repeatNextMatch(info, rctrl, rstate, begin); @@ -374,10 +374,10 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, if (c->exclusive) { u8 *active = (u8 *)stream_state; - u8 *groups = active + c->groupIterOffset; + const u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; + const u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, @@ -386,7 +386,7 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, } if (c->exclusive != PURE_EXCLUSIVE) { - u8 *active = (u8 *)stream_state + c->activeOffset; + const u8 *active = (u8 *)stream_state + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { @@ -400,8 +400,8 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, } static really_inline -u64a subCastleNextMatch(const struct Castle *c, void *full_state, - void *stream_state, const u64a loc, +u64a subCastleNextMatch(const struct Castle *c, const void *full_state, + const void *stream_state, const u64a loc, const u32 subIdx) { DEBUG_PRINTF("subcastle %u\n", subIdx); const struct SubCastle *sub = getSubCastle(c, subIdx); @@ -489,15 +489,14 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end, // full_state (scratch). u64a offset = end; // min offset of next match - u32 activeIdx = 0; mmbit_clear(matching, c->numRepeats); if (c->exclusive) { u8 *active = (u8 *)stream_state; u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - activeIdx = partial_load_u32(cur, c->activeIdxSize); + const u8 *cur = active + i * c->activeIdxSize; + u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); u64a match = subCastleNextMatch(c, full_state, stream_state, loc, activeIdx); set_matching(c, match, groups, matching, c->numGroups, i, @@ -797,7 +796,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, char found = 0; if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + const u8 *groups = (u8 *)q->streamState + c->groupIterOffset; found = mmbit_any(groups, c->numGroups); } @@ -864,7 +863,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, } if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + const u8 *groups = (u8 *)q->streamState + c->groupIterOffset; if (mmbit_any_precise(groups, c->numGroups)) { return 1; } @@ -884,7 +883,7 @@ char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) { } static -s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { +s64a castleLastKillLoc(const struct Castle *c, const struct mq *q) { assert(q_cur_type(q) == MQE_START); assert(q_last_type(q) == MQE_END); s64a sp = q_cur_loc(q); @@ -907,7 +906,6 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { return (s64a)loc - hlen; } - ep = 0; } return sp - 1; /* the repeats are never killed */ @@ -959,7 +957,7 @@ char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { char found = 0; if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + const u8 *groups = (u8 *)q->streamState + c->groupIterOffset; found = mmbit_any_precise(groups, c->numGroups); } @@ -1007,10 +1005,10 @@ char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) { if (c->exclusive) { u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; + const u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; + const u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); const struct SubCastle *sub = getSubCastle(c, activeIdx); @@ -1079,7 +1077,7 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx, const struct mq *q, const u64a offset) { const struct SubCastle *sub = getSubCastle(c, subIdx); const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(q->state, sub); + const union RepeatControl *rctrl = getControl(q->state, sub); char *packed = (char *)q->streamState + sub->streamStateOffset; DEBUG_PRINTF("sub %u next match %llu\n", subIdx, repeatNextMatch(info, rctrl, @@ -1100,10 +1098,10 @@ char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, DEBUG_PRINTF("offset=%llu\n", offset); if (c->exclusive) { u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; + const u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; + const u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("packing state for sub %u\n", activeIdx); subCastleQueueCompressState(c, activeIdx, q, offset); diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 2b157d1c..18411a85 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -106,25 +106,27 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { #ifdef HAVE_SVE2 if (cr.count() <= 16) { c->type = CASTLE_NVERM16; - vermicelli16Build(cr, (u8 *)&c->u.verm16.mask); + vermicelli16Build(cr, reinterpret_cast(&c->u.verm16.mask)); return; } if (negated.count() <= 16) { c->type = CASTLE_VERM16; - vermicelli16Build(negated, (u8 *)&c->u.verm16.mask); + vermicelli16Build(negated, reinterpret_cast(&c->u.verm16.mask)); return; } #endif // HAVE_SVE2 - if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, - (u8 *)&c->u.shuf.mask_hi) != -1) { + if (shuftiBuildMasks(negated, + reinterpret_cast(&c->u.shuf.mask_lo), + reinterpret_cast(&c->u.shuf.mask_hi)) != -1) { c->type = CASTLE_SHUFTI; return; } c->type = CASTLE_TRUFFLE; - truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1, - (u8 *)&c->u.truffle.mask2); + truffleBuildMasks(negated, + reinterpret_cast(&c->u.truffle.mask1), + reinterpret_cast(&c->u.truffle.mask2)); } static @@ -227,11 +229,13 @@ vector removeClique(CliqueGraph &cg) { while (!graph_empty(cg)) { const vector &c = cliquesVec.back(); vector dead; - for (const auto &v : vertices_range(cg)) { - if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) { - dead.emplace_back(v); - } - } + + auto deads = [&c=c, &cg=cg](const CliqueVertex &v) { + return (find(c.begin(), c.end(), cg[v].stateId) != c.end()); + }; + const auto &vr = vertices_range(cg); + std::copy_if(begin(vr), end(vr), std::back_inserter(dead), deads); + for (const auto &v : dead) { clear_vertex(v, cg); remove_vertex(v, cg); @@ -294,7 +298,7 @@ vector> checkExclusion(u32 &streamStateSize, size_t lower = 0; size_t total = 0; while (lower < trigSize) { - vector vertices; + vector clvertices; unique_ptr cg = make_unique(); vector> min_reset_dist; @@ -302,7 +306,7 @@ vector> checkExclusion(u32 &streamStateSize, // get min reset distance for each repeat for (size_t i = lower; i < upper; i++) { CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); - vertices.emplace_back(v); + clvertices.emplace_back(v); const vector &tmp_dist = minResetDistToEnd(triggers[i], cr); @@ -311,11 +315,11 @@ vector> checkExclusion(u32 &streamStateSize, // find exclusive pair for each repeat for (size_t i = lower; i < upper; i++) { - CliqueVertex s = vertices[i - lower]; + CliqueVertex s = clvertices[i - lower]; for (size_t j = i + 1; j < upper; j++) { if (findExclusivePair(i, j, lower, min_reset_dist, triggers)) { - CliqueVertex d = vertices[j - lower]; + CliqueVertex d = clvertices[j - lower]; add_edge(s, d, *cg); } } @@ -600,9 +604,9 @@ buildCastle(const CastleProto &proto, nfa->minWidth = verify_u32(minWidth); nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0; - char * const base_ptr = (char *)nfa.get() + sizeof(NFA); + char * const base_ptr = reinterpret_cast(nfa.get()) + sizeof(NFA); char *ptr = base_ptr; - Castle *c = (Castle *)ptr; + Castle *c = reinterpret_cast(ptr); c->numRepeats = verify_u32(subs.size()); c->numGroups = exclusiveInfo.numGroups; c->exclusive = verify_s8(exclusive); @@ -613,7 +617,7 @@ buildCastle(const CastleProto &proto, writeCastleScanEngine(cr, c); ptr += sizeof(Castle); - SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32)))); + SubCastle *subCastles = reinterpret_cast(ROUNDUP_PTR(ptr, alignof(u32))); copy(subs.begin(), subs.end(), subCastles); u32 length = 0; @@ -623,16 +627,16 @@ buildCastle(const CastleProto &proto, SubCastle *sub = &subCastles[i]; sub->repeatInfoOffset = offset; - ptr = (char *)sub + offset; + ptr = reinterpret_cast(sub) + offset; memcpy(ptr, &infos[i], sizeof(RepeatInfo)); if (patchSize[i]) { - RepeatInfo *info = (RepeatInfo *)ptr; - u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) + - sizeof(*info)), alignof(u64a)))); + RepeatInfo *info = reinterpret_cast(ptr); + u64a *table = reinterpret_cast(ROUNDUP_PTR(info + + sizeof(*info), alignof(u64a))); copy(tables.begin() + tableIdx, tables.begin() + tableIdx + patchSize[i], table); - u32 diff = (char *)table - (char *)info + + u32 diff = reinterpret_cast(table) - reinterpret_cast(info) + sizeof(u64a) * patchSize[i]; info->length = diff; length += diff; @@ -655,7 +659,6 @@ buildCastle(const CastleProto &proto, if (!stale_iter.empty()) { c->staleIterOffset = verify_u32(ptr - base_ptr); copy_bytes(ptr, stale_iter); - ptr += byte_length(stale_iter); } return nfa; @@ -922,7 +925,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { u32 min_bound = pr.bounds.min; // always finite if (min_bound == 0) { // Vacuous case, we can only do this once. assert(!edge(g.start, g.accept, g).second); - NFAEdge e = add_edge(g.start, g.accept, g); + NFAEdge e = add_edge(g.start, g.accept, g).first; g[e].tops.insert(top); g[u].reports.insert(pr.reports.begin(), pr.reports.end()); min_bound = 1; @@ -931,7 +934,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { for (u32 i = 0; i < min_bound; i++) { NFAVertex v = add_vertex(g); g[v].char_reach = pr.reach; - NFAEdge e = add_edge(u, v, g); + NFAEdge e = add_edge(u, v, g).first; if (u == g.start) { g[e].tops.insert(top); } @@ -950,7 +953,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { if (head != u) { add_edge(head, v, g); } - NFAEdge e = add_edge(u, v, g); + NFAEdge e = add_edge(u, v, g).first; if (u == g.start) { g[e].tops.insert(top); } diff --git a/src/nfa/dfa_min.cpp b/src/nfa/dfa_min.cpp index 2d251a89..158d0aee 100644 --- a/src/nfa/dfa_min.cpp +++ b/src/nfa/dfa_min.cpp @@ -305,6 +305,7 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { DEBUG_PRINTF("dfa is empty\n"); } + // cppcheck-suppress unreadVariable UNUSED const size_t states_before = rdfa.states.size(); HopcroftInfo info(rdfa); diff --git a/src/nfa/gough.c b/src/nfa/gough.c index 44acd4c2..f0d5bc64 100644 --- a/src/nfa/gough.c +++ b/src/nfa/gough.c @@ -978,14 +978,14 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); NfaCallback cb = q->cb; - void *ctxt = q->context; u8 s = *(u8 *)q->state; u64a offset = q_cur_offset(q); - struct gough_som_info *som = getSomInfo(q->state); + const struct gough_som_info *som = getSomInfo(q->state); assert(q_cur_type(q) == MQE_START); assert(s); if (s >= m->accept_limit_8) { + void *ctxt = q->context; u32 cached_accept_id = 0; u16 cached_accept_state = 0; u32 cached_accept_som = 0; @@ -1000,16 +1000,16 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); NfaCallback cb = q->cb; - void *ctxt = q->context; u16 s = *(u16 *)q->state; const struct mstate_aux *aux = get_aux(m, s); u64a offset = q_cur_offset(q); - struct gough_som_info *som = getSomInfo(q->state); + const struct gough_som_info *som = getSomInfo(q->state); assert(q_cur_type(q) == MQE_START); DEBUG_PRINTF("state %hu\n", s); assert(s); if (aux->accept) { + void *ctxt = q->context; u32 cached_accept_id = 0; u16 cached_accept_state = 0; u32 cached_accept_som = 0; diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index e3f0a710..b4b6b58b 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -132,7 +132,7 @@ void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) { } static -void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw, +void translateRawReports(UNUSED const GoughGraph &cfg, UNUSED const raw_som_dfa &raw, const flat_map &joins_at_s, UNUSED GoughVertex s, const set &reports_in, @@ -206,10 +206,6 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector &vertices, assert(contains(src_slots, slot_id)); shared_ptr vmin = make_shared(); - if (!vmin) { - assert(0); - throw std::bad_alloc(); - } cfg[e].vars.emplace_back(vmin); final_var = vmin.get(); @@ -321,10 +317,6 @@ void makeCFG_edge(GoughGraph &cfg, const map &som_creators, DEBUG_PRINTF("bypassing min on join %u\n", slot_id); } else { shared_ptr vmin = make_shared(); - if (!vmin) { - assert(0); - throw std::bad_alloc(); - } cfg[e].vars.emplace_back(vmin); final_var = vmin.get(); @@ -441,10 +433,11 @@ unique_ptr makeCFG(const raw_som_dfa &raw) { } static +// cppcheck-suppress constParameterReference void copy_propagate_report_set(vector > &rep) { vector >::iterator it = rep.begin(); while (it != rep.end()) { - GoughSSAVar *var = it->second; + const GoughSSAVar *var = it->second; if (!var) { ++it; continue; @@ -546,7 +539,7 @@ void remove_dead(GoughGraph &g) { } while (!queue.empty()) { - GoughSSAVar *v = queue.back(); + const GoughSSAVar *v = queue.back(); queue.pop_back(); for (GoughSSAVar *var : v->get_inputs()) { if (var->seen) { @@ -659,8 +652,8 @@ GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const { return nullptr; } -const flat_set &GoughSSAVarJoin::get_edges_for_input( - GoughSSAVar *input) const { +// cppcheck-suppress constParameterPointer +const flat_set &GoughSSAVarJoin::get_edges_for_input(GoughSSAVar *input) const { return input_map.at(input); } @@ -811,7 +804,7 @@ private: static void prep_joins_for_generation(const GoughGraph &g, GoughVertex v, - map *edge_info) { + map &edge_info) { DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id); for (const auto &var : g[v].vars) { u32 dest_slot = var->slot; @@ -822,7 +815,7 @@ void prep_joins_for_generation(const GoughGraph &g, GoughVertex v, } for (const GoughEdge &incoming_edge : var_edges.second) { - (*edge_info)[incoming_edge].insert(input, dest_slot); + edge_info[incoming_edge].insert(input, dest_slot); DEBUG_PRINTF("need %u<-%u\n", dest_slot, input); } } @@ -920,7 +913,7 @@ void build_blocks(const GoughGraph &g, } map eji; - prep_joins_for_generation(g, t, &eji); + prep_joins_for_generation(g, t, eji); for (auto &m : eji) { vector &block = (*blocks)[gough_edge_id(g, m.first)]; @@ -1019,7 +1012,7 @@ void update_accel_prog_offset(const gough_build_strat &gbs, verts[gbs.gg[v].state_id] = v; } - for (auto &m : gbs.built_accel) { + for (const auto &m : gbs.built_accel) { gough_accel *ga = m.first; assert(!ga->prog_offset); GoughVertex v = verts[m.second]; @@ -1052,7 +1045,7 @@ bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, || !cc.streaming); if (!cc.grey.allowGough) { - return nullptr; + return bytecode_ptr(nullptr); } DEBUG_PRINTF("hello world\n"); @@ -1083,11 +1076,12 @@ bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); assert(basic_dfa); if (!basic_dfa) { - return nullptr; + return bytecode_ptr(nullptr); } - u8 alphaShift - = ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift; + // cppcheck-suppress cstyleCast + const auto nfa = static_cast(getImplNfa(basic_dfa.get())); + u8 alphaShift = nfa->alphaShift; u32 edge_count = (1U << alphaShift) * raw.states.size(); u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4); @@ -1128,8 +1122,8 @@ bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, u32 gough_size = ROUNDUP_N(curr_offset, 16); auto gough_dfa = make_zeroed_bytecode_ptr(gough_size); - memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); - memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); + memcpy(reinterpret_cast(gough_dfa.get()), basic_dfa.get(), basic_dfa->length); + memcpy(reinterpret_cast(gough_dfa.get()) + haig_offset, &gi, sizeof(gi)); if (gough_dfa->type == MCCLELLAN_NFA_16) { gough_dfa->type = GOUGH_NFA_16; } else { @@ -1142,18 +1136,19 @@ bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision; gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a)); - mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get()); + // cppcheck-suppress cstyleCast + auto *m = reinterpret_cast(getMutableImplNfa(gough_dfa.get())); m->haig_offset = haig_offset; /* update nfa length, haig_info offset (leave mcclellan length alone) */ gough_dfa->length = gough_size; /* copy in blocks */ - copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks); + copy_bytes(reinterpret_cast(gough_dfa.get()) + edge_prog_offset, edge_blocks); if (top_prog_offset) { - copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks); + copy_bytes(reinterpret_cast(gough_dfa.get()) + top_prog_offset, top_blocks); } - copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks); + copy_bytes(reinterpret_cast(gough_dfa.get()) + prog_base_offset, temp_blocks); return gough_dfa; } @@ -1186,7 +1181,7 @@ AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) { assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); - gough_accel *accel = (gough_accel *)accel_out; + gough_accel *accel = reinterpret_cast(accel_out); /* build a plain accelaux so we can work out where we can get to */ mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel); DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx, @@ -1324,7 +1319,8 @@ void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset, for (const raw_gough_report_list &r : rl) { ro.emplace_back(base_offset); - gough_report_list *p = (gough_report_list *)((char *)n + base_offset); + u8 * n_ptr = reinterpret_cast(n); + gough_report_list *p = reinterpret_cast(n_ptr + base_offset); u32 i = 0; for (const som_report &sr : r.reports) { diff --git a/src/nfa/goughcompile_reg.cpp b/src/nfa/goughcompile_reg.cpp index 6096d9f7..92131306 100644 --- a/src/nfa/goughcompile_reg.cpp +++ b/src/nfa/goughcompile_reg.cpp @@ -195,7 +195,7 @@ void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g, if (contains(aux.containing_v, var)) { /* def is used by join vertex, value only needs to be live on some * incoming edges */ - GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var; + const GoughSSAVarJoin *vj = reinterpret_cast(var); const flat_set &live_edges = vj->get_edges_for_input(def); for (const auto &e : live_edges) { @@ -279,7 +279,7 @@ set live_during(GoughSSAVar *def, const GoughGraph &g, template void set_initial_slots(const vector &vars, u32 *next_slot) { - for (auto &var : vars) { + for (const auto &var : vars) { assert(var->slot == INVALID_SLOT); var->slot = (*next_slot)++; } @@ -440,7 +440,7 @@ void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count, } static -void update_local_slots(GoughGraph &g, set &locals, +void update_local_slots(GoughGraph &g, const set &locals, u32 local_base) { DEBUG_PRINTF("%zu local variables\n", locals.size()); /* local variables only occur on edges (joins are never local) */ diff --git a/src/nfa/limex.h b/src/nfa/limex.h index 0223604d..03fcca0e 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -56,7 +56,7 @@ extern "C" char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \ - char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ + char gf_name##_reportCurrent(const struct NFA *n, const struct mq *q); \ char gf_name##_inAccept(const struct NFA *n, ReportID report, \ struct mq *q); \ char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \ diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index e441945d..48661871 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -332,7 +332,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, // UE-1636) need to guard cyclic tug-accepts as well. static really_inline char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, - union RepeatControl *repeat_ctrl, char *repeat_state, + const union RepeatControl *repeat_ctrl, const char *repeat_state, u64a offset, ReportID report) { assert(limex); @@ -382,7 +382,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, static really_inline char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, - union RepeatControl *repeat_ctrl, char *repeat_state, + const union RepeatControl *repeat_ctrl, const char *repeat_state, u64a offset) { assert(limex); diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 54859f6a..1e2a0521 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -290,7 +290,7 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) { template bool isMaskZero(Mask &m) { - u8 *m8 = (u8 *)&m; + const u8 *m8 = (u8 *)&m; for (u32 i = 0; i < sizeof(m); i++) { if (m8[i]) { return false; @@ -329,11 +329,11 @@ void buildReachMapping(const build_info &args, vector &reach, // Build a list of vertices with a state index assigned. vector verts; verts.reserve(args.num_states); - for (auto v : vertices_range(h)) { - if (state_ids.at(v) != NO_STATE) { - verts.emplace_back(v); - } - } + auto sidat = [&state_ids=state_ids](const NFAVertex &v) { + return (state_ids.at(v) != NO_STATE); + }; + const auto &vr = vertices_range(h); + std::copy_if(begin(vr), end(vr), std::back_inserter(verts), sidat); // Build a mapping from set-of-states -> reachability. map mapping; @@ -556,7 +556,8 @@ void filterAccelStates(NGHolder &g, const map> &tops, // Similarly, connect (start, startDs) if necessary. if (!edge(g.start, g.startDs, g).second) { - NFAEdge e = add_edge(g.start, g.startDs, g); + NFAEdge e; + std::tie(e, std::ignore) = add_edge(g.start, g.startDs, g); tempEdges.emplace_back(e); // Remove edge later. } @@ -1485,6 +1486,7 @@ u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache, continue; } u32 j = args.state_ids.at(w); + // j can be NO_STATE if args.state_ids.at(w) returns NO_STATE if (j == NO_STATE) { continue; } @@ -1576,7 +1578,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) { static int getLimexScore(const build_info &args, u32 nShifts) { const NGHolder &h = args.h; - u32 maxVarShift = nShifts; + u32 maxVarShift; int score = 0; score += SHIFT_COST * nShifts; @@ -1704,7 +1706,7 @@ struct Factory { static void allocState(NFA *nfa, u32 repeatscratchStateSize, u32 repeatStreamState) { - implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa); + const implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa); // LimEx NFAs now store the following in state: // 1. state bitvector (always present) @@ -2222,7 +2224,7 @@ struct Factory { static bytecode_ptr generateNfa(const build_info &args) { if (args.num_states > NFATraits::maxStates) { - return nullptr; + return bytecode_ptr(nullptr); } // Build bounded repeat structures. @@ -2581,7 +2583,7 @@ bytecode_ptr generate(NGHolder &h, if (!cc.grey.allowLimExNFA) { DEBUG_PRINTF("limex not allowed\n"); - return nullptr; + return bytecode_ptr(nullptr); } // If you ask for a particular type, it had better be an NFA. @@ -2616,7 +2618,7 @@ bytecode_ptr generate(NGHolder &h, if (scores.empty()) { DEBUG_PRINTF("No NFA returned a valid score for this case.\n"); - return nullptr; + return bytecode_ptr(nullptr); } // Sort acceptable models in priority order, lowest score first. @@ -2635,7 +2637,7 @@ bytecode_ptr generate(NGHolder &h, } DEBUG_PRINTF("NFA build failed.\n"); - return nullptr; + return bytecode_ptr(nullptr); } u32 countAccelStates(NGHolder &h, diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index c9de3aed..f472aee4 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -302,8 +302,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, } #else // A copy of the estate as an array of GPR-sized chunks. - CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; - CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; + CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression + CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression #ifdef ESTATE_ON_STACK memcpy(chunks, &estate, sizeof(STATE_T)); #else @@ -311,7 +311,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, #endif memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); - u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; + u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression base_index[0] = 0; for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 7b89182b..d3ddb100 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -927,7 +927,7 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, context); } -char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { +char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, const struct mq *q) { const IMPL_NFA_T *limex = getImplNfa(n); REPORTCURRENT_FN(limex, q); return 1; @@ -984,9 +984,9 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, assert(q->state && q->streamState); const IMPL_NFA_T *limex = getImplNfa(nfa); - union RepeatControl *repeat_ctrl = + const union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - char *repeat_state = q->streamState + limex->stateSize; + const char *repeat_state = q->streamState + limex->stateSize; STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; @@ -999,9 +999,9 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { assert(q->state && q->streamState); const IMPL_NFA_T *limex = getImplNfa(nfa); - union RepeatControl *repeat_ctrl = + const union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - char *repeat_state = q->streamState + limex->stateSize; + const char *repeat_state = q->streamState + limex->stateSize; STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; @@ -1020,9 +1020,9 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( if (limex->repeatCount) { u64a offset = q->offset + loc + 1; - union RepeatControl *repeat_ctrl = + const union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - char *repeat_state = q->streamState + limex->stateSize; + const char *repeat_state = q->streamState + limex->stateSize; SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state); } diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index c731e3f0..0b403af7 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -177,7 +177,7 @@ static mstate_aux *getAux(NFA *n, dstate_id_t i) { assert(isMcClellanType(n->type)); - mcclellan *m = (mcclellan *)getMutableImplNfa(n); + const mcclellan *m = (mcclellan *)getMutableImplNfa(n); mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); mstate_aux *aux = aux_base + i; @@ -203,7 +203,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { continue; } - mstate_aux *aux = getAux(n, succ_table[c_prime]); + const mstate_aux *aux = getAux(n, succ_table[c_prime]); if (aux->accept) { succ_table[c_prime] |= ACCEPT_FLAG; @@ -232,7 +232,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { continue; } - mstate_aux *aux = getAux(n, succ_i); + const mstate_aux *aux = getAux(n, succ_i); if (aux->accept) { succ_i |= ACCEPT_FLAG; @@ -262,7 +262,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { // check successful transition u16 next = unaligned_load_u16((u8 *)trans); if (next < wide_limit) { - mstate_aux *aux = getAux(n, next); + const mstate_aux *aux = getAux(n, next); if (aux->accept) { next |= ACCEPT_FLAG; } @@ -279,7 +279,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { if (next_k >= wide_limit) { continue; } - mstate_aux *aux_k = getAux(n, next_k); + const mstate_aux *aux_k = getAux(n, next_k); if (aux_k->accept) { next_k |= ACCEPT_FLAG; } @@ -362,7 +362,7 @@ struct raw_report_list { raw_report_list(const flat_set &reports_in, const ReportManager &rm, bool do_remap) { if (do_remap) { - for (auto &id : reports_in) { + for (const auto &id : reports_in) { reports.insert(rm.getProgramOffset(id)); } } else { @@ -546,7 +546,7 @@ size_t calcWideRegionSize(const dfa_info &info) { static void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, const vector &reports, const vector &reports_eod, - vector &reportOffsets) { + const vector &reportOffsets) { const dstate &raw_state = info.states[i]; aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; aux->accept_eod = raw_state.reports_eod.empty() ? 0 @@ -631,7 +631,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, if (!allocateFSN16(info, &count_real_states, &wide_limit)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); - return nullptr; + return bytecode_ptr(nullptr); } DEBUG_PRINTF("count_real_states: %d\n", count_real_states); @@ -800,8 +800,8 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, } for (size_t i : order) { - vector &state_chain = info.wide_state_chain[i]; - vector &symbol_chain = info.wide_symbol_chain[i]; + const vector &state_chain = info.wide_state_chain[i]; + const vector &symbol_chain = info.wide_symbol_chain[i]; u16 width = verify_u16(symbol_chain.size()); *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; @@ -1373,11 +1373,11 @@ bool store_chain_longest(vector> &candidate_chain, /* \brief Generate wide_symbol_chain from wide_state_chain. */ static void generate_symbol_chain(dfa_info &info, vector &chain_tail) { - raw_dfa &rdfa = info.raw; + const raw_dfa &rdfa = info.raw; assert(chain_tail.size() == info.wide_state_chain.size()); for (size_t i = 0; i < info.wide_state_chain.size(); i++) { - vector &state_chain = info.wide_state_chain[i]; + const vector &state_chain = info.wide_state_chain[i]; vector symbol_chain; info.extra[state_chain[0]].wideHead = true; @@ -1385,7 +1385,6 @@ void generate_symbol_chain(dfa_info &info, vector &chain_tail) { for (size_t j = 0; j < width; j++) { dstate_id_t curr_id = state_chain[j]; - dstate_id_t next_id = state_chain[j + 1]; // The last state of the chain doesn't belong to a wide state. info.extra[curr_id].wideState = true; @@ -1394,6 +1393,7 @@ void generate_symbol_chain(dfa_info &info, vector &chain_tail) { if (j == width - 1) { symbol_chain.emplace_back(chain_tail[i]); } else { + dstate_id_t next_id = state_chain[j + 1]; for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { if (rdfa.states[curr_id].next[sym] == next_id) { symbol_chain.emplace_back(sym); diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 622362be..0ca31c99 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -144,11 +144,11 @@ u8 dfa_info::getAlphaShift() const { static mstate_aux *getAux(NFA *n, dstate_id_t i) { - mcsheng *m = (mcsheng *)getMutableImplNfa(n); - mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + const mcsheng *m = reinterpret_cast(getMutableImplNfa(n)); + mstate_aux *aux_base = reinterpret_cast(reinterpret_cast(n) + m->aux_offset); mstate_aux *aux = aux_base + i; - assert((const char *)aux < (const char *)n + m->length); + assert(reinterpret_cast(aux) < reinterpret_cast(n) + m->length); return aux; } @@ -192,8 +192,8 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info, } for (u32 i = 0; i < N_CHARS; i++) { assert(info.alpha_remap[i] != info.alpha_remap[TOP]); - memcpy((u8 *)&m->sheng_masks[i], - (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); + memcpy(reinterpret_cast(&m->sheng_masks[i]), + reinterpret_cast(masks[info.alpha_remap[i]].data()), sizeof(m128)); } m->sheng_end = sheng_end; m->sheng_accel_limit = sheng_end - 1; @@ -223,7 +223,7 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, nfa->type = MCSHENG_NFA_16; } - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + mcsheng *m = reinterpret_cast(getMutableImplNfa(nfa)); for (u32 i = 0; i < 256; i++) { m->remap[i] = verify_u8(info.alpha_remap[i]); } @@ -244,11 +244,11 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, static mstate_aux *getAux64(NFA *n, dstate_id_t i) { - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n); - mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + const mcsheng64 *m = reinterpret_cast(getMutableImplNfa(n)); + mstate_aux *aux_base = reinterpret_cast(reinterpret_cast(n) + m->aux_offset); mstate_aux *aux = aux_base + i; - assert((const char *)aux < (const char *)n + m->length); + assert(reinterpret_cast(aux) < reinterpret_cast(n) + m->length); return aux; } @@ -292,8 +292,8 @@ void createShuffleMasks64(mcsheng64 *m, const dfa_info &info, } for (u32 i = 0; i < N_CHARS; i++) { assert(info.alpha_remap[i] != info.alpha_remap[TOP]); - memcpy((u8 *)&m->sheng_succ_masks[i], - (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512)); + memcpy(reinterpret_cast(&m->sheng_succ_masks[i]), + reinterpret_cast(masks[info.alpha_remap[i]].data()), sizeof(m512)); } m->sheng_end = sheng_end; m->sheng_accel_limit = sheng_end - 1; @@ -323,7 +323,7 @@ void populateBasicInfo64(size_t state_size, const dfa_info &info, nfa->type = MCSHENG_64_NFA_16; } - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); + mcsheng64 *m = reinterpret_cast(getMutableImplNfa(nfa)); for (u32 i = 0; i < 256; i++) { m->remap[i] = verify_u8(info.alpha_remap[i]); } @@ -534,7 +534,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info, static dstate_id_t find_sheng_states(dfa_info &info, - map &accel_escape_info, + const map &accel_escape_info, size_t max_sheng_states) { RdfaGraph g(info.raw); auto cyclics = find_vertices_in_cycles(g); @@ -650,7 +650,7 @@ void fill_in_aux_info(NFA *nfa, const dfa_info &info, const vector &reports_eod, u32 report_base_offset, const raw_report_info &ri) { - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + mcsheng *m = reinterpret_cast(getMutableImplNfa(nfa)); vector reportOffsets; @@ -667,14 +667,14 @@ void fill_in_aux_info(NFA *nfa, const dfa_info &info, assert(accel_offset <= accel_end_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); info.strat.buildAccel(i, accel_escape_info.at(i), - (void *)((char *)m + this_aux->accel_offset)); + reinterpret_cast(reinterpret_cast(m) + this_aux->accel_offset)); } } } static u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { - mstate_aux *aux = getAux(nfa, target_impl_id); + const mstate_aux *aux = getAux(nfa, target_impl_id); u16 flags = 0; if (aux->accept) { @@ -692,7 +692,7 @@ static void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, dstate_id_t sheng_end, UNUSED dstate_id_t sherman_base) { - u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); + u16 *succ_table = reinterpret_cast(reinterpret_cast(nfa) + sizeof(NFA) + sizeof(mcsheng)); u8 alphaShift = info.getAlphaShift(); assert(alphaShift <= 8); @@ -724,7 +724,7 @@ void fill_in_aux_info64(NFA *nfa, const dfa_info &info, const vector &reports_eod, u32 report_base_offset, const raw_report_info &ri) { - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); + mcsheng64 *m = reinterpret_cast(getMutableImplNfa(nfa)); vector reportOffsets; @@ -741,14 +741,14 @@ void fill_in_aux_info64(NFA *nfa, const dfa_info &info, assert(accel_offset <= accel_end_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); info.strat.buildAccel(i, accel_escape_info.at(i), - (void *)((char *)m + this_aux->accel_offset)); + reinterpret_cast(reinterpret_cast(m) + this_aux->accel_offset)); } } } static u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) { - mstate_aux *aux = getAux64(nfa, target_impl_id); + const mstate_aux *aux = getAux64(nfa, target_impl_id); u16 flags = 0; if (aux->accept) { @@ -766,7 +766,7 @@ static void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info, dstate_id_t sheng_end, UNUSED dstate_id_t sherman_base) { - u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64)); + u16 *succ_table = reinterpret_cast(reinterpret_cast(nfa) + sizeof(NFA) + sizeof(mcsheng64)); u8 alphaShift = info.getAlphaShift(); assert(alphaShift <= 8); @@ -955,9 +955,9 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { } static -void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { - char *nfa_base = (char *)nfa; - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); +void fill_in_sherman(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) { + char *nfa_base = reinterpret_cast(nfa); + mcsheng *m = reinterpret_cast(getMutableImplNfa(nfa)); char *sherman_table = nfa_base + m->sherman_offset; assert(ISALIGNED_16(sherman_table)); @@ -978,10 +978,10 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { assert(len <= 9); dstate_id_t d = info.states[i].daddy; - *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; - *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; - *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); - u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + *(reinterpret_cast(curr_sherman_entry + SHERMAN_TYPE_OFFSET)) = SHERMAN_STATE; + *(reinterpret_cast(curr_sherman_entry + SHERMAN_LEN_OFFSET)) = len; + *(reinterpret_cast(curr_sherman_entry + SHERMAN_DADDY_OFFSET)) = info.implId(d); + u8 *chars = reinterpret_cast(curr_sherman_entry + SHERMAN_CHARS_OFFSET); for (u16 s = 0; s < info.impl_alpha_size; s++) { if (info.states[i].next[s] != info.states[d].next[s]) { @@ -989,7 +989,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { } } - u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + u16 *states = reinterpret_cast(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); for (u16 s = 0; s < info.impl_alpha_size; s++) { if (info.states[i].next[s] != info.states[d].next[s]) { DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, @@ -997,7 +997,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { info.implId(info.states[i].next[s])); u16 entry_val = info.implId(info.states[i].next[s]); entry_val |= get_edge_flags(nfa, entry_val); - unaligned_store_u16((u8 *)states++, entry_val); + unaligned_store_u16(reinterpret_cast(states++), entry_val); } } } @@ -1018,12 +1018,16 @@ bytecode_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, // Sherman optimization if (info.impl_alpha_size > 16) { +#ifdef DEBUG u16 total_daddy = 0; +#endif // DEBUG for (u32 i = 0; i < info.size(); i++) { find_better_daddy(info, i, is_cyclic_near(info.raw, info.raw.start_anchored), grey); +#ifdef DEBUG total_daddy += info.extra[i].daddytaken; +#endif // DEBUG } DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, @@ -1035,7 +1039,7 @@ bytecode_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, if (!allocateImplId16(info, sheng_end, &sherman_limit)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); - return nullptr; + return bytecode_ptr(nullptr); } u16 count_real_states = sherman_limit - sheng_end; @@ -1059,7 +1063,7 @@ bytecode_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); auto nfa = make_zeroed_bytecode_ptr(total_size); - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + mcsheng *m = reinterpret_cast(getMutableImplNfa(nfa.get())); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, accel_escape_info.size(), arb, single, nfa.get()); @@ -1087,7 +1091,7 @@ bytecode_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, static void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, dstate_id_t sheng_end) { - u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); + u8 *succ_table = reinterpret_cast(reinterpret_cast(nfa) + sizeof(NFA) + sizeof(mcsheng)); u8 alphaShift = info.getAlphaShift(); assert(alphaShift <= 8); @@ -1109,9 +1113,9 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, } static -void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { - char *nfa_base = (char *)nfa; - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); +void fill_in_sherman64(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) { + char *nfa_base = reinterpret_cast(nfa); + mcsheng *m = reinterpret_cast(getMutableImplNfa(nfa)); char *sherman_table = nfa_base + m->sherman_offset; assert(ISALIGNED_16(sherman_table)); @@ -1132,10 +1136,10 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { assert(len <= 9); dstate_id_t d = info.states[i].daddy; - *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; - *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; - *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); - u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + *(reinterpret_cast(curr_sherman_entry + SHERMAN_TYPE_OFFSET)) = SHERMAN_STATE; + *(reinterpret_cast(curr_sherman_entry + SHERMAN_LEN_OFFSET)) = len; + *(reinterpret_cast(curr_sherman_entry + SHERMAN_DADDY_OFFSET)) = info.implId(d); + u8 *chars = reinterpret_cast(curr_sherman_entry + SHERMAN_CHARS_OFFSET); for (u16 s = 0; s < info.impl_alpha_size; s++) { if (info.states[i].next[s] != info.states[d].next[s]) { @@ -1143,7 +1147,7 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { } } - u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + u16 *states = reinterpret_cast(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); for (u16 s = 0; s < info.impl_alpha_size; s++) { if (info.states[i].next[s] != info.states[d].next[s]) { DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, @@ -1151,7 +1155,7 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { info.implId(info.states[i].next[s])); u16 entry_val = info.implId(info.states[i].next[s]); entry_val |= get_edge_flags64(nfa, entry_val); - unaligned_store_u16((u8 *)states++, entry_val); + unaligned_store_u16(reinterpret_cast(states++), entry_val); } } } @@ -1172,12 +1176,16 @@ bytecode_ptr mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end, // Sherman optimization if (info.impl_alpha_size > 16) { +#ifdef DEBUG u16 total_daddy = 0; +#endif // DEBUG for (u32 i = 0; i < info.size(); i++) { find_better_daddy(info, i, is_cyclic_near(info.raw, info.raw.start_anchored), grey); +#ifdef DEBUG total_daddy += info.extra[i].daddytaken; +#endif // DEBUG } DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, @@ -1189,7 +1197,7 @@ bytecode_ptr mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end, if (!allocateImplId16(info, sheng_end, &sherman_limit)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); - return nullptr; + return bytecode_ptr(nullptr); } u16 count_real_states = sherman_limit - sheng_end; @@ -1213,7 +1221,7 @@ bytecode_ptr mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end, assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); auto nfa = make_zeroed_bytecode_ptr(total_size); - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get()); + mcsheng64 *m = reinterpret_cast(getMutableImplNfa(nfa.get())); populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset, accel_escape_info.size(), arb, single, nfa.get()); @@ -1241,7 +1249,7 @@ bytecode_ptr mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end, static void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info, dstate_id_t sheng_end) { - u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64); + u8 *succ_table = reinterpret_cast(reinterpret_cast(nfa) + sizeof(NFA) + sizeof(mcsheng)); u8 alphaShift = info.getAlphaShift(); assert(alphaShift <= 8); @@ -1339,7 +1347,7 @@ bytecode_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); auto nfa = make_zeroed_bytecode_ptr(total_size); - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + mcsheng *m = reinterpret_cast(getMutableImplNfa(nfa.get())); allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8); @@ -1392,7 +1400,7 @@ bytecode_ptr mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end, assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); auto nfa = make_zeroed_bytecode_ptr(total_size); - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get()); + mcsheng64 *m = reinterpret_cast(getMutableImplNfa(nfa.get())); allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8); @@ -1414,7 +1422,7 @@ bytecode_ptr mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end, bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm) { if (!cc.grey.allowMcSheng) { - return nullptr; + return bytecode_ptr(nullptr); } mcclellan_build_strat mbs(raw, rm, false); @@ -1430,12 +1438,10 @@ bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, map accel_escape_info = info.strat.getAccelInfo(cc.grey); - auto old_states = info.states; dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES); if (sheng_end <= DEAD_STATE + 1) { - info.states = old_states; - return nullptr; + return bytecode_ptr(nullptr); } bytecode_ptr nfa; @@ -1447,7 +1453,6 @@ bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, } if (!nfa) { - info.states = old_states; return nfa; } @@ -1462,12 +1467,12 @@ bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, bytecode_ptr mcshengCompile64(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm) { if (!cc.grey.allowMcSheng) { - return nullptr; + return bytecode_ptr(nullptr); } if (!cc.target_info.has_avx512vbmi()) { DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); - return nullptr; + return bytecode_ptr(nullptr); } mcclellan_build_strat mbs(raw, rm, false); @@ -1488,7 +1493,7 @@ bytecode_ptr mcshengCompile64(raw_dfa &raw, const CompileContext &cc, sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES); if (sheng_end64 <= DEAD_STATE + 1) { - return nullptr; + return bytecode_ptr(nullptr); } else { using64state = true; } diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index cba3d159..f84e9af1 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -512,7 +512,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, verm_restart:; assert(buf[curr] == kp->u.verm.c); - size_t test = curr; + size_t test; if (curr + min_rep < length) { test = curr + min_rep; } else { @@ -534,7 +534,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, m128 hi = kp->u.shuf.mask_hi; shuf_restart: assert(do_single_shufti(lo, hi, buf[curr])); - size_t test = curr; + size_t test; if (curr + min_rep < length) { test = curr + min_rep; } else { @@ -556,7 +556,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, const m128 mask1 = kp->u.truffle.mask1; const m128 mask2 = kp->u.truffle.mask2; truffle_restart:; - size_t test = curr; + size_t test; if (curr + min_rep < length) { test = curr + min_rep; } else { @@ -582,7 +582,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, nverm_restart:; assert(buf[curr] != kp->u.verm.c); - size_t test = curr; + size_t test; if (curr + min_rep < length) { test = curr + min_rep; } else { @@ -607,7 +607,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, } static really_inline -void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters, +void restartKilo(const struct mpv *m, UNUSED const u8 *active, u8 *reporters, struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) { const struct mpv_kilopuff *kp = (const void *)(m + 1); @@ -1074,7 +1074,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { return 0; } else { const struct mpv *m = getImplNfa(nfa); - u8 *reporters = (u8 *)q->state + m->reporter_offset; + const u8 *reporters = (u8 *)q->state + m->reporter_offset; if (mmbit_any_precise(reporters, m->kilo_count)) { DEBUG_PRINTF("next byte\n"); @@ -1087,7 +1087,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { next_event = q->items[q->cur].location; } - struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; + const struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; struct mpv_pq_item *pq = (struct mpv_pq_item *)(q->state + m->pq_offset); if (s->pq_size) { diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h index e3579a7e..3f8bca2c 100644 --- a/src/nfa/nfa_api_queue.h +++ b/src/nfa/nfa_api_queue.h @@ -167,7 +167,7 @@ void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) { // We assert that the event is different from its predecessor. If it's a // dupe, you should have used the ordinary pushQueue call. if (q->end) { - UNUSED struct mq_item *prev = &q->items[q->end - 1]; + UNUSED const struct mq_item *prev = &q->items[q->end - 1]; assert(prev->type != e || prev->location != loc); } #endif @@ -251,6 +251,10 @@ void q_skip_forward_to(struct mq *q, s64a min_loc) { // Dump the contents of the given queue. static never_inline UNUSED void debugQueue(const struct mq *q) { + if (q == nullptr) { + DEBUG_PRINTF("q=NULL!\n"); + return; + } DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa); DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n", q->offset, q->buffer, q->length, q->history, q->hlength); diff --git a/src/nfa/nfa_api_util.h b/src/nfa/nfa_api_util.h index affc5f38..1af9c801 100644 --- a/src/nfa/nfa_api_util.h +++ b/src/nfa/nfa_api_util.h @@ -68,7 +68,7 @@ void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) { // We assert that the event is different from its predecessor. If it's a // dupe, you should have used the ordinary pushQueue call. if (q->end) { - UNUSED struct mq_item *prev = &q->items[q->end - 1]; + UNUSED const struct mq_item *prev = &q->items[q->end - 1]; assert(prev->type != e || prev->location != loc); } #endif diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 8cc701b6..544867bc 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2024, VectorCamp PC * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without @@ -133,6 +134,7 @@ struct ALIGN_CL_DIRECTIVE NFA { /* Note: implementation (e.g. a LimEx) directly follows struct in memory */ } ; +#ifndef __cplusplus // Accessor macro for the implementation NFA: we do things this way to avoid // type-punning warnings. #define getImplNfa(nfa) \ @@ -140,6 +142,13 @@ struct ALIGN_CL_DIRECTIVE NFA { // Non-const version of the above, used at compile time. #define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA)) +#else +// Same versions without C casts to avoid Cppcheck warnings +#define getImplNfa(nfa) \ + (reinterpret_cast(reinterpret_cast(nfa) + sizeof(struct NFA))) + +#define getMutableImplNfa(nfa) (reinterpret_cast(nfa) + sizeof(struct NFA)) +#endif static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) { return nfa->flags & NFA_ACCEPTS_EOD; diff --git a/src/nfa/rdfa_graph.h b/src/nfa/rdfa_graph.h index 6d166c2f..d042560d 100644 --- a/src/nfa/rdfa_graph.h +++ b/src/nfa/rdfa_graph.h @@ -45,7 +45,7 @@ struct RdfaEdgeProps { }; struct RdfaGraph : public ue2_graph { - RdfaGraph(const raw_dfa &rdfa); + explicit RdfaGraph(const raw_dfa &rdfa); }; diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index 5b2e4df4..07d02082 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -785,7 +785,7 @@ enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info, if (diff > info->repeatMax) { DEBUG_PRINTF("range list is stale\n"); return REPEAT_STALE; - } else if (diff >= info->repeatMin && diff <= info->repeatMax) { + } else if (diff >= info->repeatMin) { return REPEAT_MATCH; } @@ -836,7 +836,7 @@ enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info, if (diff > info->repeatMax) { DEBUG_PRINTF("stale\n"); return REPEAT_STALE; - } else if (diff >= info->repeatMin && diff <= info->repeatMax) { + } else if (diff >= info->repeatMin) { return REPEAT_MATCH; } diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 60b51352..d9544675 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -94,9 +94,6 @@ u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, static u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, const u32 minPeriod, u32 rv) { - u32 cnt = 0; - u32 patch_bits = 0; - u32 total_size = 0; u32 min = ~0U; u32 patch_len = 0; @@ -105,11 +102,11 @@ u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, } for (u32 i = minPeriod; i <= rv; i++) { - cnt = ((u32)repeatMax + (i - 1)) / i + 1; + u32 cnt = ((u32)repeatMax + (i - 1)) / i + 1; // no bit packing version - patch_bits = calcPackedBits(info->table[i]); - total_size = (patch_bits + 7U) / 8U * cnt; + u32 patch_bits = calcPackedBits(info->table[i]); + u32 total_size = (patch_bits + 7U) / 8U * cnt; if (total_size < min) { patch_len = i; diff --git a/src/nfa/sheng.c b/src/nfa/sheng.c index 3f36e218..e6b299d4 100644 --- a/src/nfa/sheng.c +++ b/src/nfa/sheng.c @@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, return MO_CONTINUE_MATCHING; /* continue execution */ } -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) // Sheng32 static really_inline const struct sheng32 *get_sheng32(const struct NFA *n) { @@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt, } return MO_CONTINUE_MATCHING; /* continue execution */ } -#endif // end of HAVE_AVX512VBMI +#endif // end of HAVE_AVX512VBMI || HAVE_SVE /* include Sheng function definitions */ #include "sheng_defs.h" @@ -814,7 +814,6 @@ char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { const struct sheng *sh = (const struct sheng *)getImplNfa(n); NfaCallback cb = q->cb; - void *ctxt = q->context; u8 s = *(u8 *)q->state; const struct sstate_aux *aux = get_aux(sh, s); u64a offset = q_cur_offset(q); @@ -823,6 +822,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { assert(q_cur_type(q) == MQE_START); if (aux->accept) { + void *ctxt = q->context; if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { fireSingleReport(cb, ctxt, sh->report, offset); } else { @@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, return 0; } -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) // Sheng32 static really_inline char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt, @@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest, *(u8 *)dest = *(const u8 *)src; return 0; } -#endif // end of HAVE_AVX512VBMI +#endif // end of HAVE_AVX512VBMI || HAVE_SVE diff --git a/src/nfa/sheng.h b/src/nfa/sheng.h index 7b90e303..212bd3a4 100644 --- a/src/nfa/sheng.h +++ b/src/nfa/sheng.h @@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context); -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL @@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context); - -#else // !HAVE_AVX512VBMI +#else // !HAVE_AVX512VBMI && !HAVE_SVE #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL @@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, #define nfaExecSheng64_testEOD NFA_API_NO_IMPL #define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL #define nfaExecSheng64_B NFA_API_NO_IMPL -#endif // end of HAVE_AVX512VBMI +#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE) + #endif /* SHENG_H_ */ diff --git a/src/nfa/sheng_defs.h b/src/nfa/sheng_defs.h index 390af752..886af28e 100644 --- a/src/nfa/sheng_defs.h +++ b/src/nfa/sheng_defs.h @@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) { return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); } -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) static really_inline u8 isDeadState32(const u8 a) { return a & SHENG32_STATE_DEAD; @@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_cod #define DEAD_FUNC isDeadState #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_cod #define DEAD_FUNC32 isDeadState32 #define ACCEPT_FUNC32 isAcceptState32 @@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_co #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_co #define DEAD_FUNC32 dummyFunc #define ACCEPT_FUNC32 isAcceptState32 @@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_samd #define DEAD_FUNC isDeadState #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_samd #define DEAD_FUNC32 isDeadState32 #define ACCEPT_FUNC32 isAcceptState32 @@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_sam #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_sam #define DEAD_FUNC32 dummyFunc #define ACCEPT_FUNC32 isAcceptState32 @@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_nmd #define DEAD_FUNC isDeadState #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_nmd #define DEAD_FUNC32 isDeadState32 #define ACCEPT_FUNC32 dummyFunc @@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_nm #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_nm #define DEAD_FUNC32 dummyFunc #define ACCEPT_FUNC32 dummyFunc @@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef DEAD_FUNC32 #undef ACCEPT_FUNC32 @@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_coda #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_cod #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_coa #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_co #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_samda #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_samd #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 isDeadState32 @@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_sama #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_sam #define INTERESTING_FUNC32 hasInterestingStates32 #define INNER_DEAD_FUNC32 dummyFunc @@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC isAccelState #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_nmda #define INTERESTING_FUNC32 dummyFunc4 #define INNER_DEAD_FUNC32 dummyFunc @@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_nmd #define INTERESTING_FUNC32 dummyFunc4 #define INNER_DEAD_FUNC32 dummyFunc @@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 @@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #define SHENG32_IMPL sheng32_4_nm #define INTERESTING_FUNC32 dummyFunc4 #define INNER_DEAD_FUNC32 dummyFunc @@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 #undef INNER_DEAD_FUNC32 diff --git a/src/nfa/sheng_impl.h b/src/nfa/sheng_impl.h index 1fa5c831..2c701446 100644 --- a/src/nfa/sheng_impl.h +++ b/src/nfa/sheng_impl.h @@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, return MO_CONTINUE_MATCHING; } -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) static really_inline char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng32 *s, @@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, } DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); +#if defined(HAVE_SVE) + const svbool_t lane_pred_32 = svwhilelt_b8(0, 32); + svuint8_t cur_state = svdup_u8(*state); + svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F); + const m512 *masks = s->succ_masks; +#else m512 cur_state = set1_64x8(*state); const m512 *masks = s->succ_masks; +#endif while (likely(cur_buf != end)) { const u8 c = *cur_buf; + +#if defined(HAVE_SVE) + svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c)); + cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 tmp = svlastb(lane_pred_32, cur_state); +#else const m512 succ_mask = masks[c]; cur_state = vpermb512(cur_state, succ_mask); const u8 tmp = movd512(cur_state); +#endif DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK, @@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, } cur_buf++; } +#if defined(HAVE_SVE) + *state = svlastb(lane_pred_32, cur_state); +#else *state = movd512(cur_state); +#endif *scan_end = cur_buf; return MO_CONTINUE_MATCHING; } @@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, } DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); +#if defined(HAVE_SVE) + const svbool_t lane_pred_64 = svwhilelt_b8(0, 64); + svuint8_t cur_state = svdup_u8(*state); + svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F); + const m512 *masks = s->succ_masks; +#else m512 cur_state = set1_64x8(*state); const m512 *masks = s->succ_masks; +#endif while (likely(cur_buf != end)) { const u8 c = *cur_buf; + +#if defined(HAVE_SVE) + svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c)); + cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 tmp = svlastb(lane_pred_64, cur_state); +#else const m512 succ_mask = masks[c]; cur_state = vpermb512(cur_state, succ_mask); const u8 tmp = movd512(cur_state); +#endif DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK, @@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, } cur_buf++; } +#if defined(HAVE_SVE) + *state = svlastb(lane_pred_64, cur_state); +#else *state = movd512(cur_state); +#endif *scan_end = cur_buf; return MO_CONTINUE_MATCHING; } diff --git a/src/nfa/sheng_impl4.h b/src/nfa/sheng_impl4.h index e5d3468f..718c3409 100644 --- a/src/nfa/sheng_impl4.h +++ b/src/nfa/sheng_impl4.h @@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, return MO_CONTINUE_MATCHING; } -#if defined(HAVE_AVX512VBMI) +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) static really_inline char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng32 *s, @@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, return MO_CONTINUE_MATCHING; } +#if defined(HAVE_SVE) + const svbool_t lane_pred_32 = svwhilelt_b8(0, 32); + svuint8_t cur_state = svdup_u8(*state); + svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F); + const m512 *masks = s->succ_masks; +#else m512 cur_state = set1_64x8(*state); const m512 *masks = s->succ_masks; +#endif while (likely(end - cur_buf >= 4)) { const u8 *b1 = cur_buf; @@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, const u8 c3 = *b3; const u8 c4 = *b4; +#if defined(HAVE_SVE) + svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1)); + cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a1 = svlastb(lane_pred_32, cur_state); + + svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2)); + cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a2 = svlastb(lane_pred_32, cur_state); + + svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3)); + cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a3 = svlastb(lane_pred_32, cur_state); + + svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4)); + cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a4 = svlastb(lane_pred_32, cur_state); +#else const m512 succ_mask1 = masks[c1]; cur_state = vpermb512(cur_state, succ_mask1); const u8 a1 = movd512(cur_state); @@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, const m512 succ_mask4 = masks[c4]; cur_state = vpermb512(cur_state, succ_mask4); const u8 a4 = movd512(cur_state); +#endif DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK, @@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, }; cur_buf += 4; } +#if defined(HAVE_SVE) + *state = svlastb(lane_pred_32, cur_state); +#else *state = movd512(cur_state); +#endif *scan_end = cur_buf; return MO_CONTINUE_MATCHING; } @@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, *scan_end = end; return MO_CONTINUE_MATCHING; } - +#if defined(HAVE_SVE) + const svbool_t lane_pred_64 = svwhilelt_b8(0, 64); + svuint8_t cur_state = svdup_u8(*state); + svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F); + const m512 *masks = s->succ_masks; +#else m512 cur_state = set1_64x8(*state); const m512 *masks = s->succ_masks; +#endif while (likely(end - cur_buf >= 4)) { const u8 *b1 = cur_buf; @@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, const u8 c3 = *b3; const u8 c4 = *b4; +#if defined(HAVE_SVE) + svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1)); + cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a1 = svlastb(lane_pred_64, cur_state); + + svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2)); + cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a2 = svlastb(lane_pred_64, cur_state); + + svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3)); + cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a3 = svlastb(lane_pred_64, cur_state); + + svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4)); + cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state)); + const u8 a4 = svlastb(lane_pred_64, cur_state); +#else const m512 succ_mask1 = masks[c1]; cur_state = vpermb512(cur_state, succ_mask1); const u8 a1 = movd512(cur_state); @@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, const m512 succ_mask4 = masks[c4]; cur_state = vpermb512(cur_state, succ_mask4); const u8 a4 = movd512(cur_state); +#endif DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK, @@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, } cur_buf += 4; } +#if defined(HAVE_SVE) + *state = svlastb(lane_pred_64, cur_state); +#else *state = movd512(cur_state); +#endif *scan_end = cur_buf; return MO_CONTINUE_MATCHING; } diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 055e1971..2c45229a 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -99,7 +99,7 @@ struct dfa_info { return next(idx, TOP); } dstate &next(dstate_id_t idx, u16 chr) { - auto &src = (*this)[idx]; + const auto &src = (*this)[idx]; auto next_id = src.next[raw.alpha_remap[chr]]; return states[next_id]; } @@ -109,7 +109,7 @@ struct dfa_info { // if DFA can't die, shift all indices left by 1 return can_die ? idx : idx + 1; } - bool isDead(dstate &state) { + bool isDead(const dstate &state) { return raw_id(state.impl_id) == DEAD_STATE; } bool isDead(dstate_id_t idx) { @@ -117,7 +117,7 @@ struct dfa_info { } private: - static bool dfaCanDie(raw_dfa &rdfa) { + static bool dfaCanDie(const raw_dfa &rdfa) { for (unsigned chr = 0; chr < 256; chr++) { for (dstate_id_t state = 0; state < rdfa.states.size(); state++) { auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]]; @@ -138,7 +138,7 @@ struct raw_report_list { raw_report_list(const flat_set &reports_in, const ReportManager &rm, bool do_remap) { if (do_remap) { - for (auto &id : reports_in) { + for (const auto &id : reports_in) { reports.insert(rm.getProgramOffset(id)); } } else { @@ -334,14 +334,14 @@ void fillAccelOut(const map &accel_escape_info, template static -u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info, - UNUSED map &accelInfo) { +u8 getShengState(UNUSED const dstate &state, UNUSED dfa_info &info, + UNUSED const map &accelInfo) { return 0; } template <> -u8 getShengState(dstate &state, dfa_info &info, - map &accelInfo) { +u8 getShengState(const dstate &state, dfa_info &info, + const map &accelInfo) { u8 s = state.impl_id; if (!state.reports.empty()) { s |= SHENG_STATE_ACCEPT; @@ -356,8 +356,8 @@ u8 getShengState(dstate &state, dfa_info &info, } template <> -u8 getShengState(dstate &state, dfa_info &info, - map &accelInfo) { +u8 getShengState(const dstate &state, dfa_info &info, + const map &accelInfo) { u8 s = state.impl_id; if (!state.reports.empty()) { s |= SHENG32_STATE_ACCEPT; @@ -372,8 +372,8 @@ u8 getShengState(dstate &state, dfa_info &info, } template <> -u8 getShengState(dstate &state, dfa_info &info, - UNUSED map &accelInfo) { +u8 getShengState(const dstate &state, dfa_info &info, + UNUSED const map &accelInfo) { u8 s = state.impl_id; if (!state.reports.empty()) { s |= SHENG64_STATE_ACCEPT; @@ -409,8 +409,8 @@ void fillAccelAux(struct NFA *n, dfa_info &info, template static -void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, - UNUSED map &accelInfo, +void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer + UNUSED const map &accelInfo, UNUSED u32 aux_offset, UNUSED u32 report_offset, UNUSED u32 accel_offset, UNUSED u32 total_size, UNUSED u32 dfa_size) { @@ -418,7 +418,7 @@ void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, template <> void populateBasicInfo(struct NFA *n, dfa_info &info, - map &accelInfo, + const map &accelInfo, u32 aux_offset, u32 report_offset, u32 accel_offset, u32 total_size, u32 dfa_size) { @@ -443,7 +443,7 @@ void populateBasicInfo(struct NFA *n, dfa_info &info, template <> void populateBasicInfo(struct NFA *n, dfa_info &info, - map &accelInfo, + const map &accelInfo, u32 aux_offset, u32 report_offset, u32 accel_offset, u32 total_size, u32 dfa_size) { @@ -468,7 +468,7 @@ void populateBasicInfo(struct NFA *n, dfa_info &info, template <> void populateBasicInfo(struct NFA *n, dfa_info &info, - map &accelInfo, + const map &accelInfo, u32 aux_offset, u32 report_offset, u32 accel_offset, u32 total_size, u32 dfa_size) { @@ -551,19 +551,19 @@ void fillSingleReport(NFA *n, ReportID r_id) { template static -bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, - UNUSED map &accelInfo) { +bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer + UNUSED const map &accelInfo) { return true; } template <> bool createShuffleMasks(sheng *s, dfa_info &info, - map &accelInfo) { + const map &accelInfo) { for (u16 chr = 0; chr < 256; chr++) { u8 buf[16] = {0}; for (dstate_id_t idx = 0; idx < info.size(); idx++) { - auto &succ_state = info.next(idx, chr); + const auto &succ_state = info.next(idx, chr); buf[idx] = getShengState(succ_state, info, accelInfo); } @@ -577,13 +577,13 @@ bool createShuffleMasks(sheng *s, dfa_info &info, template <> bool createShuffleMasks(sheng32 *s, dfa_info &info, - map &accelInfo) { + const map &accelInfo) { for (u16 chr = 0; chr < 256; chr++) { u8 buf[64] = {0}; assert(info.size() <= 32); for (dstate_id_t idx = 0; idx < info.size(); idx++) { - auto &succ_state = info.next(idx, chr); + const auto &succ_state = info.next(idx, chr); buf[idx] = getShengState(succ_state, info, accelInfo); buf[32 + idx] = buf[idx]; @@ -598,13 +598,13 @@ bool createShuffleMasks(sheng32 *s, dfa_info &info, template <> bool createShuffleMasks(sheng64 *s, dfa_info &info, - map &accelInfo) { + const map &accelInfo) { for (u16 chr = 0; chr < 256; chr++) { u8 buf[64] = {0}; assert(info.size() <= 64); for (dstate_id_t idx = 0; idx < info.size(); idx++) { - auto &succ_state = info.next(idx, chr); + const auto &succ_state = info.next(idx, chr); if (accelInfo.find(info.raw_id(succ_state.impl_id)) != accelInfo.end()) { @@ -690,7 +690,7 @@ bytecode_ptr shengCompile_int(raw_dfa &raw, const CompileContext &cc, } if (!createShuffleMasks((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) { - return nullptr; + return bytecode_ptr(nullptr); } return nfa; @@ -701,7 +701,7 @@ bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, set *accel_states) { if (!cc.grey.allowSheng) { DEBUG_PRINTF("Sheng is not allowed!\n"); - return nullptr; + return bytecode_ptr(nullptr); } sheng_build_strat strat(raw, rm, only_accel_init); @@ -716,7 +716,7 @@ bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, info.can_die ? "can" : "cannot", info.size()); if (info.size() > 16) { DEBUG_PRINTF("Too many states\n"); - return nullptr; + return bytecode_ptr(nullptr); } return shengCompile_int(raw, cc, accel_states, strat, info); @@ -727,13 +727,20 @@ bytecode_ptr sheng32Compile(raw_dfa &raw, const CompileContext &cc, set *accel_states) { if (!cc.grey.allowSheng) { DEBUG_PRINTF("Sheng is not allowed!\n"); - return nullptr; + bytecode_ptr(nullptr); } +#ifdef HAVE_SVE + if (svcntb()<32) { + DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n"); + bytecode_ptr(nullptr); + } +#else if (!cc.target_info.has_avx512vbmi()) { DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); - return nullptr; + bytecode_ptr(nullptr); } +#endif sheng_build_strat strat(raw, rm, only_accel_init); dfa_info info(strat); @@ -748,7 +755,7 @@ bytecode_ptr sheng32Compile(raw_dfa &raw, const CompileContext &cc, assert(info.size() > 16); if (info.size() > 32) { DEBUG_PRINTF("Too many states\n"); - return nullptr; + return bytecode_ptr(nullptr); } return shengCompile_int(raw, cc, accel_states, strat, info); @@ -759,13 +766,20 @@ bytecode_ptr sheng64Compile(raw_dfa &raw, const CompileContext &cc, set *accel_states) { if (!cc.grey.allowSheng) { DEBUG_PRINTF("Sheng is not allowed!\n"); - return nullptr; + return bytecode_ptr(nullptr); } +#ifdef HAVE_SVE + if (svcntb()<64) { + DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n"); + return bytecode_ptr(nullptr); + } +#else if (!cc.target_info.has_avx512vbmi()) { DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); - return nullptr; + return bytecode_ptr(nullptr); } +#endif sheng_build_strat strat(raw, rm, only_accel_init); dfa_info info(strat); @@ -780,13 +794,13 @@ bytecode_ptr sheng64Compile(raw_dfa &raw, const CompileContext &cc, assert(info.size() > 32); if (info.size() > 64) { DEBUG_PRINTF("Too many states\n"); - return nullptr; + return bytecode_ptr(nullptr); } vector old_states; old_states = info.states; auto nfa = shengCompile_int(raw, cc, accel_states, strat, info); if (!nfa) { - info.states = old_states; + info.states = old_states; // cppcheck-suppress unreadVariable } return nfa; } diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index feeb54ab..bdb0ff9f 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -264,7 +264,7 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { if (buf_end - buf < VECTORSIZE) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); + return shuftiFwdSlow(reinterpret_cast(&mask_lo), reinterpret_cast(&mask_hi), buf, buf_end); } return shuftiExecReal(mask_lo, mask_hi, buf, buf_end); } @@ -272,7 +272,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { if (buf_end - buf < VECTORSIZE) { - return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); + return shuftiRevSlow(reinterpret_cast(&mask_lo), reinterpret_cast(&mask_hi), buf, buf_end); } return rshuftiExecReal(mask_lo, mask_hi, buf, buf_end); } diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp index d4dd4ab0..a19ad44a 100644 --- a/src/nfa/tamaramacompile.cpp +++ b/src/nfa/tamaramacompile.cpp @@ -32,6 +32,8 @@ */ #include "config.h" +#include + #include "tamaramacompile.h" @@ -129,14 +131,10 @@ buildTamarama(const TamaInfo &tamaInfo, const u32 queue, sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and // padding for subengines - auto subl = [](size_t z, NFA *sub) { return z + (size_t)(ROUNDUP_CL(sub->length)); }; total_size += std::accumulate(tamaInfo.subengines.begin(), tamaInfo.subengines.end(), 0, subl); - // for (const auto &sub : tamaInfo.subengines) { - // total_size += ROUNDUP_CL(sub->length); - // } // use subSize as a sentinel value for no active subengines, // so add one to subSize here diff --git a/src/nfa/truffle_simd.hpp b/src/nfa/truffle_simd.hpp index f7dbc6bb..e63180d0 100644 --- a/src/nfa/truffle_simd.hpp +++ b/src/nfa/truffle_simd.hpp @@ -227,7 +227,7 @@ const u8 *fwdBlock(SuperVector shuf_mask_lo_highclear, SuperVector shuf_ma } template -const u8 *truffleExecReal(m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) { +const u8 *truffleExecReal(const m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) { assert(buf && buf_end); assert(buf < buf_end); DEBUG_PRINTF("truffle %p len %zu\n", buf, buf_end - buf); @@ -349,4 +349,4 @@ const u8 *rtruffleExecReal(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highse return buf - 1; } -#endif //HAVE_SVE \ No newline at end of file +#endif //HAVE_SVE diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index b2a87523..f111b701 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -193,9 +193,6 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, if (!som) { mergeCyclicDotStars(g); - } - - if (!som) { removeSiblingsOfStartDotStar(g); } } @@ -292,7 +289,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, // Returns true if all components have been added. static -bool processComponents(NG &ng, ExpressionInfo &expr, +bool processComponents(NG &ng, const ExpressionInfo &expr, deque> &g_comp, const som_type som) { const u32 num_components = g_comp.size(); diff --git a/src/nfagraph/ng_anchored_dots.cpp b/src/nfagraph/ng_anchored_dots.cpp index a2aaeb36..cdb09d49 100644 --- a/src/nfagraph/ng_anchored_dots.cpp +++ b/src/nfagraph/ng_anchored_dots.cpp @@ -166,9 +166,9 @@ void reformAnchoredRepeatsComponent(NGHolder &g, return; } - NFAVertex dotV = NGHolder::null_vertex(); + set otherV; - dotV = findReformable(g, compAnchoredStarts, otherV); + NFAVertex dotV = findReformable(g, compAnchoredStarts, otherV); if (dotV == NGHolder::null_vertex()) { DEBUG_PRINTF("no candidate reformable dot found.\n"); return; @@ -258,7 +258,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, static void reformUnanchoredRepeatsComponent(NGHolder &g, - set &compAnchoredStarts, + const set &compAnchoredStarts, set &compUnanchoredStarts, set &dead, depth *startBegin, depth *startEnd) { @@ -269,9 +269,9 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } while (true) { - NFAVertex dotV = NGHolder::null_vertex(); + set otherV; - dotV = findReformable(g, compUnanchoredStarts, otherV); + NFAVertex dotV = findReformable(g, compUnanchoredStarts, otherV); if (dotV == NGHolder::null_vertex()) { DEBUG_PRINTF("no candidate reformable dot found.\n"); return; @@ -488,15 +488,15 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, // Collect all the other optional dot vertices and the successor vertices // by walking down the graph from initialDot - set dots, succ; - if (!gatherParticipants(g, start, initialDot, dots, succ)) { + set dots, succr; + if (!gatherParticipants(g, start, initialDot, dots, succr)) { DEBUG_PRINTF("gatherParticipants failed\n"); return; } DEBUG_PRINTF("optional dot repeat with %zu participants, " "terminating in %zu non-dot nodes\n", - dots.size(), succ.size()); + dots.size(), succr.size()); // Remove all the participants and set the start offset dead.insert(dots.begin(), dots.end()); @@ -512,7 +512,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, assert(startEnd->is_reachable()); // Connect our successor vertices to both start and startDs. - for (auto v : succ) { + for (auto v : succr) { add_edge_if_not_present(g.start, v, g); add_edge_if_not_present(g.startDs, v, g); } @@ -558,7 +558,7 @@ void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) { } static -void addDotsBetween(NGHolder &g, NFAVertex lhs, vector &rhs, +void addDotsBetween(NGHolder &g, NFAVertex lhs, const vector &rhs, depth min_repeat, depth max_repeat) { const bool unbounded = max_repeat.is_infinite(); if (unbounded) { diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index 764ebed1..296ddda1 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -92,11 +92,12 @@ static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD); static vector getAsserts(const NGHolder &g) { vector out; - for (const auto &e : edges_range(g)) { - if (g[e].assert_flags) { - out.emplace_back(e); - } - } + auto assertflags = [&g=g](const NFAEdge &e) { + return (g[e].assert_flags); + }; + const auto &er = edges_range(g); + std::copy_if(begin(er), end(er), std::back_inserter(out), assertflags); + return out; } @@ -384,7 +385,10 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, /* there may already be a different edge from start to eod if so * we need to make it unconditional and alive */ - if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { + NFAEdge start_eod; + bool exists; + std::tie(start_eod, exists) = edge(u, g.acceptEod, g); + if (exists) { g[start_eod].assert_flags = 0; dead->erase(start_eod); } else { @@ -437,7 +441,10 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, /* there may already be a different edge from start to eod if so * we need to make it unconditional and alive */ - if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { + NFAEdge start_eod; + bool exists; + std::tie(start_eod, exists) = edge(u, g.acceptEod, g); + if (exists) { g[start_eod].assert_flags = 0; dead->erase(start_eod); } else { @@ -496,7 +503,8 @@ void ensureCodePointStart(ReportManager &rm, NGHolder &g, * boundaries. Assert resolution handles the badness coming from asserts. * The only other source of trouble is startDs->accept connections. */ - NFAEdge orig = edge(g.startDs, g.accept, g); + NFAEdge orig; + std::tie(orig, std::ignore) = edge(g.startDs, g.accept, g); if (expr.utf8 && orig) { DEBUG_PRINTF("rectifying %u\n", expr.report); Report ir = rm.getBasicInternalReport(expr); diff --git a/src/nfagraph/ng_edge_redundancy.cpp b/src/nfagraph/ng_edge_redundancy.cpp index e48ebb96..26ee8926 100644 --- a/src/nfagraph/ng_edge_redundancy.cpp +++ b/src/nfagraph/ng_edge_redundancy.cpp @@ -514,17 +514,17 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) { * for SOM mode. (see UE-1544) */ bool optimiseVirtualStarts(NGHolder &g) { vector dead; + auto deads = [&g=g](const NFAEdge &e) { + return (!is_any_start(source(e, g), g)); + }; + for (auto v : adjacent_vertices_range(g.startDs, g)) { u32 flags = g[v].assert_flags; if (!(flags & POS_FLAG_VIRTUAL_START)) { continue; } - - for (const auto &e : in_edges_range(v, g)) { - if (!is_any_start(source(e, g), g)) { - dead.emplace_back(e); - } - } + const auto &e = in_edges_range(v, g); + std::copy_if(begin(e), end(e), std::back_inserter(dead), deads); } if (dead.empty()) { diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index cddc5d68..5b9e5a49 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -98,9 +98,9 @@ class ClassInfo { public: struct ClassDepth { ClassDepth() {} - ClassDepth(const NFAVertexDepth &d) + explicit ClassDepth(const NFAVertexDepth &d) : d1(d.fromStart), d2(d.fromStartDotStar) {} - ClassDepth(const NFAVertexRevDepth &rd) + explicit ClassDepth(const NFAVertexRevDepth &rd) : d1(rd.toAccept), d2(rd.toAcceptEod) {} DepthMinMax d1; DepthMinMax d2; @@ -159,7 +159,7 @@ public: return id; } - void append(WorkQueue &other) { + void append(const WorkQueue &other) { for (const auto &e : other) { push(e); } @@ -193,7 +193,7 @@ private: } static -bool outIsIrreducible(NFAVertex &v, const NGHolder &g) { +bool outIsIrreducible(const NFAVertex &v, const NGHolder &g) { unsigned nonSpecialVertices = 0; for (auto w : adjacent_vertices_range(v, g)) { if (!is_special(w, g) && w != v) { @@ -205,7 +205,7 @@ bool outIsIrreducible(NFAVertex &v, const NGHolder &g) { } static -bool inIsIrreducible(NFAVertex &v, const NGHolder &g) { +bool inIsIrreducible(const NFAVertex &v, const NGHolder &g) { unsigned nonSpecialVertices = 0; for (auto u : inv_adjacent_vertices_range(v, g)) { if (!is_special(u, g) && u != v) { @@ -339,9 +339,9 @@ vector partitionGraph(vector> &infos, ClassInfo::ClassDepth depth; if (eq == LEFT_EQUIVALENCE) { - depth = depths[vi->vert_index]; + depth = ClassInfo::ClassDepth(depths[vi->vert_index]); } else { - depth = rdepths[vi->vert_index]; + depth = ClassInfo::ClassDepth(rdepths[vi->vert_index]); } ClassInfo ci(g, *vi, depth, eq); @@ -549,8 +549,8 @@ void mergeClass(vector> &infos, NGHolder &g, pred_info->succ.erase(old_vertex_info); // if edge doesn't exist, create it - NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g); - + NFAEdge e; + std::tie(e, std::ignore) = add_edge_if_not_present(pred_info->v, new_v, g); // put edge tops, if applicable if (!edgetops.empty()) { assert(g[e].tops.empty() || g[e].tops == edgetops); @@ -560,7 +560,8 @@ void mergeClass(vector> &infos, NGHolder &g, pred_info->succ.insert(new_vertex_info); if (new_v_eod) { - NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, + NFAEdge ee; + std::tie(ee, std::ignore) = add_edge_if_not_present(pred_info->v, new_v_eod, g); // put edge tops, if applicable diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index 66f8dd26..e0352705 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -432,7 +432,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) { } static -bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, +bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g, int *adjust) { const auto &reports = all_reports(g); if (reports.empty()) { @@ -509,14 +509,14 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { while (v != cyclic) { DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - auto succ = succs(v, g); - if (contains(succ, cyclic)) { - if (succ.size() == 1) { + auto s = succs(v, g); + if (contains(s, cyclic)) { + if (s.size() == 1) { v = cyclic; - } else if (succ.size() == 2) { + } else if (s.size() == 2) { // Cyclic and jump edge. - succ.erase(cyclic); - NFAVertex v2 = *succ.begin(); + s.erase(cyclic); + NFAVertex v2 = *s.begin(); if (!edge(cyclic, v2, g).second) { DEBUG_PRINTF("bad form\n"); return false; @@ -527,11 +527,11 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { return false; } } else { - if (succ.size() != 1) { + if (s.size() != 1) { DEBUG_PRINTF("bad form\n"); return false; } - v = *succ.begin(); + v = *s.begin(); } } @@ -547,12 +547,12 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { while (!is_any_accept(v, g)) { DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - auto succ = succs(v, g); - if (succ.size() != 1) { + auto s = succs(v, g); + if (s.size() != 1) { DEBUG_PRINTF("bad form\n"); return false; } - v = *succ.begin(); + v = *s.begin(); } int offsetAdjust = 0; @@ -572,27 +572,28 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { return true; } - vector preds; + vector predcs; vector dead; + auto deads = [&g=g](const NFAEdge &e) { + return (target(e, g) != g.startDs); + }; for (auto u : inv_adjacent_vertices_range(cyclic, g)) { DEBUG_PRINTF("pred %zu\n", g[u].index); if (u == cyclic) { continue; } - preds.emplace_back(u); + predcs.emplace_back(u); // We want to delete the out-edges of each predecessor, but need to // make sure we don't delete the startDs self loop. - for (const auto &e : out_edges_range(u, g)) { - if (target(e, g) != g.startDs) { - dead.emplace_back(e); - } - } + + const auto &e = out_edges_range(u, g); + std::copy_if(begin(e), end(e), std::back_inserter(dead), deads); } remove_edges(dead, g); - assert(!preds.empty()); + assert(!predcs.empty()); const CharReach &cr = g[cyclic].char_reach; @@ -600,14 +601,14 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { v = add_vertex(g); g[v].char_reach = cr; - for (auto u : preds) { + for (auto u : predcs) { add_edge(u, v, g); } - preds.clear(); - preds.emplace_back(v); + predcs.clear(); + predcs.emplace_back(v); } - assert(!preds.empty()); - for (auto u : preds) { + assert(!predcs.empty()); + for (auto u : predcs) { add_edge(u, cyclic, g); } diff --git a/src/nfagraph/ng_fixed_width.cpp b/src/nfagraph/ng_fixed_width.cpp index f901a534..e3940d86 100644 --- a/src/nfagraph/ng_fixed_width.cpp +++ b/src/nfagraph/ng_fixed_width.cpp @@ -66,15 +66,15 @@ bool findMask(const NGHolder &g, vector *mask, bool *anchored, return false; } - set &succs = *anchored ? s_succ : sds_succ; - succs.erase(g.startDs); - if (succs.size() != 1) { + set &succrs = *anchored ? s_succ : sds_succ; + succrs.erase(g.startDs); + if (succrs.size() != 1) { DEBUG_PRINTF("branchy root\n"); return false; } NFAVertex u = *anchored ? g.start : g.startDs; - NFAVertex v = *succs.begin(); + NFAVertex v = *succrs.begin(); while (true) { DEBUG_PRINTF("validating vertex %zu\n", g[v].index); diff --git a/src/nfagraph/ng_fuzzy.cpp b/src/nfagraph/ng_fuzzy.cpp index 397e385f..61a3ee12 100644 --- a/src/nfagraph/ng_fuzzy.cpp +++ b/src/nfagraph/ng_fuzzy.cpp @@ -71,13 +71,13 @@ vector> gatherSuccessorsByDepth(const NGHolder &g, continue; } - for (auto succ : adjacent_vertices_range(v, g)) { + for (auto succr : adjacent_vertices_range(v, g)) { // ignore self-loops - if (v == succ) { + if (v == succr) { continue; } DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); - next.insert(succ); + next.insert(succr); } } result[d] = next; @@ -113,13 +113,13 @@ vector> gatherPredecessorsByDepth(const NGHolder &g, for (unsigned d = 1; d < depth; d++) { // collect all successors for all current level vertices for (auto v : cur) { - for (auto pred : inv_adjacent_vertices_range(v, g)) { + for (auto predc : inv_adjacent_vertices_range(v, g)) { // ignore self-loops - if (v == pred) { + if (v == predc) { continue; } DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); - next.insert(pred); + next.insert(predc); } } result[d] = next; @@ -582,11 +582,11 @@ private: // set up all reports bool clone = false; - for (auto &pair : reports_to_vertices) { + for (const auto &pair : reports_to_vertices) { const auto &reports = pair.first; - const auto &vertices = pair.second; + const auto &svertices = pair.second; - for (auto src : vertices) { + for (auto src : svertices) { // get all predecessors up to edit distance auto src_vertices_by_depth = gatherPredecessorsByDepth(g, src, edit_distance); @@ -594,7 +594,8 @@ private: // find which accepts source vertex connects to flat_set targets; for (const auto &accept : accepts) { - NFAEdge e = edge(src, accept, g); + NFAEdge e; + std::tie(e, std::ignore) = edge(src, accept, g); if (e) { targets.insert(accept); } @@ -602,8 +603,8 @@ private: assert(targets.size()); for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { - const auto &preds = src_vertices_by_depth[d]; - for (auto v : preds) { + const auto &predcs = src_vertices_by_depth[d]; + for (auto v : predcs) { // only clone a node if it already contains reports if (clone && !g[v].reports.empty()) { create_clone(v, reports, edit_distance - d, diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index ae0033ea..8fae4f07 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -514,12 +514,12 @@ static bool doHaig(const NGHolder &g, som_type som, const vector> &triggers, bool unordered_som, raw_som_dfa *rdfa) { - u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from - a fight */ using StateSet = typename Auto::StateSet; vector nfa_state_map; Auto n(g, som, triggers, unordered_som); try { + u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from + a fight */ if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); return false; diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index 039eeb3b..a6630a95 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -154,7 +154,7 @@ bytecode_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, ReportID report) { if (!cr.all()) { - return nullptr; + return bytecode_ptr(nullptr); } enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, @@ -176,7 +176,7 @@ bytecode_ptr buildLbrVerm(const CharReach &cr, const depth &repeatMin, const CharReach escapes(~cr); if (escapes.count() != 1) { - return nullptr; + return bytecode_ptr(nullptr); } enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, @@ -199,7 +199,7 @@ bytecode_ptr buildLbrNVerm(const CharReach &cr, const depth &repeatMin, const CharReach escapes(cr); if (escapes.count() != 1) { - return nullptr; + return bytecode_ptr(nullptr); } enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, @@ -228,7 +228,7 @@ bytecode_ptr buildLbrShuf(const CharReach &cr, const depth &repeatMin, minPeriod, rtype); if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { - return nullptr; + return bytecode_ptr(nullptr); } DEBUG_PRINTF("built shuf lbr\n"); @@ -296,7 +296,7 @@ bytecode_ptr constructLBR(const CharReach &cr, const depth &repeatMin, if (!nfa) { assert(0); - return nullptr; + return bytecode_ptr(nullptr); } return nfa; @@ -307,11 +307,11 @@ bytecode_ptr constructLBR(const CastleProto &proto, const CompileContext &cc, const ReportManager &rm) { if (!cc.grey.allowLbr) { - return nullptr; + return bytecode_ptr(nullptr); } if (proto.repeats.size() != 1) { - return nullptr; + return bytecode_ptr(nullptr); } const PureRepeat &repeat = proto.repeats.begin()->second; @@ -319,7 +319,7 @@ bytecode_ptr constructLBR(const CastleProto &proto, if (repeat.reports.size() != 1) { DEBUG_PRINTF("too many reports\n"); - return nullptr; + return bytecode_ptr(nullptr); } bool is_reset; @@ -346,16 +346,16 @@ bytecode_ptr constructLBR(const NGHolder &g, const CompileContext &cc, const ReportManager &rm) { if (!cc.grey.allowLbr) { - return nullptr; + return bytecode_ptr(nullptr); } PureRepeat repeat; if (!isPureRepeat(g, repeat)) { - return nullptr; + return bytecode_ptr(nullptr); } if (repeat.reports.size() != 1) { DEBUG_PRINTF("too many reports\n"); - return nullptr; + return bytecode_ptr(nullptr); } CastleProto proto(g.kind, repeat); diff --git a/src/nfagraph/ng_lbr_sve.hpp b/src/nfagraph/ng_lbr_sve.hpp index 82df3ea1..80822f86 100644 --- a/src/nfagraph/ng_lbr_sve.hpp +++ b/src/nfagraph/ng_lbr_sve.hpp @@ -39,7 +39,7 @@ bytecode_ptr buildLbrVerm16(const CharReach &cr, const depth &repeatMin, const CharReach escapes(~cr); if (escapes.count() > 16) { - return nullptr; + return bytecode_ptr(nullptr); } enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, @@ -62,7 +62,7 @@ bytecode_ptr buildLbrNVerm16(const CharReach &cr, const depth &repeatMin, const CharReach escapes(cr); if (escapes.count() > 16) { - return nullptr; + return bytecode_ptr(nullptr); } enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 27d8c524..dad3cb47 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -342,7 +342,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, map> &unhandled_succ_tops, map> &tops_out) { flat_set top_inter = unhandled_succ_tops.at(u); - flat_set succs; + flat_set f_succs; for (NFAVertex v : adjacent_vertices_range(u, g)) { if (!contains(unhandled_succ_tops, v)) { return; @@ -360,7 +360,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, set_intersection(top_inter.begin(), top_inter.end(), v_tops.begin(), v_tops.end(), ni_inserter); top_inter = std::move(new_inter); - succs.insert(v); + f_succs.insert(v); } if (top_inter.empty()) { @@ -373,7 +373,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, } DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index); - markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, + markTopSuccAsHandled(u, top_inter, f_succs, tops_out, unhandled_top_succs, unhandled_succ_tops); } @@ -389,11 +389,11 @@ void reusePredsAsStarts(const NGHolder &g, const map &top_reach, /* create list of candidates first, to avoid issues of iter invalidation */ DEBUG_PRINTF("attempting to reuse vertices for top starts\n"); vector cand_starts; - for (NFAVertex u : unhandled_succ_tops | map_keys) { - if (hasSelfLoop(u, g)) { - cand_starts.emplace_back(u); - } - } + auto cands = [&g=g](const NFAVertex &u) { + return (hasSelfLoop(u, g)); + }; + const auto &u = unhandled_succ_tops | map_keys; + std::copy_if(begin(u), end(u), std::back_inserter(cand_starts), cands); for (NFAVertex u : cand_starts) { if (!contains(unhandled_succ_tops, u)) { @@ -652,7 +652,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); - return nullptr; + return bytecode_ptr(nullptr); } map br_cyclic; @@ -722,14 +722,14 @@ bytecode_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ // Do state numbering. - auto state_ids = numberStates(h, {}); + auto state_ids = numberStates(h, flat_set>>()); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); - return nullptr; + return bytecode_ptr(nullptr); } assert(sanityCheckGraph(h, state_ids)); diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 8bac753d..0cf6006d 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -62,12 +62,12 @@ namespace ue2 { static void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, const flat_set &cands, - const flat_set &preds, + const flat_set &f_preds, flat_set *next_cands, flat_set *next_preds, flat_set *friends) { for (auto v : cands) { - if (contains(preds, v)) { + if (contains(f_preds, v)) { continue; } @@ -80,7 +80,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, } for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!contains(preds, u)) { + if (!contains(f_preds, u)) { DEBUG_PRINTF("bad pred\n"); goto next_cand; } @@ -116,8 +116,8 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, u32 friend_depth = offset + 1; - flat_set preds; - insert(&preds, inv_adjacent_vertices(v, g)); + flat_set f_preds; + insert(&f_preds, inv_adjacent_vertices(v, g)); const CharReach &cr = g[v].char_reach; flat_set cands; @@ -126,9 +126,9 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, flat_set next_preds; flat_set next_cands; for (u32 i = 0; i < friend_depth; i++) { - findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds, + findAccelFriendGeneration(g, cr, cands, f_preds, &next_cands, &next_preds, friends); - preds.insert(next_preds.begin(), next_preds.end()); + f_preds.insert(next_preds.begin(), next_preds.end()); next_preds.clear(); cands.swap(next_cands); next_cands.clear(); @@ -321,7 +321,7 @@ struct DAccelScheme { bool cd_a = buildDvermMask(a.double_byte); bool cd_b = buildDvermMask(b.double_byte); if (cd_a != cd_b) { - return cd_a > cd_b; + return cd_a; } } @@ -811,11 +811,9 @@ depth_done: return true; } } - } // Second option: a two-byte shufti (i.e. less than eight 2-byte // literals) - if (depth > 1) { for (unsigned int i = 0; i < (depth - 1); i++) { if (depthReach[i].count() * depthReach[i+1].count() <= DOUBLE_SHUFTI_LIMIT) { diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 9fdd8e65..bd9c3ab2 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -490,9 +490,9 @@ vector add_reverse_edges_and_index(LitGraph &lg) { const size_t edge_count = num_edges(lg); vector fwd_edges; fwd_edges.reserve(edge_count); - for (const auto &e : edges_range(lg)) { - fwd_edges.push_back(e); - } + + const auto &e = edges_range(lg); + std::copy(begin(e), end(e), std::back_inserter(fwd_edges)); vector rev_map(2 * edge_count); diff --git a/src/nfagraph/ng_literal_analysis.h b/src/nfagraph/ng_literal_analysis.h index 6bb87556..b1e20053 100644 --- a/src/nfagraph/ng_literal_analysis.h +++ b/src/nfagraph/ng_literal_analysis.h @@ -70,7 +70,7 @@ bool bad_mixed_sensitivity(const ue2_literal &s); * Score all the edges in the given graph, returning them in \p scores indexed * by edge_index. */ std::vector scoreEdges(const NGHolder &h, - const flat_set &known_bad = {}); + const flat_set &known_bad = flat_set()); /** Returns a score for a literal set. Lower scores are better. */ u64a scoreSet(const std::set &s); diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index 4d3965df..dfda0838 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -98,7 +98,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) { } static -bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, +bool splitOffLiteral(NG &ng, const NGHolder &g, NFAVertex v, const bool anchored, set &dead) { DEBUG_PRINTF("examine vertex %zu\n", g[v].index); bool nocase = false, casefixed = false; diff --git a/src/nfagraph/ng_mcclellan_internal.h b/src/nfagraph/ng_mcclellan_internal.h index f069d733..0acd8aa5 100644 --- a/src/nfagraph/ng_mcclellan_internal.h +++ b/src/nfagraph/ng_mcclellan_internal.h @@ -94,7 +94,7 @@ void transition_graph(autom &nfa, const std::vector &vByStateId, /* generate top transitions, false -> top = selfloop */ bool top_allowed = is_triggered(graph); - StateSet succ = nfa.dead; + StateSet succr = nfa.dead; for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { NFAVertex u = vByStateId[i]; @@ -102,7 +102,7 @@ void transition_graph(autom &nfa, const std::vector &vByStateId, if (contains(unused, v)) { continue; } - succ.set(graph[v].index); + succr.set(graph[v].index); } if (top_allowed && !nfa.toppable.test(i)) { @@ -112,15 +112,15 @@ void transition_graph(autom &nfa, const std::vector &vByStateId, } } - StateSet active_squash = succ & squash; + StateSet active_squash = succr & squash; if (active_squash.any()) { for (size_t j = active_squash.find_first(); j != active_squash.npos; j = active_squash.find_next(j)) { - succ &= squash_mask.find(j)->second; + succr &= squash_mask.find(j)->second; } } - for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) { + for (size_t j = succr.find_first(); j != succr.npos; j = succr.find_next(j)) { const CharReach &cr = cr_by_index[j]; for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) { next[s].set(j); /* already alpha'ed */ diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 99c98a81..fa7496c5 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -404,19 +404,19 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g, return v_cr; } - NFAVertex pred = getSoleSourceVertex(g, v); - assert(pred); + NFAVertex s_pred = getSoleSourceVertex(g, v); + assert(s_pred); - /* require pred to be fed by one vertex OR (start + startDS) */ + /* require s_pred to be fed by one vertex OR (start + startDS) */ NFAVertex predpred; - size_t idp = in_degree(pred, g); - if (hasSelfLoop(pred, g)) { + size_t idp = in_degree(s_pred, g); + if (hasSelfLoop(s_pred, g)) { return v_cr; /* not cliche */ } else if (idp == 1) { - predpred = getSoleSourceVertex(g, pred); + predpred = getSoleSourceVertex(g, s_pred); } else if (idp == 2 - && edge(g.start, pred, g).second - && edge(g.startDs, pred, g).second) { + && edge(g.start, s_pred, g).second + && edge(g.startDs, s_pred, g).second) { predpred = g.startDs; } else { return v_cr; /* not cliche */ @@ -425,7 +425,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g, assert(predpred); /* require predpred to be cyclic and its cr to be a superset of - pred and v */ + s_pred and v */ if (!hasSelfLoop(predpred, g)) { return v_cr; /* not cliche */ } @@ -435,7 +435,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g, return v_cr; /* fake cyclic */ } - const CharReach &p_cr = g[pred].char_reach; + const CharReach &p_cr = g[s_pred].char_reach; const CharReach &pp_cr = g[predpred].char_reach; if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) { return v_cr; /* not cliche */ @@ -446,7 +446,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g, set v_succ; insert(&v_succ, adjacent_vertices(v, g)); set p_succ; - insert(&p_succ, adjacent_vertices(pred, g)); + insert(&p_succ, adjacent_vertices(s_pred, g)); if (!is_subset_of(v_succ, p_succ)) { DEBUG_PRINTF("fail\n"); @@ -456,7 +456,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g, if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) { /* need to check that reports of v are a subset of p's */ if (!is_subset_of(g[v].reports, - g[pred].reports)) { + g[s_pred].reports)) { DEBUG_PRINTF("fail - reports not subset\n"); return v_cr; /* not cliche */ } diff --git a/src/nfagraph/ng_netflow.cpp b/src/nfagraph/ng_netflow.cpp index b48e33c4..6e65093f 100644 --- a/src/nfagraph/ng_netflow.cpp +++ b/src/nfagraph/ng_netflow.cpp @@ -93,7 +93,8 @@ void addReverseEdges(NGHolder &g, vector &reverseEdge, if (it == allEdges.end()) { // No reverse edge, add one. NFAVertex u = source(fwd, g), v = target(fwd, g); - NFAEdge rev = add_edge(v, u, g); + NFAEdge rev; + std::tie(rev, std::ignore) = add_edge(v, u, g); it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first; // Add to capacity map. u32 revIndex = g[rev].index; diff --git a/src/nfagraph/ng_prune.cpp b/src/nfagraph/ng_prune.cpp index 042807fa..2390b865 100644 --- a/src/nfagraph/ng_prune.cpp +++ b/src/nfagraph/ng_prune.cpp @@ -62,11 +62,13 @@ void pruneUnreachable(NGHolder &g) { && edge(g.accept, g.acceptEod, g).second) { // Trivial case: there are no in-edges to our accepts (other than // accept->acceptEod), so all non-specials are unreachable. - for (auto v : vertices_range(g)) { - if (!is_special(v, g)) { - dead.emplace_back(v); - } - } + + auto deads = [&g=g](const NFAVertex &v) { + return (!is_special(v, g)); + }; + const auto &vr = vertices_range(g); + std::copy_if(begin(vr), end(vr), std::back_inserter(dead), deads); + } else { // Walk a reverse graph from acceptEod with Boost's depth_first_visit // call. @@ -199,17 +201,17 @@ void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) { } vector dead; + auto deads = [&g=g](const NFAEdge &e) { + return (!is_any_accept(target(e, g), g)); + }; for (auto u : inv_adjacent_vertices_range(g.accept, g)) { if (is_special(u, g)) { continue; } // We can prune any out-edges that aren't accepts - for (const auto &e : out_edges_range(u, g)) { - if (!is_any_accept(target(e, g), g)) { - dead.emplace_back(e); - } - } + const auto &er = out_edges_range(u, g); + std::copy_if(begin(er), end(er), std::back_inserter(dead), deads); } if (dead.empty()) { diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 33bb2a9e..2252a7de 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -244,7 +244,7 @@ u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g, /** Gives a stronger puff trigger when the trigger is connected to a wide * cyclic state (aside from sds) */ static -void improveHead(NGHolder &g, NFAVertex *a, vector *nodes) { +void improveHead(const NGHolder &g, NFAVertex *a, vector *nodes) { DEBUG_PRINTF("attempting to improve puff trigger\n"); assert(!nodes->empty()); const CharReach &puff_cr = g[nodes->back()].char_reach; @@ -263,7 +263,7 @@ void improveHead(NGHolder &g, NFAVertex *a, vector *nodes) { } static -void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, +void constructPuff(const NGHolder &g, const NFAVertex a, const NFAVertex puffv, const CharReach &cr, const ReportID report, u32 width, bool fixed_depth, bool unbounded, bool auto_restart, RoseBuild &rose, ReportManager &rm, @@ -361,9 +361,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, // single report ID on a vertex if (is_match_vertex(a, g)) { DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n"); - if (!nodes.empty()) { - nodes.pop_back(); - } + nodes.pop_back(); break; } } diff --git a/src/nfagraph/ng_redundancy.cpp b/src/nfagraph/ng_redundancy.cpp index 6e89c5c8..594ca21f 100644 --- a/src/nfagraph/ng_redundancy.cpp +++ b/src/nfagraph/ng_redundancy.cpp @@ -307,13 +307,15 @@ void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, static bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { - NFAEdge e = edge(g.start, v, g); + + NFAEdge e; + std::tie(e, std::ignore) = edge(g.start, v, g); return e && !g[e].tops.empty(); } /** Transform (1), removal of redundant vertices. */ static -bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, +bool doUselessMergePass(const NGHolder &g, const som_type som, VertexInfoMap &infoMap, set &removable) { /* useless merges can be done in any order, no need to take any care with * ordering */ @@ -323,7 +325,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, bool changed = false; for (auto v : vertices_range(g)) { - VertexInfo &info = infoMap[v]; + const VertexInfo &info = infoMap[v]; if (info.isRemoved) { continue; @@ -439,7 +441,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, continue; // Conservatively skip anything with nonzero tops. } - CharReach &otherReach = g[t].char_reach; + const CharReach &otherReach = g[t].char_reach; if (currReach.isSubsetOf(otherReach)) { DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n", g[v].index, g[t].index); @@ -636,12 +638,12 @@ bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, NFAVertex start = source(e, g); using RevGraph = boost::reverse_graph; - map vertexColor; // Walk the graph backwards from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { + map vertexColor; depth_first_visit(RevGraph(g), start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), @@ -664,12 +666,12 @@ bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, } NFAVertex start = target(e, g); - map vertexColor; // Walk the graph forward from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { + map vertexColor; depth_first_visit(g, start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), VertexIs(dom)); @@ -748,7 +750,7 @@ u32 findCyclic(const NGHolder &g, vector &cyclic) { } static -void findCyclicDom(NGHolder &g, vector &cyclic, +void findCyclicDom(const NGHolder &g, vector &cyclic, set &dead, som_type som) { auto dominators = findDominators(g); @@ -792,7 +794,7 @@ void findCyclicDom(NGHolder &g, vector &cyclic, } static -void findCyclicPostDom(NGHolder &g, vector &cyclic, +void findCyclicPostDom(const NGHolder &g, vector &cyclic, set &dead) { auto postdominators = findPostDominators(g); diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index a0c2735f..4c5341c8 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -393,9 +393,9 @@ void checkReachSubgraphs(const NGHolder &g, vector &rs, unordered_set involved(rsi.vertices.begin(), rsi.vertices.end()); unordered_set tail(involved); // to look for back-edges. - unordered_set pred, succ; - proper_pred(g, rsi.vertices.front(), pred); - proper_succ(g, rsi.vertices.back(), succ); + unordered_set v_pred, v_succ; + proper_pred(g, rsi.vertices.front(), v_pred); + proper_succ(g, rsi.vertices.back(), v_succ); flat_set reports; findFirstReports(g, rsi, reports); @@ -406,7 +406,7 @@ void checkReachSubgraphs(const NGHolder &g, vector &rs, for (auto v : rsi.vertices) { tail.erase(v); // now contains all vertices _after_ this one. - if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) { + if (vertexIsBad(g, v, involved, tail, v_pred, v_succ, reports)) { recalc = true; continue; } @@ -793,10 +793,10 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, const unordered_set involved(rsi.vertices.begin(), rsi.vertices.end()); - vector succs; - getSuccessors(g, rsi, &succs); + vector g_succs; + getSuccessors(g, rsi, &g_succs); - unpeelNearEnd(g, rsi, depths, &succs); + unpeelNearEnd(g, rsi, depths, &g_succs); // Create our replacement cyclic state with the same reachability and // report info as the last vertex in our topo-ordered list. @@ -824,7 +824,7 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, // Wire cyclic state to tug trigger states built from successors. vector tugs; - for (auto v : succs) { + for (auto v : g_succs) { buildTugTrigger(g, cyclic, v, involved, depths, tugs); } created.insert(tugs.begin(), tugs.end()); @@ -859,11 +859,9 @@ void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi, assert(rsi.repeatMax >= rsi.repeatMin); DEBUG_PRINTF("entry\n"); - - const unordered_set involved(rsi.vertices.begin(), - rsi.vertices.end()); - vector succs; - getSuccessors(g, rsi, &succs); + + vector g_succs; + getSuccessors(g, rsi, &g_succs); // Create our replacement cyclic state with the same reachability and // report info as the last vertex in our topo-ordered list. @@ -892,15 +890,15 @@ void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi, // In the rose case, our tug is our cyclic, and it's wired to our // successors (which should be just the accept). vector tugs; - assert(succs.size() == 1); - for (auto v : succs) { + assert(g_succs.size() == 1); + for (auto v : g_succs) { add_edge(cyclic, v, g); } // Wire pos trigger to accept if min repeat is one -- this deals with cases // where we can get a pos and tug trigger on the same byte. if (rsi.repeatMin == depth(1)) { - for (auto v : succs) { + for (auto v : g_succs) { add_edge(pos_trigger, v, g); g[pos_trigger].reports = g[cyclic].reports; } @@ -1144,7 +1142,7 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector &trigger, g[v].char_reach = cr; add_edge(u, v, g); if (u == g.start) { - g[edge(u, v, g)].tops.insert(top); + g[edge(u, v, g).first].tops.insert(top); } u = v; } @@ -1467,9 +1465,9 @@ struct StrawWalker { } if (ai != ae) { DEBUG_PRINTF("more than one succ\n"); - set succs; - insert(&succs, adjacent_vertices(v, g)); - succs.erase(v); + set a_succs; + insert(&a_succs, adjacent_vertices(v, g)); + a_succs.erase(v); for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { next = *ai; DEBUG_PRINTF("checking %zu\n", g[next].index); @@ -1479,7 +1477,7 @@ struct StrawWalker { set lsuccs; insert(&lsuccs, adjacent_vertices(next, g)); - if (lsuccs != succs) { + if (lsuccs != a_succs) { continue; } @@ -1887,7 +1885,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, * offset. */ static -bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, +bool improveLeadingRepeat(NGHolder &g, const BoundedRepeatData &rd, unordered_set &created, const vector &all_repeats) { assert(edge(g.startDs, g.startDs, g).second); @@ -1908,9 +1906,9 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, } vector straw; - NFAVertex pred = + NFAVertex w_pred = walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); - if (pred != g.startDs) { + if (w_pred != g.startDs) { DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); return false; } @@ -1958,7 +1956,7 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, } static -vector makeOwnStraw(NGHolder &g, BoundedRepeatData &rd, +vector makeOwnStraw(NGHolder &g, const BoundedRepeatData &rd, const vector &straw) { // Straw runs from startDs to our pos trigger. assert(!straw.empty()); @@ -1992,7 +1990,7 @@ vector makeOwnStraw(NGHolder &g, BoundedRepeatData &rd, * rewire the straw to start instead of removing the startDs self-loop. */ static -bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd, +bool improveLeadingRepeatOutfix(NGHolder &g, const BoundedRepeatData &rd, unordered_set &created, const vector &all_repeats) { assert(g.kind == NFA_OUTFIX); @@ -2013,9 +2011,9 @@ bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd, } vector straw; - NFAVertex pred = + NFAVertex w_pred = walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); - if (pred != g.startDs) { + if (w_pred != g.startDs) { DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); return false; } diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 50c4d64d..99f287db 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -54,8 +54,8 @@ void wireStartToTops(NGHolder &g, const flat_set &tops, vector &tempEdges) { for (NFAVertex v : tops) { assert(!isLeafNode(v, g)); - - const NFAEdge &e = add_edge(g.start, v, g); + auto edge_result = add_edge(g.start, v, g); + const NFAEdge &e = edge_result.first; tempEdges.emplace_back(e); } } diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index d40bbdef..ad7a38df 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -877,18 +877,18 @@ bool beginsWithDotStar(const NGHolder &g) { // We can ignore the successors of start, as matches that begin there will // necessarily have a SOM of 0. - set succ; - insert(&succ, adjacent_vertices(g.startDs, g)); - succ.erase(g.startDs); + set a_succ; + insert(&a_succ, adjacent_vertices(g.startDs, g)); + a_succ.erase(g.startDs); - for (auto v : succ) { + for (auto v : a_succ) { // We want 'dot' states that aren't virtual starts. if (g[v].char_reach.all() && !g[v].assert_flags) { hasDot = true; set dotsucc; insert(&dotsucc, adjacent_vertices(v, g)); - if (dotsucc != succ) { + if (dotsucc != a_succ) { DEBUG_PRINTF("failed dot-star succ check\n"); return false; } @@ -1178,7 +1178,7 @@ void expandGraph(NGHolder &g, unordered_map ®ions, } static -bool doTreePlanningIntl(NGHolder &g, +bool doTreePlanningIntl(const NGHolder &g, const unordered_map ®ions, const map &info, map::const_iterator picked, u32 bad_region, @@ -1293,8 +1293,8 @@ bool doTreePlanningIntl(NGHolder &g, DEBUG_PRINTF("add mapped reporters for region %u\n", it->first); addMappedReporterVertices(it->second, g, copy_to_orig, plan.back().reporters); - } while (it->second.optional && it != info.rend() && - (++it)->first > furthest->first); + } while (it != info.rend() && it->second.optional && + (++it)->first > furthest->first); return true; } @@ -1409,7 +1409,7 @@ bool doSomPlanning(NGHolder &g, bool stuck_in, /* Need to verify how far the lock covers */ u32 bad_region; - NGHolder *ap_pref = plan.back().prefix.get(); + const NGHolder *ap_pref = plan.back().prefix.get(); NGHolder ap_temp; if (hasBigCycles(*ap_pref)) { fillRoughMidfix(&ap_temp, g, regions, info, picked); @@ -1552,7 +1552,7 @@ bool doSomPlanning(NGHolder &g, bool stuck_in, DEBUG_PRINTF("region %u contributes reporters to last plan\n", it->first); addReporterVertices(it->second, g, plan.back().reporters); - } while (it->second.optional && it != info.rend() && + } while (it != info.rend() && it->second.optional && (++it)->first > furthest->first); DEBUG_PRINTF("done!\n"); @@ -1856,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { } static -u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, +u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, const NGHolder &g, const CompileContext &cc) { depth maxWidth = findMaxWidth(g); @@ -2012,7 +2012,7 @@ void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) { } static -bool tryHaig(RoseBuild &rose, NGHolder &g, +bool tryHaig(RoseBuild &rose, const NGHolder &g, const unordered_map ®ions, som_type som, u32 somPrecision, map::const_iterator picked, @@ -2444,13 +2444,9 @@ void makeReportsSomPass(ReportManager &rm, NGHolder &g) { } static -bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { +bool doLitHaigSom(NG &ng, const NGHolder &g, som_type som) { ue2_literal lit; shared_ptr rhs = make_shared(); - if (!rhs) { - assert(0); - throw std::bad_alloc(); - } if (!ng.cc.grey.allowLitHaig) { return false; } @@ -2515,10 +2511,6 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, ue2_literal lit; shared_ptr rhs = make_shared(); shared_ptr lhs = make_shared(); - if (!rhs || !lhs) { - assert(0); - throw std::bad_alloc(); - } if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) { return false; @@ -2661,7 +2653,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, } static -bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) { +bool doMultiLitHaigSom(NG &ng, const NGHolder &g, som_type som) { set lits; shared_ptr rhs = make_shared(); if (!ng.cc.grey.allowLitHaig) { @@ -3135,7 +3127,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, // try a redundancy pass. if (addSomRedundancy(g, depths)) { - depths = getDistancesFromSOM(g); + depths = getDistancesFromSOM(g); // cppcheck-suppress unreadVariable } auto regions = assignRegions(g); diff --git a/src/nfagraph/ng_som_add_redundancy.cpp b/src/nfagraph/ng_som_add_redundancy.cpp index 50b49abd..1b285d96 100644 --- a/src/nfagraph/ng_som_add_redundancy.cpp +++ b/src/nfagraph/ng_som_add_redundancy.cpp @@ -113,9 +113,9 @@ bool forkVertex(NFAVertex v, NGHolder &g, vector &depths, } *numNewVertices += predGroups.size(); - for (auto &group : predGroups) { + for (const auto &group : predGroups) { const depth &predDepth = group.first; - const vector &preds = group.second; + const vector &gspreds = group.second; // Clone v for this depth with all its associated out-edges. u32 clone_idx = depths.size(); // next index to be used @@ -131,8 +131,8 @@ bool forkVertex(NFAVertex v, NGHolder &g, vector &depths, add_edge(clone, target(e, g), g[e], g); } - // Add in-edges from preds in this group. - for (const auto &e : preds) { + // Add in-edges from gspreds in this group. + for (const auto &e : gspreds) { add_edge(source(e, g), clone, g[e], g); } } diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index f2501c76..151e50d1 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -58,11 +58,12 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { cloneHolder(g, g_orig, &vmap); vector vstarts; - for (auto v : vertices_range(g)) { - if (is_virtual_start(v, g)) { - vstarts.emplace_back(v); - } - } + auto vstart = [&g=g](const NFAVertex &v) { + return (is_virtual_start(v, g)); + }; + const auto &vr = vertices_range(g); + std::copy_if(begin(vr), end(vr), std::back_inserter(vstarts), vstart); + vstarts.emplace_back(g.startDs); // wire the successors of every virtual start or startDs to g.start. @@ -269,18 +270,6 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, boost::depth_first_search(c_g, visitor(backEdgeVisitor) .root_vertex(c_g.start)); - for (const auto &e : be) { - NFAVertex s = source(e, c_g); - NFAVertex t = target(e, c_g); - DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index); - if (s != t) { - assert(0); - DEBUG_PRINTF("eek big cycle\n"); - rv = true; /* big cycle -> eek */ - goto exit; - } - } - DEBUG_PRINTF("checking acyclic+selfloop graph\n"); rv = !firstMatchIsFirst(c_g); diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index 91a099fc..528b72f7 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -151,7 +151,8 @@ void splitRHS(const NGHolder &base, const vector &pivots, for (auto pivot : pivots) { assert(contains(*rhs_map, pivot)); - NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); + auto edge_result = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); + NFAEdge e = edge_result.first; (*rhs)[e].tops.insert(DEFAULT_TOP); } diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index eba642dc..96b6f80e 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -255,19 +255,19 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, } static -void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) { +void buildSucc(NFAStateSet &ssucc, const NGHolder &g, NFAVertex v) { for (auto w : adjacent_vertices_range(v, g)) { if (!is_special(w, g)) { - succ.set(g[w].index); + ssucc.set(g[w].index); } } } static -void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) { +void buildPred(NFAStateSet &spred, const NGHolder &g, NFAVertex v) { for (auto u : inv_adjacent_vertices_range(v, g)) { if (!is_special(u, g)) { - pred.set(g[u].index); + spred.set(g[u].index); } } } @@ -409,19 +409,19 @@ unordered_map findSquashers(const NGHolder &g, DEBUG_PRINTF("state %u is cyclic\n", i); - NFAStateSet mask(numStates), succ(numStates), pred(numStates); + NFAStateSet mask(numStates), ssucc(numStates), spred(numStates); buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som, som_depths, region_map, cache); - buildSucc(succ, g, v); - buildPred(pred, g, v); + buildSucc(ssucc, g, v); + buildPred(spred, g, v); const auto &reports = g[v].reports; - for (size_t j = succ.find_first(); j != succ.npos; - j = succ.find_next(j)) { + for (size_t j = ssucc.find_first(); j != ssucc.npos; + j = ssucc.find_next(j)) { NFAVertex vj = vByIndex[j]; NFAStateSet pred2(numStates); buildPred(pred2, g, vj); - if (pred2 == pred) { + if (pred2 == spred) { DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i); NFAStateSet tmp(numStates); buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, @@ -430,14 +430,14 @@ unordered_map findSquashers(const NGHolder &g, } } - for (size_t j = pred.find_first(); j != pred.npos; - j = pred.find_next(j)) { + for (size_t j = spred.find_first(); j != spred.npos; + j = spred.find_next(j)) { NFAVertex vj = vByIndex[j]; NFAStateSet succ2(numStates); buildSucc(succ2, g, vj); /* we can use j as a basis for squashing if its succs are a subset * of ours */ - if ((succ2 & ~succ).any()) { + if ((succ2 & ~ssucc).any()) { continue; } @@ -590,7 +590,7 @@ void getHighlanderReporters(const NGHolder &g, const NFAVertex accept, verts.insert(v); next_vertex: - continue; + ; } } diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index a10673e6..92b52c43 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -196,10 +196,11 @@ u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, } a_count++; + NFAEdge b_edge; + bool b_edge_bool; + std::tie(b_edge, b_edge_bool) = edge(b_ranking.at(i), b_ranking.at(sid), gb); - NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb); - - if (!b_edge) { + if (!b_edge_bool) { max = i; DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", max, i, sid); @@ -319,7 +320,7 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { DEBUG_PRINTF("skipping common edge\n"); assert(edge(u, v, dest).second); // Should never merge edges with different top values. - assert(vic[e].tops == dest[edge(u, v, dest)].tops); + assert(vic[e].tops == dest[edge(u, v, dest).first].tops); continue; } else { assert(is_any_accept(v, dest)); @@ -454,8 +455,8 @@ void buildNfaMergeQueue(const vector &cluster, } } - NGHolder &g_i = *(cluster[ci]); - NGHolder &g_j = *(cluster[cj]); + const NGHolder &g_i = *(cluster[ci]); + const NGHolder &g_j = *(cluster[cj]); if (!compatibleStarts(g_i, g_j)) { continue; @@ -505,16 +506,26 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { /* TODO: relax top checks if reports match */ // If both graphs have edge (start, accept), the tops must match. - NFAEdge e1_accept = edge(h1.start, h1.accept, h1); - NFAEdge e2_accept = edge(h2.start, h2.accept, h2); - if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { + bool bool_e1_accept; + NFAEdge e1_accept; + NFAEdge e2_accept; + std::tie(e1_accept, bool_e1_accept) = edge(h1.start, h1.accept, h1); + bool bool_e2_accept; + std::tie(e2_accept, bool_e2_accept) = edge(h2.start, h2.accept, h2); + + if (bool_e1_accept && bool_e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { return false; } // If both graphs have edge (start, acceptEod), the tops must match. - NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1); - NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2); - if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { + bool bool_e1_eod; + NFAEdge e1_eod; + NFAEdge e2_eod; + std::tie(e1_eod, bool_e1_eod) = edge(h1.start, h1.acceptEod, h1); + bool bool_e2_eod; + std::tie(e2_eod, bool_e2_eod) = edge(h2.start, h2.acceptEod, h2); + + if (bool_e1_eod && bool_e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { return false; } diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index d6a90146..605f669c 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -128,7 +128,7 @@ void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) { if (edge(dest, t, g).second) { continue; } - NFAEdge clone = add_edge(dest, t, g); + NFAEdge clone = add_edge(dest, t, g).first; u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -139,7 +139,7 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { for (const auto &e : in_edges_range(s, g)) { NFAVertex ss = source(e, g); assert(!edge(ss, dest, g).second); - NFAEdge clone = add_edge(ss, dest, g); + NFAEdge clone = add_edge(ss, dest, g).first; u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -282,9 +282,11 @@ bool can_only_match_at_eod(const NGHolder &g) { } bool matches_everywhere(const NGHolder &h) { - NFAEdge e = edge(h.startDs, h.accept, h); + bool bool_e; + NFAEdge e; + std::tie(e, bool_e) = edge(h.startDs, h.accept, h); - return e && !h[e].assert_flags; + return bool_e && !h[e].assert_flags; } bool is_virtual_start(NFAVertex v, const NGHolder &g) { @@ -409,9 +411,10 @@ void appendLiteral(NGHolder &h, const ue2_literal &s) { DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str()); vector tail; assert(in_degree(h.acceptEod, h) == 1); - for (auto v : inv_adjacent_vertices_range(h.accept, h)) { - tail.emplace_back(v); - } + + const auto &vr = inv_adjacent_vertices_range(h.accept, h); + std::copy(begin(vr), end(vr), std::back_inserter(tail)); + assert(!tail.empty()); for (auto v : tail) { @@ -572,7 +575,7 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { NFAVertex s = out_mapping[si]; NFAVertex t = out_mapping[ti]; - NFAEdge e2 = add_edge(s, t, out); + NFAEdge e2 = add_edge(s, t, out).first; out[e2] = in[e]; } @@ -713,16 +716,16 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, assert(delay <= lit.length()); DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); - set pred; + set predv; for (auto v : curr) { - insert(&pred, inv_adjacent_vertices_range(v, g)); + insert(&predv, inv_adjacent_vertices_range(v, g)); } clear_in_edges(g.accept, g); clearReports(g); - for (auto v : pred) { - NFAEdge e = add_edge(v, g.accept, g); + for (auto v : predv) { + NFAEdge e = add_edge(v, g.accept, g).first; g[v].reports.insert(0); if (is_triggered(g) && v == g.start) { g[e].tops.insert(DEFAULT_TOP); diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index a2d0d9b7..34199773 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -314,7 +314,7 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); /** Construct a reversed copy of an arbitrary NGHolder, mapping starts to * accepts. */ -void reverseHolder(const NGHolder &g, NGHolder &out); +void reverseHolder(const NGHolder &g_in, NGHolder &g); /** \brief Returns the delay or ~0U if the graph cannot match with * the trailing literal. */ diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 635f9400..451376d9 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -354,10 +354,9 @@ void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, map scores; map> lit_info; - set s; for (auto v : a_dom) { - s = getLiteralSet(g, v, true); /* RHS will take responsibility for any + set s = getLiteralSet(g, v, true); /* RHS will take responsibility for any revisits to the target vertex */ if (s.empty()) { @@ -695,7 +694,7 @@ unique_ptr findBestSplit(const NGHolder &g, } if (seeking_transient) { - for (auto &a : lits) { + for (const auto &a : lits) { a->creates_transient = createsTransientLHS(g, a->vv, *depths, cc.grey); } @@ -704,20 +703,20 @@ unique_ptr findBestSplit(const NGHolder &g, if (last_chance) { const size_t num_verts = num_vertices(g); auto color_map = make_small_color_map(g); - for (auto &a : lits) { + for (const auto &a : lits) { size_t num_reachable = count_reachable(g, a->vv, color_map); double ratio = (double)num_reachable / (double)num_verts; a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio; } } - auto cmp = LitComparator(g, seeking_anchored, seeking_transient, + auto lcmp = LitComparator(g, seeking_anchored, seeking_transient, last_chance); unique_ptr best = std::move(lits.back()); lits.pop_back(); while (!lits.empty()) { - if (cmp(best, lits.back())) { + if (lcmp(best, lits.back())) { best = std::move(lits.back()); } lits.pop_back(); @@ -819,7 +818,7 @@ flat_set poisonEdges(const NGHolder &h, /* poison edges covered by successor literal */ - set > succs; + set > lsuccs; for (const RoseInEdge &ve : ee) { if (vg[target(ve, vg)].type != RIV_LITERAL) { /* nothing to poison in suffixes/outfixes */ @@ -827,15 +826,15 @@ flat_set poisonEdges(const NGHolder &h, assert(is_any_accept_type(vg[target(ve, vg)].type)); continue; } - succs.insert({vg[target(ve, vg)].s, + lsuccs.insert({vg[target(ve, vg)].s, vg[source(ve, vg)].type == RIV_LITERAL}); } - DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size()); + DEBUG_PRINTF("poisoning edges %zu successor literals\n", lsuccs.size()); flat_set bad; - for (const auto &p : succs) { + for (const auto &p : lsuccs) { poisonFromSuccessor(h, p.first, p.second, bad); } @@ -1043,11 +1042,6 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, shared_ptr lhs = make_shared(); shared_ptr rhs = make_shared(); - if (!lhs || !rhs) { - assert(0); - throw std::bad_alloc(); - } - unordered_map lhs_map; unordered_map rhs_map; @@ -1179,7 +1173,7 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, #define MAX_LEN_2_LITERALS_PER_CUT 3 static -bool checkValidNetflowLits(NGHolder &h, const vector &scores, +bool checkValidNetflowLits(const NGHolder &h, const vector &scores, const map> &cut_lits, u32 min_allowed_length) { DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(), @@ -1217,7 +1211,7 @@ bool checkValidNetflowLits(NGHolder &h, const vector &scores, } static -void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, +void splitEdgesByCut(const NGHolder &h, RoseInGraph &vg, const vector &to_cut, const vector &cut, const map> &cut_lits) { @@ -1242,17 +1236,14 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); unordered_map temp_map; shared_ptr new_lhs = make_shared(); - if (!new_lhs) { - assert(0); - throw std::bad_alloc(); - } splitLHS(h, pivot, new_lhs.get(), &temp_map); /* want to cut off paths to pivot from things other than the pivot - * makes a more svelte graphy */ clear_in_edges(temp_map[pivot], *new_lhs); NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], - *new_lhs); + *new_lhs).first; + if (is_triggered(h) && prev_v == h.start) { (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); } @@ -1327,10 +1318,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, if (!contains(done_rhs, adj)) { unordered_map temp_map; shared_ptr new_rhs = make_shared(); - if (!new_rhs) { - assert(0); - throw std::bad_alloc(); - } + splitRHS(h, adj, new_rhs.get(), &temp_map); remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); @@ -1442,11 +1430,11 @@ bool deanchorIfNeeded(NGHolder &g) { if (succ_v == succ_g) { DEBUG_PRINTF("found ^.*\n"); - for (auto succ : adjacent_vertices_range(g.start, g)) { - if (succ == g.startDs) { + for (auto asucc : adjacent_vertices_range(g.start, g)) { + if (asucc == g.startDs) { continue; } - add_edge(g.startDs, succ, g); + add_edge(g.startDs, asucc, g); } clear_vertex(v, g); remove_vertex(v, g); @@ -1693,18 +1681,18 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, * successor literal. This would require using distinct report ids and also * taking into account overlap of successor literals. */ - set preds; - set succs; + set lpreds; + set lsuccs; for (const RoseInEdge &e : ee) { RoseInVertex u = source(e, ig); assert(ig[u].type == RIV_LITERAL); assert(!ig[u].delay); - preds.insert(ig[u].s); + lpreds.insert(ig[u].s); RoseInVertex v = target(e, ig); assert(ig[v].type == RIV_LITERAL); assert(!ig[v].delay); - succs.insert(ig[v].s); + lsuccs.insert(ig[v].s); if (ig[e].graph_lag) { /* already removed redundant parts of literals */ @@ -1716,9 +1704,9 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, map, u32> > graphs; /* + delay */ - for (const ue2_literal &right : succs) { + for (const ue2_literal &right : lsuccs) { size_t max_overlap = 0; - for (const ue2_literal &left : preds) { + for (const ue2_literal &left : lpreds) { size_t overlap = maxOverlap(left, right, 0); ENSURE_AT_LEAST(&max_overlap, overlap); } @@ -1755,13 +1743,13 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, for (const RoseInEdge &e : ee) { RoseInVertex v = target(e, ig); - const ue2_literal &succ = ig[v].s; - if (!contains(graphs, succ)) { + const ue2_literal &igsucc = ig[v].s; + if (!contains(graphs, igsucc)) { continue; } - ig[e].graph = graphs[succ].first; - ig[e].graph_lag = graphs[succ].second; + ig[e].graph = graphs[igsucc].first; + ig[e].graph_lag = graphs[igsucc].second; if (isStarCliche(*ig[e].graph)) { DEBUG_PRINTF("is a X star!\n"); @@ -1800,9 +1788,9 @@ void removeRedundantLiteralsFromInfixes(RoseInGraph &g, } for (const auto &m : infixes) { - NGHolder *h = m.first; - const auto &edges = m.second; - removeRedundantLiteralsFromInfix(*h, g, edges, cc); + const NGHolder *h = m.first; + const auto &medges = m.second; + removeRedundantLiteralsFromInfix(*h, g, medges, cc); } } @@ -1813,7 +1801,7 @@ void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) { } static -RoseInVertex getStart(RoseInGraph &vg) { +RoseInVertex getStart(const RoseInGraph &vg) { for (RoseInVertex v : vertices_range(vg)) { // cppcheck-suppress useStlAlgorithm if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) { @@ -1880,7 +1868,7 @@ unique_ptr make_chain(u32 count) { #define SHORT_TRIGGER_LEN 16 static -bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, +bool makeTransientFromLongLiteral(const NGHolder &h, RoseInGraph &vg, const vector &ee, const CompileContext &cc) { /* check max width and literal lengths to see if possible */ @@ -1963,7 +1951,7 @@ bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, static void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay, const vector &preds) { + u32 delay, const vector &lpreds) { assert(delay <= lit.length()); assert(isCorrectlyTopped(g)); DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); @@ -1979,8 +1967,8 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, prev = curr; } - for (auto v : preds) { - NFAEdge e = add_edge_if_not_present(v, prev, g); + for (auto v : lpreds) { + NFAEdge e = add_edge_if_not_present(v, prev, g).first; if (v == g.start && is_triggered(g)) { g[e].tops.insert(DEFAULT_TOP); } @@ -1998,11 +1986,11 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, static void restoreTrailingLiteralStates(NGHolder &g, const vector> &lits) { - vector preds; - insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); + vector vpreds; + insert(&vpreds, vpreds.end(), inv_adjacent_vertices(g.accept, g)); clear_in_edges(g.accept, g); - for (auto v : preds) { + for (auto v : vpreds) { g[v].reports.clear(); /* clear report from old accepts */ } @@ -2010,7 +1998,7 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit = p.first; u32 delay = p.second; - restoreTrailingLiteralStates(g, lit, delay, preds); + restoreTrailingLiteralStates(g, lit, delay, vpreds); } } @@ -2144,14 +2132,14 @@ void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { /* look for bad prefixes and try to split */ for (const auto &m : prefixes) { NGHolder *h = m.first; - const auto &edges = m.second; + const auto &medges = m.second; depth max_width = findMaxWidth(*h); if (willBeTransient(max_width, cc) || willBeAnchoredTable(max_width, cc.grey)) { continue; } - changed = improvePrefix(*h, vg, edges, cc); + changed = improvePrefix(*h, vg, medges, cc); } } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN); } @@ -2160,7 +2148,7 @@ void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { #define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10 static -bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg, +bool extractStrongLiteral(const NGHolder &h, RoseInGraph &vg, const vector &ee, const CompileContext &cc) { DEBUG_PRINTF("looking for string literal\n"); @@ -2208,12 +2196,12 @@ void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) { for (const auto &m : edges_by_graph) { NGHolder *g = m.first; - const auto &edges = m.second; + const auto &medges = m.second; if (contains(stuck, g)) { DEBUG_PRINTF("already known to be bad\n"); continue; } - bool rv = extractStrongLiteral(*g, vg, edges, cc); + bool rv = extractStrongLiteral(*g, vg, medges, cc); if (rv) { changed = true; } else { @@ -2291,8 +2279,8 @@ void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) { for (const auto &m : weak_edges) { NGHolder *h = m.first; - const auto &edges = m.second; - improveInfix(*h, vg, edges, cc); + const auto &medges = m.second; + improveInfix(*h, vg, medges, cc); } } @@ -2304,10 +2292,7 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, assert(!splitters.empty()); shared_ptr lhs = make_shared(); - if (!lhs) { - assert(0); - throw bad_alloc(); - } + unordered_map v_map; cloneHolder(*lhs, base_graph, &v_map); lhs->kind = NFA_INFIX; @@ -2316,7 +2301,7 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, add_edge(lhs->accept, lhs->acceptEod, *lhs); clearReports(*lhs); for (NFAVertex v : splitters) { - NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs); + NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs).first; if (v == base_graph.start) { (*lhs)[e].tops.insert(DEFAULT_TOP); } @@ -2417,14 +2402,14 @@ bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg, assert(!by_reports.empty()); /* TODO: how strong a min len do we want here ? */ - u32 min_len = cc.grey.minRoseLiteralLength; - ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN); + u32 rose_min_len = cc.grey.minRoseLiteralLength; + ENSURE_AT_LEAST(&rose_min_len, MIN_SUFFIX_LEN); for (auto &vli : by_reports | map_values) { u64a score = sanitizeAndCompressAndScore(vli.lit); if (vli.lit.empty() - || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, + || !validateRoseLiteralSetQuality(vli.lit, score, false, rose_min_len, false, false)) { return false; } @@ -2468,8 +2453,8 @@ void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) { /* look at suffixes and try to split */ for (const auto &m : suffixes) { const NGHolder *h = m.first; - const auto &edges = m.second; - replaceSuffixWithInfix(*h, vg, edges, cc); + const auto &medges = m.second; + replaceSuffixWithInfix(*h, vg, medges, cc); } } @@ -2563,8 +2548,8 @@ void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) { for (const auto &m : right_edges) { const NGHolder *h = m.first; - const auto &edges = m.second; - lookForDoubleCut(*h, edges, vg, cc.grey); + const auto &medges = m.second; + lookForDoubleCut(*h, medges, vg, cc.grey); } } @@ -2755,8 +2740,8 @@ void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) { for (const auto &m : rightfixes) { const NGHolder *h = m.first; - const auto &edges = m.second; - lookForCleanSplit(*h, edges, vg, cc); + const auto &medges = m.second; + lookForCleanSplit(*h, medges, vg, cc); } prev = std::move(curr); @@ -2815,7 +2800,7 @@ bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) { } static -vector> getDfaTriggers(RoseInGraph &vg, +vector> getDfaTriggers(const RoseInGraph &vg, const vector &edges, bool *single_trigger) { vector> triggers; @@ -2879,7 +2864,6 @@ static bool splitForImplementability(RoseInGraph &vg, NGHolder &h, const vector &edges, const CompileContext &cc) { - vector> succ_lits; DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", to_string(h.kind).c_str(), num_vertices(h), edges.size()); @@ -2888,6 +2872,7 @@ bool splitForImplementability(RoseInGraph &vg, NGHolder &h, } if (!generates_callbacks(h)) { + vector> succ_lits; for (const auto &e : edges) { const auto &lit = vg[target(e, vg)].s; u32 delay = vg[e].graph_lag; @@ -2900,8 +2885,8 @@ bool splitForImplementability(RoseInGraph &vg, NGHolder &h, } unique_ptr split; - bool last_chance = true; if (h.kind == NFA_PREFIX) { + bool last_chance = true; auto depths = calcDepths(h); split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); @@ -2938,7 +2923,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, vector> edges_by_graph; for (const RoseInEdge &ve : edges_range(vg)) { if (vg[ve].graph && !vg[ve].dfa) { - auto &h = vg[ve].graph; + const auto &h = vg[ve].graph; edges_by_graph[h].emplace_back(ve); } } @@ -2953,10 +2938,10 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, continue; } - const auto &edges = m.second; + const auto &medges = m.second; if (tryForEarlyDfa(*h, cc) && - doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) { + doEarlyDfa(rose, vg, *h, medges, final_chance, rm, cc)) { continue; } @@ -2965,7 +2950,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } - if (splitForImplementability(vg, *h, edges, cc)) { + if (splitForImplementability(vg, *h, medges, cc)) { added_count++; if (added_count > MAX_IMPLEMENTABLE_SPLITS) { DEBUG_PRINTF("added_count hit limit\n"); diff --git a/src/parser/ComponentAlternation.cpp b/src/parser/ComponentAlternation.cpp index 2eee705a..a4521d8f 100644 --- a/src/parser/ComponentAlternation.cpp +++ b/src/parser/ComponentAlternation.cpp @@ -73,7 +73,7 @@ Component *ComponentAlternation::accept(ComponentVisitor &v) { } for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); + const Component *child = i->get(); c = (*i)->accept(v); if (c != child) { // Child has been replaced (new Component pointer) or we've been @@ -109,20 +109,20 @@ void ComponentAlternation::append(unique_ptr component) { vector ComponentAlternation::first() const { // firsts come from all our subcomponents in position order. This will // maintain left-to-right priority order. - vector firsts, subfirsts; + vector firsts; for (const auto &c : children) { - subfirsts = c->first(); + vector subfirsts = c->first(); firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end()); } return firsts; } vector ComponentAlternation::last() const { - vector lasts, sublasts; + vector lasts; for (const auto &c : children) { - sublasts = c->last(); + vector sublasts = c->last(); lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); } return lasts; diff --git a/src/parser/ComponentAssertion.cpp b/src/parser/ComponentAssertion.cpp index cadff932..a2b4c24c 100644 --- a/src/parser/ComponentAssertion.cpp +++ b/src/parser/ComponentAssertion.cpp @@ -59,7 +59,7 @@ Component * ComponentAssertion::accept(ComponentVisitor &v) { } for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); + const Component *child = i->get(); c = (*i)->accept(v); if (c != child) { // Child has been replaced (new Component pointer) or we've been diff --git a/src/parser/ComponentAtomicGroup.cpp b/src/parser/ComponentAtomicGroup.cpp index 106f24fc..3630021f 100644 --- a/src/parser/ComponentAtomicGroup.cpp +++ b/src/parser/ComponentAtomicGroup.cpp @@ -51,7 +51,7 @@ Component *ComponentAtomicGroup::accept(ComponentVisitor &v) { } for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); + const Component *child = i->get(); c = (*i)->accept(v); if (c != child) { // Child has been replaced (new Component pointer) or we've been diff --git a/src/parser/ComponentBoundary.cpp b/src/parser/ComponentBoundary.cpp index e8eafc8c..f116cc1b 100644 --- a/src/parser/ComponentBoundary.cpp +++ b/src/parser/ComponentBoundary.cpp @@ -161,26 +161,26 @@ void ComponentBoundary::buildFollowSet(GlushkovBuildState &, bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const { if (at_start) { - return at_start; + return true; } if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) { throw ParseError("Embedded start anchors not supported."); } - return at_start; + return false; } bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const { if (at_end) { - return at_end; + return true; } if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) { throw ParseError("Embedded end anchors not supported."); } - return at_end; + return false; } } // namespace diff --git a/src/parser/ComponentCondReference.cpp b/src/parser/ComponentCondReference.cpp index b6ff44db..cfd2a38d 100644 --- a/src/parser/ComponentCondReference.cpp +++ b/src/parser/ComponentCondReference.cpp @@ -79,7 +79,7 @@ Component *ComponentCondReference::accept(ComponentVisitor &v) { } if (kind == CONDITION_ASSERTION) { - Component *a = assertion.get(); + const Component *a = assertion.get(); c = assertion->accept(v); if (c != a) { assertion.reset(c); @@ -87,7 +87,7 @@ Component *ComponentCondReference::accept(ComponentVisitor &v) { } for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); + const Component *child = i->get(); c = (*i)->accept(v); if (c != child) { // Child has been replaced (new Component pointer) or we've been diff --git a/src/parser/ComponentRepeat.cpp b/src/parser/ComponentRepeat.cpp index f4c7e370..05deec54 100644 --- a/src/parser/ComponentRepeat.cpp +++ b/src/parser/ComponentRepeat.cpp @@ -110,7 +110,7 @@ void addBase(Position base, vector &firsts, } static -void checkPositions(vector &v, const GlushkovBuildState &bs) { +void checkPositions(const vector &v, const GlushkovBuildState &bs) { const NFABuilder& builder = bs.getBuilder(); for (const auto &e : v) { // cppcheck-suppress useStlAlgorithm @@ -133,7 +133,7 @@ void ComponentRepeat::notePositions(GlushkovBuildState &bs) { posFirst = bs.getBuilder().numVertices(); sub_comp->notePositions(bs); - u32 copies = m_max < NoLimit ? m_max : MAX(m_min, 1); + u32 copies = (m_max < NoLimit) ? m_max : std::max(m_min, 1U); DEBUG_PRINTF("building %u copies of repeated region\n", copies); m_firsts.clear(); m_lasts.clear(); @@ -321,8 +321,8 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { } } - DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); - for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { + DEBUG_PRINTF("wiring up %u optional repeats\n", copies - m_min); + for (u32 rep = std::max(m_min, 1U); rep < copies; rep++) { vector lasts = m_lasts[rep - 1]; if (rep != m_min) { lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); diff --git a/src/parser/ComponentSequence.cpp b/src/parser/ComponentSequence.cpp index 40f41225..109b4fbb 100644 --- a/src/parser/ComponentSequence.cpp +++ b/src/parser/ComponentSequence.cpp @@ -82,7 +82,7 @@ Component *ComponentSequence::accept(ComponentVisitor &v) { } for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); + const Component *child = i->get(); c = (*i)->accept(v); if (c != child) { // Child has been replaced (new Component pointer) or we've been @@ -157,10 +157,10 @@ void ComponentSequence::finalize() { } vector ComponentSequence::first() const { - vector firsts, subfirsts; + vector firsts; for (const auto &c : children) { - subfirsts = c->first(); + vector subfirsts = c->first(); replaceEpsilons(firsts, subfirsts); if (!c->empty()) { break; @@ -229,12 +229,12 @@ void applyEpsilonVisits(vector &lasts, } vector ComponentSequence::last() const { - vector lasts, sublasts; + vector lasts; vector visits(1); auto i = children.rbegin(), e = children.rend(); for (; i != e; ++i) { - sublasts = (*i)->last(); + vector sublasts = (*i)->last(); applyEpsilonVisits(sublasts, visits); lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); if ((*i)->empty()) { diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index e5cbfe2b..ae419ec8 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -224,7 +224,7 @@ u8 decodeCtrl(char raw) { static unichar readUtf8CodePoint2c(const char *s) { - auto *ts = (const u8 *)s; + auto *ts = reinterpret_cast(s); assert(ts[0] >= 0xc0 && ts[0] < 0xe0); assert(ts[1] >= 0x80 && ts[1] < 0xc0); unichar val = ts[0] & 0x1f; diff --git a/src/parser/buildstate.cpp b/src/parser/buildstate.cpp index 5bdd0f14..c243f626 100644 --- a/src/parser/buildstate.cpp +++ b/src/parser/buildstate.cpp @@ -243,7 +243,7 @@ Position makeNewlineAssertPos(GlushkovBuildState &bs) { static void generateAccepts(GlushkovBuildStateImpl &bs, const PositionInfo &from, vector *tolist) { - NFABuilder &builder = bs.getBuilder(); + const NFABuilder &builder = bs.getBuilder(); u32 flags = from.flags; bool require_eod = flags & POS_FLAG_WIRE_EOD; @@ -456,11 +456,10 @@ void cleanupPositions(vector &a) { vector out; out.reserve(a.size()); // output should be close to input in size. - for (const auto &p : a) { - if (seen.emplace(p.pos, p.flags).second) { - out.emplace_back(p); // first encounter - } - } + auto seens = [&seen=seen](const PositionInfo &p) { + return (seen.emplace(p.pos, p.flags).second); + }; + std::copy_if(begin(a), end(a), std::back_inserter(out), seens); DEBUG_PRINTF("in %zu; out %zu\n", a.size(), out.size()); a.swap(out); diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index b75ca34f..bbe41b83 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -260,14 +260,14 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, u32 ekey, u64a min_offset, u64a max_offset) { u32 ckey = getCombKey(id); - vector op_stack; vector subid_stack; u32 lkey_start = INVALID_LKEY; // logical operation's lkey - u32 paren = 0; // parentheses u32 digit = (u32)-1; // digit start offset, invalid offset is -1 u32 subid = (u32)-1; u32 i; try { + vector op_stack; + u32 paren = 0; // parentheses for (i = 0; logical[i]; i++) { if (isdigit(logical[i])) { if (digit == (u32)-1) { // new digit start @@ -284,7 +284,7 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, if (logical[i] == '(') { paren += 1; } else if (logical[i] == ')') { - if (paren <= 0) { + if (paren == 0) { throw LocatedParseError("Not enough left parentheses"); } paren -= 1; diff --git a/src/parser/ucp_table.cpp b/src/parser/ucp_table.cpp index fc1330fe..77f13009 100644 --- a/src/parser/ucp_table.cpp +++ b/src/parser/ucp_table.cpp @@ -34,14 +34,6 @@ using namespace std; namespace ue2 { -#define UCP_FN(cat) \ -CodePointSet getUcp##cat(void) { \ - CodePointSet rv; \ - for (u32 i = 0; i < ARRAY_LENGTH(ucp_##cat##_def); i += 2) { \ - rv.setRange(ucp_##cat##_def[i], ucp_##cat##_def[i + 1]); \ - } \ - return rv; \ -} struct unicase { unichar base; diff --git a/src/parser/ucp_table.h b/src/parser/ucp_table.h index 269a971c..5f0c9af3 100644 --- a/src/parser/ucp_table.h +++ b/src/parser/ucp_table.h @@ -36,6 +36,15 @@ namespace ue2 { +#define UCP_FN(cat) \ +CodePointSet getUcp##cat(void) { \ + CodePointSet rv; \ + for (u32 i = 0; i < ARRAY_LENGTH(ucp_##cat##_def); i += 2) { \ + rv.setRange(ucp_##cat##_def[i], ucp_##cat##_def[i + 1]); \ + } \ + return rv; \ +} + class CodePointSet; void make_caseless(CodePointSet *cps); bool flip_case(unichar *c); diff --git a/src/rose/block.c b/src/rose/block.c index b3f424cb..60572d49 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -227,7 +227,7 @@ int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) { const size_t length = scratch->core_info.len; char *state = scratch->core_info.state; - struct RoseContext *tctxt = &scratch->tctxt; + const struct RoseContext *tctxt = &scratch->tctxt; DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, t->floatingMinDistance); @@ -377,7 +377,7 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { init_for_block(t, scratch, state, is_small_block); - struct RoseContext *tctxt = &scratch->tctxt; + const struct RoseContext *tctxt = &scratch->tctxt; if (is_small_block) { const void *sbtable = getSBLiteralMatcher(t); diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 7a6648da..ad0636b8 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -679,7 +679,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, s64a final_loc, struct hs_scratch *scratch) { assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue); - struct RoseContext *tctxt = &scratch->tctxt; + const struct RoseContext *tctxt = &scratch->tctxt; assert(t->activeArrayCount); assert(scratch->core_info.buf_offset + final_loc diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h index 602907cb..3bd93509 100644 --- a/src/rose/counting_miracle.h +++ b/src/rose/counting_miracle.h @@ -192,7 +192,7 @@ int roseCountingMiracleOccurs(const struct RoseEngine *t, u32 count = 0; - s64a m_loc = start; + s64a m_loc; if (!cm->shufti) { u8 c = cm->c; diff --git a/src/rose/match.c b/src/rose/match.c index 84d3b1fd..90cbd267 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -68,7 +68,7 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) { hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, struct hs_scratch *scratch) { - struct RoseContext *tctx = &scratch->tctxt; + const struct RoseContext *tctx = &scratch->tctxt; struct core_info *ci = &scratch->core_info; const struct RoseEngine *t = ci->rose; size_t rb_len = MIN(ci->hlen, t->delayRebuildLength); @@ -109,7 +109,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, u64a top_squash_distance, u64a end, char in_catchup) { assert(event == MQE_TOP || event >= MQE_TOP_FIRST); - struct core_info *ci = &scratch->core_info; + const struct core_info *ci = &scratch->core_info; u8 *aa = getActiveLeafArray(t, scratch->core_info.state); u32 aaCount = t->activeArrayCount; @@ -267,7 +267,8 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, const u32 *programs = getByOffset(t, t->delayProgramOffset); for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); - it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { + it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { + // cppcheck-suppress unreadVariable UNUSED rose_group old_groups = tctxt->groups; DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", it, offset); @@ -296,7 +297,7 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, struct hs_scratch *scratch, u32 curr_loc) { struct RoseContext *tctxt = &scratch->tctxt; - struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; + const struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; u32 region_width = t->anchored_count; const u32 *programs = getByOffset(t, t->anchoredProgramOffset); @@ -334,7 +335,7 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, static really_inline u32 anchored_it_begin(struct hs_scratch *scratch) { - struct RoseContext *tctxt = &scratch->tctxt; + const struct RoseContext *tctxt = &scratch->tctxt; if (tctxt->lastEndOffset >= scratch->anchored_literal_region_len) { return MMB_INVALID; } diff --git a/src/rose/match.h b/src/rose/match.h index c03b1ebb..5a4bfa6b 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -254,8 +254,8 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, return; } - struct RoseContext *tctxt = &scratch->tctxt; - struct core_info *ci = &scratch->core_info; + const struct RoseContext *tctxt = &scratch->tctxt; + const struct core_info *ci = &scratch->core_info; /* currEnd is last byte of string + 1 */ if (tctxt->lastEndOffset == ci->buf_offset + ci->len diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 3f27dba1..78d13e3c 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -131,7 +131,7 @@ RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, /* fill in report information */ g[v].reports.insert(reports.begin(), reports.end()); - RoseEdge e = add_edge(parent, v, g); + RoseEdge e = add_edge(parent, v, g).first; DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound); g[e].minBound = minBound; @@ -161,7 +161,7 @@ RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId, DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index, literalId); - RoseEdge e = add_edge(build->anchored_root, v, g); + RoseEdge e = add_edge(build->anchored_root, v, g).first; g[e].minBound = min_offset; g[e].maxBound = max_offset; @@ -308,7 +308,7 @@ void createVertices(RoseBuildImpl *tbi, RoseVertex p = pv.first; - RoseEdge e = add_edge(p, w, g); + RoseEdge e = add_edge(p, w, g).first; DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound, edge_props.maxBound); g[e].minBound = edge_props.minBound; @@ -346,7 +346,7 @@ void createVertices(RoseBuildImpl *tbi, for (const auto &pv : parents) { const RoseInEdgeProps &edge_props = bd.ig[pv.second]; - RoseEdge e = add_edge(pv.first, g_v, tbi->g); + RoseEdge e = add_edge(pv.first, g_v, tbi->g).first; g[e].minBound = edge_props.minBound; g[e].maxBound = edge_props.maxBound; g[e].history = selectHistory(*tbi, bd, pv.second, e); @@ -354,7 +354,7 @@ void createVertices(RoseBuildImpl *tbi, edge_props.minBound, edge_props.maxBound); } - for (auto &m : created) { + for (const auto &m : created) { tbi->ghost[m.second] = g_v; } } @@ -519,9 +519,9 @@ u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep, static void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector &msk, - vector &cmp) { + vector &lcmp) { if (lag >= HWLM_MASKLEN) { - msk.clear(); cmp.clear(); + msk.clear(); lcmp.clear(); return; } @@ -533,7 +533,7 @@ void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector &msk, assert(!curr.empty()); msk.assign(HWLM_MASKLEN, 0); - cmp.assign(HWLM_MASKLEN, 0); + lcmp.assign(HWLM_MASKLEN, 0); size_t i = HWLM_MASKLEN - lag - 1; do { if (curr.empty() || contains(curr, h.start) || @@ -550,9 +550,9 @@ void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector &msk, cr |= h[v].char_reach; insert(&next, inv_adjacent_vertices(v, h)); } - make_and_cmp_mask(cr, &msk[i], &cmp[i]); - DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i, - describeClass(cr).c_str(), msk.at(i), cmp.at(i)); + make_and_cmp_mask(cr, &msk[i], &lcmp[i]); + DEBUG_PRINTF("%zu: reach=%s, msk=%u, lcmp=%u\n", i, + describeClass(cr).c_str(), msk.at(i), lcmp.at(i)); curr.swap(next); } while (i-- > 0); } @@ -618,18 +618,18 @@ void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table, } floating: - vector msk, cmp; + vector msk, lcmp; if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) { RoseInEdge e = *in_edges(iv, ig).first; if (ig[e].graph) { - findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp); + findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, lcmp); } } u32 delay = iv_info.delay; rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING; - u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table); + u32 literalId = tbi->getLiteralId(iv_info.s, msk, lcmp, delay, table); DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n", literalId, iv_info.s.length(), delay, iv_info.min_offset, @@ -699,7 +699,7 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, g[v].left.graph = eod_leftfix; g[v].left.leftfix_report = report_mapping.second; g[v].left.lag = 0; - RoseEdge e1 = add_edge(u, v, g); + RoseEdge e1 = add_edge(u, v, g).first; g[e1].minBound = 0; g[e1].maxBound = ROSE_BOUND_INF; g[v].min_offset = add_rose_depth(g[u].min_offset, @@ -719,7 +719,7 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, g[w].reports = report_mapping.first; g[w].min_offset = g[v].min_offset; g[w].max_offset = g[v].max_offset; - RoseEdge e = add_edge(v, w, g); + RoseEdge e = add_edge(v, w, g).first; g[e].minBound = 0; g[e].maxBound = 0; /* No need to set history as the event is only delivered at the last @@ -795,7 +795,7 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, g[w].reports = ig[iv].reports; g[w].min_offset = g[u].min_offset; g[w].max_offset = g[u].max_offset; - RoseEdge e = add_edge(u, w, g); + RoseEdge e = add_edge(u, w, g).first; g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; @@ -940,7 +940,7 @@ void shift_accepts_to_end(const RoseInGraph &ig, } static -void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { +void populateRoseGraph(RoseBuildImpl *tbi, const RoseBuildData &bd) { const RoseInGraph &ig = bd.ig; /* add the pattern in to the main rose graph */ @@ -1041,9 +1041,9 @@ bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm, if (prefilter && cc.grey.prefilterReductions) { // If we're prefiltering, we can have another go with a reduced graph. - UNUSED size_t numBefore = num_vertices(h); + UNUSED size_t numBefore = num_vertices(h); // cppcheck-suppress unreadVariable prefilterReductions(h, cc); - UNUSED size_t numAfter = num_vertices(h); + UNUSED size_t numAfter = num_vertices(h); // cppcheck-suppress unreadVariable DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter); if (isImplementableNFA(h, &rm, cc)) { @@ -1090,20 +1090,20 @@ bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) { static u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) { /* overly conservative if only part of the string is nocase */ - string pred = pred_key.get_string(); + string predk = pred_key.get_string(); string lit = lit_key.get_string(); if (pred_key.any_nocase() || lit_key.any_nocase()) { - upperString(pred); + upperString(predk); upperString(lit); } - string::size_type last = pred.rfind(lit); + string::size_type last = predk.rfind(lit); if (last == string::npos) { return MAX_DELAY; } - u32 raw = pred.size() - last - 1; + u32 raw = predk.size() - last - 1; return MIN(raw, MAX_DELAY); } @@ -1723,7 +1723,7 @@ bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) { g[v].left.graph = eod_leftfix; g[v].left.leftfix_report = report_mapping.second; g[v].left.lag = 0; - RoseEdge e1 = add_edge(build.anchored_root, v, g); + RoseEdge e1 = add_edge(build.anchored_root, v, g).first; g[e1].minBound = 0; g[e1].maxBound = ROSE_BOUND_INF; g[v].min_offset = findMinWidth(*eod_leftfix); @@ -1741,7 +1741,7 @@ bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) { g[w].reports = report_mapping.first; g[w].min_offset = g[v].min_offset; g[w].max_offset = g[v].max_offset; - RoseEdge e = add_edge(v, w, g); + RoseEdge e = add_edge(v, w, g).first; g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_NONE; diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index 95c32cba..abfc7178 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -131,7 +131,6 @@ void findMaskLiteral(const vector &mask, bool streaming, if (better) { best_begin = begin; best_end = end; - best_len = len; } for (size_t i = best_begin; i < best_end; i++) { @@ -304,31 +303,31 @@ unique_ptr buildMaskLhs(bool anchored, u32 prefix_len, assert(prefix_len); assert(mask.size() >= prefix_len); - NFAVertex pred = anchored ? lhs->start : lhs->startDs; + NFAVertex lpreds = anchored ? lhs->start : lhs->startDs; u32 m_idx = 0; while (prefix_len--) { NFAVertex v = add_vertex(*lhs); (*lhs)[v].char_reach = mask[m_idx++]; - add_edge(pred, v, *lhs); - pred = v; + add_edge(lpreds, v, *lhs); + lpreds = v; } - add_edge(pred, lhs->accept, *lhs); - (*lhs)[pred].reports.insert(0); + add_edge(lpreds, lhs->accept, *lhs); + (*lhs)[lpreds].reports.insert(0); return lhs; } static void buildLiteralMask(const vector &mask, vector &msk, - vector &cmp, u32 delay) { + vector &lcmp, u32 delay) { msk.clear(); - cmp.clear(); + lcmp.clear(); if (mask.size() <= delay) { return; } - // Construct an and/cmp mask from our mask ending at delay positions before + // Construct an and/lcmp mask from our mask ending at delay positions before // the end of the literal, with max length HWLM_MASKLEN. auto ite = mask.end() - delay; @@ -336,11 +335,11 @@ void buildLiteralMask(const vector &mask, vector &msk, for (; it != ite; ++it) { msk.emplace_back(0); - cmp.emplace_back(0); - make_and_cmp_mask(*it, &msk.back(), &cmp.back()); + lcmp.emplace_back(0); + make_and_cmp_mask(*it, &msk.back(), &lcmp.back()); } - assert(msk.size() == cmp.size()); + assert(msk.size() == lcmp.size()); assert(msk.size() <= HWLM_MASKLEN); } @@ -394,9 +393,10 @@ bool validateTransientMask(const vector &mask, bool anchored, none_of(begin(lits), end(lits), mixed_sensitivity)); // Build the HWLM literal mask. - vector msk, cmp; + vector msk; if (grey.roseHamsterMasks) { - buildLiteralMask(mask, msk, cmp, delay); + vector lcmp; + buildLiteralMask(mask, msk, lcmp, delay); } // We consider the HWLM mask length to run from the first non-zero byte to @@ -494,9 +494,9 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, set_report(*mask_graph, mask_report); // Build the HWLM literal mask. - vector msk, cmp; + vector msk, lcmp; if (build.cc.grey.roseHamsterMasks) { - buildLiteralMask(mask, msk, cmp, delay); + buildLiteralMask(mask, msk, lcmp, delay); } /* adjust bounds to be relative to trigger rather than mask */ @@ -530,7 +530,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, const flat_set no_reports; for (const auto &lit : lits) { - u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table); + u32 lit_id = build.getLiteralId(lit, msk, lcmp, delay, table); const RoseVertex parent = anchored ? build.anchored_root : build.root; bool use_mask = delay || maskIsNeeded(lit, *mask_graph); @@ -543,7 +543,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, g[v].left.leftfix_report = mask_report; } else { // Make sure our edge bounds are correct. - RoseEdge e = edge(parent, v, g); + RoseEdge e = edge(parent, v, g).first; g[e].minBound = 0; g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF; g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH @@ -555,7 +555,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, g[v].max_offset = v_max_offset; if (eod) { - RoseEdge e = add_edge(v, eod_v, g); + RoseEdge e = add_edge(v, eod_v, g).first; g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; @@ -573,19 +573,19 @@ unique_ptr buildMaskRhs(const flat_set &reports, unique_ptr rhs = std::make_unique(NFA_SUFFIX); NGHolder &h = *rhs; - NFAVertex succ = h.accept; + NFAVertex asucc = h.accept; u32 m_idx = mask.size() - 1; while (suffix_len--) { NFAVertex u = add_vertex(h); - if (succ == h.accept) { + if (asucc == h.accept) { h[u].reports.insert(reports.begin(), reports.end()); } h[u].char_reach = mask[m_idx--]; - add_edge(u, succ, h); - succ = u; + add_edge(u, asucc, h); + asucc = u; } - NFAEdge e = add_edge(h.start, succ, h); + NFAEdge e = add_edge(h.start, asucc, h).first; h[e].tops.insert(DEFAULT_TOP); return rhs; diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 281b39cf..b6c14549 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -350,11 +350,11 @@ public: next[s].wdelay = wdelay; } - nfa_state_set succ; + nfa_state_set gsucc; if (wdelay != in.wdelay) { DEBUG_PRINTF("enabling start\n"); - succ.set(vertexToIndex[g.startDs]); + gsucc.set(vertexToIndex[g.startDs]); } for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos; @@ -370,12 +370,12 @@ public: continue; } - succ.set(vertexToIndex[w]); + gsucc.set(vertexToIndex[w]); } } - for (size_t j = succ.find_first(); j != nfa_state_set::npos; - j = succ.find_next(j)) { + for (size_t j = gsucc.find_first(); j != nfa_state_set::npos; + j = gsucc.find_next(j)) { const CharReach &cr = cr_by_index[j]; for (size_t s = cr.find_first(); s != CharReach::npos; s = cr.find_next(s)) { @@ -870,13 +870,13 @@ vector buildAnchoredDfas(RoseBuildImpl &build, } bytecode_ptr -buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, +buildAnchoredMatcher(const RoseBuildImpl &build, const vector &fragments, vector &dfas) { const CompileContext &cc = build.cc; if (dfas.empty()) { DEBUG_PRINTF("empty\n"); - return nullptr; + return bytecode_ptr(nullptr); } for (auto &rdfa : dfas) { @@ -899,7 +899,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, for (size_t i = 0; i < nfas.size(); i++) { const NFA *nfa = nfas[i].get(); anchored_matcher_info *ami = (anchored_matcher_info *)curr; - char *prev_curr = curr; + const char *prev_curr = curr; curr += sizeof(anchored_matcher_info); diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index 37d268ac..8a25eb58 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -60,7 +60,7 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build, * given in litPrograms. */ bytecode_ptr -buildAnchoredMatcher(RoseBuildImpl &build, +buildAnchoredMatcher(const RoseBuildImpl &build, const std::vector &fragments, std::vector &dfas); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ef3f6fdd..7e3a9bab 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -479,9 +479,9 @@ rose_group RoseBuildImpl::getInitialGroups() const { static bool nfaStuckOn(const NGHolder &g) { assert(!proper_out_degree(g.startDs, g)); - set succ; - insert(&succ, adjacent_vertices(g.start, g)); - succ.erase(g.startDs); + set vsucc; + insert(&vsucc, adjacent_vertices(g.start, g)); + vsucc.erase(g.startDs); set asucc; set tops; @@ -496,7 +496,7 @@ bool nfaStuckOn(const NGHolder &g) { asucc.clear(); insert(&asucc, adjacent_vertices(target(e, g), g)); - if (asucc == succ) { + if (asucc == vsucc) { insert(&done_tops, g[e].tops); } } @@ -534,12 +534,12 @@ void findFixedDepthTops(const RoseGraph &g, const set &triggers, for (const auto &e : pred_by_top) { u32 top = e.first; - const set &preds = e.second; - if (!g[*preds.begin()].fixedOffset()) { + const set &spreds = e.second; + if (!g[*spreds.begin()].fixedOffset()) { continue; } - u32 depth = g[*preds.begin()].min_offset; - for (RoseVertex u : preds) { + u32 depth = g[*spreds.begin()].min_offset; + for (RoseVertex u : spreds) { if (g[u].min_offset != depth || g[u].max_offset != depth) { goto next_top; } @@ -677,7 +677,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } assert(suff.graph()); - NGHolder &holder = *suff.graph(); + const NGHolder &holder = *suff.graph(); assert(holder.kind == NFA_SUFFIX); const bool oneTop = onlyOneTop(holder); bool compress_state = cc.streaming; @@ -928,12 +928,12 @@ void appendTailToHolder(NGHolder &h, const vector &tail) { static u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h, - const vector &succs) { + const vector &vsuccs) { const RoseGraph &rg = build.g; static const size_t MAX_RESTORE_LEN = 5; vector restored(MAX_RESTORE_LEN); - for (RoseVertex v : succs) { + for (RoseVertex v : vsuccs) { u32 lag = rg[v].left.lag; for (u32 lit_id : rg[v].literals) { u32 delay = build.literals.at(lit_id).delay; @@ -972,7 +972,7 @@ struct eager_info { static bool checkSuitableForEager(bool is_prefix, const left_id &left, const RoseBuildImpl &build, - const vector &succs, + const vector &vsuccs, rose_group squash_mask, rose_group initial_groups, eager_info &ei, const CompileContext &cc) { DEBUG_PRINTF("checking prefix --> %016llx...\n", squash_mask); @@ -990,7 +990,7 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left, } // cppcheck-suppress useStlAlgorithm - for (RoseVertex s : succs) { + for (RoseVertex s : vsuccs) { if (build.isInETable(s) || contains(rg[s].literals, build.eod_event_literal_id)) { return false; /* Ignore EOD related prefixes */ @@ -1009,7 +1009,7 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left, if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) { return false; } - ei.new_graph = rg[succs[0]].left.graph; + ei.new_graph = rg[vsuccs[0]].left.graph; } else if (left.graph()) { const NGHolder &g = *left.graph(); if (proper_out_degree(g.startDs, g)) { @@ -1020,7 +1020,7 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left, auto gg = ei.new_graph; gg->kind = NFA_EAGER_PREFIX; - ei.lag_adjust = decreaseLag(build, *gg, succs); + ei.lag_adjust = decreaseLag(build, *gg, vsuccs); if (is_match_vertex(gg->start, *gg)) { return false; /* should not still be vacuous as lag decreased */ @@ -1048,17 +1048,17 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left, static left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, - const vector &succs) { + const vector &vsuccs) { u32 lag_adjust = ei.lag_adjust; auto gg = ei.new_graph; - for (RoseVertex v : succs) { + for (RoseVertex v : vsuccs) { g[v].left.graph = gg; assert(g[v].left.lag >= lag_adjust); g[v].left.lag -= lag_adjust; DEBUG_PRINTF("added %u literal chars back, new lag %u\n", lag_adjust, g[v].left.lag); } - left_id leftfix = g[succs[0]].left; + left_id leftfix = left_id(g[vsuccs[0]].left); if (leftfix.graph()) { assert(leftfix.graph()->kind == NFA_PREFIX @@ -1103,7 +1103,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, const map > &infixTriggers, set *no_retrigger_queues, set *eager_queues, const map &eager, - const vector &succs, left_id leftfix) { + const vector &vsuccs, left_id leftfix) { RoseGraph &g = build.g; const CompileContext &cc = build.cc; const ReportManager &rm = build.rm; @@ -1115,7 +1115,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, if (contains(eager, leftfix)) { eager_queues->insert(qi); - leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs); + leftfix = updateLeftfixWithEager(g, eager.at(leftfix), vsuccs); } bytecode_ptr nfa; @@ -1163,7 +1163,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, u32 max_queuelen = UINT32_MAX; if (!prefix) { set lits; - for (RoseVertex v : succs) { + for (RoseVertex v : vsuccs) { for (auto u : inv_adjacent_vertices_range(v, g)) { for (u32 lit_id : g[u].literals) { lits.insert(build.literals.at(lit_id).s); @@ -1192,7 +1192,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); } - for (RoseVertex v : succs) { + for (RoseVertex v : vsuccs) { bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width, squash_mask, stop, max_queuelen, cm_count, @@ -1233,7 +1233,7 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, const map, u32> &out_top_remap, const bool is_suffix) { u32 i = 0; - for (const auto &n : tamaInfo.subengines) { + for (const auto *n : tamaInfo.subengines) { for (const auto &v : subengines[i].vertices) { if (is_suffix) { tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); @@ -1382,7 +1382,7 @@ void updateExclusiveSuffixProperties(const RoseBuildImpl &build, const vector &exclusive_info, set *no_retrigger_queues) { const RoseGraph &g = build.g; - for (auto &info : exclusive_info) { + for (const auto &info : exclusive_info) { const auto &qi = info.queue; const auto &subengines = info.subengines; bool no_retrigger = true; @@ -1499,7 +1499,7 @@ void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, } static -bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, +void buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, set *no_retrigger_queues, set *eager_queues, bool do_prefix) { RoseGraph &g = tbi.g; @@ -1508,7 +1508,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, map> infixTriggers; findInfixTriggers(tbi, &infixTriggers); - insertion_ordered_map> succs; + insertion_ordered_map> lsuccs; if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues); @@ -1548,7 +1548,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, } } - succs[leftfix].emplace_back(v); + lsuccs[leftfix].emplace_back(v); } rose_group initial_groups = tbi.getInitialGroups(); @@ -1556,7 +1556,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, map eager; - for (const auto &m : succs) { + for (const auto &m : lsuccs) { const left_id &leftfix = m.first; const auto &left_succs = m.second; @@ -1577,7 +1577,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, eager.clear(); } - for (const auto &m : succs) { + for (const auto &m : lsuccs) { const left_id &leftfix = m.first; const auto &left_succs = m.second; buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers, @@ -1585,7 +1585,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, leftfix); } - return true; + return ; } static @@ -1597,7 +1597,7 @@ void findSuffixTriggers(const RoseBuildImpl &tbi, continue; } PredTopPair ptp(v, g[v].suffix.top); - (*suffixTriggers)[g[v].suffix].insert(ptp); + (*suffixTriggers)[suffix_id(g[v].suffix)].insert(ptp); } } @@ -1618,7 +1618,7 @@ public: explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {} bytecode_ptr operator()(boost::blank&) const { - return nullptr; + return bytecode_ptr(nullptr); }; bytecode_ptr operator()(unique_ptr &rdfa) const { @@ -1632,11 +1632,11 @@ public: build.rm); } - bytecode_ptr operator()(unique_ptr &holder) const { + bytecode_ptr operator()(const unique_ptr &holder) const { const CompileContext &cc = build.cc; const ReportManager &rm = build.rm; - NGHolder &h = *holder; + const NGHolder &h = *holder; assert(h.kind == NFA_OUTFIX); // Build NFA. @@ -1662,10 +1662,10 @@ public: return n; } - bytecode_ptr operator()(UNUSED MpvProto &mpv) const { + bytecode_ptr operator()(UNUSED const MpvProto &mpv) const { // MPV construction handled separately. assert(mpv.puffettes.empty()); - return nullptr; + return bytecode_ptr(nullptr); } private: @@ -1906,8 +1906,8 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, // We may have already built this NFA. if (contains(suffixes, s)) { - u32 id = suffixes[s]; if (!tbi.isInETable(v)) { + u32 id = suffixes[s]; vertex_map[id].emplace_back(v); } continue; @@ -2065,16 +2065,8 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, suffixTriggers.clear(); *leftfixBeginQueue = qif.allocated_count(); - - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, - true)) { - return false; - } - - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, - false)) { - return false; - } + buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,true); + buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,false); return true; } @@ -2247,20 +2239,19 @@ vector buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc, for (const auto &e : bc.suffixes) { const suffix_id &s = e.first; - u32 qi = e.second; set ekeys = reportsToEkeys(all_reports(s), build.rm); if (!ekeys.empty()) { + u32 qi = e.second; qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; } } /* for each outfix also build elists */ for (const auto &outfix : build.outfixes) { - u32 qi = outfix.get_queue(); set ekeys = reportsToEkeys(all_reports(outfix), build.rm); - if (!ekeys.empty()) { + u32 qi = outfix.get_queue(); qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; } } @@ -2320,12 +2311,12 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &build) { if (!g[v].suffix) { continue; } - if (contains(done, g[v].suffix)) { + if (contains(done, suffix_id(g[v].suffix))) { continue; /* already done */ } - done.insert(g[v].suffix); + done.insert(suffix_id(g[v].suffix)); - if (hasMpvTrigger(all_reports(g[v].suffix), build.rm)) { + if (hasMpvTrigger(all_reports(suffix_id(g[v].suffix)), build.rm)) { return true; } } @@ -2386,7 +2377,7 @@ void recordResources(RoseResources &resources, const RoseBuildImpl &build, resources.has_eod = true; break; } - if (g[v].suffix && has_eod_accepts(g[v].suffix)) { + if (g[v].suffix && has_eod_accepts(suffix_id(g[v].suffix))) { resources.has_eod = true; break; } @@ -2471,7 +2462,7 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("literally report eod\n"); return true; } - if (g[v].suffix && has_eod_accepts(g[v].suffix)) { + if (g[v].suffix && has_eod_accepts(suffix_id(g[v].suffix))) { DEBUG_PRINTF("eod suffix\n"); return true; } @@ -2546,7 +2537,7 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, if (!g[v].suffix) { continue; } - u32 qi = bc.suffixes.at(g[v].suffix); + u32 qi = bc.suffixes.at(suffix_id(g[v].suffix)); assert(qi < infos.size()); if (build.isInETable(v)) { infos.at(qi).eod = 1; @@ -2737,7 +2728,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } static -RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, const build_context &bc, ProgramBuild &prog_build, u32 lit_id, const vector> &lit_edge_map, bool is_anchored_replay_program) { @@ -2751,7 +2742,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } static -RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, +RoseProgram makeFragmentProgram(const RoseBuildImpl &build, const build_context &bc, ProgramBuild &prog_build, const vector &lit_ids, const vector> &lit_edge_map) { @@ -2983,9 +2974,10 @@ void buildFragmentPrograms(const RoseBuildImpl &build, pfrag.lit_ids, lit_edge_map); if (pfrag.included_frag_id != INVALID_FRAG_ID && !lit_prog.empty()) { - auto &cfrag = fragments[pfrag.included_frag_id]; + const auto &cfrag = fragments[pfrag.included_frag_id]; assert(pfrag.s.length() >= cfrag.s.length() && - !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); + !pfrag.s.any_nocase() == !cfrag.s.any_nocase()); + /** !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); **/ u32 child_offset = cfrag.lit_program_offset; DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, child_offset); @@ -3002,9 +2994,10 @@ void buildFragmentPrograms(const RoseBuildImpl &build, pfrag.lit_ids); if (pfrag.included_delay_frag_id != INVALID_FRAG_ID && !rebuild_prog.empty()) { - auto &cfrag = fragments[pfrag.included_delay_frag_id]; - assert(pfrag.s.length() >= cfrag.s.length() && - !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); + const auto &cfrag = fragments[pfrag.included_delay_frag_id]; + /** assert(pfrag.s.length() >= cfrag.s.length() && **/ + assert(pfrag.s.length() == cfrag.s.length() && + !pfrag.s.any_nocase() != !cfrag.s.any_nocase()); u32 child_offset = cfrag.delay_program_offset; DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, child_offset); @@ -3021,7 +3014,7 @@ void updateLitProtoProgramOffset(vector &fragments, auto &proto = *litProto.hwlmProto; for (auto &lit : proto.lits) { auto fragId = lit.id; - auto &frag = fragments[fragId]; + const auto &frag = fragments[fragId]; if (delay) { DEBUG_PRINTF("delay_program_offset:%u\n", frag.delay_program_offset); @@ -3202,7 +3195,7 @@ set findEngineReports(const RoseBuildImpl &build) { const auto &g = build.g; for (auto v : vertices_range(g)) { if (g[v].suffix) { - insert(&reports, all_reports(g[v].suffix)); + insert(&reports, all_reports(suffix_id(g[v].suffix))); } } @@ -3320,10 +3313,11 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, // Collect all edges leading into EOD event literal vertices. vector edge_list; + + for (const auto &v : lit_info.vertices) { - for (const auto &e : in_edges_range(v, g)) { - edge_list.emplace_back(e); - } + const auto &er = in_edges_range(v, g); + std::copy(begin(er), end(er), std::back_inserter(edge_list)); } // Sort edge list for determinism, prettiness. @@ -3660,7 +3654,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); proto.outfixBeginQueue = qif.allocated_count(); if (!prepOutfixes(*this, bc, &historyRequired)) { - return nullptr; + return bytecode_ptr(nullptr); } proto.outfixEndQueue = qif.allocated_count(); proto.leftfixBeginQueue = proto.outfixEndQueue; @@ -3671,7 +3665,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { /* Note: buildNfas may reduce the lag for vertices that have prefixes */ if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues, &proto.leftfixBeginQueue)) { - return nullptr; + return bytecode_ptr(nullptr); } u32 eodNfaIterOffset = buildEodNfaIterator(bc, proto.leftfixBeginQueue); buildCountingMiracles(bc); diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp index 14b67fdc..94345a43 100644 --- a/src/rose/rose_build_castle.cpp +++ b/src/rose/rose_build_castle.cpp @@ -171,7 +171,6 @@ void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle, return; /* bail - TODO: be less lazy */ } - vector rem_local_cr; u32 ok_count = 0; for (auto it = e.s.end() - g[v].left.lag; it != e.s.end(); ++it) { if (!isSubsetOf(*it, cr)) { @@ -254,11 +253,11 @@ bool unmakeCastles(RoseBuildImpl &tbi) { for (auto v : vertices_range(g)) { const LeftEngInfo &left = g[v].left; if (left.castle && left.castle->repeats.size() > 1) { - left_castles[left].emplace_back(v); + left_castles[left_id(left)].emplace_back(v); } const RoseSuffixInfo &suffix = g[v].suffix; if (suffix.castle && suffix.castle->repeats.size() > 1) { - suffix_castles[suffix].emplace_back(v); + suffix_castles[suffix_id(suffix)].emplace_back(v); } } diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 808d0774..ed06aeb1 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -819,7 +819,7 @@ void RoseBuildImpl::findTransientLeftfixes(void) { continue; } - const left_id &left(g[v].left); + const left_id &left(left_id(g[v].left)); if (::ue2::isAnchored(left) && !isInETable(v)) { /* etable prefixes currently MUST be transient as we do not know @@ -871,7 +871,7 @@ map> findLeftSucc(const RoseBuildImpl &build) { for (auto v : vertices_range(build.g)) { if (build.g[v].left) { const LeftEngInfo &lei = build.g[v].left; - leftfixes[lei].emplace_back(v); + leftfixes[left_id(lei)].emplace_back(v); } } return leftfixes; @@ -1153,10 +1153,10 @@ void findTopTriggerCancels(RoseBuildImpl &build) { for (const auto &r : left_succ) { const left_id &left = r.first; - const vector &succs = r.second; + const vector &rsuccs = r.second; - assert(!succs.empty()); - if (build.isRootSuccessor(*succs.begin())) { + assert(!rsuccs.empty()); + if (build.isRootSuccessor(*rsuccs.begin())) { /* a prefix is never an infix */ continue; } @@ -1165,7 +1165,7 @@ void findTopTriggerCancels(RoseBuildImpl &build) { set rose_edges; set pred_lit_ids; - for (auto v : succs) { + for (auto v : rsuccs) { for (const auto &e : in_edges_range(v, build.g)) { RoseVertex u = source(e, build.g); tops_seen.insert(build.g[e].rose_top); @@ -1221,11 +1221,11 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) { * successor of the nfa and all the literals */ for (const auto &e : roses) { const left_id &left = e.first; - const vector &succs = e.second; + const vector &rsuccs = e.second; set lit_ids; bool anchored_pred = false; - for (auto v : succs) { + for (auto v : rsuccs) { lit_ids.insert(tbi.g[v].literals.begin(), tbi.g[v].literals.end()); for (auto u : inv_adjacent_vertices_range(v, tbi.g)) { anchored_pred |= tbi.isAnchored(u); @@ -1239,7 +1239,7 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) { if (anchored_pred) { /* infix with pred in anchored table */ u32 min_off = ~0U; u32 max_off = 0U; - for (auto v : succs) { + for (auto v : rsuccs) { for (auto u : inv_adjacent_vertices_range(v, tbi.g)) { min_off = min(min_off, tbi.g[u].min_offset); max_off = max(max_off, tbi.g[u].max_offset); @@ -1259,7 +1259,7 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) { if (!info.delayed_ids.empty() || !all_of_in(info.vertices, [&](RoseVertex v) { - return left == tbi.g[v].left; })) { + return left == left_id(tbi.g[v].left); })) { DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask); unsquashable |= info.group_mask; } @@ -1402,7 +1402,7 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, g[v].max_offset = sai.max_bound + sai.literal.length(); lit_info.vertices.insert(v); - RoseEdge e = add_edge(anchored_root, v, g); + RoseEdge e = add_edge(anchored_root, v, g).first; g[e].minBound = sai.min_bound; g[e].maxBound = sai.max_bound; } @@ -1426,7 +1426,7 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit, g[v].literals.insert(lit_id); g[v].reports = reports; - RoseEdge e = add_edge(tbi.root, v, g); + RoseEdge e = add_edge(tbi.root, v, g).first; g[e].minBound = 0; g[e].maxBound = ROSE_BOUND_INF; g[v].min_offset = 1; diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index c89c6ddd..35ffc728 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -99,7 +99,7 @@ unique_ptr makeFloodProneSuffix(const ue2_literal &s, size_t len, NFAVertex u = h->start; for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) { NFAVertex v = addHolderVertex(*it, *h); - NFAEdge e = add_edge(u, v, *h); + NFAEdge e = add_edge(u, v, *h).first; if (u == h->start) { (*h)[e].tops.insert(DEFAULT_TOP); } @@ -410,7 +410,7 @@ bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, assert(g[e_old].maxBound >= bound_max); setEdgeBounds(g, e_old, bound_min, bound_max); } else { - RoseEdge e_new = add_edge(ar, v, g); + RoseEdge e_new = add_edge(ar, v, g).first; setEdgeBounds(g, e_new, bound_min, bound_max); to_delete->emplace_back(e_old); } @@ -561,10 +561,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, DEBUG_PRINTF("woot?\n"); shared_ptr h_new = make_shared(); - if (!h_new) { - assert(0); - throw std::bad_alloc(); - } + unordered_map rhs_map; vector exits_vec; insert(&exits_vec, exits_vec.end(), exits); @@ -606,7 +603,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, if (source(e_old, g) == ar) { setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width); } else { - RoseEdge e_new = add_edge(ar, v, g); + RoseEdge e_new = add_edge(ar, v, g).first; setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width); to_delete->emplace_back(e_old); } diff --git a/src/rose/rose_build_dedupe.cpp b/src/rose/rose_build_dedupe.cpp index f788ee1a..49d64c95 100644 --- a/src/rose/rose_build_dedupe.cpp +++ b/src/rose/rose_build_dedupe.cpp @@ -129,7 +129,7 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in) // Several vertices may share a suffix, so we collect the set of // suffixes first to avoid repeating work. if (g[v].suffix) { - suffixes.insert(g[v].suffix); + suffixes.insert(suffix_id(g[v].suffix)); } } diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp index 999f769f..d8769026 100644 --- a/src/rose/rose_build_exclusive.cpp +++ b/src/rose/rose_build_exclusive.cpp @@ -119,7 +119,7 @@ bool addPrefixLiterals(NGHolder &h, unordered_set &tailId, for (auto v : adjacent_vertices_range(start, h)) { if (v != h.startDs) { - for (auto &t : tails) { + for (const auto &t : tails) { add_edge(t, v, h); } } @@ -127,7 +127,7 @@ bool addPrefixLiterals(NGHolder &h, unordered_set &tailId, clear_out_edges(start, h); add_edge(h.start, h.start, h); - for (auto &t : heads) { + for (const auto &t : heads) { add_edge(start, t, h); } diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index 9aaf349b..28cbe3b0 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -77,7 +77,7 @@ static bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) { auto eligble = [&](RoseVertex v) { return build.isRootSuccessor(v) - && (!build.g[v].left || !isAnchored(build.g[v].left)); + && (!build.g[v].left || !isAnchored(left_id(build.g[v].left))); }; if (any_of_in(build.literal_info[id].vertices, eligble)) { @@ -210,7 +210,7 @@ void allocateGroupForEvent(RoseBuildImpl &build, u32 group_always_on, bool new_group = !groupCount[group_always_on]; for (RoseVertex v : info.vertices) { - if (build.g[v].left && !isAnchored(build.g[v].left)) { + if (build.g[v].left && !isAnchored(left_id(build.g[v].left))) { new_group = false; } } @@ -277,7 +277,7 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { // Second pass: the other literals. for (u32 id = 0; id < literals.size(); id++) { const rose_literal_id &lit = literals.at(id); - rose_literal_info &info = literal_info[id]; + const rose_literal_info &info = literal_info[id]; if (!requires_group_assignment(lit, info)) { continue; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index d0ed84df..3aa530fb 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -80,7 +80,7 @@ class SmallWriteBuild; class SomSlotManager; struct suffix_id { - suffix_id(const RoseSuffixInfo &in) + explicit suffix_id(const RoseSuffixInfo &in) : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), h(in.haig.get()), t(in.tamarama.get()), dfa_min_width(in.dfa_min_width), @@ -181,7 +181,7 @@ depth findMaxWidth(const suffix_id &s, u32 top); /** \brief represents an engine to the left of a rose role */ struct left_id { - left_id(const LeftEngInfo &in) + explicit left_id(const LeftEngInfo &in) : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), h(in.haig.get()), dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index f18f4a47..01ff8d88 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -2319,7 +2319,7 @@ class RoseInstrSetCombination public: u32 ckey; - RoseInstrSetCombination(u32 ckey_in) : ckey(ckey_in) {} + explicit RoseInstrSetCombination(u32 ckey_in) : ckey(ckey_in) {} bool operator==(const RoseInstrSetCombination &ri) const { return ckey == ri.ckey; @@ -2361,7 +2361,7 @@ class RoseInstrSetExhaust public: u32 ekey; - RoseInstrSetExhaust(u32 ekey_in) : ekey(ekey_in) {} + explicit RoseInstrSetExhaust(u32 ekey_in) : ekey(ekey_in) {} bool operator==(const RoseInstrSetExhaust &ri) const { return ekey == ri.ekey; diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp index 45a2eb27..943c5b29 100644 --- a/src/rose/rose_build_long_lit.cpp +++ b/src/rose/rose_build_long_lit.cpp @@ -98,8 +98,7 @@ void addToBloomFilter(vector &bloom, const u8 *substr, bool nocase) { const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; for (const auto &hash_func : hash_functions) { - u32 hash = hash_func(substr, nocase); - u32 key = hash & key_mask; + u32 key = hash_func(substr, nocase) & key_mask; DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8); bloom[key / 8] |= 1U << (key % 8); } @@ -193,11 +192,9 @@ vector buildHashTable( } for (const auto &m : hashToLitOffPairs) { - u32 hash = m.first; + u32 bucket = m.first % numEntries; const LitOffsetVector &d = m.second; - u32 bucket = hash % numEntries; - // Placement via linear probing. for (const auto &lit_offset : d) { while (tab[bucket].str_offset != 0) { diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index fecc73b4..7c203be5 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -202,10 +202,10 @@ void getForwardReach(const raw_dfa &rdfa, map &look) { } for (unsigned c = 0; c < N_CHARS; c++) { - dstate_id_t succ = ds.next[rdfa.alpha_remap[c]]; - if (succ != DEAD_STATE) { + dstate_id_t dnsucc = ds.next[rdfa.alpha_remap[c]]; + if (dnsucc != DEAD_STATE) { cr.set(c); - next.insert(succ); + next.insert(dnsucc); } } } @@ -280,13 +280,13 @@ void findForwardReach(const RoseGraph &g, const RoseVertex v, return; } rose_look.emplace_back(map()); - getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back()); + getRoseForwardReach(left_id(g[t].left), g[e].rose_top, rose_look.back()); } if (g[v].suffix) { DEBUG_PRINTF("suffix engine\n"); rose_look.emplace_back(map()); - getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back()); + getSuffixForwardReach(suffix_id(g[v].suffix), g[v].suffix.top, rose_look.back()); } combineForwardMasks(rose_look, look); diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index d644bcd5..a14eae2d 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -75,7 +75,7 @@ string dumpMask(const vector &v) { static bool maskFromLeftGraph(const LeftEngInfo &left, vector &msk, - vector &cmp) { + vector &lcmp) { const u32 lag = left.lag; const ReportID report = left.leftfix_report; @@ -111,9 +111,9 @@ bool maskFromLeftGraph(const LeftEngInfo &left, vector &msk, cr |= v_cr; insert(&next, inv_adjacent_vertices(v, h)); } - make_and_cmp_mask(cr, &msk.at(i), &cmp.at(i)); - DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i, - describeClass(cr).c_str(), msk[i], cmp[i]); + make_and_cmp_mask(cr, &msk.at(i), &lcmp.at(i)); + DEBUG_PRINTF("%zu: reach=%s, msk=%u, lcmp=%u\n", i, + describeClass(cr).c_str(), msk[i], lcmp[i]); curr.swap(next); } while (i-- > 0); @@ -122,7 +122,7 @@ bool maskFromLeftGraph(const LeftEngInfo &left, vector &msk, static bool maskFromLeftCastle(const LeftEngInfo &left, vector &msk, - vector &cmp) { + vector &lcmp) { const u32 lag = left.lag; const ReportID report = left.leftfix_report; @@ -149,23 +149,23 @@ bool maskFromLeftCastle(const LeftEngInfo &left, vector &msk, u32 len = min_width; u32 end = HWLM_MASKLEN - lag; for (u32 i = end; i > end - min(end, len); i--) { - make_and_cmp_mask(c.reach(), &msk.at(i - 1), &cmp.at(i - 1)); + make_and_cmp_mask(c.reach(), &msk.at(i - 1), &lcmp.at(i - 1)); } return true; } static -bool maskFromLeft(const LeftEngInfo &left, vector &msk, vector &cmp) { +bool maskFromLeft(const LeftEngInfo &left, vector &msk, vector &lcmp) { if (left.lag >= HWLM_MASKLEN) { DEBUG_PRINTF("too much lag\n"); return false; } if (left.graph) { - return maskFromLeftGraph(left, msk, cmp); + return maskFromLeftGraph(left, msk, lcmp); } else if (left.castle) { - return maskFromLeftCastle(left, msk, cmp); + return maskFromLeftCastle(left, msk, lcmp); } return false; @@ -173,7 +173,7 @@ bool maskFromLeft(const LeftEngInfo &left, vector &msk, vector &cmp) { static bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, - const RoseVertex v, vector &msk, vector &cmp) { + const RoseVertex v, vector &msk, vector &lcmp) { const RoseGraph &g = build.g; // For right now, wuss out and only handle cases with one pred. @@ -222,7 +222,7 @@ bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, ue2_literal::const_iterator it, ite; for (it = u_id.s.begin() + (u_len - u_sublen), ite = u_id.s.end(); it != ite; ++it) { - make_and_cmp_mask(*it, &msk.at(i), &cmp.at(i)); + make_and_cmp_mask(*it, &msk.at(i), &lcmp.at(i)); ++i; } @@ -231,21 +231,21 @@ bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, static bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, - const RoseVertex v, vector &msk, vector &cmp) { + const RoseVertex v, vector &msk, vector &lcmp) { // Start with zero masks. msk.assign(HWLM_MASKLEN, 0); - cmp.assign(HWLM_MASKLEN, 0); + lcmp.assign(HWLM_MASKLEN, 0); const LeftEngInfo &left = build.g[v].left; if (left && left.lag < HWLM_MASKLEN) { - if (maskFromLeft(left, msk, cmp)) { + if (maskFromLeft(left, msk, lcmp)) { DEBUG_PRINTF("mask from a leftfix!\n"); return true; } } if (id.s.length() < HWLM_MASKLEN) { - if (maskFromPreds(build, id, v, msk, cmp)) { + if (maskFromPreds(build, id, v, msk, lcmp)) { DEBUG_PRINTF("mask from preds!\n"); return true; } @@ -255,18 +255,18 @@ bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, } static -bool hamsterMaskCombine(vector &msk, vector &cmp, +bool hamsterMaskCombine(vector &msk, vector &lcmp, const vector &v_msk, const vector &v_cmp) { - assert(msk.size() == HWLM_MASKLEN && cmp.size() == HWLM_MASKLEN); + assert(msk.size() == HWLM_MASKLEN && lcmp.size() == HWLM_MASKLEN); assert(v_msk.size() == HWLM_MASKLEN && v_cmp.size() == HWLM_MASKLEN); u8 all_masks = 0; for (size_t i = 0; i < HWLM_MASKLEN; i++) { - u8 filter = ~(cmp[i] ^ v_cmp[i]); + u8 filter = ~(lcmp[i] ^ v_cmp[i]); msk[i] &= v_msk[i]; msk[i] &= filter; - cmp[i] &= filter; + lcmp[i] &= filter; all_masks |= msk[i]; } @@ -278,7 +278,7 @@ bool hamsterMaskCombine(vector &msk, vector &cmp, static bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, const rose_literal_info &info, vector &msk, - vector &cmp) { + vector &lcmp) { if (!build.cc.grey.roseHamsterMasks) { return false; } @@ -289,7 +289,7 @@ bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, } msk.assign(HWLM_MASKLEN, 0); - cmp.assign(HWLM_MASKLEN, 0); + lcmp.assign(HWLM_MASKLEN, 0); size_t num = 0; vector v_msk, v_cmp; @@ -301,28 +301,28 @@ bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, } if (!num++) { - // First (or only) vertex, this becomes the mask/cmp pair. + // First (or only) vertex, this becomes the mask/lcmp pair. msk = v_msk; - cmp = v_cmp; + lcmp = v_cmp; } else { // Multiple vertices with potentially different masks. We combine // them into an 'advisory' mask. - if (!hamsterMaskCombine(msk, cmp, v_msk, v_cmp)) { + if (!hamsterMaskCombine(msk, lcmp, v_msk, v_cmp)) { DEBUG_PRINTF("mask went to zero\n"); return false; } } } - normaliseLiteralMask(id.s, msk, cmp); + normaliseLiteralMask(id.s, msk, lcmp); if (msk.empty()) { DEBUG_PRINTF("no mask\n"); return false; } - DEBUG_PRINTF("msk=%s, cmp=%s\n", dumpMask(msk).c_str(), - dumpMask(cmp).c_str()); + DEBUG_PRINTF("msk=%s, lcmp=%s\n", dumpMask(msk).c_str(), + dumpMask(lcmp).c_str()); return true; } @@ -357,13 +357,13 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { const auto &lit = build.literals.at(id); auto &lit_info = build.literal_info.at(id); - vector msk, cmp; - if (!addSurroundingMask(build, lit, lit_info, msk, cmp)) { + vector msk, lcmp; + if (!addSurroundingMask(build, lit, lit_info, msk, lcmp)) { continue; } DEBUG_PRINTF("found surrounding mask for lit_id=%u (%s)\n", id, dumpString(lit.s).c_str()); - u32 new_id = build.getLiteralId(lit.s, msk, cmp, lit.delay, lit.table); + u32 new_id = build.getLiteralId(lit.s, msk, lcmp, lit.delay, lit.table); if (new_id == id) { continue; } @@ -392,7 +392,7 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { // mixed-case is mandatory. static void addLiteralMask(const rose_literal_id &id, vector &msk, - vector &cmp) { + vector &lcmp) { const size_t suffix_len = min(id.s.length(), size_t{HWLM_MASKLEN}); bool mixed_suffix = mixed_sensitivity_in(id.s.end() - suffix_len, id.s.end()); @@ -403,7 +403,7 @@ void addLiteralMask(const rose_literal_id &id, vector &msk, while (msk.size() < HWLM_MASKLEN) { msk.insert(msk.begin(), 0); - cmp.insert(cmp.begin(), 0); + lcmp.insert(lcmp.begin(), 0); } if (!id.msk.empty()) { @@ -413,7 +413,7 @@ void addLiteralMask(const rose_literal_id &id, vector &msk, size_t mand_offset = msk.size() - i - 1; size_t lit_offset = id.msk.size() - i - 1; msk[mand_offset] = id.msk[lit_offset]; - cmp[mand_offset] = id.cmp[lit_offset]; + lcmp[mand_offset] = id.cmp[lit_offset]; } } @@ -425,12 +425,12 @@ void addLiteralMask(const rose_literal_id &id, vector &msk, size_t offset = HWLM_MASKLEN - i - 1; DEBUG_PRINTF("offset %zu must match 0x%02x exactly\n", offset, c.c); - make_and_cmp_mask(c, &msk[offset], &cmp[offset]); + make_and_cmp_mask(c, &msk[offset], &lcmp[offset]); } } } - normaliseLiteralMask(id.s, msk, cmp); + normaliseLiteralMask(id.s, msk, lcmp); } static @@ -477,10 +477,10 @@ bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); return false; } + auto edge_result = edge(build.root, u, g); + RoseEdge e = edge_result.first; - RoseEdge e = edge(build.root, u, g); - - if (!e) { + if (!edge_result.second) { DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); return false; } @@ -638,7 +638,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, } if (g[v].suffix) { - depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top); + depth suffix_width = findMinWidth(suffix_id(g[v].suffix), g[v].suffix.top); assert(suffix_width.is_reachable()); DEBUG_PRINTF("suffix with width %s\n", suffix_width.str().c_str()); min_offset = min(min_offset, vert_offset + suffix_width); @@ -707,7 +707,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, lit.s.length()); vector msk = lit.msk; // copy - vector cmp = lit.cmp; // copy + vector lcmp = lit.cmp; // copy bool noruns = isNoRunsFragment(build, f, max_len); DEBUG_PRINTF("fragment is %s\n", noruns ? "noruns" : "not noruns"); @@ -723,24 +723,24 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, assert(!noruns); } - addLiteralMask(lit, msk, cmp); + addLiteralMask(lit, msk, lcmp); const auto &s_final = lit_final.get_string(); bool nocase = lit_final.any_nocase(); - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n", + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, lcmp=%s\n", f.fragment_id, escapeString(s_final).c_str(), (int)nocase, - noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + noruns, dumpMask(msk).c_str(), dumpMask(lcmp).c_str()); - if (!maskIsConsistent(s_final, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + if (!maskIsConsistent(s_final, nocase, msk, lcmp)) { + DEBUG_PRINTF("msk/lcmp for literal can't match, skipping\n"); return; } const auto &groups = f.groups; mp.lits.emplace_back(std::move(s_final), nocase, noruns, f.fragment_id, - groups, msk, cmp); + groups, msk, lcmp); } static @@ -751,11 +751,11 @@ void addAccelLiteral(MatcherProto &mp, const rose_literal_id &lit, DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(s).c_str(), s.length()); vector msk = lit.msk; // copy - vector cmp = lit.cmp; // copy - addLiteralMask(lit, msk, cmp); + vector lcmp = lit.cmp; // copy + addLiteralMask(lit, msk, lcmp); - if (!maskIsConsistent(s.get_string(), s.any_nocase(), msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + if (!maskIsConsistent(s.get_string(), s.any_nocase(), msk, lcmp)) { + DEBUG_PRINTF("msk/lcmp for literal can't match, skipping\n"); return; } @@ -764,9 +764,9 @@ void addAccelLiteral(MatcherProto &mp, const rose_literal_id &lit, string s_final = lit.s.get_string(); trim_to_suffix(s_final, max_len); trim_to_suffix(msk, max_len); - trim_to_suffix(cmp, max_len); + trim_to_suffix(lcmp, max_len); - mp.accel_lits.emplace_back(s_final, lit.s.any_nocase(), msk, cmp, + mp.accel_lits.emplace_back(s_final, lit.s.any_nocase(), msk, lcmp, info.group_mask); } @@ -887,9 +887,9 @@ void buildAccel(const RoseBuildImpl &build, } bytecode_ptr -buildHWLMMatcher(const RoseBuildImpl &build, LitProto *litProto) { +buildHWLMMatcher(const RoseBuildImpl &build, const LitProto *litProto) { if (!litProto) { - return nullptr; + return bytecode_ptr(nullptr); } auto hwlm = hwlmBuild(*litProto->hwlmProto, build.cc, build.getInitialGroups()); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index ef8999ed..37a96c7a 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -101,7 +101,7 @@ struct LitProto { }; bytecode_ptr -buildHWLMMatcher(const RoseBuildImpl &build, LitProto *proto); +buildHWLMMatcher(const RoseBuildImpl &build, const LitProto *proto); std::unique_ptr buildFloatingMatcherProto(const RoseBuildImpl &build, diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 69f688c5..8e57f0f2 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -145,7 +145,7 @@ namespace { /** Key used to group sets of leftfixes by the dedupeLeftfixes path. */ struct RoseGroup { RoseGroup(const RoseBuildImpl &build, RoseVertex v) - : left_hash(hashLeftfix(build.g[v].left)), + : left_hash(hashLeftfix(left_id(build.g[v].left))), lag(build.g[v].left.lag), eod_table(build.isInETable(v)) { const RoseGraph &g = build.g; assert(in_degree(v, g) == 1); @@ -262,8 +262,8 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { // Scan the rest of the list for dupes. for (auto kt = std::next(jt); kt != jte; ++kt) { if (g[v].left == g[*kt].left - || !is_equal(g[v].left, g[v].left.leftfix_report, - g[*kt].left, g[*kt].left.leftfix_report)) { + || !is_equal(left_id(g[v].left), g[v].left.leftfix_report, + left_id(g[*kt].left), g[*kt].left.leftfix_report)) { continue; } @@ -547,8 +547,8 @@ bool checkPrefix(const rose_literal_id &ul, const u32 ulag, static bool hasSameEngineType(const RoseVertexProps &u_prop, const RoseVertexProps &v_prop) { - const left_id u_left = u_prop.left; - const left_id v_left = v_prop.left; + const left_id u_left = left_id(u_prop.left); + const left_id v_left = left_id(v_prop.left); return !u_left.haig() == !v_left.haig() && !u_left.dfa() == !v_left.dfa() @@ -800,9 +800,9 @@ template static never_inline bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, const VertexCont &v2) { - flat_set preds; + flat_set fpreds; for (auto v : v1) { - insert(&preds, inv_adjacent_vertices(v, build.g)); + insert(&fpreds, inv_adjacent_vertices(v, build.g)); } flat_set pred_lits; @@ -817,7 +817,7 @@ bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); } - for (auto u : preds) { + for (auto u : fpreds) { if (!contains(known_good_preds, u)) { insert(&pred_lits, build.g[u].literals); } @@ -1197,8 +1197,8 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, assert(!r1.graph() == !r2.graph()); if (r1.graph()) { - NGHolder *h1 = r1.graph(); - NGHolder *h2 = r2.graph(); + const NGHolder *h1 = r1.graph(); + const NGHolder *h2 = r2.graph(); CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); CharReach stopboth = stop1 & stop2; @@ -1349,15 +1349,15 @@ void chunk(vector in, vector> *out, size_t chunk_size) { } static -insertion_ordered_map> get_eng_verts(RoseGraph &g) { +insertion_ordered_map> get_eng_verts(const RoseGraph &g) { insertion_ordered_map> eng_verts; for (auto v : vertices_range(g)) { const auto &left = g[v].left; if (!left) { continue; } - assert(contains(all_reports(left), left.leftfix_report)); - eng_verts[left].emplace_back(v); + assert(contains(all_reports(left_id(left)), left.leftfix_report)); + eng_verts[left_id(left)].emplace_back(v); } return eng_verts; @@ -1547,11 +1547,11 @@ private: static flat_set> get_pred_tops(RoseVertex v, const RoseGraph &g) { - flat_set> preds; + flat_set> fpreds; for (const auto &e : in_edges_range(v, g)) { - preds.emplace(g[source(e, g)].index, g[e].rose_top); + fpreds.emplace(g[source(e, g)].index, g[e].rose_top); } - return preds; + return fpreds; } /** @@ -1603,14 +1603,15 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); } - auto preds = get_pred_tops(verts.front(), g); + auto vpreds = get_pred_tops(verts.front(), g); for (RoseVertex v : verts) { - if (preds != get_pred_tops(v, g)) { + if (vpreds != get_pred_tops(v, g)) { DEBUG_PRINTF("distinct pred sets\n"); continue; } } - engine_groups[DedupeLeftKey(build, std::move(preds), left)].emplace_back(left); + auto preds_copy = std::move(vpreds); + engine_groups[DedupeLeftKey(build, preds_copy , left)].emplace_back(left); } /* We don't bother chunking as we expect deduping to be successful if the @@ -1697,7 +1698,7 @@ void replaceTops(NGHolder &h, const map &top_mapping) { } static -bool setDistinctTops(NGHolder &h1, const NGHolder &h2, +void setDistinctTops(NGHolder &h1, const NGHolder &h2, map &top_mapping) { flat_set tops1 = getTops(h1), tops2 = getTops(h2); @@ -1707,7 +1708,7 @@ bool setDistinctTops(NGHolder &h1, const NGHolder &h2, // If our tops don't intersect, we're OK to merge with no changes. if (!has_intersection(tops1, tops2)) { DEBUG_PRINTF("tops don't intersect\n"); - return true; + return ; } // Otherwise, we have to renumber the tops in h1 so that they don't overlap @@ -1722,18 +1723,17 @@ bool setDistinctTops(NGHolder &h1, const NGHolder &h2, } replaceTops(h1, top_mapping); - return true; + return ; } -bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, +void setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, const deque &verts1) { map top_mapping; - if (!setDistinctTops(h1, h2, top_mapping)) { - return false; - } + + setDistinctTops(h1, h2, top_mapping); if (top_mapping.empty()) { - return true; // No remapping necessary. + return ; // No remapping necessary. } for (auto v : verts1) { @@ -1751,19 +1751,17 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } } - return true; + return ; } static -bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, +void setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, const deque &verts1) { map top_mapping; - if (!setDistinctTops(h1, h2, top_mapping)) { - return false; - } + setDistinctTops(h1, h2, top_mapping); if (top_mapping.empty()) { - return true; // No remapping necessary. + return ; // No remapping necessary. } for (auto v : verts1) { @@ -1773,7 +1771,7 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, g[v].suffix.top = top_mapping[t]; } - return true; + return ; } /** \brief Estimate the number of accel states in the given graph when built as @@ -1847,10 +1845,7 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) { } } - if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) { - DEBUG_PRINTF("can't set distinct tops\n"); - continue; // next h2 - } + setDistinctRoseTops(g, victim, *r1.graph(), verts2); assert(victim.kind == r1.graph()->kind); assert(!generates_callbacks(*r1.graph())); @@ -1903,7 +1898,7 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { return; } - RoseGraph &g = tbi.g; + const RoseGraph &g = tbi.g; LeftfixBouquet nfa_leftfixes; @@ -1935,7 +1930,7 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { } assert(left.graph()); - NGHolder &h = *left.graph(); + const NGHolder &h = *left.graph(); /* Ensure that kind on the graph is correct */ assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); @@ -2035,7 +2030,7 @@ void mergeCastleLeftfixes(RoseBuildImpl &build) { return; } - RoseGraph &g = build.g; + const RoseGraph &g = build.g; insertion_ordered_map> eng_verts; @@ -2049,7 +2044,7 @@ void mergeCastleLeftfixes(RoseBuildImpl &build) { continue; } - eng_verts[g[v].left].emplace_back(v); + eng_verts[left_id(g[v].left)].emplace_back(v); } map> by_reach; @@ -2065,8 +2060,8 @@ void mergeCastleLeftfixes(RoseBuildImpl &build) { DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); - for (auto &chunk : chunks) { - mergeCastleChunk(build, chunk, eng_verts); + for (auto &cchunk : chunks) { + mergeCastleChunk(build, cchunk, eng_verts); } } @@ -2130,10 +2125,7 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, old_tops[v] = g[v].suffix.top; } - if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) { - DEBUG_PRINTF("can't set distinct tops\n"); - continue; // next h2 - } + setDistinctSuffixTops(g, victim, *s1.graph(), verts2); if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) { DEBUG_PRINTF("merge failed\n"); @@ -2199,7 +2191,7 @@ void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { SuffixBouquet suffixes; - RoseGraph &g = tbi.g; + const RoseGraph &g = tbi.g; for (auto v : vertices_range(g)) { shared_ptr h = g[v].suffix.graph; @@ -2217,7 +2209,7 @@ void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { continue; } - suffixes.insert(g[v].suffix, v); + suffixes.insert(suffix_id(g[v].suffix), v); } deque suff_groups; @@ -2259,7 +2251,7 @@ void mergeSmallSuffixes(RoseBuildImpl &tbi) { return; } - RoseGraph &g = tbi.g; + const RoseGraph &g = tbi.g; SuffixBouquet suffixes; for (auto v : vertices_range(g)) { @@ -2279,7 +2271,7 @@ void mergeSmallSuffixes(RoseBuildImpl &tbi) { continue; } - suffixes.insert(g[v].suffix, v); + suffixes.insert(suffix_id(g[v].suffix), v); } deque suff_groups; @@ -2317,7 +2309,7 @@ void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) { } static -map chunkedNfaMerge(RoseBuildImpl &build, +map chunkedNfaMerge(const RoseBuildImpl &build, const vector &nfas) { map merged; @@ -2460,13 +2452,13 @@ void chunkedDfaMerge(vector &dfas, DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size()); vector out_dfas; - vector chunk; + vector dchunk; for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) { - chunk.emplace_back(*it); - if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) { - pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func); - out_dfas.insert(end(out_dfas), begin(chunk), end(chunk)); - chunk.clear(); + dchunk.emplace_back(*it); + if (dchunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) { + pairwiseDfaMerge(dchunk, dfa_mapping, outfixes, merge_func); + out_dfas.insert(end(out_dfas), begin(dchunk), end(dchunk)); + dchunk.clear(); } } @@ -2809,8 +2801,8 @@ void mergeCastleSuffixes(RoseBuildImpl &build) { eng_verts[c].emplace_back(v); } - for (auto &chunk : by_reach | map_values) { - mergeCastleSuffixChunk(g, chunk, eng_verts); + for (auto &cchunk : by_reach | map_values) { + mergeCastleSuffixChunk(g, cchunk, eng_verts); } } diff --git a/src/rose/rose_build_merge.h b/src/rose/rose_build_merge.h index 6de6c778..e93a977f 100644 --- a/src/rose/rose_build_merge.h +++ b/src/rose/rose_build_merge.h @@ -62,7 +62,7 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, bool mergeableRoseVertices(const RoseBuildImpl &tbi, const std::set &v1, const std::set &v2); -bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, +void setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, const std::deque &verts1); } // namespace ue2 diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 98273086..a5225d3b 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -521,8 +521,8 @@ bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) { graph_tops.insert(g[e].rose_top); } } - - return is_subset_of(graph_tops, all_tops(g[v].left)); + + return is_subset_of(graph_tops, all_tops(left_id(g[v].left))); } #endif @@ -869,7 +869,6 @@ u32 roseQuality(const RoseResources &res, const RoseEngine *t) { } if (eod_prefix) { - always_run++; DEBUG_PRINTF("eod prefixes are slow"); return 0; } @@ -1014,16 +1013,16 @@ bool hasOrphanedTops(const RoseBuildImpl &build) { for (auto v : vertices_range(g)) { if (g[v].left) { - set &tops = leftfixes[g[v].left]; if (!build.isRootSuccessor(v)) { // Tops for infixes come from the in-edges. + set &tops = leftfixes[left_id(g[v].left)]; for (const auto &e : in_edges_range(v, g)) { tops.insert(g[e].rose_top); } } } if (g[v].suffix) { - suffixes[g[v].suffix].insert(g[v].suffix.top); + suffixes[suffix_id(g[v].suffix)].insert(g[v].suffix.top); } } diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index bc343a55..a157ad27 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1920,8 +1920,8 @@ void makeRoleSuffix(const RoseBuildImpl &build, if (!g[v].suffix) { return; } - assert(contains(suffixes, g[v].suffix)); - u32 queue = suffixes.at(g[v].suffix); + assert(contains(suffixes, suffix_id(g[v].suffix))); + u32 queue = suffixes.at(suffix_id(g[v].suffix)); u32 event; assert(contains(engine_info_by_queue, queue)); const auto eng_info = engine_info_by_queue.at(queue); @@ -1993,7 +1993,7 @@ void makeRoleInfixTriggers(const RoseBuildImpl &build, make_pair(g[v].index, g[e].rose_top)); assert(top < MQE_INVALID); } else if (!isMultiTopType(eng_info.type)) { - assert(num_tops(g[v].left) == 1); + assert(num_tops(left_id(g[v].left)) == 1); top = MQE_TOP; } else { top = MQE_TOP_FIRST + g[e].rose_top; @@ -2181,7 +2181,7 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, namespace { struct ProgKey { - ProgKey(const RoseProgram &p) : prog(&p) {} + explicit ProgKey(const RoseProgram &p) : prog(&p) {} bool operator==(const ProgKey &b) const { return RoseProgramEquivalence()(*prog, *b.prog); @@ -2203,7 +2203,7 @@ RoseProgram assembleProgramBlocks(vector &&blocks_in) { ue2_unordered_set seen; for (auto &block : blocks_in) { - if (contains(seen, block)) { + if (contains(seen, ProgKey(block))) { continue; } @@ -2435,9 +2435,8 @@ void addPredBlocksAny(map &pred_blocks, u32 num_states, RoseProgram sparse_program; vector keys; - for (const u32 &key : pred_blocks | map_keys) { - keys.emplace_back(key); - } + const auto &k = pred_blocks | map_keys; + std::copy(begin(k), end(k), std::back_inserter(keys)); const RoseInstruction *end_inst = sparse_program.end_instruction(); auto ri = std::make_unique(num_states, keys, end_inst); diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 0404a0c2..a762520d 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -159,13 +159,13 @@ private: }; struct RoseAliasingInfo { - RoseAliasingInfo(const RoseBuildImpl &build) { + explicit RoseAliasingInfo(const RoseBuildImpl &build) { const auto &g = build.g; // Populate reverse leftfix map. for (auto v : vertices_range(g)) { if (g[v].left) { - rev_leftfix[g[v].left].insert(v); + rev_leftfix[left_id(g[v].left)].insert(v); } } @@ -260,8 +260,10 @@ bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) { // cppcheck-suppress useStlAlgorithm for (const auto &e_a : in_edges_range(a, g)) { - RoseEdge e = edge(source(e_a, g), b, g); - if (!e || g[e].rose_top != g[e_a].rose_top) { + auto edge_result = edge(source(e_a, g), b, g); + RoseEdge e = edge_result.first; + + if (!edge_result.second || g[e].rose_top != g[e_a].rose_top) { DEBUG_PRINTF("bad tops\n"); return false; } @@ -276,7 +278,9 @@ bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { // cppcheck-suppress useStlAlgorithm for (const auto &e_a : out_edges_range(a, g)) { - if (RoseEdge e = edge(b, target(e_a, g), g)) { + auto edge_result = edge(b, target(e_a, g), g); + RoseEdge e = edge_result.first; + if (edge_result.second) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -296,7 +300,9 @@ bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { // cppcheck-suppress useStlAlgorithm for (const auto &e_a : in_edges_range(a, g)) { - if (RoseEdge e = edge(source(e_a, g), b, g)) { + auto edge_result = edge(source(e_a, g), b, g); + RoseEdge e = edge_result.first; + if (edge_result.second) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -706,7 +712,9 @@ bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b, // cppcheck-suppress useStlAlgorithm for (const auto &e_a : in_edges_range(a, g)) { - if (RoseEdge e = edge(source(e_a, g), b, g)) { + auto edge_result = edge(source(e_a, g), b, g); + RoseEdge e = edge_result.first; + if (edge_result.second) { DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n", (int)equal_roses, g[e].rose_top, g[e_a].rose_top); if (!equal_roses) { @@ -914,9 +922,9 @@ bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, } } - assert(contains(rai.rev_leftfix[b_left], b)); - rai.rev_leftfix[b_left].erase(b); - rai.rev_leftfix[a_left].insert(b); + assert(contains(rai.rev_leftfix[left_id(b_left)], b)); + rai.rev_leftfix[left_id(b_left)].erase(b); + rai.rev_leftfix[left_id(a_left)].insert(b); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; @@ -925,7 +933,7 @@ bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, updateEdgeTops(g, a, a_top_map); updateEdgeTops(g, b, b_top_map); - pruneUnusedTops(castle, g, rai.rev_leftfix[a_left]); + pruneUnusedTops(castle, g, rai.rev_leftfix[left_id(a_left)]); return true; } @@ -1033,9 +1041,9 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, b_left.castle = new_castle; assert(a_left == b_left); - rai.rev_leftfix[a_left].insert(a); - rai.rev_leftfix[a_left].insert(b); - pruneUnusedTops(*new_castle, g, rai.rev_leftfix[a_left]); + rai.rev_leftfix[left_id(a_left)].insert(a); + rai.rev_leftfix[left_id(a_left)].insert(b); + pruneUnusedTops(*new_castle, g, rai.rev_leftfix[left_id(a_left)]); return true; } @@ -1086,7 +1094,9 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, // We should be protected from merging common preds with tops leading // to completely different repeats by earlier checks, but just in // case... - if (RoseEdge a_edge = edge(source(e, g), a, g)) { + auto edge_result = edge(source(e, g), a, g); + RoseEdge a_edge = edge_result.first; + if (edge_result.second) { u32 a_top = g[a_edge].rose_top; const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report if (pr != a_pr) { @@ -1119,9 +1129,9 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, b_left.leftfix_report = new_report; assert(a_left == b_left); - rai.rev_leftfix[a_left].insert(a); - rai.rev_leftfix[a_left].insert(b); - pruneUnusedTops(*m_castle, g, rai.rev_leftfix[a_left]); + rai.rev_leftfix[left_id(a_left)].insert(a); + rai.rev_leftfix[left_id(a_left)].insert(b); + pruneUnusedTops(*m_castle, g, rai.rev_leftfix[left_id(a_left)]); return true; } @@ -1244,9 +1254,9 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, a_left.graph = new_graph; b_left.graph = new_graph; - rai.rev_leftfix[a_left].insert(a); - rai.rev_leftfix[a_left].insert(b); - pruneUnusedTops(*new_graph, g, rai.rev_leftfix[a_left]); + rai.rev_leftfix[left_id(a_left)].insert(a); + rai.rev_leftfix[left_id(a_left)].insert(b); + pruneUnusedTops(*new_graph, g, rai.rev_leftfix[left_id(a_left)]); return true; } @@ -1265,7 +1275,7 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n", g[a].index, g[b].index); - set &b_verts = rai.rev_leftfix[b_left]; + set &b_verts = rai.rev_leftfix[left_id(b_left)]; set aa; aa.insert(a); @@ -1287,7 +1297,7 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, ReportID new_report = build.getNewNfaReport(); duplicateReport(*b_h, b_left.leftfix_report, new_report); b_left.leftfix_report = new_report; - pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], b_oldreport); + pruneReportIfUnused(build, b_h, rai.rev_leftfix[left_id(b_left_id)], b_oldreport); NGHolder victim; cloneHolder(victim, *a_h); @@ -1301,12 +1311,7 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, DEBUG_PRINTF("victim %zu states\n", num_vertices(*a_h)); DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h)); - - if (!setDistinctRoseTops(g, victim, *b_h, deque(1, a))) { - assert(roseHasTops(build, a)); - assert(roseHasTops(build, b)); - return false; - } + setDistinctRoseTops(g, victim, *b_h, deque(1, a)); assert(victim.kind == b_h->kind); assert(!generates_callbacks(*b_h)); @@ -1328,16 +1333,16 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, a_left.graph = b_h; a_left.leftfix_report = new_report; - assert(contains(rai.rev_leftfix[a_left_id], a)); - assert(contains(rai.rev_leftfix[b_left_id], b)); - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].insert(a); + assert(contains(rai.rev_leftfix[left_id(a_left_id)], a)); + assert(contains(rai.rev_leftfix[left_id(b_left_id)], b)); + rai.rev_leftfix[left_id(a_left_id)].erase(a); + rai.rev_leftfix[left_id(b_left_id)].insert(a); - pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); - pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[left_id(a_left_id)]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[left_id(b_left_id)]); // Prune A's report from its old prefix if it was only used by A. - pruneReportIfUnused(build, a_h, rai.rev_leftfix[a_left_id], a_oldreport); + pruneReportIfUnused(build, a_h, rai.rev_leftfix[left_id(a_left_id)], a_oldreport); reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc); @@ -1358,9 +1363,9 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, build.g[a].index, build.g[b].index); assert(a != b); - RoseGraph &g = build.g; - LeftEngInfo &a_left = g[a].left; - LeftEngInfo &b_left = g[b].left; + const RoseGraph &g = build.g; + const LeftEngInfo &a_left = g[a].left; + const LeftEngInfo &b_left = g[b].left; // Trivial case. if (a_left == b_left) { @@ -1608,7 +1613,7 @@ void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, vector *dead, bool mergeRoses, RoseAliasingInfo &rai) { DEBUG_PRINTF("begin\n"); - RoseGraph &g = build.g; + const RoseGraph &g = build.g; if (candidates.empty()) { return; @@ -1726,18 +1731,18 @@ void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a, assert(!g[a].literals.empty()); u32 lit_id = *g[a].literals.begin(); const auto &verts = build.literal_info.at(lit_id).vertices; - RoseVertex pred = pickPred(a, g, build); + RoseVertex ppred = pickPred(a, g, build); siblings.clear(); - if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || - out_degree(pred, g) > verts.size()) { + if (ppred == RoseGraph::null_vertex() || build.isAnyStart(ppred) || + out_degree(ppred, g) > verts.size()) { // Select sibling from amongst the vertices that share a literal. insert(&siblings, siblings.end(), verts); } else { // Select sibling from amongst the vertices that share a // predecessor. - insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); + insert(&siblings, siblings.end(), adjacent_vertices(ppred, g)); } } @@ -1860,14 +1865,14 @@ void splitByRightProps(const RoseGraph &g, vector> &buckets) { // Successor vector used in make_split_key. We declare it here so we can // reuse storage. - vector succ; + vector vsucc; // Split by {successors, literals, reports}. auto make_split_key = [&](RoseVertex v) { - succ.clear(); - insert(&succ, succ.end(), adjacent_vertices(v, g)); - sort(succ.begin(), succ.end()); - return hash_all(g[v].literals, g[v].reports, succ); + vsucc.clear(); + insert(&vsucc, vsucc.end(), adjacent_vertices(v, g)); + sort(vsucc.begin(), vsucc.end()); + return hash_all(g[v].literals, g[v].reports, vsucc); }; splitAndFilterBuckets(buckets, make_split_key); } @@ -1981,15 +1986,14 @@ bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) { * merge. */ static -void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) { +void filterDiamondCandidates(const RoseGraph &g, CandidateSet &candidates) { DEBUG_PRINTF("%zu candidates enter\n", candidates.size()); vector dead; - for (const auto &v : candidates) { - if (hasNoDiamondSiblings(g, v)) { - dead.emplace_back(v); - } - } + auto deads = [&g=g](const RoseVertex &v) { + return (hasNoDiamondSiblings(g, v)); + }; + std::copy_if(begin(candidates), end(candidates), std::back_inserter(dead), deads); for (const auto &v : dead) { candidates.erase(v); @@ -2001,7 +2005,7 @@ void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) { void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { const CompileContext &cc = build.cc; - RoseGraph &g = build.g; + const RoseGraph &g = build.g; assert(!hasOrphanedTops(build)); assert(canImplementGraphs(build)); @@ -2150,7 +2154,9 @@ void mergeDupeLeaves(RoseBuildImpl &build) { for (const auto &e : in_edges_range(v, g)) { RoseVertex u = source(e, g); DEBUG_PRINTF("u index=%zu\n", g[u].index); - if (RoseEdge et = edge(u, t, g)) { + auto edge_result = edge(u, t, g); + RoseEdge et = edge_result.first; + if (edge_result.second) { if (g[et].minBound <= g[e].minBound && g[et].maxBound >= g[e].maxBound) { DEBUG_PRINTF("remove more constrained edge\n"); diff --git a/src/rose/rose_build_scatter.cpp b/src/rose/rose_build_scatter.cpp index 87085ae9..f7a457b6 100644 --- a/src/rose/rose_build_scatter.cpp +++ b/src/rose/rose_build_scatter.cpp @@ -111,21 +111,41 @@ void write_out(scatter_full_plan *plan_out, void *aux_out, const scatter_plan_raw &raw, u32 aux_base_offset) { memset(plan_out, 0, sizeof(*plan_out)); -#define DO_CASE(t) \ - if (!raw.p_##t.empty()) { \ - plan_out->s_##t##_offset = aux_base_offset; \ - plan_out->s_##t##_count = raw.p_##t.size(); \ - assert(ISALIGNED_N((char *)aux_out + aux_base_offset, \ - alignof(scatter_unit_##t))); \ - memcpy((char *)aux_out + aux_base_offset, raw.p_##t.data(), \ - byte_length(raw.p_##t)); \ - aux_base_offset += byte_length(raw.p_##t); \ + if (!raw.p_u64a.empty()) { + plan_out->s_u64a_offset = aux_base_offset; + plan_out->s_u64a_count = raw.p_u64a.size(); + assert(ISALIGNED_N(static_cast(aux_out) + aux_base_offset, alignof(scatter_unit_u64a))); + auto *start = reinterpret_cast(raw.p_u64a.data()); + auto *to = static_cast(aux_out) + aux_base_offset; + std::copy(start, start + byte_length(raw.p_u64a), to); + aux_base_offset += byte_length(raw.p_u64a); + } + if (!raw.p_u32.empty()) { + plan_out->s_u32_offset = aux_base_offset; + plan_out->s_u32_count = raw.p_u32.size(); + assert(ISALIGNED_N(static_cast(aux_out) + aux_base_offset, alignof(scatter_unit_u32))); + auto *start = reinterpret_cast(raw.p_u32.data()); + auto *to = static_cast(aux_out) + aux_base_offset; + std::copy(start, start + byte_length(raw.p_u32), to); + aux_base_offset += byte_length(raw.p_u32); + } + if (!raw.p_u16.empty()) { + plan_out->s_u16_offset = aux_base_offset; + plan_out->s_u16_count = raw.p_u16.size(); + assert(ISALIGNED_N(static_cast(aux_out) + aux_base_offset, alignof(scatter_unit_u16))); + auto *start = reinterpret_cast(raw.p_u16.data()); + auto *to = static_cast(aux_out) + aux_base_offset; + std::copy(start, start + byte_length(raw.p_u16), to); + aux_base_offset += byte_length(raw.p_u16); + } + if (!raw.p_u8.empty()) { + plan_out->s_u8_offset = aux_base_offset; + plan_out->s_u8_count = raw.p_u8.size(); + assert(ISALIGNED_N(static_cast(aux_out) + aux_base_offset, alignof(scatter_unit_u8))); + auto *start = reinterpret_cast(raw.p_u8.data()); + auto *to = static_cast(aux_out) + aux_base_offset; + std::copy(start, start + byte_length(raw.p_u8), to); } - - DO_CASE(u64a); - DO_CASE(u32); - DO_CASE(u16); - DO_CASE(u8); } } // namespace ue2 diff --git a/src/rose/rose_build_width.cpp b/src/rose/rose_build_width.cpp index 2e29a8be..e534859a 100644 --- a/src/rose/rose_build_width.cpp +++ b/src/rose/rose_build_width.cpp @@ -65,12 +65,11 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { const RoseGraph &g = tbi.g; vector table_verts; - - for (auto v : vertices_range(g)) { - if (tbi.hasLiteralInTable(v, table)) { - table_verts.emplace_back(v); - } - } + auto tvs = [&tbi=tbi, &table=table](const RoseVertex &v) { + return (tbi.hasLiteralInTable(v, table)); + }; + const auto &vr = vertices_range(g); + std::copy_if(begin(vr), end(vr), std::back_inserter(table_verts), tvs); set reachable; find_reachable(g, table_verts, &reachable); @@ -96,7 +95,7 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { } if (g[v].suffix) { - depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top); + depth suffix_width = findMinWidth(suffix_id(g[v].suffix), g[v].suffix.top); assert(suffix_width.is_reachable()); DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire " "report at %u\n", @@ -146,10 +145,10 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi) { u64a w = g[v].max_offset; if (g[v].suffix) { - if (has_non_eod_accepts(g[v].suffix)) { + if (has_non_eod_accepts(suffix_id(g[v].suffix))) { return ROSE_BOUND_INF; } - depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top); + depth suffix_width = findMaxWidth(suffix_id(g[v].suffix), g[v].suffix.top); DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top, suffix_width.str().c_str()); assert(suffix_width.is_reachable()); @@ -190,13 +189,12 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { table == ROSE_FLOATING ? "floating" : "anchored"); vector table_verts; - - for (auto v : vertices_range(g)) { - if ((table == ROSE_FLOATING && tbi.isFloating(v)) - || (table == ROSE_ANCHORED && tbi.isAnchored(v))) { - table_verts.emplace_back(v); - } - } + auto tvs = [&tbi=tbi, &table=table](const RoseVertex &v) { + return ((table == ROSE_FLOATING && tbi.isFloating(v)) + || (table == ROSE_ANCHORED && tbi.isAnchored(v))); + }; + const auto &vr = vertices_range(g); + std::copy_if(begin(vr), end(vr), std::back_inserter(table_verts), tvs); set reachable; find_reachable(g, table_verts, &reachable); @@ -223,11 +221,11 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { accept_eod node */ if (g[v].suffix) { - if (has_non_eod_accepts(g[v].suffix)) { + if (has_non_eod_accepts(suffix_id(g[v].suffix))) { DEBUG_PRINTF("has accept\n"); return ROSE_BOUND_INF; } - depth suffix_width = findMaxWidth(g[v].suffix); + depth suffix_width = findMaxWidth(suffix_id(g[v].suffix)); DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str()); assert(suffix_width.is_reachable()); if (!suffix_width.is_finite()) { diff --git a/src/rose/stream.c b/src/rose/stream.c index 26268dd5..b3a91569 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -104,7 +104,7 @@ void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, static really_inline -void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) { +void saveStreamState(const struct NFA *nfa, const struct mq *q, s64a loc) { DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n", q->offset, q->length, q->hlength, loc); nfaQueueCompressState(nfa, q, loc); @@ -133,7 +133,7 @@ enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u32 qi, const struct LeftNfaInfo *left, const struct NFA *nfa) { - struct core_info *ci = &scratch->core_info; + const struct core_info *ci = &scratch->core_info; const u32 qCount = t->queueCount; struct mq *q = scratch->queues + qi; @@ -211,7 +211,7 @@ char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, const struct LeftNfaInfo *left) { assert(!left->transient); // active roses only - struct core_info *ci = &scratch->core_info; + const struct core_info *ci = &scratch->core_info; const u32 qCount = t->queueCount; struct mq *q = scratch->queues + qi; const struct NFA *nfa = getNfaByQueue(t, qi); @@ -382,7 +382,7 @@ void roseSaveNfaStreamState(const struct RoseEngine *t, char *state, qi = mmbit_iterate(aa, aaCount, qi)) { DEBUG_PRINTF("saving stream state for qi=%u\n", qi); - struct mq *q = queues + qi; + const struct mq *q = queues + qi; // If it's active, it should have an active queue (as we should have // done some work!) @@ -517,7 +517,7 @@ void runEagerPrefixesStream(const struct RoseEngine *t, static really_inline int can_never_match(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, size_t length, u64a offset) { - struct RoseContext *tctxt = &scratch->tctxt; + const struct RoseContext *tctxt = &scratch->tctxt; if (tctxt->groups) { DEBUG_PRINTF("still has active groups\n"); diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h index df9b57f4..68adc136 100644 --- a/src/rose/stream_long_lit.h +++ b/src/rose/stream_long_lit.h @@ -332,7 +332,7 @@ void storeLongLiteralState(const struct RoseEngine *t, char *state, return; } - struct core_info *ci = &scratch->core_info; + const struct core_info *ci = &scratch->core_info; const struct RoseLongLitTable *ll_table = getByOffset(t, t->longLitTableOffset); assert(ll_table->maxLen); diff --git a/src/runtime.c b/src/runtime.c index a055e5f4..d91b5672 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -69,7 +69,7 @@ void prefetch_data(const char *data, unsigned length) { static int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from, UNUSED unsigned long long to, UNUSED unsigned flags, - UNUSED void *ctxt) { + UNUSED void *ctxt) { // cppcheck-suppress constParameterCallback return 0; } @@ -576,7 +576,7 @@ hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, static really_inline -void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { +void rawEodExec(const hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *rose = id->rose; if (can_stop_matching(scratch)) { @@ -593,7 +593,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { } static never_inline -void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) { +void soleOutfixEodExec(const hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *t = id->rose; if (can_stop_matching(scratch)) { diff --git a/src/scratch.c b/src/scratch.c index 9f6d77cd..aa15547b 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -83,9 +83,9 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size; u32 som_store_size = proto->som_store_count * sizeof(u64a); - u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); + u32 som_attempted_store_size = som_store_size; // Same as som_store_size u32 som_now_size = proto->som_fatbit_size; - u32 som_attempted_size = proto->som_fatbit_size; + u32 som_attempted_size = som_now_size; // Same as som_now_size struct hs_scratch *s; struct hs_scratch *s_tmp; diff --git a/src/scratch.h b/src/scratch.h index e3cd9245..e01ccd6b 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -215,12 +215,12 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { /* array of fatbit ptr; TODO: why not an array of fatbits? */ static really_inline -struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) { +struct fatbit **getAnchoredLiteralLog(const struct hs_scratch *scratch) { return scratch->al_log; } static really_inline -struct fatbit **getDelaySlots(struct hs_scratch *scratch) { +struct fatbit **getDelaySlots(const struct hs_scratch *scratch) { return scratch->delay_slots; } diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index df8b73f2..095bd2f9 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -792,7 +792,7 @@ bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, bool only_accel_init = !has_non_literals; bool trust_daddy_states = !has_non_literals; - bytecode_ptr dfa = nullptr; + bytecode_ptr dfa = bytecode_ptr(nullptr); if (cc.grey.allowSmallWriteSheng) { dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states); if (!dfa) { @@ -822,27 +822,27 @@ bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); - return nullptr; + return bytecode_ptr(nullptr); } if (is_slow(rdfa, accel_states, roseQuality)) { DEBUG_PRINTF("is slow\n"); *small_region = cc.grey.smallWriteLargestBufferBad; if (*small_region <= *start_offset) { - return nullptr; + return bytecode_ptr(nullptr); } if (clear_deeper_reports(rdfa, *small_region - *start_offset)) { minimize_hopcroft(rdfa, cc.grey); if (rdfa.start_anchored == DEAD_STATE) { DEBUG_PRINTF("all patterns pruned out\n"); - return nullptr; + return bytecode_ptr(nullptr); } nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ - return nullptr; + return bytecode_ptr(nullptr); } } } else { @@ -853,7 +853,7 @@ bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, if (nfa->length > cc.grey.limitSmallWriteOutfixSize || nfa->length > cc.grey.limitDFASize) { DEBUG_PRINTF("smallwrite outfix size too large\n"); - return nullptr; /* this is just a soft failure - don't build smwr */ + return bytecode_ptr(nullptr); /* this is just a soft failure - don't build smwr */ } nfa->queueIndex = 0; /* dummy, small write API does not use queue */ @@ -873,12 +873,12 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { if (dfas.empty() && !has_literals) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; - return nullptr; + return bytecode_ptr(nullptr); } if (poisoned) { DEBUG_PRINTF("some pattern could not be made into a smallwrite dfa\n"); - return nullptr; + return bytecode_ptr(nullptr); } // We happen to know that if the rose is high quality, we're going to limit @@ -906,12 +906,12 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { if (dfas.empty()) { DEBUG_PRINTF("no dfa, pruned everything away\n"); - return nullptr; + return bytecode_ptr(nullptr); } if (!mergeDfas(dfas, rm, cc)) { dfas.clear(); - return nullptr; + return bytecode_ptr(nullptr); } assert(dfas.size() == 1); @@ -928,7 +928,7 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { DEBUG_PRINTF("some smallwrite outfix could not be prepped\n"); /* just skip the smallwrite optimization */ poisoned = true; - return nullptr; + return bytecode_ptr(nullptr); } u32 size = sizeof(SmallWriteEngine) + nfa->length; diff --git a/src/som/slot_manager.cpp b/src/som/slot_manager.cpp index 33b8d503..6808ac3c 100644 --- a/src/som/slot_manager.cpp +++ b/src/som/slot_manager.cpp @@ -187,7 +187,7 @@ u32 SomSlotManager::getInitialResetSomSlot(const NGHolder &prefix, find_if(reset.entries.begin(), reset.entries.end(), has_prefix_func) != reset.entries.end(); - for (auto &e : reset.entries) { + for (const auto &e : reset.entries) { u32 temp = 0; /* we don't need to test against sentinels which are identical to * our current one as races don't matter and we know it clears diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index 1a868efc..3544309c 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -69,8 +69,8 @@ void setSomLoc(struct fatbit *som_set_now, u64a *som_store, u32 som_store_count, } static really_inline -char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now, - u8 *som_store_writable, u32 som_store_count, +char ok_and_mark_if_write(u8 *som_store_valid, const struct fatbit *som_set_now, + const u8 *som_store_writable, u32 som_store_count, u32 loc) { return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */ || fatbit_isset(som_set_now, som_store_count, loc) /* write here, need @@ -79,7 +79,7 @@ char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now, } static really_inline -char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now, +char ok_and_mark_if_unset(u8 *som_store_valid, const struct fatbit *som_set_now, u32 som_store_count, u32 loc) { return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */ || fatbit_isset(som_set_now, som_store_count, loc); /* write here, need @@ -512,7 +512,7 @@ int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset) { /* fire any reports from the logs and clear them */ if (offset == scratch->deduper.current_report_offset + 1) { struct fatbit *done_log = scratch->deduper.som_log[offset % 2]; - u64a *done_starts = scratch->deduper.som_start_log[offset % 2]; + const u64a *done_starts = scratch->deduper.som_start_log[offset % 2]; halt = clearSomLog(scratch, scratch->deduper.current_report_offset - 1, done_log, done_starts); @@ -522,9 +522,9 @@ int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset) { u64a f_offset = scratch->deduper.current_report_offset - 1; u64a s_offset = scratch->deduper.current_report_offset; struct fatbit *first_log = scratch->deduper.som_log[f_offset % 2]; - u64a *first_starts = scratch->deduper.som_start_log[f_offset % 2]; + const u64a *first_starts = scratch->deduper.som_start_log[f_offset % 2]; struct fatbit *second_log = scratch->deduper.som_log[s_offset % 2]; - u64a *second_starts = scratch->deduper.som_start_log[s_offset % 2]; + const u64a *second_starts = scratch->deduper.som_start_log[s_offset % 2]; halt = clearSomLog(scratch, f_offset, first_log, first_starts) || clearSomLog(scratch, s_offset, second_log, second_starts); diff --git a/src/som/som_stream.c b/src/som/som_stream.c index 93ab709e..6502d87c 100644 --- a/src/som/som_stream.c +++ b/src/som/som_stream.c @@ -102,7 +102,7 @@ void storeSomToStream(struct hs_scratch *scratch, const u64a offset) { const u32 som_store_count = rose->somLocationCount; assert(som_store_count); // Caller should ensure that we have work to do. - u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid; + const u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid; char *stream_som_store = ci->state + rose->stateOffsets.somLocation; const u64a *som_store = scratch->som_store; const u8 som_size = rose->somHorizon; diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h index f02543ef..a4c3e466 100644 --- a/src/stream_compress_impl.h +++ b/src/stream_compress_impl.h @@ -142,6 +142,7 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, COPY(stream_body + so->groups, so->groups_size); /* copy the real bits of history */ + // cppcheck-suppress unreadVariable UNUSED u32 hend = so->history + rose->historyRequired; COPY(stream_body + hend - history, history); diff --git a/src/util/alloc.cpp b/src/util/alloc.cpp index 40004932..a6d30b33 100644 --- a/src/util/alloc.cpp +++ b/src/util/alloc.cpp @@ -68,7 +68,8 @@ namespace ue2 { #endif void *aligned_malloc_internal(size_t size, size_t align) { - void *mem; + // cppcheck-suppress cstyleCast + void *mem= nullptr;; int rv = posix_memalign(&mem, align, size); if (rv != 0) { DEBUG_PRINTF("posix_memalign returned %d when asked for %zu bytes\n", @@ -104,17 +105,17 @@ void *aligned_zmalloc(size_t size) { const size_t alloc_size = size + HACK_OFFSET; - void *mem = aligned_malloc_internal(alloc_size, 64); + char *mem = static_cast(aligned_malloc_internal(alloc_size, 64)); if (!mem) { DEBUG_PRINTF("unable to allocate %zu bytes\n", alloc_size); throw std::bad_alloc(); } - DEBUG_PRINTF("alloced %p reporting %p\n", mem, (char *)mem + HACK_OFFSET); + DEBUG_PRINTF("alloced %p reporting %p\n", mem, mem + HACK_OFFSET); assert(ISALIGNED_N(mem, 64)); memset(mem, 0, alloc_size); - return (void *)((char *)mem + HACK_OFFSET); + return reinterpret_cast(mem + HACK_OFFSET); } /** \brief Free a pointer allocated with \ref aligned_zmalloc. */ @@ -123,7 +124,8 @@ void aligned_free(void *ptr) { return; } - void *addr = (void *)((char *)ptr - HACK_OFFSET); + char *addr_c = static_cast(ptr); + void *addr = static_cast(addr_c - HACK_OFFSET); DEBUG_PRINTF("asked to free %p freeing %p\n", ptr, addr); assert(ISALIGNED_N(addr, 64)); diff --git a/src/util/alloc.h b/src/util/alloc.h index 49b4a824..c5dea4c5 100644 --- a/src/util/alloc.h +++ b/src/util/alloc.h @@ -68,7 +68,7 @@ public: AlignedAllocator() noexcept {} template - AlignedAllocator(const AlignedAllocator &) noexcept {} + explicit AlignedAllocator(const AlignedAllocator &) noexcept {} template struct rebind { using other = AlignedAllocator; diff --git a/src/util/arch/common/bitutils.h b/src/util/arch/common/bitutils.h index e5ab0d05..897c9197 100644 --- a/src/util/arch/common/bitutils.h +++ b/src/util/arch/common/bitutils.h @@ -155,13 +155,13 @@ u32 compress32_impl_c(u32 x, u32 m) { return 0; } - u32 mk, mp, mv, t; + u32 mk, mv; x &= m; // clear irrelevant bits mk = ~m << 1; // we will count 0's to right for (u32 i = 0; i < 5; i++) { - mp = mk ^ (mk << 1); + u32 mp = mk ^ (mk << 1); mp ^= mp << 2; mp ^= mp << 4; mp ^= mp << 8; @@ -169,7 +169,7 @@ u32 compress32_impl_c(u32 x, u32 m) { mv = mp & m; // bits to move m = (m ^ mv) | (mv >> (1 << i)); // compress m - t = x & mv; + u32 t = x & mv; x = (x ^ t) | (t >> (1 << i)); // compress x mk = mk & ~mp; } @@ -239,14 +239,14 @@ u32 expand32_impl_c(u32 x, u32 m) { return 0; } - u32 m0, mk, mp, mv, t; + u32 m0, mk, mv; u32 array[5]; m0 = m; // save original mask mk = ~m << 1; // we will count 0's to right for (int i = 0; i < 5; i++) { - mp = mk ^ (mk << 1); // parallel suffix + u32 mp = mk ^ (mk << 1); // parallel suffix mp = mp ^ (mp << 2); mp = mp ^ (mp << 4); mp = mp ^ (mp << 8); @@ -259,7 +259,7 @@ u32 expand32_impl_c(u32 x, u32 m) { for (int i = 4; i >= 0; i--) { mv = array[i]; - t = x << (1 << i); + u32 t = x << (1 << i); x = (x & ~mv) | (t & mv); } @@ -409,7 +409,7 @@ u64a pdep64_impl_c(u64a x, u64a _m) { u64a result = 0x0UL; const u64a mask = 0x8000000000000000UL; u64a m = _m; - u64a c, t; + u64a p; /* The pop-count of the mask gives the number of the bits from @@ -421,8 +421,8 @@ u64a pdep64_impl_c(u64a x, u64a _m) { each mask bit as it is processed. */ while (m != 0) { - c = __builtin_clzl (m); - t = x << (p - c); + u64a c = __builtin_clzl (m); + u64a t = x << (p - c); m ^= (mask >> c); result |= (t & (mask >> c)); p++; diff --git a/src/util/bitfield.h b/src/util/bitfield.h index 14da9967..bb8c9e3b 100644 --- a/src/util/bitfield.h +++ b/src/util/bitfield.h @@ -64,7 +64,7 @@ public: assert(none()); } - bitfield(const boost::dynamic_bitset<> &a) : bits{{0}} { + explicit bitfield(const boost::dynamic_bitset<> &a) : bits{{0}} { assert(a.size() == requested_size); assert(none()); for (auto i = a.find_first(); i != a.npos; i = a.find_next(i)) { @@ -89,9 +89,7 @@ public: /// Set all bits. void setall() { - for (auto &e : bits) { - e = all_ones; - } + std::fill(bits.begin(), bits.end(), all_ones); clear_trailer(); } @@ -102,9 +100,7 @@ public: /// Clear all bits. void clear() { - for (auto &e : bits) { - e = 0; - } + std::fill(bits.begin(), bits.end(), 0); } /// Clear all bits (alias for bitset::clear). diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index f1f2e5ef..ab2b9171 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -66,7 +66,7 @@ public: } } - bytecode_ptr(std::nullptr_t) {} + explicit bytecode_ptr(std::nullptr_t) {} T *get() const { return ptr.get(); } diff --git a/src/util/clique.cpp b/src/util/clique.cpp index 19daed3c..5dbcc1d8 100644 --- a/src/util/clique.cpp +++ b/src/util/clique.cpp @@ -79,7 +79,7 @@ vector findCliqueGroup(CliqueGraph &cg) { // Choose a vertex from the graph u32 id = g[0]; - CliqueVertex &n = vertexMap.at(id); + const CliqueVertex &n = vertexMap.at(id); clique.emplace_back(id); // Corresponding vertex in the original graph set subgraphId(g.begin(), g.end()); diff --git a/src/util/determinise.h b/src/util/determinise.h index cfccd597..ab8f85f9 100644 --- a/src/util/determinise.h +++ b/src/util/determinise.h @@ -102,7 +102,7 @@ bool determinise(Auto &n, std::vector &dstates, size_t state_limit, dstates.emplace_back(ds(alphabet_size)); } - std::vector succs(alphabet_size, n.dead); + std::vector succrs(alphabet_size, n.dead); while (!q.empty()) { auto m = std::move(q.front()); @@ -133,13 +133,13 @@ bool determinise(Auto &n, std::vector &dstates, size_t state_limit, } /* fill in successor states */ - n.transition(curr, &succs[0]); + n.transition(curr, &succrs[0]); for (symbol_t s = 0; s < n.alphasize; s++) { dstate_id_t succ_id; - if (s && succs[s] == succs[s - 1]) { + if (s && succrs[s] == succrs[s - 1]) { succ_id = dstates[curr_id].next[s - 1]; } else { - auto p = dstate_ids.find(succs[s]); + auto p = dstate_ids.find(succrs[s]); if (p != dstate_ids.end()) { // succ[s] is already present succ_id = p->second; if (succ_id > curr_id && !dstates[succ_id].daddy @@ -148,10 +148,10 @@ bool determinise(Auto &n, std::vector &dstates, size_t state_limit, } } else { succ_id = dstate_ids.size(); - dstate_ids.emplace(succs[s], succ_id); + dstate_ids.emplace(succrs[s], succ_id); dstates.emplace_back(ds(alphabet_size)); dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0; - q.emplace(succs[s], succ_id); + q.emplace(succrs[s], succ_id); } DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]); diff --git a/src/util/dump_charclass.cpp b/src/util/dump_charclass.cpp index df308dec..2243fcbd 100644 --- a/src/util/dump_charclass.cpp +++ b/src/util/dump_charclass.cpp @@ -178,9 +178,9 @@ size_t describeClassInt(ostream &os, const CharReach &incr, size_t maxLength, // Render charclass as a series of ranges size_t c_start = cr.find_first(); - size_t c = c_start, c_last = 0; + size_t c = c_start; while (c != CharReach::npos) { - c_last = c; + size_t c_last = c; c = cr.find_next(c); if (c != c_last + 1 || c_last == 0xff) { describeRange(os, c_start, c_last, out_type); diff --git a/src/util/flat_containers.h b/src/util/flat_containers.h index 41452eb4..9ba7e232 100644 --- a/src/util/flat_containers.h +++ b/src/util/flat_containers.h @@ -195,10 +195,10 @@ public: // Constructors. - flat_set(const Compare &compare = Compare(), + explicit flat_set(const Compare &compare = Compare(), const Allocator &alloc = Allocator()) : base_type(compare, alloc) {} - + template flat_set(InputIt first, InputIt last, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) @@ -425,7 +425,7 @@ public: // Constructors. - flat_map(const Compare &compare = Compare(), + explicit flat_map(const Compare &compare = Compare(), const Allocator &alloc = Allocator()) : base_type(compare, alloc) {} @@ -615,7 +615,7 @@ public: friend class flat_map; protected: Compare c; - value_compare(Compare c_in) : c(c_in) {} + explicit value_compare(Compare c_in) : c(c_in) {} public: bool operator()(const value_type &lhs, const value_type &rhs) { return c(lhs.first, rhs.first); diff --git a/src/util/graph_small_color_map.h b/src/util/graph_small_color_map.h index 249b7153..a85f4b77 100644 --- a/src/util/graph_small_color_map.h +++ b/src/util/graph_small_color_map.h @@ -102,10 +102,10 @@ public: using category = boost::read_write_property_map_tag; small_color_map(size_t n_in, const IndexMap &index_map_in) - : n(n_in), index_map(index_map_in) { - size_t num_bytes = (n + entries_per_byte - 1) / entries_per_byte; - data = std::make_shared>(num_bytes); - fill(small_color::white); + : n(n_in), + index_map(index_map_in), + data(std::make_shared>((n_in + entries_per_byte - 1) / entries_per_byte)) { + fill(small_color::white); } void fill(small_color color) { diff --git a/src/util/hash_dynamic_bitset.h b/src/util/hash_dynamic_bitset.h index 65bc29c3..fecb0c68 100644 --- a/src/util/hash_dynamic_bitset.h +++ b/src/util/hash_dynamic_bitset.h @@ -56,7 +56,7 @@ struct hash_output_it { using reference = void; using iterator_category = std::output_iterator_tag; - hash_output_it(size_t *hash_out = nullptr) : out(hash_out) {} + explicit hash_output_it(size_t *hash_out = nullptr) : out(hash_out) {} hash_output_it &operator++() { return *this; } @@ -65,7 +65,7 @@ struct hash_output_it { } struct deref_proxy { - deref_proxy(size_t *hash_out) : out(hash_out) {} + explicit deref_proxy(size_t *hash_out) : out(hash_out) {} template void operator=(const T &val) const { @@ -76,7 +76,7 @@ struct hash_output_it { size_t *out; /* output location of the owning iterator */ }; - deref_proxy operator*() { return {out}; } + deref_proxy operator*() { return deref_proxy(out); } private: size_t *out; /* location to output the hashes to */ diff --git a/src/util/multibit.h b/src/util/multibit.h index 95261b37..3ec45d6f 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -1421,7 +1421,7 @@ uplevel: if (level == 0) { return; // we are done } - u8 *block_ptr = + const u8 *block_ptr = mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE); MMB_TYPE real_block = mmb_load(block_ptr); key >>= MMB_KEY_SHIFT; diff --git a/src/util/multibit_build.cpp b/src/util/multibit_build.cpp index 442c528f..bccff22f 100644 --- a/src/util/multibit_build.cpp +++ b/src/util/multibit_build.cpp @@ -256,7 +256,7 @@ void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end, /* handle the multilevel case */ s32 ks = mmbit_keyshift(total_bits); u32 level = 0; - assert(sizeof(MMB_TYPE) == sizeof(u64a)); + assert(sizeof(MMB_TYPE) == sizeof(u64a)); // cppcheck-suppress duplicateExpression if (begin == end) { add_scatter(&out->p_u64a, 0, 0); diff --git a/src/util/multibit_compress.h b/src/util/multibit_compress.h index e7b4fd8e..018bfec2 100644 --- a/src/util/multibit_compress.h +++ b/src/util/multibit_compress.h @@ -167,7 +167,7 @@ char mmbit_decompress(u8 *bits, u32 total_bits, const u8 *comp, comp += sizeof(MMB_TYPE); while (1) { if (key_rem < MMB_KEY_BITS) { - u8 *block_ptr = mmbit_get_level_root(bits, level) + + const u8 *block_ptr = mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE); MMB_TYPE block = mmb_load(block_ptr); MMB_TYPE block_1 = block & ~mmb_mask_zero_to_nocheck(key_rem); diff --git a/src/util/supervector/arch/arm/impl.cpp b/src/util/supervector/arch/arm/impl.cpp index 55f6c55c..17859d61 100644 --- a/src/util/supervector/arch/arm/impl.cpp +++ b/src/util/supervector/arch/arm/impl.cpp @@ -159,13 +159,13 @@ really_inline SuperVector<16>::SuperVector(uint64_t const other) template<> really_inline SuperVector<16> SuperVector<16>::Ones(void) { - return {vdupq_n_u8(0xFF)}; + return SuperVector<16>(vdupq_n_u8(0xFF)); } template<> really_inline SuperVector<16> SuperVector<16>::Zeroes(void) { - return {vdupq_n_u8(0)}; + return SuperVector<16>(vdupq_n_u8(0)); } // Methods @@ -179,37 +179,37 @@ really_inline void SuperVector<16>::operator=(SuperVector<16> const &other) template <> really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const { - return {vandq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vandq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const { - return {vorrq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vorrq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const { - return {veorq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(veorq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator!() const { - return {vmvnq_u8(u.u8x16[0])}; + return SuperVector<16>(vmvnq_u8(u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { - return {vandq_u8(vmvnq_u8(u.u8x16[0]), b.u.u8x16[0])}; + return SuperVector<16>(vandq_u8(vmvnq_u8(u.u8x16[0]), b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const { - return {vceqq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vceqq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> @@ -221,25 +221,25 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const template <> really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const { - return {vcgtq_s8(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vcgtq_s8(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const { - return {vcgeq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vcgeq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const { - return {vcltq_s8(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vcltq_s8(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const { - return {vcgeq_s8(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vcgeq_s8(u.s8x16[0], b.u.s8x16[0])); } template <> @@ -274,35 +274,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshl_8_imm() const { - return {vshlq_n_u8(u.u8x16[0], N)}; + return SuperVector<16>(vshlq_n_u8(u.u8x16[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const { - return {vshlq_n_u16(u.u16x8[0], N)}; + return SuperVector<16>(vshlq_n_u16(u.u16x8[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const { - return {vshlq_n_u32(u.u32x4[0], N)}; + return SuperVector<16>(vshlq_n_u32(u.u32x4[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const { - return {vshlq_n_u64(u.u64x2[0], N)}; + return SuperVector<16>(vshlq_n_u64(u.u64x2[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const { - return {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)}; + return SuperVector<16>(vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)); } template <> @@ -316,35 +316,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshr_8_imm() const { - return {vshrq_n_u8(u.u8x16[0], N)}; + return SuperVector<16>(vshrq_n_u8(u.u8x16[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const { - return {vshrq_n_u16(u.u16x8[0], N)}; + return SuperVector<16>(vshrq_n_u16(u.u16x8[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const { - return {vshrq_n_u32(u.u32x4[0], N)}; + return SuperVector<16>(vshrq_n_u32(u.u32x4[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const { - return {vshrq_n_u64(u.u64x2[0], N)}; + return SuperVector<16>(vshrq_n_u64(u.u64x2[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const { - return {vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)}; + return SuperVector<16>(vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)); } template <> @@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const if (N == 0) return *this; if (N == 8) return Zeroes(); int8x16_t shift_indices = vdupq_n_s8(N); - return { vshlq_s8(u.s8x16[0], shift_indices) }; + return SuperVector<16>(vshlq_s8(u.s8x16[0], shift_indices)); } template <> @@ -385,7 +385,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); int16x8_t shift_indices = vdupq_n_s16(N); - return { vshlq_s16(u.s16x8[0], shift_indices) }; + return SuperVector<16>(vshlq_s16(u.s16x8[0], shift_indices)); } template <> @@ -394,7 +394,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); int32x4_t shift_indices = vdupq_n_s32(N); - return { vshlq_s32(u.s32x4[0], shift_indices) }; + return SuperVector<16>(vshlq_s32(u.s32x4[0], shift_indices)); } template <> @@ -403,7 +403,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); int64x2_t shift_indices = vdupq_n_s64(N); - return { vshlq_s64(u.s64x2[0], shift_indices) }; + return SuperVector<16>(vshlq_s64(u.s64x2[0], shift_indices)); } template <> @@ -413,11 +413,11 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const if (N == 16) return Zeroes(); #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)}; + return SuperVector<16>(vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)); } #endif SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = SuperVector<16>(vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)); }); return result; } @@ -433,7 +433,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const if (N == 0) return *this; if (N == 8) return Zeroes(); int8x16_t shift_indices = vdupq_n_s8(-N); - return { vshlq_s8(u.s8x16[0], shift_indices) }; + return SuperVector<16>(vshlq_s8(u.s8x16[0], shift_indices)); } template <> @@ -442,7 +442,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); int16x8_t shift_indices = vdupq_n_s16(-N); - return { vshlq_s16(u.s16x8[0], shift_indices) }; + return SuperVector<16>(vshlq_s16(u.s16x8[0], shift_indices)); } template <> @@ -451,7 +451,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); int32x4_t shift_indices = vdupq_n_s32(-N); - return { vshlq_s32(u.s32x4[0], shift_indices) }; + return SuperVector<16>(vshlq_s32(u.s32x4[0], shift_indices)); } template <> @@ -460,7 +460,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); int64x2_t shift_indices = vdupq_n_s64(-N); - return { vshlq_s64(u.s64x2[0], shift_indices) }; + return SuperVector<16>(vshlq_s64(u.s64x2[0], shift_indices)); } template <> @@ -470,11 +470,11 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const if (N == 16) return Zeroes(); #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)}; + return SuperVector<16>(vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)); } #endif SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = SuperVector<16>(vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)); }); return result; } @@ -511,7 +511,7 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) { - return {vld1q_s32((const int32_t *)ptr)}; + return {SuperVector<16>(vld1q_s32(reinterpret_cast(ptr)))}; } template <> @@ -519,7 +519,7 @@ really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return {vld1q_s32((const int32_t *)ptr)}; + return {SuperVector<16>(vld1q_s32(reinterpret_cast(ptr)))}; } template <> @@ -537,11 +537,11 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in if (offset == 16) return *this; #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(offset)) { - return {vextq_u8(other.u.u8x16[0], u.u8x16[0], offset)}; + return SuperVector<16>(vextq_u8(other.u.u8x16[0], u.u8x16[0], offset)); } #endif SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (offset == n) result = {vextq_u8(other.u.u8x16[0], v->u.u8x16[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (offset == n) result = SuperVector<16>(vextq_u8(other.u.u8x16[0], v->u.u8x16[0], n)); }); return result; } @@ -549,7 +549,7 @@ template<> template<> really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) { - return {vqtbl1q_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vqtbl1q_u8(u.u8x16[0], b.u.u8x16[0])); } template<> diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index de7c73fa..3b2cfa6b 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -183,13 +183,13 @@ really_inline SuperVector<16>::SuperVector(uint64_t const other) template<> really_inline SuperVector<16> SuperVector<16>::Ones(void) { - return { vec_splat_s8(-1)}; + return SuperVector<16>(vec_splat_s8(-1)); } template<> really_inline SuperVector<16> SuperVector<16>::Zeroes(void) { - return { vec_splat_s8(0) }; + return SuperVector<16>(vec_splat_s8(0)); } // Methods @@ -203,38 +203,38 @@ really_inline void SuperVector<16>::operator=(SuperVector<16> const &other) template <> really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const { - return { vec_and(u.v128[0], b.u.v128[0]) }; + return SuperVector<16>(vec_and(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const { - return { vec_or(u.v128[0], b.u.v128[0]) }; + return SuperVector<16>(vec_or(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const { - return { vec_xor(u.v128[0], b.u.v128[0]) }; + return SuperVector<16>(vec_xor(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator!() const { - return { vec_xor(u.v128[0], u.v128[0]) }; + return SuperVector<16>(vec_xor(u.v128[0], u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { int8x16_t not_res = vec_xor(u.s8x16[0], vec_splat_s8(-1)); - return { vec_and(not_res, b.u.s8x16[0]) }; + return SuperVector<16>(vec_and(not_res, b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const { - return { vec_cmpeq(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vec_cmpeq(u.s8x16[0], b.u.s8x16[0])); } template <> @@ -246,25 +246,25 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const template <> really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const { - return { vec_cmpgt(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vec_cmpgt(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const { - return { vec_cmpge(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vec_cmpge(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const { - return { vec_cmpgt(b.u.s8x16[0], u.s8x16[0])}; + return SuperVector<16>(vec_cmpgt(b.u.s8x16[0], u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const { - return { vec_cmpge(b.u.s8x16[0], u.s8x16[0])}; + return SuperVector<16>(vec_cmpge(b.u.s8x16[0], u.s8x16[0])); } template <> @@ -310,35 +310,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshl_8_imm() const { - return { vec_sl(u.s8x16[0], vec_splat_u8(N)) }; + return SuperVector<16>(vec_sl(u.s8x16[0], vec_splat_u8(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const { - return { vec_sl(u.s16x8[0], vec_splat_u16(N)) }; + return SuperVector<16>(vec_sl(u.s16x8[0], vec_splat_u16(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const { - return { vec_sl(u.s32x4[0], vec_splat_u32(N)) }; + return SuperVector<16>(vec_sl(u.s32x4[0], vec_splat_u32(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const { - return { vec_sl(u.s64x2[0], vec_splats((ulong64_t) N)) }; + return SuperVector<16>(vec_sl(u.s64x2[0], vec_splats((ulong64_t) N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const { - return { vec_sld(u.s8x16[0], vec_splat_s8(0), N)}; + return SuperVector<16>(vec_sld(u.s8x16[0], vec_splat_s8(0), N)); } template <> @@ -352,35 +352,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshr_8_imm() const { - return { vec_sr(u.s8x16[0], vec_splat_u8(N)) }; + return SuperVector<16>(vec_sr(u.s8x16[0], vec_splat_u8(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const { - return { vec_sr(u.s16x8[0], vec_splat_u16(N)) }; + return SuperVector<16>(vec_sr(u.s16x8[0], vec_splat_u16(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const { - return { vec_sr(u.s32x4[0], vec_splat_u32(N)) }; + return SuperVector<16>(vec_sr(u.s32x4[0], vec_splat_u32(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const { - return { vec_sr(u.s64x2[0], vec_splats((ulong64_t)N)) }; + return SuperVector<16>(vec_sr(u.s64x2[0], vec_splats((ulong64_t)N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const { - return { vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N) }; + return SuperVector<16>(vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N)); } template <> @@ -411,7 +411,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const { if (N == 0) return *this; uint8x16_t shift_indices = vec_splats((uint8_t) N); - return { vec_sl(u.u8x16[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u8x16[0], shift_indices)); } template <> @@ -419,7 +419,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const UNUSED N) { if (N == 0) return *this; uint16x8_t shift_indices = vec_splats((uint16_t) N); - return { vec_sl(u.u16x8[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u16x8[0], shift_indices)); } template <> @@ -427,7 +427,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const { if (N == 0) return *this; uint32x4_t shift_indices = vec_splats((uint32_t) N); - return { vec_sl(u.u32x4[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u32x4[0], shift_indices)); } template <> @@ -435,7 +435,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const { if (N == 0) return *this; uint64x2_t shift_indices = vec_splats((ulong64_t) N); - return { vec_sl(u.u64x2[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u64x2[0], shift_indices)); } template <> @@ -443,7 +443,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const { if (N == 0) return *this; SuperVector sl{N << 3}; - return { vec_slo(u.u8x16[0], sl.u.u8x16[0]) }; + return SuperVector<16>(vec_slo(u.u8x16[0], sl.u.u8x16[0])); } template <> @@ -457,7 +457,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const { if (N == 0) return *this; uint8x16_t shift_indices = vec_splats((uint8_t) N); - return { vec_sr(u.u8x16[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u8x16[0], shift_indices)); } template <> @@ -465,7 +465,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const { if (N == 0) return *this; uint16x8_t shift_indices = vec_splats((uint16_t) N); - return { vec_sr(u.u16x8[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u16x8[0], shift_indices)); } template <> @@ -473,7 +473,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const { if (N == 0) return *this; uint32x4_t shift_indices = vec_splats((uint32_t) N); - return { vec_sr(u.u32x4[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u32x4[0], shift_indices)); } template <> @@ -481,7 +481,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const { if (N == 0) return *this; uint64x2_t shift_indices = vec_splats((ulong64_t) N); - return { vec_sr(u.u64x2[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u64x2[0], shift_indices)); } template <> @@ -489,7 +489,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const { if (N == 0) return *this; SuperVector sr{N << 3}; - return { vec_sro(u.u8x16[0], sr.u.u8x16[0]) }; + return SuperVector<16>(vec_sro(u.u8x16[0], sr.u.u8x16[0])); } template <> @@ -504,7 +504,7 @@ really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const #if defined(HAVE__BUILTIN_CONSTANT_P) if (N == 0) return *this; if (__builtin_constant_p(N)) { - return { vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N) }; + return SuperVector<16>(vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N)); } #endif return vshr_128(N); @@ -516,7 +516,7 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const #if defined(HAVE__BUILTIN_CONSTANT_P) if (N == 0) return *this; if (__builtin_constant_p(N)) { - return { vec_sld(u.s8x16[0], vec_splat_s8(0), N)}; + return SuperVector<16>(vec_sld(u.s8x16[0], vec_splat_s8(0), N)); } #endif return vshl_128(N); @@ -537,14 +537,14 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) { - return { vec_xl(0, (const long64_t*)ptr) }; + return SuperVector<16>(vec_xl(0, (const long64_t*)ptr)); } template <> really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); - return { vec_xl(0, (const long64_t*)ptr) }; + return SuperVector<16>(vec_xl(0, (const long64_t*)ptr)); } template <> @@ -562,14 +562,14 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in if (offset == 16) return *this; #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(offset)) { - return { vec_sld(u.s8x16[0], other.u.s8x16[0], offset) }; + return SuperVector<16>(vec_sld(u.s8x16[0], other.u.s8x16[0], offset)); } #endif uint8x16_t sl = vec_splats((uint8_t) (offset << 3)); uint8x16_t sr = vec_splats((uint8_t) ((16 - offset) << 3)); uint8x16_t rhs = vec_slo(u.u8x16[0], sr); uint8x16_t lhs = vec_sro(other.u.u8x16[0], sl); - return { vec_or(lhs, rhs) }; + return SuperVector<16>(vec_or(lhs, rhs)); } template<> @@ -581,7 +581,7 @@ really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) below is the version that is converted from Intel to PPC. */ uint8x16_t mask =(uint8x16_t)vec_cmpge(b.u.u8x16[0], vec_splats((uint8_t)0x80)); uint8x16_t res = vec_perm (u.u8x16[0], u.u8x16[0], b.u.u8x16[0]); - return { vec_sel(res, vec_splat_u8(0), mask) }; + return SuperVector<16>(vec_sel(res, vec_splat_u8(0), mask)); } template<> diff --git a/src/util/supervector/arch/x86/impl.cpp b/src/util/supervector/arch/x86/impl.cpp index b8a75c95..0323d5e5 100644 --- a/src/util/supervector/arch/x86/impl.cpp +++ b/src/util/supervector/arch/x86/impl.cpp @@ -113,13 +113,13 @@ really_inline SuperVector<16>::SuperVector(uint64_t const other) template<> really_inline SuperVector<16> SuperVector<16>::Ones() { - return {_mm_set1_epi8(0xFF)}; + return SuperVector<16>(_mm_set1_epi8(0xFF)); } template<> really_inline SuperVector<16> SuperVector<16>::Zeroes(void) { - return {_mm_set1_epi8(0)}; + return SuperVector<16>(_mm_set1_epi8(0)); } // Methods @@ -133,37 +133,37 @@ really_inline void SuperVector<16>::operator=(SuperVector<16> const &other) template <> really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const { - return {_mm_and_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_and_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const { - return {_mm_or_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_or_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const { - return {_mm_xor_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_xor_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator!() const { - return {_mm_xor_si128(u.v128[0], u.v128[0])}; + return SuperVector<16>(_mm_xor_si128(u.v128[0], u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { - return {_mm_andnot_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_andnot_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const { - return {_mm_cmpeq_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_cmpeq_epi8(u.v128[0], b.u.v128[0])); } template <> @@ -175,13 +175,13 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const template <> really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const { - return {_mm_cmpgt_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_cmpgt_epi8(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const { - return {_mm_cmplt_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_cmplt_epi8(u.v128[0], b.u.v128[0])); } template <> @@ -235,28 +235,28 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const { - return {_mm_slli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi16(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const { - return {_mm_slli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi32(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const { - return {_mm_slli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi64(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const { - return {_mm_slli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_si128(u.v128[0], N)); } template <> @@ -277,28 +277,28 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const { - return {_mm_srli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi16(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const { - return {_mm_srli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi32(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const { - return {_mm_srli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi64(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const { - return {_mm_srli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_si128(u.v128[0], N)); } template <> @@ -333,13 +333,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_slli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi16(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi16(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi16(v->u.v128[0], n))}; }); return result; } @@ -348,13 +348,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_slli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi32(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi32(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi32(v->u.v128[0], n))}; }); return result; } @@ -363,13 +363,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_slli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi64(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi64(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi64(v->u.v128[0], n))}; }); return result; } @@ -378,13 +378,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_slli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_si128(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_si128(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_si128(v->u.v128[0], n))}; }); return result; } @@ -408,13 +408,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_srli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi16(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi16(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi16(v->u.v128[0], n))}; }); return result; } @@ -423,13 +423,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_srli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi32(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi32(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi32(v->u.v128[0], n))}; }); return result; } @@ -438,13 +438,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_srli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi64(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi64(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi64(v->u.v128[0], n))}; }); return result; } @@ -453,13 +453,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_srli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_si128(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_si128(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_si128(v->u.v128[0], n))}; }); return result; } @@ -474,7 +474,7 @@ really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_srli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_si128(u.v128[0], N)); } #endif return vshr_128(N); @@ -485,7 +485,7 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_slli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_si128(u.v128[0], N)); } #endif return vshl_128(N); @@ -508,7 +508,7 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) { - return _mm_loadu_si128((const m128 *)ptr); + return SuperVector<16>(_mm_loadu_si128(reinterpret_cast(ptr))); } template <> @@ -516,14 +516,14 @@ really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return _mm_load_si128((const m128 *)ptr); + return SuperVector<16>(_mm_load_si128(reinterpret_cast(ptr))); } template <> really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len) { SuperVector mask = Ones_vshr(16 -len); - SuperVector v = _mm_loadu_si128((const m128 *)ptr); + SuperVector v = SuperVector<16>(_mm_loadu_si128(reinterpret_cast(ptr))); return mask & v; } @@ -535,27 +535,27 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in if (offset == 16) { return *this; } else { - return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset)}; + return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset)); } } #endif switch(offset) { case 0: return other; break; - case 1: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)}; break; - case 2: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 2)}; break; - case 3: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 3)}; break; - case 4: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 4)}; break; - case 5: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 5)}; break; - case 6: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 6)}; break; - case 7: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 7)}; break; - case 8: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 8)}; break; - case 9: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 9)}; break; - case 10: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 10)}; break; - case 11: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 11)}; break; - case 12: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 12)}; break; - case 13: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 13)}; break; - case 14: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 14)}; break; - case 15: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 15)}; break; + case 1: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)); break; + case 2: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 2)); break; + case 3: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 3)); break; + case 4: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 4)); break; + case 5: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 5)); break; + case 6: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 6)); break; + case 7: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 7)); break; + case 8: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 8)); break; + case 9: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 9)); break; + case 10: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 10)); break; + case 11: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 11)); break; + case 12: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 12)); break; + case 13: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 13)); break; + case 14: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 14)); break; + case 15: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 15)); break; default: break; } return *this; @@ -565,7 +565,7 @@ template<> template<> really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) { - return {_mm_shuffle_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_shuffle_epi8(u.v128[0], b.u.v128[0])); } template<> @@ -673,13 +673,13 @@ really_inline SuperVector<32>::SuperVector(uint64_t const other) template<> really_inline SuperVector<32> SuperVector<32>::Ones(void) { - return {_mm256_set1_epi8(0xFF)}; + return SuperVector<32>(_mm256_set1_epi8(0xFF)); } template<> really_inline SuperVector<32> SuperVector<32>::Zeroes(void) { - return {_mm256_set1_epi8(0)}; + return SuperVector<32>(_mm256_set1_epi8(0)); } template <> @@ -691,37 +691,37 @@ really_inline void SuperVector<32>::operator=(SuperVector<32> const &other) template <> really_inline SuperVector<32> SuperVector<32>::operator&(SuperVector<32> const &b) const { - return {_mm256_and_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_and_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator|(SuperVector<32> const &b) const { - return {_mm256_or_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_or_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator^(SuperVector<32> const &b) const { - return {_mm256_xor_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_xor_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator!() const { - return {_mm256_xor_si256(u.v256[0], u.v256[0])}; + return SuperVector<32>(_mm256_xor_si256(u.v256[0], u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::opandnot(SuperVector<32> const &b) const { - return {_mm256_andnot_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_andnot_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator==(SuperVector<32> const &b) const { - return {_mm256_cmpeq_epi8(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_cmpeq_epi8(u.v256[0], b.u.v256[0])); } template <> @@ -733,7 +733,7 @@ really_inline SuperVector<32> SuperVector<32>::operator!=(SuperVector<32> const template <> really_inline SuperVector<32> SuperVector<32>::operator>(SuperVector<32> const &b) const { - return {_mm256_cmpgt_epi8(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_cmpgt_epi8(u.v256[0], b.u.v256[0])); } template <> @@ -793,28 +793,28 @@ template <> template really_inline SuperVector<32> SuperVector<32>::vshl_16_imm() const { - return {_mm256_slli_epi16(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_epi16(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshl_32_imm() const { - return {_mm256_slli_epi32(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_epi32(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshl_64_imm() const { - return {_mm256_slli_epi64(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_epi64(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshl_128_imm() const { - return {_mm256_slli_si256(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_si256(u.v256[0], N))}; } template <> @@ -822,12 +822,12 @@ template really_inline SuperVector<32> SuperVector<32>::vshl_256_imm() const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))}; if (N == 32) return Zeroes(); if (N < 16) { - return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))}; } else { - return {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16)}; + return {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16))}; } } @@ -849,28 +849,28 @@ template <> template really_inline SuperVector<32> SuperVector<32>::vshr_16_imm() const { - return {_mm256_srli_epi16(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_epi16(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshr_32_imm() const { - return {_mm256_srli_epi32(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_epi32(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshr_64_imm() const { - return {_mm256_srli_epi64(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_epi64(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshr_128_imm() const { - return {_mm256_srli_si256(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_si256(u.v256[0], N))}; } template <> @@ -878,12 +878,12 @@ template really_inline SuperVector<32> SuperVector<32>::vshr_256_imm() const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))}; if (N == 32) return Zeroes(); if (N < 16) { - return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))}; } else { - return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)}; + return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))}; } } @@ -922,7 +922,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_16 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi16(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi16(v->u.v256[0], n))}; }); return result; } @@ -932,7 +932,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi32(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi32(v->u.v256[0], n))}; }); return result; } @@ -942,7 +942,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_64 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi64(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi64(v->u.v256[0], n))}; }); return result; } @@ -952,7 +952,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_128(uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_si256(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_si256(v->u.v256[0], n))}; }); return result; } @@ -960,16 +960,16 @@ template <> really_inline SuperVector<32> SuperVector<32>::vshl_256(uint8_t const N) const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))}; if (N == 32) return Zeroes(); SuperVector result; Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};; + if (N == n) result = {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n))};; }); Unroller<17, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)}; + if (N == n) result = {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16))}; }); return result; } @@ -995,7 +995,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_16 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi16(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi16(v->u.v256[0], n))}; }); return result; } @@ -1005,7 +1005,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi32(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi32(v->u.v256[0], n))}; }); return result; } @@ -1015,7 +1015,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_64 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi64(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi64(v->u.v256[0], n))}; }); return result; } @@ -1025,7 +1025,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_128(uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_si256(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_si256(v->u.v256[0], n))}; }); return result; } @@ -1033,16 +1033,16 @@ template <> really_inline SuperVector<32> SuperVector<32>::vshr_256(uint8_t const N) const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))}; if (N == 32) return Zeroes(); SuperVector result; Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_alignr_epi8(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), v->u.v256[0], n)}; + if (N == n) result = {SuperVector<32>(_mm256_alignr_epi8(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), v->u.v256[0], n))}; }); Unroller<17, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_srli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), n - 16)}; + if (N == n) result = {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), n - 16))}; }); return result; } @@ -1060,11 +1060,11 @@ really_inline SuperVector<32> SuperVector<32>::operator>>(uint8_t const N) const if (__builtin_constant_p(N)) { // As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx if (N < 16) { - return {_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N)}; + return {SuperVector<32>(_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N))}; } else if (N == 16) { - return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))}; + return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))}; } else { - return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)}; + return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))}; } } #endif @@ -1078,11 +1078,11 @@ really_inline SuperVector<32> SuperVector<32>::operator<<(uint8_t const N) const if (__builtin_constant_p(N)) { // As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx if (N < 16) { - return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))}; } else if (N == 16) { - return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))}; + return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))}; } else { - return {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16)}; + return {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16))}; } } #endif @@ -1112,7 +1112,7 @@ really_inline SuperVector<32> SuperVector<32>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<32> SuperVector<32>::loadu(void const *ptr) { - return {_mm256_loadu_si256((const m256 *)ptr)}; + return {SuperVector<32>(_mm256_loadu_si256((const m256 *)ptr))}; } template <> @@ -1120,7 +1120,7 @@ really_inline SuperVector<32> SuperVector<32>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return {_mm256_load_si256((const m256 *)ptr)}; + return {SuperVector<32>(_mm256_load_si256((const m256 *)ptr))}; } template <> @@ -1128,7 +1128,7 @@ really_inline SuperVector<32> SuperVector<32>::loadu_maskz(void const *ptr, uint { #ifdef HAVE_AVX512 u32 mask = (~0ULL) >> (32 - len); - SuperVector<32> v = _mm256_mask_loadu_epi8(Zeroes().u.v256[0], mask, (const m256 *)ptr); + SuperVector<32> v = SuperVector<32>(_mm256_mask_loadu_epi8(Zeroes().u.v256[0], mask, (const m256 *)ptr)); v.print8("v"); return v; #else @@ -1136,7 +1136,7 @@ really_inline SuperVector<32> SuperVector<32>::loadu_maskz(void const *ptr, uint SuperVector<32> mask = Ones_vshr(32 -len); mask.print8("mask"); (Ones() >> (32 - len)).print8("mask"); - SuperVector<32> v = _mm256_loadu_si256((const m256 *)ptr); + SuperVector<32> v = SuperVector<32>(_mm256_loadu_si256((const m256 *)ptr)); v.print8("v"); return mask & v; #endif @@ -1145,49 +1145,49 @@ really_inline SuperVector<32> SuperVector<32>::loadu_maskz(void const *ptr, uint template<> really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, int8_t offset) { -#if defined(HAVE__BUILTIN_CONSTANT_P) && !(defined(__GNUC__) && (__GNUC__ == 13)) +#if defined(HAVE__BUILTIN_CONSTANT_P) && !(defined(__GNUC__) && ((__GNUC__ == 13) || (__GNUC__ == 14))) if (__builtin_constant_p(offset)) { if (offset == 16) { return *this; } else { - return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], offset)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], other.u.v256[0], offset))}; } } #endif // As found here: https://stackoverflow.com/questions/8517970/mm-alignr-epi8-palignr-equivalent-in-avx2#8637458 switch (offset){ - case 0 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0)); break; - case 1 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 1), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 1)); break; - case 2 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 2), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 2)); break; - case 3 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 3), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 3)); break; - case 4 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 4), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 4)); break; - case 5 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 5), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 5)); break; - case 6 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 6), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 6)); break; - case 7 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 7), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 7)); break; - case 8 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 8), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 8)); break; - case 9 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 9), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 9)); break; - case 10 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 10), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 10)); break; - case 11 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 11), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 11)); break; - case 12 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 12), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 12)); break; - case 13 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 13), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 13)); break; - case 14 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 14), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 14)); break; - case 15 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 15), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 15)); break; - case 16 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 0), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 0)); break; - case 17 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 1), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 1)); break; - case 18 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 2), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 2)); break; - case 19 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 3), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 3)); break; - case 20 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 4), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 4)); break; - case 21 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 5), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 5)); break; - case 22 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 6), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 6)); break; - case 23 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 7), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 7)); break; - case 24 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 8), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 8)); break; - case 25 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 9), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 9)); break; - case 26 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 10), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 10)); break; - case 27 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 11), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 11)); break; - case 28 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 12), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 12)); break; - case 29 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 13), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 13)); break; - case 30 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 14), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 14)); break; - case 31 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 15), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 15)); break; + case 0 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0))); break; + case 1 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 1), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 1))); break; + case 2 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 2), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 2))); break; + case 3 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 3), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 3))); break; + case 4 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 4), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 4))); break; + case 5 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 5), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 5))); break; + case 6 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 6), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 6))); break; + case 7 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 7), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 7))); break; + case 8 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 8), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 8))); break; + case 9 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 9), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 9))); break; + case 10 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 10), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 10))); break; + case 11 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 11), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 11))); break; + case 12 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 12), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 12))); break; + case 13 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 13), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 13))); break; + case 14 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 14), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 14))); break; + case 15 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 15), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 15))); break; + case 16 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 0), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 0))); break; + case 17 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 1), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 1))); break; + case 18 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 2), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 2))); break; + case 19 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 3), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 3))); break; + case 20 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 4), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 4))); break; + case 21 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 5), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 5))); break; + case 22 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 6), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 6))); break; + case 23 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 7), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 7))); break; + case 24 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 8), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 8))); break; + case 25 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 9), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 9))); break; + case 26 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 10), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 10))); break; + case 27 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 11), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 11))); break; + case 28 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 12), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 12))); break; + case 29 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 13), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 13))); break; + case 30 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 14), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 14))); break; + case 31 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 15), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 15))); break; default: break; } return *this; @@ -1197,7 +1197,7 @@ template<> template<> really_inline SuperVector<32> SuperVector<32>::pshufb(SuperVector<32> b) { - return {_mm256_shuffle_epi8(u.v256[0], b.u.v256[0])}; + return {SuperVector<32>(_mm256_shuffle_epi8(u.v256[0], b.u.v256[0]))}; } template<> @@ -1313,13 +1313,13 @@ really_inline SuperVector<64>::SuperVector(uint64_t const o) template<> really_inline SuperVector<64> SuperVector<64>::Ones(void) { - return {_mm512_set1_epi8(0xFF)}; + return {SuperVector<64>(_mm512_set1_epi8(0xFF))}; } template<> really_inline SuperVector<64> SuperVector<64>::Zeroes(void) { - return {_mm512_set1_epi8(0)}; + return {SuperVector<64>(_mm512_set1_epi8(0))}; } // Methods @@ -1332,31 +1332,31 @@ really_inline void SuperVector<64>::operator=(SuperVector<64> const &o) template <> really_inline SuperVector<64> SuperVector<64>::operator&(SuperVector<64> const &b) const { - return {_mm512_and_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_and_si512(u.v512[0], b.u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::operator|(SuperVector<64> const &b) const { - return {_mm512_or_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_or_si512(u.v512[0], b.u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::operator^(SuperVector<64> const &b) const { - return {_mm512_xor_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_xor_si512(u.v512[0], b.u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::operator!() const { - return {_mm512_xor_si512(u.v512[0], u.v512[0])}; + return {SuperVector<64>(_mm512_xor_si512(u.v512[0], u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::opandnot(SuperVector<64> const &b) const { - return {_mm512_andnot_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_andnot_si512(u.v512[0], b.u.v512[0]))}; } template <> @@ -1364,7 +1364,7 @@ really_inline SuperVector<64> SuperVector<64>::operator==(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmpeq_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1372,7 +1372,7 @@ really_inline SuperVector<64> SuperVector<64>::operator!=(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmpneq_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1380,7 +1380,7 @@ really_inline SuperVector<64> SuperVector<64>::operator>(SuperVector<64> const & { SuperVector<64>::comparemask_type mask = _mm512_cmpgt_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1388,7 +1388,7 @@ really_inline SuperVector<64> SuperVector<64>::operator<(SuperVector<64> const & { SuperVector<64>::comparemask_type mask = _mm512_cmplt_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1396,7 +1396,7 @@ really_inline SuperVector<64> SuperVector<64>::operator>=(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmpge_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1404,7 +1404,7 @@ really_inline SuperVector<64> SuperVector<64>::operator<=(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmple_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1448,28 +1448,28 @@ template <> template really_inline SuperVector<64> SuperVector<64>::vshl_16_imm() const { - return {_mm512_slli_epi16(u.v512[0], N)}; + return {SuperVector<64>(_mm512_slli_epi16(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshl_32_imm() const { - return {_mm512_slli_epi32(u.v512[0], N)}; + return {SuperVector<64>(_mm512_slli_epi32(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshl_64_imm() const { - return {_mm512_slli_epi64(u.v512[0], N)}; + return {SuperVector<64>(_mm512_slli_epi64(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshl_128_imm() const { - return {_mm512_bslli_epi128(u.v512[0], N)}; + return {SuperVector<64>(_mm512_bslli_epi128(u.v512[0], N))}; } template <> @@ -1504,28 +1504,28 @@ template <> template really_inline SuperVector<64> SuperVector<64>::vshr_16_imm() const { - return {_mm512_srli_epi16(u.v512[0], N)}; + return {SuperVector<64>(_mm512_srli_epi16(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshr_32_imm() const { - return {_mm512_srli_epi32(u.v512[0], N)}; + return {SuperVector<64>(_mm512_srli_epi32(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshr_64_imm() const { - return {_mm512_srli_epi64(u.v512[0], N)}; + return {SuperVector<64>(_mm512_srli_epi64(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshr_128_imm() const { - return {_mm512_bsrli_epi128(u.v512[0], N)}; + return {SuperVector<64>(_mm512_bsrli_epi128(u.v512[0], N))}; } template <> @@ -1574,7 +1574,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_16 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi16(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi16(v->u.v512[0], n))}; }); return result; } @@ -1584,7 +1584,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_32 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi32(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi32(v->u.v512[0], n))}; }); return result; } @@ -1594,7 +1594,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_64 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi64(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi64(v->u.v512[0], n))}; }); return result; } @@ -1604,7 +1604,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_128(uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_bslli_epi128(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_bslli_epi128(v->u.v512[0], n))}; }); return result; } @@ -1641,7 +1641,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_16 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi16(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi16(v->u.v512[0], n))}; }); return result; } @@ -1651,7 +1651,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_32 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi32(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi32(v->u.v512[0], n))}; }); return result; } @@ -1661,7 +1661,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_64 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi64(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi64(v->u.v512[0], n))}; }); return result; } @@ -1671,7 +1671,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_128(uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_bsrli_epi128(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_bsrli_epi128(v->u.v512[0], n))}; }); return result; } @@ -1719,18 +1719,18 @@ really_inline SuperVector<64> SuperVector<64>::operator>>(uint8_t const N) const if (N == 0) { return *this; } else if (N < 32) { - SuperVector<32> lo256 = u.v256[0]; - SuperVector<32> hi256 = u.v256[1]; + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); SuperVector<32> carry = hi256 << (32 - N); hi256 = hi256 >> N; lo256 = (lo256 >> N) | carry; - return SuperVector(lo256, hi256); + return SuperVector<64>(lo256, hi256); } else if (N == 32) { - SuperVector<32> hi256 = u.v256[1]; - return SuperVector(hi256, SuperVector<32>::Zeroes()); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + return SuperVector<64>(hi256, SuperVector<32>::Zeroes()); } else if (N < 64) { - SuperVector<32> hi256 = u.v256[1]; - return SuperVector(hi256 >> (N - 32), SuperVector<32>::Zeroes()); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + return SuperVector<64>(hi256 >> (N - 32), SuperVector<32>::Zeroes()); } else { return Zeroes(); } @@ -1742,18 +1742,18 @@ really_inline SuperVector<64> SuperVector<64>::operator<<(uint8_t const N) const if (N == 0) { return *this; } else if (N < 32) { - SuperVector<32> lo256 = u.v256[0]; - SuperVector<32> hi256 = u.v256[1]; + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); SuperVector<32> carry = lo256 >> (32 - N); hi256 = (hi256 << N) | carry; lo256 = lo256 << N; - return SuperVector(lo256, hi256); + return SuperVector<64>(lo256, hi256); } else if (N == 32) { - SuperVector<32> lo256 = u.v256[0]; - return SuperVector(SuperVector<32>::Zeroes(), lo256); + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + return SuperVector<64>(SuperVector<32>::Zeroes(), lo256); } else if (N < 64) { - SuperVector<32> lo256 = u.v256[0]; - return SuperVector(SuperVector<32>::Zeroes(), lo256 << (N - 32)); + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + return SuperVector<64>(SuperVector<32>::Zeroes(), lo256 << (N - 32)); } else { return Zeroes(); } @@ -1762,7 +1762,7 @@ really_inline SuperVector<64> SuperVector<64>::operator<<(uint8_t const N) const template <> really_inline SuperVector<64> SuperVector<64>::loadu(void const *ptr) { - return {_mm512_loadu_si512((const m512 *)ptr)}; + return {SuperVector<64>(_mm512_loadu_si512((const m512 *)ptr))}; } template <> @@ -1770,7 +1770,7 @@ really_inline SuperVector<64> SuperVector<64>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return {_mm512_load_si512((const m512 *)ptr)}; + return {SuperVector<64>(_mm512_load_si512((const m512 *)ptr))}; } template <> @@ -1778,7 +1778,7 @@ really_inline SuperVector<64> SuperVector<64>::loadu_maskz(void const *ptr, uint { u64a mask = (~0ULL) >> (64 - len); DEBUG_PRINTF("mask = %016llx\n", mask); - SuperVector<64> v = _mm512_mask_loadu_epi8(Zeroes().u.v512[0], mask, (const m512 *)ptr); + SuperVector<64> v = SuperVector<64>(_mm512_mask_loadu_epi8(Zeroes().u.v512[0], mask, (const m512 *)ptr)); v.print8("v"); return v; } @@ -1787,7 +1787,7 @@ template<> template<> really_inline SuperVector<64> SuperVector<64>::pshufb(SuperVector<64> b) { - return {_mm512_shuffle_epi8(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_shuffle_epi8(u.v512[0], b.u.v512[0]))}; } template<> @@ -1795,37 +1795,37 @@ really_inline SuperVector<64> SuperVector<64>::pshufb_maskz(SuperVector<64> b, u { u64a mask = (~0ULL) >> (64 - len); DEBUG_PRINTF("mask = %016llx\n", mask); - return {_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0]))}; } template<> really_inline SuperVector<64> SuperVector<64>::alignr(SuperVector<64> &l, int8_t offset) { -#if defined(HAVE__BUILTIN_CONSTANT_P) +#if defined(HAVE__BUILTIN_CONSTANT_P) && !(defined(__GNUC__) && (__GNUC__ == 14)) if (__builtin_constant_p(offset)) { if (offset == 16) { return *this; } else { - return {_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset)}; + return {SuperVector<64>(_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset))}; } } #endif if(offset == 0) { return *this; } else if (offset < 32){ - SuperVector<32> lo256 = u.v256[0]; - SuperVector<32> hi256 = u.v256[1]; - SuperVector<32> o_lo256 = l.u.v256[0]; - SuperVector<32> carry1 = hi256.alignr(lo256,offset); - SuperVector<32> carry2 = o_lo256.alignr(hi256,offset); - return SuperVector(carry1, carry2); + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + SuperVector<32> o_lo256 = SuperVector<32>(l.u.v256[0]); + SuperVector<32> carry1 = SuperVector<32>(hi256.alignr(lo256,offset)); + SuperVector<32> carry2 = SuperVector<32>(o_lo256.alignr(hi256,offset)); + return SuperVector<64>(carry1, carry2); } else if (offset <= 64){ - SuperVector<32> hi256 = u.v256[1]; - SuperVector<32> o_lo256 = l.u.v256[0]; - SuperVector<32> o_hi256 = l.u.v256[1]; - SuperVector<32> carry1 = o_lo256.alignr(hi256, offset - 32); - SuperVector<32> carry2 = o_hi256.alignr(o_lo256,offset -32); - return SuperVector(carry1, carry2); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + SuperVector<32> o_lo256 = SuperVector<32>(l.u.v256[0]); + SuperVector<32> o_hi256 = SuperVector<32>(l.u.v256[1]); + SuperVector<32> carry1 = SuperVector<32>(o_lo256.alignr(hi256, offset - 32)); + SuperVector<32> carry2 = SuperVector<32>(o_hi256.alignr(o_lo256,offset -32)); + return SuperVector<64>(carry1, carry2); } else { return *this; } diff --git a/src/util/supervector/supervector.hpp b/src/util/supervector/supervector.hpp index 5e2de235..0601b937 100644 --- a/src/util/supervector/supervector.hpp +++ b/src/util/supervector/supervector.hpp @@ -205,21 +205,21 @@ public: constexpr SuperVector() {}; SuperVector(SuperVector const &other) :u(other.u) {}; - SuperVector(typename base_type::type const v); + explicit SuperVector(typename base_type::type const v); template - SuperVector(T const other); + explicit SuperVector(T const other); SuperVector(SuperVector const lo, SuperVector const hi); SuperVector(previous_type const lo, previous_type const hi); - static SuperVector dup_u8 (uint8_t other) { return {other}; }; - static SuperVector dup_s8 (int8_t other) { return {other}; }; + static SuperVector dup_u8 (uint8_t other) { return {SuperVector(other)}; }; + static SuperVector dup_s8 (int8_t other) { return {SuperVector(other)}; }; static SuperVector dup_u16(uint16_t other) { return {other}; }; static SuperVector dup_s16(int16_t other) { return {other}; }; static SuperVector dup_u32(uint32_t other) { return {other}; }; static SuperVector dup_s32(int32_t other) { return {other}; }; - static SuperVector dup_u64(uint64_t other) { return {other}; }; + static SuperVector dup_u64(uint64_t other) { return {SuperVector(other)}; }; static SuperVector dup_s64(int64_t other) { return {other}; }; void operator=(SuperVector const &other); diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index bff49566..52910520 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -210,7 +210,7 @@ public: * edge() and add_edge(). As we have null_edges and we always allow * parallel edges, the bool component of the return from these functions is * not required. */ - edge_descriptor(const std::pair &tup) + explicit edge_descriptor(const std::pair &tup) : p(tup.first.p), serial(tup.first.serial) { assert(tup.second == (bool)tup.first); } @@ -432,7 +432,7 @@ public: vertex_descriptor> { using super = typename adjacency_iterator::iterator_adaptor_; public: - adjacency_iterator(out_edge_iterator a) : super(std::move(a)) { } + explicit adjacency_iterator(out_edge_iterator a) : super(std::move(a)) { } adjacency_iterator() { } vertex_descriptor dereference() const { @@ -448,7 +448,7 @@ public: vertex_descriptor> { using super = typename inv_adjacency_iterator::iterator_adaptor_; public: - inv_adjacency_iterator(in_edge_iterator a) : super(std::move(a)) { } + explicit inv_adjacency_iterator(in_edge_iterator a) : super(std::move(a)) { } inv_adjacency_iterator() { } vertex_descriptor dereference() const { @@ -793,7 +793,7 @@ public: typedef typename boost::lvalue_property_map_tag category; - prop_map(value_type P_of::*m_in) : member(m_in) { } + explicit prop_map(value_type P_of::*m_in) : member(m_in) { } reference operator[](key_type k) const { return k.raw()->props.*member; diff --git a/tools/hsbench/engine_chimera.h b/tools/hsbench/engine_chimera.h index 187dec8c..52ec1179 100644 --- a/tools/hsbench/engine_chimera.h +++ b/tools/hsbench/engine_chimera.h @@ -66,32 +66,32 @@ public: explicit EngineChimera(ch_database_t *db, CompileCHStats cs); ~EngineChimera(); - std::unique_ptr makeContext() const; + std::unique_ptr makeContext() const override; void scan(const char *data, unsigned int len, unsigned int id, - ResultEntry &result, EngineContext &ectx) const; + ResultEntry &result, EngineContext &ectx) const override; void scan_vectored(const char *const *data, const unsigned int *len, unsigned int count, unsigned int streamId, - ResultEntry &result, EngineContext &ectx) const; + ResultEntry &result, EngineContext &ectx) const override; std::unique_ptr streamOpen(EngineContext &ectx, - unsigned id) const; + unsigned id) const override; void streamClose(std::unique_ptr stream, - ResultEntry &result) const; + ResultEntry &result) const override; void streamCompressExpand(EngineStream &stream, - std::vector &temp) const; + std::vector &temp) const override; void streamScan(EngineStream &stream, const char *data, unsigned int len, - unsigned int id, ResultEntry &result) const; + unsigned int id, ResultEntry &result) const override; - void printStats() const; + void printStats() const override; - void printCsvStats() const; + void printCsvStats() const override; - void sqlStats(SqlDB &db) const; + void sqlStats(SqlDB &db) const override; private: ch_database_t *db; diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp index 95461de5..218a2d4c 100644 --- a/tools/hsbench/engine_hyperscan.cpp +++ b/tools/hsbench/engine_hyperscan.cpp @@ -132,7 +132,7 @@ void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id, ResultEntry &result, EngineContext &ectx) const { assert(data); - EngineHSContext &ctx = static_cast(ectx); + const EngineHSContext &ctx = static_cast(ectx); ScanHSContext sc(id, result, nullptr); auto callback = echo_matches ? onMatchEcho : onMatch; hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc); @@ -150,7 +150,7 @@ void EngineHyperscan::scan_vectored(const char *const *data, assert(data); assert(len); - EngineHSContext &ctx = static_cast(ectx); + const EngineHSContext &ctx = static_cast(ectx); ScanHSContext sc(streamId, result, nullptr); auto callback = echo_matches ? onMatchEcho : onMatch; hs_error_t rv = @@ -198,8 +198,8 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data, ResultEntry &result) const { assert(data); - auto &s = static_cast(stream); - EngineHSContext &ctx = *s.ctx; + const auto &s = static_cast(stream); + const EngineHSContext &ctx = *s.ctx; ScanHSContext sc(id, result, &s); auto callback = echo_matches ? onMatchEcho : onMatch; @@ -215,7 +215,7 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data, void EngineHyperscan::streamCompressExpand(EngineStream &stream, vector &temp) const { size_t used = 0; - auto &s = static_cast(stream); + const auto &s = static_cast(stream); hs_error_t err = hs_compress_stream(s.id, temp.data(), temp.size(), &used); if (err == HS_INSUFFICIENT_SPACE) { @@ -248,7 +248,7 @@ void EngineHyperscan::printStats() const { printf("Signature set: %s\n", compile_stats.sigs_name.c_str()); } printf("Signatures: %s\n", compile_stats.signatures.c_str()); - printf("Hyperscan info: %s\n", compile_stats.db_info.c_str()); + printf("Vectorscan info: %s\n", compile_stats.db_info.c_str()); printf("Expression count: %'zu\n", compile_stats.expressionCount); printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize); printf("Database CRC: 0x%x\n", compile_stats.crc32); @@ -456,7 +456,7 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, if (err == HS_COMPILER_ERROR) { if (compile_err->expression >= 0) { - printf("Compile error for signature #%u: %s\n", + printf("Compile error for signature #%d: %s\n", compile_err->expression, compile_err->message); } else { printf("Compile error: %s\n", compile_err->message); diff --git a/tools/hsbench/engine_hyperscan.h b/tools/hsbench/engine_hyperscan.h index afbdf098..ccbc2fa8 100644 --- a/tools/hsbench/engine_hyperscan.h +++ b/tools/hsbench/engine_hyperscan.h @@ -75,32 +75,32 @@ public: explicit EngineHyperscan(hs_database_t *db, CompileHSStats cs); ~EngineHyperscan(); - std::unique_ptr makeContext() const; + std::unique_ptr makeContext() const override; void scan(const char *data, unsigned int len, unsigned int id, - ResultEntry &result, EngineContext &ectx) const; + ResultEntry &result, EngineContext &ectx) const override; void scan_vectored(const char *const *data, const unsigned int *len, unsigned int count, unsigned int streamId, - ResultEntry &result, EngineContext &ectx) const; + ResultEntry &result, EngineContext &ectx) const override; std::unique_ptr streamOpen(EngineContext &ectx, - unsigned id) const; + unsigned id) const override; void streamClose(std::unique_ptr stream, - ResultEntry &result) const; + ResultEntry &result) const override; void streamCompressExpand(EngineStream &stream, - std::vector &temp) const; + std::vector &temp) const override; void streamScan(EngineStream &stream, const char *data, unsigned int len, - unsigned int id, ResultEntry &result) const; + unsigned int id, ResultEntry &result) const override; - void printStats() const; + void printStats() const override; - void printCsvStats() const; + void printCsvStats() const override; - void sqlStats(SqlDB &db) const; + void sqlStats(SqlDB &db) const override; private: hs_database_t *db; diff --git a/tools/hsbench/engine_pcre.h b/tools/hsbench/engine_pcre.h index 9569bef4..7ae9147f 100644 --- a/tools/hsbench/engine_pcre.h +++ b/tools/hsbench/engine_pcre.h @@ -74,32 +74,32 @@ public: CompilePCREStats cs, int capture_cnt_in); ~EnginePCRE(); - std::unique_ptr makeContext() const; + std::unique_ptr makeContext() const override; void scan(const char *data, unsigned int len, unsigned int id, - ResultEntry &result, EngineContext &ectx) const; + ResultEntry &result, EngineContext &ectx) const override; void scan_vectored(const char *const *data, const unsigned int *len, unsigned int count, unsigned int streamId, - ResultEntry &result, EngineContext &ectx) const; + ResultEntry &result, EngineContext &ectx) const override; std::unique_ptr streamOpen(EngineContext &ectx, - unsigned id) const; + unsigned id) const override; void streamClose(std::unique_ptr stream, - ResultEntry &result) const; + ResultEntry &result) const override; void streamCompressExpand(EngineStream &stream, - std::vector &temp) const; + std::vector &temp) const override; void streamScan(EngineStream &stream, const char *data, unsigned int len, - unsigned int id, ResultEntry &result) const; + unsigned int id, ResultEntry &result) const override; - void printStats() const; + void printStats() const override; - void printCsvStats() const; + void printCsvStats() const override; - void sqlStats(SqlDB &db) const; + void sqlStats(SqlDB &db) const override; private: std::vector> dbs; diff --git a/tools/hsbench/huge.cpp b/tools/hsbench/huge.cpp index 2fa15ebf..2e8db824 100644 --- a/tools/hsbench/huge.cpp +++ b/tools/hsbench/huge.cpp @@ -183,7 +183,7 @@ long gethugepagesize(void) { hpage_size = -1; } else { /* convert from kb to bytes */ - hpage_size = 1024 * hpage_kb; + hpage_size = 1024L * hpage_kb; } return hpage_size; diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index 8a78881a..c2366087 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -227,7 +227,7 @@ struct BenchmarkSigs { /** Process command-line arguments. Prints usage and exits on error. */ static void processArgs(int argc, char *argv[], vector &sigSets, - UNUSED unique_ptr &grey) { + UNUSED const unique_ptr &grey) { const char options[] = "-b:c:Cd:e:E:G:hHi:n:No:p:PsS:Vw:z:" #if defined(HAVE_DECL_PTHREAD_SETAFFINITY_NP) "T:" // add the thread flag @@ -465,7 +465,7 @@ void processArgs(int argc, char *argv[], vector &sigSets, /** Start the global timer. */ static -void startTotalTimer(ThreadContext *ctx) { +void startTotalTimer(const ThreadContext *ctx) { if (ctx->num != 0) { return; // only runs in the first thread } @@ -474,7 +474,7 @@ void startTotalTimer(ThreadContext *ctx) { /** Stop the global timer and calculate totals. */ static -void stopTotalTimer(ThreadContext *ctx) { +void stopTotalTimer(const ThreadContext *ctx) { if (ctx->num != 0) { return; // only runs in the first thread } @@ -1013,9 +1013,9 @@ int HS_CDECL main(int argc, char *argv[]) { if (sigSets.empty()) { SignatureSet sigs; sigs.reserve(exprMapTemplate.size()); - for (auto i : exprMapTemplate | map_keys) { - sigs.push_back(i); - } + const auto &i = exprMapTemplate | map_keys; + std::copy(begin(i), end(i), std::back_inserter(sigs)); + sigSets.emplace_back(exprPath, std::move(sigs)); } diff --git a/tools/hscheck/main.cpp b/tools/hscheck/main.cpp index f3e9419a..6ee14930 100644 --- a/tools/hscheck/main.cpp +++ b/tools/hscheck/main.cpp @@ -97,12 +97,12 @@ unsigned int countFailures = 0; class ParsedExpr { public: - ParsedExpr(string regex_in, unsigned int flags_in, hs_expr_ext ext_in) + ParsedExpr(string regex_in, unsigned int flags_in, const hs_expr_ext& ext_in) : regex(regex_in), flags(flags_in), ext(ext_in) {} ~ParsedExpr() {} string regex; unsigned int flags; - hs_expr_ext ext; + const hs_expr_ext& ext; }; typedef map ExprExtMap; @@ -491,7 +491,7 @@ void usage() { } static -void processArgs(int argc, char *argv[], UNUSED unique_ptr &grey) { +void processArgs(int argc, char *argv[], UNUSED const unique_ptr &grey) { const char options[] = "e:E:s:z:hHLNV8G:T:BC"; bool signatureSet = false; int literalFlag = 0; diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index f5577d40..e2196459 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -102,6 +102,7 @@ set(unit_internal_SOURCES internal/rvermicelli.cpp internal/simd_utils.cpp internal/supervector.cpp + internal/sheng.cpp internal/shuffle.cpp internal/shufti.cpp internal/state_compress.cpp diff --git a/unit/hyperscan/logical_combination.cpp b/unit/hyperscan/logical_combination.cpp index 9558948f..20b6e5a1 100644 --- a/unit/hyperscan/logical_combination.cpp +++ b/unit/hyperscan/logical_combination.cpp @@ -45,8 +45,8 @@ TEST(LogicalCombination, SingleComb1) { string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)"}; - unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 103, 104, 105, 1001}; + const unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + const unsigned ids[] = {101, 102, 103, 104, 105, 1001}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -92,9 +92,9 @@ TEST(LogicalCombination, SingleCombQuietSub1) { string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)"}; - unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + const unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 103, 104, 105, 1001}; + const unsigned ids[] = {101, 102, 103, 104, 105, 1001}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -133,11 +133,11 @@ TEST(LogicalCombination, MultiCombQuietSub1) { const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)", "!101 & 102", "!(!101 | 102)", "101 & !102"}; - unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + const unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; + const unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -178,13 +178,13 @@ TEST(LogicalCombination, MultiHighlanderCombQuietSub1) { const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)", "!101 & 102", "!(!101 | 102)", "101 & !102"}; - unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + const unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH, HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH}; - unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; + const unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -219,11 +219,11 @@ TEST(LogicalCombination, MultiQuietCombQuietSub1) { const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)", "!101 & 102", "!(!101 | 102)", "101 & !102"}; - unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + const unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION | HS_FLAG_QUIET}; - unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; + const unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -255,8 +255,8 @@ TEST(LogicalCombination, SingleComb2) { string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(201 | 202 & 203) & (!204 | 205)"}; - unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; - unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + const unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + const unsigned ids[] = {201, 202, 203, 204, 205, 1002}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -299,9 +299,9 @@ TEST(LogicalCombination, SingleCombQuietSub2) { string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(201 | 202 & 203) & (!204 | 205)"}; - unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, + const unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, HS_FLAG_COMBINATION}; - unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + const unsigned ids[] = {201, 202, 203, 204, 205, 1002}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -339,8 +339,8 @@ TEST(LogicalCombination, SingleComb3) { string data = "abcijklndefxxfoobarrrghabcxdefxteakettleeeeexxxxijklnxxdef"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "((301 | 302) & 303) & (304 | 305)"}; - unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; - unsigned ids[] = {301, 302, 303, 304, 305, 1003}; + const unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + const unsigned ids[] = {301, 302, 303, 304, 305, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -387,9 +387,9 @@ TEST(LogicalCombination, SingleCombQuietSub3) { string data = "abcijklndefxxfoobarrrghabcxdefxteakettleeeeexxxxijklnxxdef"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "((301 | 302) & 303) & (304 | 305)"}; - unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, + const unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_COMBINATION}; - unsigned ids[] = {301, 302, 303, 304, 305, 1003}; + const unsigned ids[] = {301, 302, 303, 304, 305, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -429,9 +429,9 @@ TEST(LogicalCombination, MultiCombDupSub4) { "ijkl[mMn]", "(201 & 202 & 203) | (204 & !205)", "(201 | 202 & 203) & (!204 | 205)", "((201 | 202) & 203) & (204 | 205)"}; - unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION, + const unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003}; + const unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 8, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -486,10 +486,10 @@ TEST(LogicalCombination, MultiCombQuietDupSub4) { "ijkl[mMn]", "(201 & 202 & 203) | (204 & !205)", "(201 | 202 & 203) & (!204 | 205)", "((201 | 202) & 203) & (204 | 205)"}; - unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, + const unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003}; + const unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 8, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -546,10 +546,10 @@ TEST(LogicalCombination, MultiCombUniSub5) { "(101 & 102 & 103) | (104 & !105)", "(201 | 202 & 203) & (!204 | 205)", "((301 | 302) & 303) & (304 | 305)"}; - unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + const unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + const unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, 302, 303, 304, 305, 1001, 1002, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -636,12 +636,12 @@ TEST(LogicalCombination, MultiCombQuietUniSub5) { "(101 & 102 & 103) | (104 & !105)", "(201 | 202 & 203) & (!204 | 205)", "((301 | 302) & 303) & (304 | 305)"}; - unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, + const unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + const unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, 302, 303, 304, 305, 1001, 1002, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -702,8 +702,8 @@ TEST(LogicalCombination, SingleCombPurelyNegative6) { string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"}; - unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; - unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + const unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + const unsigned ids[] = {201, 202, 203, 204, 205, 1002}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -734,8 +734,8 @@ TEST(LogicalCombination, SingleCombQuietPurelyNegative6) { string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"}; - unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET}; - unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + const unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET}; + const unsigned ids[] = {201, 202, 203, 204, 205, 1002}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -775,10 +775,10 @@ TEST(LogicalCombination, MultiCombPurelyNegativeUniSub6) { "(101 & 102 & 103) | (!104 & !105)", "(!201 | 202 & 203) & (!204 | 205)", "((301 | 302) & 303) & (304 | 305)"}; - unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + const unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + const unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, 302, 303, 304, 305, 1001, 1002, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -822,11 +822,11 @@ TEST(LogicalCombination, MultiCombPurelyNegativeUniSubEOD6) { "(101 & 102 & 103) | (!104 & !105)", "(!201 | 202 & 203) & (!204 | 205)", "((301 | 302) & 303) & (304 | 305)"}; - unsigned flags[] = {0, 0, 0, 0, 0, 0, HS_FLAG_MULTILINE, + const unsigned flags[] = {0, 0, 0, 0, 0, 0, HS_FLAG_MULTILINE, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + const unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, 302, 303, 304, 305, 1001, 1002, 1003}; hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -875,8 +875,8 @@ TEST(LogicalCombination, MultiCombStream1) { "z"}; const char *expr[] = {"abc", "def", "xyz", "zxyz", "101 & 102", "201 & !202"}; - unsigned flags[] = {0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; - unsigned ids[] = {101, 102, 201, 202, 1001, 1002}; + const unsigned flags[] = {0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; + const unsigned ids[] = {101, 102, 201, 202, 1001, 1002}; hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_STREAM, nullptr, &db, &compile_err); diff --git a/unit/hyperscan/multi.cpp b/unit/hyperscan/multi.cpp index 85d8cd25..3a344fe5 100644 --- a/unit/hyperscan/multi.cpp +++ b/unit/hyperscan/multi.cpp @@ -44,8 +44,8 @@ TEST(MMAdaptor, norm_cont1) { // UE-901 CallBackContext c; string data = "aooAaooAbarZ"; const char *expr[] = {"aoo[A-K]", "bar[L-Z]"}; - unsigned flags[] = {0, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {0, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -77,8 +77,8 @@ TEST(MMAdaptor, norm_cont2) { CallBackContext c; string data = "aooAaooAbarZ "; const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"}; - unsigned flags[] = {0, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {0, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -110,8 +110,8 @@ TEST(MMAdaptor, norm_halt1) { CallBackContext c; string data = "aooAaooAbarZ"; const char *expr[] = {"aoo[A-K]", "bar[L-Z]"}; - unsigned flags[] = {0, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {0, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -141,8 +141,8 @@ TEST(MMAdaptor, norm_halt2) { // UE-901 CallBackContext c; string data = "aooAaooAbarZ "; const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"}; - unsigned flags[] = {0, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {0, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -172,8 +172,8 @@ TEST(MMAdaptor, high_cont1) { // UE-901 CallBackContext c; string data = "aooAaooAbarZ"; const char *expr[] = {"aoo[A-K]", "bar[L-Z]"}; - unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -204,8 +204,8 @@ TEST(MMAdaptor, high_cont2) { CallBackContext c; string data = "aooAaooAbarZ "; const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"}; - unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -236,8 +236,8 @@ TEST(MMAdaptor, high_halt1) { CallBackContext c; string data = "aooAaooAbarZ"; const char *expr[] = {"aoo[A-K]", "bar[L-Z]"}; - unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -267,8 +267,8 @@ TEST(MMAdaptor, high_halt2) { CallBackContext c; string data = "aooAaooAbarZbarZaooA "; const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"}; - unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; - unsigned ids[] = {30, 31}; + const unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0}; + const unsigned ids[] = {30, 31}; hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM, nullptr, &db, &compile_err); @@ -342,7 +342,7 @@ TEST(MMRoseLiteralPath, issue_141) { const char *expr[] = {"/odezhda-dlya-bega/", "kurtki-i-vetrovki-dlya-bega", "futbolki-i-mayki-dlya-bega"}; - unsigned flags[] = {HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + const unsigned flags[] = {HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH}; hs_error_t err = hs_compile_multi(expr, flags, nullptr, 3, HS_MODE_BLOCK, diff --git a/unit/hyperscan/test_util.cpp b/unit/hyperscan/test_util.cpp index f6c20a74..10d23962 100644 --- a/unit/hyperscan/test_util.cpp +++ b/unit/hyperscan/test_util.cpp @@ -58,7 +58,7 @@ std::ostream &operator<<(std::ostream &o, const pattern &p) { } hs_database_t *buildDB(const vector &patterns, unsigned int mode, - hs_platform_info *plat) { + const hs_platform_info *plat) { vector expressions; vector flags; vector ids; @@ -92,7 +92,7 @@ hs_database_t *buildDB(const pattern &expr, unsigned int mode) { hs_database_t *buildDB(const char *expression, unsigned int flags, unsigned int id, unsigned int mode, - hs_platform_info_t *plat) { + const hs_platform_info_t *plat) { return buildDB({pattern(expression, flags, id)}, mode, plat); } diff --git a/unit/hyperscan/test_util.h b/unit/hyperscan/test_util.h index 21862b6b..01fdca0b 100644 --- a/unit/hyperscan/test_util.h +++ b/unit/hyperscan/test_util.h @@ -99,11 +99,11 @@ struct pattern { std::ostream &operator<<(std::ostream &o, const pattern &p); hs_database_t *buildDB(const std::vector &patterns, unsigned int mode, - hs_platform_info *plat = nullptr); + const hs_platform_info *plat = nullptr); hs_database_t *buildDB(const pattern &pat, unsigned int mode); hs_database_t *buildDB(const char *expression, unsigned int flags, unsigned int id, unsigned int mode, - hs_platform_info *plat = nullptr); + const hs_platform_info *plat = nullptr); hs_database_t *buildDB(const char *filename, unsigned int mode, unsigned int extra_flags = 0); hs_database_t *buildDB(const char *filename, unsigned int mode, diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 8af8f9a4..6adfc2d6 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -62,7 +62,7 @@ u32 our_clzll(u64a x) { TEST(BitUtils, findAndClearLSB32_1) { // test that it can find every single-bit case for (unsigned int i = 0; i < 32; i++) { - u32 input = 1 << i; + u32 input = 1U << i; u32 idx = findAndClearLSB_32(&input); EXPECT_EQ(i, idx); EXPECT_EQ(0U, input); @@ -112,7 +112,7 @@ TEST(BitUtils, findAndClearLSB64_2) { TEST(BitUtils, findAndClearMSB32_1) { // test that it can find every single-bit case for (unsigned int i = 0; i < 32; i++) { - u32 input = 1 << i; + u32 input = 1U << i; u32 idx = findAndClearMSB_32(&input); EXPECT_EQ(i, idx); EXPECT_EQ(0U, input); diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index 87ab0974..46f19be7 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -61,10 +61,10 @@ using namespace ue2; #define CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint) \ { \ auto descr = getTeddyDescription(hint); \ - if (descr && fdr == nullptr) { \ + if (descr && fdr.get() == nullptr) { \ return; /* cannot build Teddy for this set of literals */ \ } else { \ - ASSERT_TRUE(fdr != nullptr); \ + ASSERT_TRUE(fdr.get() != nullptr); \ } \ } #endif @@ -145,7 +145,7 @@ bytecode_ptr buildFDREngineHinted(std::vector &lits, auto proto = fdrBuildProtoHinted(HWLM_ENGINE_FDR, lits, make_small, hint, target, grey); if (!proto) { - return nullptr; + return ue2::bytecode_ptr(nullptr); } return fdrBuildTable(*proto, grey); } @@ -156,7 +156,7 @@ bytecode_ptr buildFDREngine(std::vector &lits, const Grey &grey) { auto proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small, target, grey); if (!proto) { - return nullptr; + return bytecode_ptr(nullptr); } return fdrBuildTable(*proto, grey); } @@ -421,7 +421,7 @@ TEST_P(FDRp, moveByteStream) { size_t size = fdrSize(fdrTable0.get()); auto fdrTable = make_bytecode_ptr(size, 64); - EXPECT_NE(nullptr, fdrTable); + EXPECT_NE(nullptr, fdrTable.get()); memcpy(fdrTable.get(), fdrTable0.get(), size); @@ -706,7 +706,7 @@ TEST(FDR, FDRTermS) { lits.push_back(hwlmLiteral("ff", 0, 1)); auto fdr = buildFDREngine(lits, false, get_current_target(), Grey()); - ASSERT_TRUE(fdr != nullptr); + ASSERT_TRUE(fdr.get() != nullptr); // check matches @@ -729,7 +729,7 @@ TEST(FDR, FDRTermB) { lits.push_back(hwlmLiteral("ff", 0, 1)); auto fdr = buildFDREngine(lits, false, get_current_target(), Grey()); - ASSERT_TRUE(fdr != nullptr); + ASSERT_TRUE(fdr.get() != nullptr); // check matches struct hs_scratch scratch; diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 81afbeaa..a3b0cc96 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -55,10 +55,10 @@ using namespace ue2; #define CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint) \ { \ auto descr = getTeddyDescription(hint); \ - if (descr && fdr != nullptr) { \ + if (descr && fdr.get() != nullptr) { \ return; \ } else { \ - ASSERT_TRUE(fdr != nullptr); \ + ASSERT_TRUE(fdr.get() != nullptr); \ } \ } #endif @@ -154,7 +154,7 @@ TEST_P(FDRFloodp, NoMask) { struct hs_scratch scratch; scratch.fdr_conf = NULL; - while (1) { + while (c != 255) { SCOPED_TRACE((unsigned int)c); u8 bit = 1 << (c & 0x7); u8 cAlt = c ^ bit; @@ -233,9 +233,7 @@ TEST_P(FDRFloodp, NoMask) { } matchesCounts.clear(); - if (++c == 0) { - break; - } + ++c; } } @@ -248,7 +246,7 @@ TEST_P(FDRFloodp, WithMask) { struct hs_scratch scratch; scratch.fdr_conf = NULL; - while (1) { + while (c != 255) { u8 bit = 1 << (c & 0x7); u8 cAlt = c ^ bit; SCOPED_TRACE((unsigned int)c); @@ -396,9 +394,7 @@ TEST_P(FDRFloodp, WithMask) { } matchesCounts.clear(); - if (++c == '\0') { - break; - } + ++c; } } @@ -414,7 +410,7 @@ TEST_P(FDRFloodp, StreamingMask) { struct hs_scratch scratch; scratch.fdr_conf = NULL; - while (1) { + while (c != 255) { u8 bit = 1 << (c & 0x7); u8 cAlt = c ^ bit; SCOPED_TRACE((unsigned int)c); @@ -488,7 +484,6 @@ TEST_P(FDRFloodp, StreamingMask) { Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - hwlm_error_t fdrStatus; const u32 cnt4 = dataSize - 4 + 1; for (u32 streamChunk = 1; streamChunk <= 16; streamChunk *= 2) { @@ -496,7 +491,7 @@ TEST_P(FDRFloodp, StreamingMask) { const u8 *d = data.data(); // reference past the end of fake history to allow headroom const u8 *fhist = fake_history.data() + fake_history_size; - fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0, + hwlm_error_t fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0, countCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); @@ -549,9 +544,7 @@ TEST_P(FDRFloodp, StreamingMask) { } } - if (++c == '\0') { - break; - } + ++c; } matchesCounts.clear(); } diff --git a/unit/internal/flat_set.cpp b/unit/internal/flat_set.cpp index 10607a6f..174a4771 100644 --- a/unit/internal/flat_set.cpp +++ b/unit/internal/flat_set.cpp @@ -48,9 +48,7 @@ std::ostream &operator<<(std::ostream &os, const flat_set &f) { os << "{"; for (auto it = begin(f); it != end(f); ++it) { os << *it; - if (it != end(f)) { - os << ", "; - } + os << ", "; } os << "}"; return os; diff --git a/unit/internal/insertion_ordered.cpp b/unit/internal/insertion_ordered.cpp index 6026ce1d..2d799aa9 100644 --- a/unit/internal/insertion_ordered.cpp +++ b/unit/internal/insertion_ordered.cpp @@ -149,9 +149,7 @@ std::ostream &operator<<(std::ostream &os, const insertion_ordered_set &s) { os << "{"; for (auto it = begin(s); it != end(s); ++it) { os << *it; - if (it != end(s)) { - os << ", "; - } + os << ", "; } os << "}"; return os; diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index 2c585ae5..0b782569 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -98,7 +98,7 @@ protected: ParsedExpression parsed(0, pattern.c_str(), flags, 0); auto built_expr = buildGraph(rm, cc, parsed); const auto &g = built_expr.g; - ASSERT_TRUE(g != nullptr); + ASSERT_TRUE(static_cast(g)); clearReports(*g); rm.setProgramOffset(0, MATCH_REPORT); @@ -106,7 +106,7 @@ protected: /* LBR triggered by dot */ vector> triggers = {{CharReach::dot()}}; nfa = constructLBR(*g, triggers, cc, rm); - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(static_cast(nfa)); full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); stream_state = make_bytecode_ptr(nfa->streamStateSize); diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 28433c96..80b9159b 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -87,7 +87,7 @@ protected: nfa = constructNFA(*g, &rm, fixed_depth_tops, triggers, compress_state, fast_nfa, type, cc); - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); stream_state = make_bytecode_ptr(nfa->streamStateSize); @@ -134,7 +134,7 @@ INSTANTIATE_TEST_CASE_P( Range((int)LIMEX_NFA_32, (int)LIMEX_NFA_512)); TEST_P(LimExModelTest, StateSize) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); hs_platform_info plat; hs_error_t err = hs_populate_platform(&plat); @@ -150,7 +150,7 @@ TEST_P(LimExModelTest, StateSize) { } TEST_P(LimExModelTest, QueueExec) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); initQueue(); nfaQueueInitState(nfa.get(), &q); @@ -165,7 +165,7 @@ TEST_P(LimExModelTest, QueueExec) { } TEST_P(LimExModelTest, CompressExpand) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); u32 real_state_size = nfa->scratchStateSize; /* Only look at 8 bytes for limex 64 (rather than the padding) */ @@ -197,7 +197,7 @@ TEST_P(LimExModelTest, CompressExpand) { } TEST_P(LimExModelTest, InitCompressedState0) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); // 64-bit NFAs assume during compression that they have >= 5 bytes of // compressed NFA state, which isn't true for our 8-state test pattern. We @@ -212,7 +212,7 @@ TEST_P(LimExModelTest, InitCompressedState0) { } TEST_P(LimExModelTest, QueueExecToMatch) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); initQueue(); nfaQueueInitState(nfa.get(), &q); @@ -256,7 +256,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) { } TEST_P(LimExModelTest, QueueExecRose) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); initQueue(); // For rose, there's no callback or context. @@ -277,7 +277,7 @@ TEST_P(LimExModelTest, QueueExecRose) { } TEST_P(LimExModelTest, CheckFinalState) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); initQueue(); nfaQueueInitState(nfa.get(), &q); @@ -321,7 +321,7 @@ protected: } nfa = constructReversedNFA(g_rev, type, cc); - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); } // NFA type (enum NFAEngineType) @@ -338,7 +338,7 @@ INSTANTIATE_TEST_CASE_P(LimExReverse, LimExReverseTest, Range((int)LIMEX_NFA_32, (int)LIMEX_NFA_512)); TEST_P(LimExReverseTest, BlockExecReverse) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); u64a offset = 0; const u8 *buf = (const u8 *)SCAN_DATA.c_str(); @@ -381,7 +381,7 @@ protected: nfa = constructNFA(*g, &rm, fixed_depth_tops, triggers, compress_state, fast_nfa, type, cc); - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); stream_state = make_bytecode_ptr(nfa->streamStateSize); @@ -427,7 +427,7 @@ INSTANTIATE_TEST_CASE_P(LimExZombie, LimExZombieTest, Range((int)LIMEX_NFA_32, (int)LIMEX_NFA_512)); TEST_P(LimExZombieTest, GetZombieStatus) { - ASSERT_TRUE(nfa != nullptr); + ASSERT_TRUE(nfa.get() != nullptr); ASSERT_TRUE(nfa->flags & NFA_ZOMBIE); initQueue(); diff --git a/unit/internal/multi_bit_compress.cpp b/unit/internal/multi_bit_compress.cpp index 14c3f480..5c65d0a9 100644 --- a/unit/internal/multi_bit_compress.cpp +++ b/unit/internal/multi_bit_compress.cpp @@ -46,7 +46,7 @@ UNUSED static void mmbit_display(const u8 *bits, u32 total_bits) { for (u32 i = 0; i < mmbit_size(total_bits); i += 8) { - printf("block %d:", i / 8); + printf("block %u:", i / 8); for (s32 j = 7; j >= 0; j--) { u8 a = (*(bits + i + j)); printf(" %02x", a); @@ -72,7 +72,7 @@ UNUSED static void mmbit_display_comp(const u8 *bits, u32 comp_size) { for (u32 i = 0; i < comp_size; i += 8) { - printf("block %d:", i / 8); + printf("block %u:", i / 8); for (s32 j = 7; j >= 0; j--) { u8 a = (*(bits + i + j)); printf(" %02x", a); @@ -401,7 +401,6 @@ TEST_P(MultiBitCompTest, CompCompressDecompressDense) { TEST(MultiBitComp, CompIntegration1) { // 256 + 1 --> smallest 2-level mmbit - u32 total_size = mmbit_size(257); mmbit_holder ba(257); //-------------------- 1 -----------------------// @@ -516,7 +515,6 @@ TEST(MultiBitComp, CompIntegration1) { TEST(MultiBitComp, CompIntegration2) { // 64^2 + 1 --> smallest 3-level mmbit - u32 total_size = mmbit_size(4097); mmbit_holder ba(4097); //-------------------- 1 -----------------------// @@ -645,7 +643,6 @@ TEST(MultiBitComp, CompIntegration2) { TEST(MultiBitComp, CompIntegration3) { // 64^3 + 1 --> smallest 4-level mmbit - u32 total_size = mmbit_size(262145); mmbit_holder ba(262145); //-------------------- 1 -----------------------// diff --git a/unit/internal/nfagraph_common.h b/unit/internal/nfagraph_common.h index ca5554c4..61ece377 100644 --- a/unit/internal/nfagraph_common.h +++ b/unit/internal/nfagraph_common.h @@ -41,7 +41,7 @@ namespace ue2 { // Helper function: construct a graph from an expression, flags and context. inline std::unique_ptr constructGraphWithCC(const std::string &expr, - CompileContext &cc, + const CompileContext &cc, unsigned flags) { ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); diff --git a/unit/internal/noodle.cpp b/unit/internal/noodle.cpp index 16c257b8..c1723744 100644 --- a/unit/internal/noodle.cpp +++ b/unit/internal/noodle.cpp @@ -70,7 +70,7 @@ void noodleMatch(const u8 *data, size_t data_len, const char *lit_str, u32 id = 1000; hwlmLiteral lit(std::string(lit_str, lit_len), nocase, id); auto n = noodBuildTable(lit); - ASSERT_TRUE(n != nullptr); + ASSERT_TRUE(static_cast(n)); hwlm_error_t rv; struct hs_scratch scratch; diff --git a/unit/internal/pqueue.cpp b/unit/internal/pqueue.cpp index a0a37990..bd7e4650 100644 --- a/unit/internal/pqueue.cpp +++ b/unit/internal/pqueue.cpp @@ -245,7 +245,7 @@ TEST(pqueue, queue1) { u32 in[] = {1, 2, 3, 4, 5, 6, 7, 8}; u32 expected[] = {4, 5, 6, 7, 8, 3, 2, 1}; u32 temp[ARRAY_LENGTH(in)]; - u32 output[ARRAY_LENGTH(in)]; + u32 output[ARRAY_LENGTH(in)] = {0}; u32 queue_size = 0; u32 i = 0, o = 0; @@ -275,7 +275,7 @@ TEST(pqueue, queue2) { u32 in[] = {8, 7, 6, 5, 4, 3, 2, 1}; u32 expected[] = {8, 7, 6, 5, 4, 3, 2, 1}; u32 temp[ARRAY_LENGTH(in)]; - u32 output[ARRAY_LENGTH(in)]; + u32 output[ARRAY_LENGTH(in)] = {0}; u32 queue_size = 0; u32 i = 0, o = 0; @@ -301,7 +301,7 @@ TEST(pqueue, queue3) { u32 in[] = {1, 8, 2, 7, 3, 6, 4, 5}; u32 expected[] = {8, 7, 6, 4, 5, 3, 2, 1}; u32 temp[ARRAY_LENGTH(in)]; - u32 output[ARRAY_LENGTH(in)]; + u32 output[ARRAY_LENGTH(in)] = {0}; u32 queue_size = 0; u32 i = 0, o = 0; diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index 5665a0c3..41a54c5f 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -277,10 +277,9 @@ TEST_P(RepeatTest, FillRing) { } // We should be able to see matches for all of these (beyond the last top offset). - enum TriggerResult rv; for (u64a i = offset + info.repeatMax; i <= offset + info.repeatMax + info.repeatMin; i++) { - rv = processTugTrigger(&info, ctrl, state, i); + enum TriggerResult rv = processTugTrigger(&info, ctrl, state, i); if (rv == TRIGGER_SUCCESS_CACHE) { rv = TRIGGER_SUCCESS; } @@ -998,16 +997,14 @@ TEST_P(SparseOptimalTest, FillTops) { repeatStore(info, ctrl, state, offset, 0); ASSERT_EQ(offset, repeatLastTop(info, ctrl, state)); - u64a offset2; for (u32 i = min_period; i < patch_count * patch_size; i += min_period) { - offset2 = offset + i; + u64a offset2 = offset + i; repeatStore(info, ctrl, state, offset2, 1); ASSERT_EQ(offset2, repeatLastTop(info, ctrl, state)); } - u64a exit2; for (u32 i = 0; i < patch_count * patch_size; i += min_period) { - exit2 = exit + i; + u64a exit2 = exit + i; for (u32 j = exit2 + info->repeatMin; j <= offset + info->repeatMax; j++) { ASSERT_EQ(REPEAT_MATCH, repeatHasMatch(info, ctrl, state, j)); diff --git a/unit/internal/rose_mask.cpp b/unit/internal/rose_mask.cpp index e6be00f3..ed1af956 100644 --- a/unit/internal/rose_mask.cpp +++ b/unit/internal/rose_mask.cpp @@ -87,12 +87,11 @@ static int initLegalValidMasks(u64a validMasks[]) { */ static int initLegalNegMasks(u64a negMasks[]) { u64a data = 0; - u64a offset; int num = 0; while (data != ONES64) { negMasks[num] = data; num++; - offset = (data | (data +1)) ^ data; + u64a offset = (data | (data +1)) ^ data; data += 0xfeULL * offset + 1; } negMasks[num] = data; diff --git a/unit/internal/rose_mask_32.cpp b/unit/internal/rose_mask_32.cpp index 732f51a0..bb444ead 100644 --- a/unit/internal/rose_mask_32.cpp +++ b/unit/internal/rose_mask_32.cpp @@ -194,10 +194,9 @@ TEST(ValidateMask32, testMask32_3) { u32 valid_mask = ONES32 << (left + right) >> left; for (int i = 0; i < test_len; i++) { const auto &t = testBasic[i]; - int bool_result; for (int j = 0; j < 5000; j++) { u32 neg_mask = neg_mask_rand.Generate(1u << 31); - bool_result = (neg_mask & valid_mask) == + int bool_result = (neg_mask & valid_mask) == (t.neg_mask & valid_mask); EXPECT_EQ(bool_result, validateMask32(t.data.a256, valid_mask, diff --git a/unit/internal/sheng.cpp b/unit/internal/sheng.cpp new file mode 100644 index 00000000..e8e45ac5 --- /dev/null +++ b/unit/internal/sheng.cpp @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2024, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "gtest/gtest.h" +#include "nfa/shengcompile.h" +#include "nfa/rdfa.h" +#include "util/bytecode_ptr.h" +#include "util/compile_context.h" +#include "util/report_manager.h" + +extern "C" { + #include "hs_compile.h" + #include "nfa/nfa_api.h" + #include "nfa/nfa_api_queue.h" + #include "nfa/nfa_api_util.h" + #include "nfa/nfa_internal.h" + #include "nfa/rdfa.h" + #include "nfa/sheng.h" + #include "ue2common.h" +} + +namespace { + +struct callback_context { + unsigned int period; + unsigned int match_count; + unsigned int pattern_length; +}; + +int dummy_callback(u64a start, u64a end, ReportID id, void *context) { + (void) context; + printf("callback %llu %llu %u\n", start, end, id); + return 1; /* 0 stops matching, !0 continue */ +} + +int periodic_pattern_callback(u64a start, u64a end, ReportID id, void *raw_context) { + struct callback_context *context = (struct callback_context*) raw_context; + (void) start; + (void) id; + EXPECT_EQ(context->period * context->match_count, end - context->pattern_length); + context->match_count++; + return 1; /* 0 stops matching, !0 continue */ +} + +/** + * @brief Fill the state matrix with a diagonal pattern: accept the Nth character to go to the N+1 state + */ +static void fill_straight_regex_sequence(struct ue2::raw_dfa *dfa, int start_state, int end_state, int state_count) +{ + for (int state = start_state; state < end_state; state++) { + dfa->states[state].next.assign(state_count ,1); + dfa->states[state].next[0] = 2; + dfa->states[state].next[1] = 2; + dfa->states[state].next[state] = state+1; + } +} + +static void init_raw_dfa16(struct ue2::raw_dfa *dfa, const ReportID rID) +{ + dfa->start_anchored = 1; + dfa->start_floating = 1; + dfa->alpha_size = 8; + + int nb_state = 8; + for(int i = 0; i < nb_state; i++) { + struct ue2::dstate state(dfa->alpha_size); + state.next = std::vector(nb_state); + state.daddy = 0; + state.impl_id = i; /* id of the state */ + state.reports = ue2::flat_set(); + state.reports_eod = ue2::flat_set(); + dfa->states.push_back(state); + } + + /* add a report to every accept state */ + dfa->states[7].reports.insert(rID); + + /** + * [a,b][c-e]{3}of + * (1) -a,b-> (2) -c,d,e-> (3) -c,d,e-> (4) -c,d,e-> (5) -o-> (6) -f-> ((7)) + * (0) = dead + */ + + for(int i = 0; i < ue2::ALPHABET_SIZE; i++) { + dfa->alpha_remap[i] = 0; + } + + dfa->alpha_remap['a'] = 0; + dfa->alpha_remap['b'] = 1; + dfa->alpha_remap['c'] = 2; + dfa->alpha_remap['d'] = 3; + dfa->alpha_remap['e'] = 4; + dfa->alpha_remap['o'] = 5; + dfa->alpha_remap['f'] = 6; + dfa->alpha_remap[256] = 7; /* for some reason there's a check that run on dfa->alpha_size-1 */ + + /* a b c d e o f */ + dfa->states[0].next = {0,0,0,0,0,0,0}; + dfa->states[1].next = {2,2,1,1,1,1,1}; /* nothing */ + dfa->states[2].next = {2,2,3,3,3,1,1}; /* [a,b] */ + dfa->states[3].next = {2,2,4,4,4,1,1}; /* [a,b][c-e]{1} */ + dfa->states[4].next = {2,2,5,5,5,1,1}; /* [a,b][c-e]{2} */ + fill_straight_regex_sequence(dfa, 5, 7, 7); /* [a,b][c-e]{3}o */ + dfa->states[7].next = {2,2,1,1,1,1,1}; /* [a,b][c-e]{3}of */ +} + +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +/* We need more than 16 states to run sheng32, so make the graph longer */ +static void init_raw_dfa32(struct ue2::raw_dfa *dfa, const ReportID rID) +{ + dfa->start_anchored = 1; + dfa->start_floating = 1; + dfa->alpha_size = 18; + + int nb_state = 18; + for(int i = 0; i < nb_state; i++) { + struct ue2::dstate state(dfa->alpha_size); + state.next = std::vector(nb_state); + state.daddy = 0; + state.impl_id = i; /* id of the state */ + state.reports = ue2::flat_set(); + state.reports_eod = ue2::flat_set(); + dfa->states.push_back(state); + } + + /* add a report to every accept state */ + dfa->states[17].reports.insert(rID); + + /** + * [a,b][c-e]{3}of0123456789 + * (1) -a,b-> (2) -c,d,e-> (3) -c,d,e-> (4) -c,d,e-> (5) -o-> (6) -f-> (7) --> ((17)) + * (0) = dead + */ + + for(int i = 0; i < ue2::ALPHABET_SIZE; i++) { + dfa->alpha_remap[i] = 0; + } + + dfa->alpha_remap['a'] = 0; + dfa->alpha_remap['b'] = 1; + dfa->alpha_remap['c'] = 2; + dfa->alpha_remap['d'] = 3; + dfa->alpha_remap['e'] = 4; + dfa->alpha_remap['o'] = 5; + dfa->alpha_remap['f'] = 6; + // maps 0 to 9 + for (int i = 0; i < 10; i ++) { + dfa->alpha_remap[i + '0'] = i + 7; + } + dfa->alpha_remap[256] = 17; /* for some reason there's a check that run on dfa->alpha_size-1 */ + + /* a b c d e o f 0 1 2 3 4 5 6 7 8 9 */ + dfa->states[0].next = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + dfa->states[1].next = {2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; /* nothing */ + dfa->states[2].next = {2,2,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b] */ + dfa->states[3].next = {2,2,4,4,4,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{1} */ + dfa->states[4].next = {2,2,5,5,5,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{2} */ + fill_straight_regex_sequence(dfa, 5, 17, 17); /* [a,b][c-e]{3}of012345678 */ + dfa->states[17].next = {2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{3}of0123456789 */ +} +#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */ + +typedef ue2::bytecode_ptr (*sheng_compile_ptr)(ue2::raw_dfa&, + const ue2::CompileContext&, + const ue2::ReportManager&, + bool, + std::set*); + +typedef void (*init_raw_dfa_ptr)(struct ue2::raw_dfa*, const ReportID); + + +static inline void init_nfa(struct NFA **out_nfa, sheng_compile_ptr compile_function, init_raw_dfa_ptr init_dfa_function) { + ue2::Grey *g = new ue2::Grey(); + hs_platform_info plat_info = {0, 0, 0, 0}; + ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g); + ue2::ReportManager *rm = new ue2::ReportManager(*g); + ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0); + ReportID rID = rm->getInternalId(*report); + rm->setProgramOffset(0, 0); + + struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX); + init_dfa_function(dfa, rID); + + *out_nfa = (compile_function(*dfa, *cc, *rm, false, nullptr)).release(); + ASSERT_NE(nullptr, *out_nfa); + + delete report; + delete rm; + delete cc; + delete g; +} + +static void init_nfa16(struct NFA **out_nfa) { + init_nfa(out_nfa, ue2::shengCompile, init_raw_dfa16); +} + +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +static void init_nfa32(struct NFA **out_nfa) { + init_nfa(out_nfa, ue2::sheng32Compile, init_raw_dfa32); +} +#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */ + +static char state_buffer; + +static inline void init_sheng_queue(struct mq **out_q, uint8_t *buffer, size_t max_size, void (*init_nfa_func)(struct NFA **out_nfa) ) { + struct NFA* nfa; + init_nfa_func(&nfa); + assert(nfa); + + struct mq *q = new mq(); + + memset(q, 0, sizeof(struct mq)); + q->nfa = nfa; + q->state = &state_buffer; + q->cb = dummy_callback; + q->buffer = buffer; + q->length = max_size; /* setting this as the max length scanable */ + + if (nfa != q->nfa) { + printf("Something went wrong while initializing sheng.\n"); + } + nfaQueueInitState(nfa, q); + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_END, q->length ); + + *out_q = q; +} + +static void init_sheng_queue16(struct mq **out_q, uint8_t *buffer ,size_t max_size) { + init_sheng_queue(out_q, buffer, max_size, init_nfa16); +} + +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +static void init_sheng_queue32(struct mq **out_q, uint8_t *buffer, size_t max_size) { + init_sheng_queue(out_q, buffer, max_size, init_nfa32); +} +#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */ + +static +void fill_pattern(u8* buf, size_t buffer_size, unsigned int start_offset, unsigned int period, const char *pattern, unsigned int pattern_length) { + memset(buf, '_', buffer_size); + + for (unsigned int i = 0; i < buffer_size - 8; i+= 8) { + /* filling with some junk, including some character used for a valid state, to prevent the use of shufti */ + memcpy(buf + i, "jgohcxbf", 8); + } + + for (unsigned int i = start_offset; i < buffer_size - pattern_length; i += period) { + memcpy(buf + i, pattern, pattern_length); + } +} + +/* Generate ground truth to compare to */ +struct NFA *get_expected_nfa_header(u8 type, unsigned int length, unsigned int nposition) { + struct NFA *expected_nfa_header = new struct NFA(); + memset(expected_nfa_header, 0, sizeof(struct NFA)); + expected_nfa_header->length = length; + expected_nfa_header->type = type; + expected_nfa_header->nPositions = nposition; + expected_nfa_header->scratchStateSize = 1; + expected_nfa_header->streamStateSize = 1; + return expected_nfa_header; +} + +struct NFA *get_expected_nfa16_header() { + return get_expected_nfa_header(SHENG_NFA, 4736, 8); +} + +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) +struct NFA *get_expected_nfa32_header() { + return get_expected_nfa_header(SHENG_NFA_32, 17216, 18); +} +#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */ + +void test_nfa_equal(const NFA& l, const NFA& r) +{ + EXPECT_EQ(l.flags, r.flags); + EXPECT_EQ(l.length, r.length); + EXPECT_EQ(l.type, r.type); + EXPECT_EQ(l.rAccelType, r.rAccelType); + EXPECT_EQ(l.rAccelOffset, r.rAccelOffset); + EXPECT_EQ(l.maxBiAnchoredWidth, r.maxBiAnchoredWidth); + EXPECT_EQ(l.rAccelData.dc, r.rAccelData.dc); + EXPECT_EQ(l.queueIndex, r.queueIndex); + EXPECT_EQ(l.nPositions, r.nPositions); + EXPECT_EQ(l.scratchStateSize, r.scratchStateSize); + EXPECT_EQ(l.streamStateSize, r.streamStateSize); + EXPECT_EQ(l.maxWidth, r.maxWidth); + EXPECT_EQ(l.minWidth, r.minWidth); + EXPECT_EQ(l.maxOffset, r.maxOffset); +} + +/* Start of actual tests */ + +/* + * Runs shengCompile and compares its outputs to previously recorded outputs. + */ +TEST(Sheng16, std_compile_header) { + + ue2::Grey *g = new ue2::Grey(); + hs_platform_info plat_info = {0, 0, 0, 0}; + ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g); + ue2::ReportManager *rm = new ue2::ReportManager(*g); + ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0); + ReportID rID = rm->getInternalId(*report); + rm->setProgramOffset(0, 0); + + struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX); + init_raw_dfa16(dfa, rID); + + struct NFA *nfa = (shengCompile(*dfa, *cc, *rm, false)).release(); + EXPECT_NE(nullptr, nfa); + + EXPECT_NE(0, nfa->length); + EXPECT_EQ(SHENG_NFA, nfa->type); + + struct NFA *expected_nfa = get_expected_nfa16_header(); + test_nfa_equal(*expected_nfa, *nfa); + + delete expected_nfa; + delete report; + delete rm; + delete cc; + delete g; +} + +/* + * nfaExecSheng_B is the most basic of the sheng variants. It simply calls the core of the algorithm. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng16, std_run_B) { + struct mq *q; + unsigned int pattern_length = 6; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + struct callback_context context = {period, 0, pattern_length}; + + struct NFA* nfa; + init_nfa16(&nfa); + ASSERT_NE(nullptr, nfa); + fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length); + char ret_val; + unsigned int offset = 0; + unsigned int loop_count = 0; + for (; loop_count < expected_matches + 1; loop_count++) { + ASSERT_LT(offset, buf_size); + ret_val = nfaExecSheng_B(nfa, + offset, + buf + offset, + (s64a) buf_size - offset, + periodic_pattern_callback, + &context); + offset = (context.match_count - 1) * context.period + context.pattern_length; + if(unlikely(ret_val != MO_ALIVE)) { + break; + } + } + + /*check normal return*/ + EXPECT_EQ(MO_ALIVE, ret_val); + + /*check that we don't find additional match nor crash when no match are found*/ + EXPECT_EQ(expected_matches + 1, loop_count); + + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, context.match_count); +} + +/* + * nfaExecSheng_Q runs like the _B version (callback), but exercises the message queue logic. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng16, std_run_Q) { + struct mq *q; + unsigned int pattern_length = 6; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + struct callback_context context = {period, 0, pattern_length}; + + init_sheng_queue16(&q, buf, buf_size); + fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length); + q->cur = 0; + q->items[q->cur].location = 0; + q->context = &context; + q->cb = periodic_pattern_callback; + + nfaExecSheng_Q(q->nfa, q, (s64a) buf_size); + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, context.match_count); + + delete q; +} + +/* + * nfaExecSheng_Q2 uses the message queue, but stops at match instead of using a callback. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng16, std_run_Q2) { + struct mq *q; + unsigned int pattern_length = 6; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + + init_sheng_queue16(&q, buf, buf_size); + fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length); + q->cur = 0; + q->items[q->cur].location = 0; + + char ret_val; + int location; + unsigned int loop_count = 0; + do { + ret_val = nfaExecSheng_Q2(q->nfa, q, (s64a) buf_size); + location = q->items[q->cur].location; + loop_count++; + } while(likely((ret_val == MO_MATCHES_PENDING) && (location < (int)buf_size) && ((location % period) == pattern_length))); + + /*check if it's a spurious match*/ + EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && ((location % period) != pattern_length)); + + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, loop_count-1); + + delete q; +} + +/* + * The message queue can also run on the "history" buffer. We test it the same way as the normal + * buffer, expecting the same behavior. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng16, history_run_Q2) { + struct mq *q; + unsigned int pattern_length = 6; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + + init_sheng_queue16(&q, buf, buf_size); + fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length); + q->history = buf; + q->hlength = buf_size; + q->cur = 0; + q->items[q->cur].location = -200; + + char ret_val; + int location; + unsigned int loop_count = 0; + do { + ret_val = nfaExecSheng_Q2(q->nfa, q, 0); + location = q->items[q->cur].location; + loop_count++; + } while(likely((ret_val == MO_MATCHES_PENDING) && (location > -(int)buf_size) && (location < 0) && (((buf_size + location) % period) == pattern_length))); + + /*check if it's a spurious match*/ + EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && (((buf_size + location) % period) != pattern_length)); + + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, loop_count-1); + + delete q; +} + +/** + * Those tests only covers the basic paths. More tests can cover: + * - running for history buffer to current buffer in Q2 + * - running while expecting no match + * - nfaExecSheng_QR + * - run sheng when it should call an accelerator and confirm it call them + */ + +#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) + +/* + * Runs sheng32Compile and compares its outputs to previously recorded outputs. + */ +TEST(Sheng32, std_compile_header) { +#if defined(HAVE_SVE) + if(svcntb()<32) { + return; + } +#endif + ue2::Grey *g = new ue2::Grey(); + hs_platform_info plat_info = {0, 0, 0, 0}; + ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g); + ue2::ReportManager *rm = new ue2::ReportManager(*g); + ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0); + ReportID rID = rm->getInternalId(*report); + rm->setProgramOffset(0, 0); + + struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX); + init_raw_dfa32(dfa, rID); + + struct NFA *nfa = (sheng32Compile(*dfa, *cc, *rm, false)).release(); + EXPECT_NE(nullptr, nfa); + + EXPECT_NE(0, nfa->length); + EXPECT_EQ(SHENG_NFA_32, nfa->type); + + struct NFA *expected_nfa = get_expected_nfa32_header(); + test_nfa_equal(*expected_nfa, *nfa); + + delete expected_nfa; + delete report; + delete rm; + delete cc; + delete g; +} + +/* + * nfaExecSheng32_B is the most basic of the sheng variants. It simply calls the core of the algorithm. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng32, std_run_B) { +#if defined(HAVE_SVE) + if(svcntb()<32) { + return; + } +#endif + struct mq *q; + unsigned int pattern_length = 16; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + struct callback_context context = {period, 0, pattern_length}; + + struct NFA* nfa; + init_nfa32(&nfa); + ASSERT_NE(nullptr, nfa); + fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length); + char ret_val; + unsigned int offset = 0; + unsigned int loop_count = 0; + for (; loop_count < expected_matches + 1; loop_count++) { + ASSERT_LT(offset, buf_size); + ret_val = nfaExecSheng32_B(nfa, + offset, + buf + offset, + (s64a) buf_size - offset, + periodic_pattern_callback, + &context); + offset = (context.match_count - 1) * context.period + context.pattern_length; + if(unlikely(ret_val != MO_ALIVE)) { + break; + } + } + + /*check normal return*/ + EXPECT_EQ(MO_ALIVE, ret_val); + + /*check that we don't find additional match nor crash when no match are found*/ + EXPECT_EQ(expected_matches + 1, loop_count); + + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, context.match_count); +} + +/* + * nfaExecSheng32_Q runs like the _B version (callback), but exercises the message queue logic. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng32, std_run_Q) { +#if defined(HAVE_SVE) + if(svcntb()<32) { + return; + } +#endif + struct mq *q; + unsigned int pattern_length = 16; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + struct callback_context context = {period, 0, pattern_length}; + + init_sheng_queue32(&q, buf, buf_size); + fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length); + q->cur = 0; + q->items[q->cur].location = 0; + q->context = &context; + q->cb = periodic_pattern_callback; + + nfaExecSheng32_Q(q->nfa, q, (s64a) buf_size); + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, context.match_count); + + delete q; +} + +/* + * nfaExecSheng32_Q2 uses the message queue, but stops at match instead of using a callback. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng32, std_run_Q2) { +#if defined(HAVE_SVE) + if(svcntb()<32) { + return; + } +#endif + struct mq *q; + unsigned int pattern_length = 16; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + + init_sheng_queue32(&q, buf, buf_size); + fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length); + q->cur = 0; + q->items[q->cur].location = 0; + + char ret_val; + int location; + unsigned int loop_count = 0; + do { + ret_val = nfaExecSheng32_Q2(q->nfa, q, (s64a) buf_size); + location = q->items[q->cur].location; + loop_count++; + } while(likely((ret_val == MO_MATCHES_PENDING) && (location < (int)buf_size) && ((location % period) == pattern_length))); + + /*check if it's a spurious match*/ + EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && ((location % period) != pattern_length)); + + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, loop_count-1); + + delete q; +} + +/* + * The message queue can also runs on the "history" buffer. We test it the same way as the normal + * buffer, expecting the same behavior. + * We test it with a buffer having a few matches at fixed intervals and check that it finds them all. + */ +TEST(Sheng32, history_run_Q2) { +#if defined(HAVE_SVE) + if(svcntb()<32) { + return; + } +#endif + struct mq *q; + unsigned int pattern_length = 16; + unsigned int period = 128; + const size_t buf_size = 200; + unsigned int expected_matches = buf_size/128 + 1; + u8 buf[buf_size]; + + init_sheng_queue32(&q, buf, buf_size); + fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length); + q->history = buf; + q->hlength = buf_size; + q->cur = 0; + q->items[q->cur].location = -200; + + char ret_val; + int location; + unsigned int loop_count = 0; + do { + ret_val = nfaExecSheng32_Q2(q->nfa, q, 0); + location = q->items[q->cur].location; + loop_count++; + } while(likely((ret_val == MO_MATCHES_PENDING) && (location > -(int)buf_size) && (location < 0) && (((buf_size + location) % period) == pattern_length))); + + /*check if it's a spurious match*/ + EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && (((buf_size + location) % period) != pattern_length)); + + /*check that we have all the matches*/ + EXPECT_EQ(expected_matches, loop_count-1); + + delete q; +} +#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */ + +} /* namespace */ \ No newline at end of file diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index deb85e9f..a812071a 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -210,14 +210,14 @@ TEST(Shuffle, PackedExtract_templatized_128_1) { SuperVector<16> permute = SuperVector<16>::Zeroes(); SuperVector<16> compare = SuperVector<16>::Zeroes(); build_pshufb_masks_onebit(i, &permute.u.v128[0], &compare.u.v128[0]); - EXPECT_EQ(1U, packedExtract<16>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>(setbit(i)), permute, compare)); EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>::Ones(), permute, compare)); // we should get zero out of these cases EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>::Zeroes(), permute, compare)); - EXPECT_EQ(0U, packedExtract<16>(not128(setbit(i)), permute, compare)); + EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>(not128(setbit(i))), permute, compare)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 128); j++) { - EXPECT_EQ(0U, packedExtract<16>(setbit(j), permute, compare)); + EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>(setbit(j)), permute, compare)); } } } @@ -251,14 +251,14 @@ TEST(Shuffle, PackedExtract_templatized_256_1) { SuperVector<32> permute = SuperVector<32>::Zeroes(); SuperVector<32> compare = SuperVector<32>::Zeroes(); build_pshufb_masks_onebit(i, &permute.u.v256[0], &compare.u.v256[0]); - EXPECT_EQ(1U, packedExtract<32>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>(setbit(i)), permute, compare)); EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>::Ones(), permute, compare)); // we should get zero out of these cases EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>::Zeroes(), permute, compare)); - EXPECT_EQ(0U, packedExtract<32>(not256(setbit(i)), permute, compare)); + EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>(not256(setbit(i))), permute, compare)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 256); j++) { - EXPECT_EQ(0U, packedExtract<32>(setbit(j), permute, compare)); + EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>(setbit(j)), permute, compare)); } } } @@ -291,14 +291,14 @@ TEST(Shuffle, PackedExtract_templatized_512_1) { SuperVector<64> permute = SuperVector<64>::Zeroes(); SuperVector<64> compare = SuperVector<64>::Zeroes(); build_pshufb_masks_onebit(i, &permute.u.v512[0], &compare.u.v512[0]); - EXPECT_EQ(1U, packedExtract<64>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>(setbit(i)), permute, compare)); EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>::Ones(), permute, compare)); // we should get zero out of these cases EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>::Zeroes(), permute, compare)); - EXPECT_EQ(0U, packedExtract<64>(not512(setbit(i)), permute, compare)); + EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>(not512(setbit(i))), permute, compare)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 512); j++) { - EXPECT_EQ(0U, packedExtract<64>(setbit(j), permute, compare)); + EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>(setbit(j)), permute, compare)); } } } diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index fb8d58a8..a3955267 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -79,8 +79,8 @@ TEST(Shufti, BuildMask2) { int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); - u8 *lo = (u8 *)&lomask; - u8 *hi = (u8 *)&himask; + const u8 *lo = (u8 *)&lomask; + const u8 *hi = (u8 *)&himask; ASSERT_TRUE(lo['a' % 16] & hi['a' >> 4]); ASSERT_TRUE(lo['B' % 16] & hi['B' >> 4]); ASSERT_FALSE(lo['a' % 16] & hi['B' >> 4]); @@ -100,8 +100,8 @@ TEST(Shufti, BuildMask4) { int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); - u8 *lo = (u8 *)&lomask; - u8 *hi = (u8 *)&himask; + const u8 *lo = (u8 *)&lomask; + const u8 *hi = (u8 *)&himask; ASSERT_TRUE(lo['a' % 16] & hi['a' >> 4]); ASSERT_TRUE(lo['A' % 16] & hi['A' >> 4]); ASSERT_TRUE(lo['b' % 16] & hi['b' >> 4]); @@ -331,10 +331,10 @@ TEST(DoubleShufti, BuildMask2) { (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); - u8 *lo1 = (u8 *)&lo1m; - u8 *lo2 = (u8 *)&lo2m; - u8 *hi1 = (u8 *)&hi1m; - u8 *hi2 = (u8 *)&hi2m; + const u8 *lo1 = (u8 *)&lo1m; + const u8 *lo2 = (u8 *)&lo2m; + const u8 *hi1 = (u8 *)&hi1m; + const u8 *hi2 = (u8 *)&hi2m; ASSERT_NE(0xff, lo1['a' % 16] | hi1['a' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); ASSERT_NE(0xff, @@ -359,10 +359,10 @@ TEST(DoubleShufti, BuildMask4) { (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); - u8 *lo1 = (u8 *)&lo1m; - u8 *lo2 = (u8 *)&lo2m; - u8 *hi1 = (u8 *)&hi1m; - u8 *hi2 = (u8 *)&hi2m; + const u8 *lo1 = (u8 *)&lo1m; + const u8 *lo2 = (u8 *)&lo2m; + const u8 *hi1 = (u8 *)&hi1m; + const u8 *hi2 = (u8 *)&hi2m; ASSERT_NE(0xff, lo1['a' % 16] | hi1['a' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); ASSERT_NE(0xff, @@ -388,10 +388,10 @@ TEST(DoubleShufti, BuildMask5) { (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); - u8 *lo1 = (u8 *)&lo1m; - u8 *lo2 = (u8 *)&lo2m; - u8 *hi1 = (u8 *)&hi1m; - u8 *hi2 = (u8 *)&hi2m; + const u8 *lo1 = (u8 *)&lo1m; + const u8 *lo2 = (u8 *)&lo2m; + const u8 *hi1 = (u8 *)&hi1m; + const u8 *hi2 = (u8 *)&hi2m; ASSERT_NE(0xff, lo1['a' % 16] | hi1['a' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); ASSERT_EQ(0xff, @@ -426,10 +426,10 @@ TEST(DoubleShufti, BuildMask6) { (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); - u8 *lo1 = (u8 *)&lo1m; - u8 *lo2 = (u8 *)&lo2m; - u8 *hi1 = (u8 *)&hi1m; - u8 *hi2 = (u8 *)&hi2m; + const u8 *lo1 = (u8 *)&lo1m; + const u8 *lo2 = (u8 *)&lo2m; + const u8 *hi1 = (u8 *)&hi1m; + const u8 *hi2 = (u8 *)&hi2m; ASSERT_NE(0xff, lo1['a' % 16] | hi1['a' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); ASSERT_NE(0xff, diff --git a/unit/internal/state_compress.cpp b/unit/internal/state_compress.cpp index 00423702..58e540a8 100644 --- a/unit/internal/state_compress.cpp +++ b/unit/internal/state_compress.cpp @@ -152,7 +152,7 @@ TEST(state_compress, m128_1) { TEST(state_compress, m128_2) { char buf[sizeof(m128)] = { 0 }; - char val_raw[16] = { '0', '1', '2', '3', '4', '5', '6', '7', + const char val_raw[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; m128 val; memcpy(&val, val_raw, sizeof(val)); @@ -180,7 +180,7 @@ TEST(state_compress, m128_2) { loadcompressed128(&val_out, &buf, &mask, 0); EXPECT_TRUE(!diff128(and128(val, mask), val_out)); - mask_raw[j] = 0x7f; + mask_raw[j] = 0x7f; // cppcheck-suppress unreadVariable } } } @@ -228,7 +228,7 @@ TEST(state_compress, m256_1) { TEST(state_compress, m256_2) { char buf[sizeof(m256)] = { 0 }; - char val_raw[32] = { '0', '1', '2', '3', '4', '5', '6', '7', + const char val_raw[32] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P' }; @@ -258,7 +258,7 @@ TEST(state_compress, m256_2) { loadcompressed256(&val_out, &buf, &mask, 0); EXPECT_TRUE(!diff256(and256(val, mask), val_out)); - mask_raw[j] = 0x7f; + mask_raw[j] = 0x7f; // cppcheck-suppress unreadVariable } } } @@ -306,7 +306,7 @@ TEST(state_compress, m384_1) { TEST(state_compress, m384_2) { char buf[sizeof(m384)] = { 0 }; - char val_raw[48] = { '0', '1', '2', '3', '4', '5', '6', '7', + const char val_raw[48] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', @@ -338,7 +338,7 @@ TEST(state_compress, m384_2) { loadcompressed384(&val_out, &buf, &mask, 0); EXPECT_TRUE(!diff384(and384(val, mask), val_out)); - mask_raw[j] = 0x7f; + mask_raw[j] = 0x7f; // cppcheck-suppress unreadVariable } } } @@ -386,7 +386,7 @@ TEST(state_compress, m512_1) { TEST(state_compress, m512_2) { char buf[sizeof(m512)] = { 0 }; - char val_raw[64] = { '0', '1', '2', '3', '4', '5', '6', '7', + const char val_raw[64] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', @@ -419,7 +419,7 @@ TEST(state_compress, m512_2) { loadcompressed512(&val_out, &buf, &mask, 0); EXPECT_TRUE(!diff512(and512(val, mask), val_out)); - mask_raw[j] = 0x7f; + mask_raw[j] = 0x7f; // cppcheck-suppress unreadVariable } } } diff --git a/unit/internal/supervector.cpp b/unit/internal/supervector.cpp index 2432e598..ac3daf2a 100644 --- a/unit/internal/supervector.cpp +++ b/unit/internal/supervector.cpp @@ -508,7 +508,7 @@ TEST(SuperVectorUtilsTest,Movemask256c){ u8 vec2[32] = {0}; u32 r = rand() % 100 + 1; for(int i=0; i<32; i++) { - if (r & (1 << i)) { + if (r & (1U << i)) { vec[i] = 0xff; } } diff --git a/util/ExpressionParser.rl b/util/ExpressionParser.rl index b93f069d..02761b32 100644 --- a/util/ExpressionParser.rl +++ b/util/ExpressionParser.rl @@ -152,7 +152,6 @@ bool HS_CDECL readExpression(const std::string &input, std::string &expr, UNUSED const char *eof = pe; UNUSED const char *ts = p, *te = p; int cs; - UNUSED int act; assert(p); assert(pe); diff --git a/util/cross_compile.cpp b/util/cross_compile.cpp index 0da620ff..7431d55f 100644 --- a/util/cross_compile.cpp +++ b/util/cross_compile.cpp @@ -55,7 +55,6 @@ unique_ptr xcompileReadMode(const char *s) { assert(!err); string str(s); - string mode = str.substr(0, str.find(":")); string opt = str.substr(str.find(":")+1, str.npos); bool found_mode = false; diff --git a/util/ng_corpus_editor.cpp b/util/ng_corpus_editor.cpp index c1149216..c3bfd75f 100644 --- a/util/ng_corpus_editor.cpp +++ b/util/ng_corpus_editor.cpp @@ -66,7 +66,7 @@ size_t choosePosition(const SeqT &corpus, CorpusProperties &props) { class CorpusEditor { public: - CorpusEditor(CorpusProperties &p) : props(p) {} + explicit CorpusEditor(CorpusProperties &p) : props(p) {} // Apply edits to a corpus void applyEdits(string &corpus); @@ -171,7 +171,7 @@ u8 CorpusEditor::chooseByte() { class CorpusEditorUtf8 { public: - CorpusEditorUtf8(CorpusProperties &p) : props(p) {} + explicit CorpusEditorUtf8(CorpusProperties &p) : props(p) {} // Apply edits to a corpus. void applyEdits(vector &corpus); diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 1be7112d..e13abb67 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -223,9 +223,9 @@ class CorpusGeneratorImpl : public CorpusGenerator { public: CorpusGeneratorImpl(const NGHolder &graph_in, const ExpressionInfo &expr_in, CorpusProperties &props); - ~CorpusGeneratorImpl() = default; + virtual ~CorpusGeneratorImpl() = default; - void generateCorpus(vector &data); + void generateCorpus(vector &data) override; private: unsigned char getRandomChar(); @@ -421,7 +421,7 @@ public: CorpusProperties &props); ~CorpusGeneratorUtf8() = default; - void generateCorpus(vector &data); + void generateCorpus(vector &data) override; private: unichar getRandomChar(); diff --git a/util/ng_corpus_generator.h b/util/ng_corpus_generator.h index f230a10d..cd84a9ab 100644 --- a/util/ng_corpus_generator.h +++ b/util/ng_corpus_generator.h @@ -47,7 +47,7 @@ class NGHolder; } // namespace ue2 struct CorpusGenerationFailure { - explicit CorpusGenerationFailure(const std::string s) : + explicit CorpusGenerationFailure(const std::string& s) : message(std::move(s)) {} std::string message; }; diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index c406ee95..0e5958cd 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -861,7 +861,7 @@ bool isUtf8CodePoint(const char c) { } static -bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) { +bool canReach(const NGHolder &g, const NFAEdge &e, const struct fmstate &state) { auto flags = g[e].assert_flags; if (!flags) { return true; @@ -896,7 +896,7 @@ bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) { static void getAcceptMatches(const NGHolder &g, MatchSet &matches, - struct fmstate &state, NFAVertex accept_vertex, + const struct fmstate &state, NFAVertex accept_vertex, vector &active_states) { assert(accept_vertex == g.accept || accept_vertex == g.acceptEod);