Merge branch 'develop' into bugfix-rose-segfault

This commit is contained in:
g. economou 2024-05-16 09:57:58 +03:00 committed by GitHub
commit b03699fade
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
201 changed files with 3022 additions and 2001 deletions

View File

@ -1221,11 +1221,17 @@ if (NOT BUILD_STATIC_LIBS)
endif () endif ()
add_subdirectory(util) add_subdirectory(util)
add_subdirectory(unit)
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) option(BUILD_UNIT "Build Hyperscan unit tests (default TRUE)" TRUE)
if(BUILD_UNIT)
add_subdirectory(unit)
endif()
option(BUILD_TOOLS "Build Hyperscan tools (default TRUE)" TRUE)
if(EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt AND BUILD_TOOLS)
add_subdirectory(tools) add_subdirectory(tools)
endif() endif()
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA) if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
add_subdirectory(chimera) add_subdirectory(chimera)
endif() endif()
@ -1240,4 +1246,7 @@ if(BUILD_BENCHMARKS)
add_subdirectory(benchmarks) add_subdirectory(benchmarks)
endif() endif()
add_subdirectory(doc/dev-reference) option(BUILD_DOC "Build the Hyperscan documentation (default TRUE)" TRUE)
if(BUILD_DOC)
add_subdirectory(doc/dev-reference)
endif()

View File

@ -146,6 +146,7 @@ export CXX="/usr/pkg/gcc12/bin/g++"
``` ```
In FreeBSD similarly, you might want to install a different compiler. In FreeBSD similarly, you might want to install a different compiler.
If you want to use gcc, it is recommended to use gcc12.
You will also, as in NetBSD, need to install cmake, sqlite, boost and ragel packages. You will also, as in NetBSD, need to install cmake, sqlite, boost and ragel packages.
Using the example of gcc12 from pkg: Using the example of gcc12 from pkg:
installing the desired compiler: installing the desired compiler:
@ -164,7 +165,6 @@ the environment variables to point to this compiler:
export CC="/usr/local/bin/gcc" export CC="/usr/local/bin/gcc"
export CXX="/usr/local/bin/g++" export CXX="/usr/local/bin/g++"
``` ```
A further note in FreeBSD, on the PowerPC and ARM platforms, A further note in FreeBSD, on the PowerPC and ARM platforms,
the gcc12 package installs to a slightly different name, on FreeBSD/ppc, the gcc12 package installs to a slightly different name, on FreeBSD/ppc,
gcc12 will be found using: gcc12 will be found using:
@ -175,12 +175,6 @@ export CXX="/usr/local/bin/g++12"
Then continue with the build as below. Then continue with the build as below.
A note about running in FreeBSD: if you built a dynamically linked binary
with an alternative compiler, the libraries specific to the compiler that
built the binary will probably not be found and the base distro libraries
in /lib will be found instead. Adjust LD_LIBRARY_PATH appropriately. For
example, with gcc12 installed from pkg, one would want to use
```export LD_LIBRARY_PATH=/usr/local/lib/gcc12/```
## Configure & build ## Configure & build

View File

@ -26,32 +26,30 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <iostream>
#include <chrono> #include <chrono>
#include <cstdlib>
#include <cstring> #include <cstring>
#include <ctime> #include <ctime>
#include <cstdlib>
#include <memory>
#include <functional> #include <functional>
#include <iostream>
#include <memory>
#include "benchmarks.hpp" #include "benchmarks.hpp"
#define MAX_LOOPS 1000000000 #define MAX_LOOPS 1000000000
#define MAX_MATCHES 5 #define MAX_MATCHES 5
#define N 8 #define N 8
struct hlmMatchEntry { struct hlmMatchEntry {
size_t to; size_t to;
u32 id; u32 id;
hlmMatchEntry(size_t end, u32 identifier) : hlmMatchEntry(size_t end, u32 identifier) : to(end), id(identifier) {}
to(end), id(identifier) {}
}; };
std::vector<hlmMatchEntry> ctxt; std::vector<hlmMatchEntry> ctxt;
static static hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, UNUSED struct hs_scratch *scratch) {
UNUSED struct hs_scratch *scratch) {
DEBUG_PRINTF("match @%zu = %u\n", to, id); DEBUG_PRINTF("match @%zu = %u\n", to, id);
ctxt.push_back(hlmMatchEntry(to, id)); ctxt.push_back(hlmMatchEntry(to, id));
@ -59,40 +57,42 @@ hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
return HWLM_CONTINUE_MATCHING; return HWLM_CONTINUE_MATCHING;
} }
template<typename InitFunc, typename BenchFunc> template <typename InitFunc, typename BenchFunc>
static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) { static void run_benchmarks(int size, int loops, int max_matches,
bool is_reverse, MicroBenchmark &bench,
InitFunc &&init, BenchFunc &&func) {
init(bench); init(bench);
double total_sec = 0.0; double total_sec = 0.0;
u64a total_size = 0;
double bw = 0.0;
double avg_bw = 0.0;
double max_bw = 0.0; double max_bw = 0.0;
double avg_time = 0.0; double avg_time = 0.0;
if (max_matches) { if (max_matches) {
double avg_bw = 0.0;
int pos = 0; int pos = 0;
for(int j = 0; j < max_matches - 1; j++) { for (int j = 0; j < max_matches - 1; j++) {
bench.buf[pos] = 'b'; bench.buf[pos] = 'b';
pos = (j+1) *size / max_matches ; pos = (j + 1) * size / max_matches;
bench.buf[pos] = 'a'; bench.buf[pos] = 'a';
u64a actual_size = 0; u64a actual_size = 0;
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
for(int i = 0; i < loops; i++) { for (int i = 0; i < loops; i++) {
const u8 *res = func(bench); const u8 *res = func(bench);
if (is_reverse) if (is_reverse)
actual_size += bench.buf.data() + size - res; actual_size += bench.buf.data() + size - res;
else else
actual_size += res - bench.buf.data(); actual_size += res - bench.buf.data();
} }
auto end = std::chrono::steady_clock::now(); auto end = std::chrono::steady_clock::now();
double dt = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); double dt = std::chrono::duration_cast<std::chrono::microseconds>(
end - start)
.count();
total_sec += dt; total_sec += dt;
/*convert microseconds to seconds*/ /*convert microseconds to seconds*/
/*calculate bandwidth*/ /*calculate bandwidth*/
bw = (actual_size / dt) * 1000000.0 / 1048576.0; double bw = (actual_size / dt) * 1000000.0 / 1048576.0;
/*std::cout << "act_size = " << act_size << std::endl; /*std::cout << "act_size = " << act_size << std::endl;
std::cout << "dt = " << dt << std::endl; std::cout << "dt = " << dt << std::endl;
std::cout << "bw = " << bw << std::endl;*/ std::cout << "bw = " << bw << std::endl;*/
avg_bw += bw; avg_bw += bw;
/*convert to MB/s*/ /*convert to MB/s*/
max_bw = std::max(bw, max_bw); max_bw = std::max(bw, max_bw);
/*calculate average time*/ /*calculate average time*/
@ -100,20 +100,22 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
} }
avg_time /= max_matches; avg_time /= max_matches;
avg_bw /= max_matches; avg_bw /= max_matches;
total_sec /= 1000000.0; total_sec /= 1000000.0;
/*convert average time to us*/ /*convert average time to us*/
printf(KMAG "%s: %u matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " printf("%-18s, %-12d, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7.3f\n",
KBLU "average time per call =" RST " %.3f μs," KBLU " max bandwidth = " RST " %.3f MB/s," KBLU " average bandwidth =" RST " %.3f MB/s \n",
bench.label, max_matches, size ,loops, total_sec, avg_time, max_bw, avg_bw); bench.label, max_matches, size ,loops, total_sec, avg_time, max_bw, avg_bw);
} else { } else {
u64a total_size = 0;
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
for (int i = 0; i < loops; i++) { for (int i = 0; i < loops; i++) {
const u8 *res = func(bench); func(bench);
} }
auto end = std::chrono::steady_clock::now(); auto end = std::chrono::steady_clock::now();
total_sec += std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); total_sec +=
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count();
/*calculate transferred size*/ /*calculate transferred size*/
total_size = size * loops; total_size = (u64a)size * (u64a)loops;
/*calculate average time*/ /*calculate average time*/
avg_time = total_sec / loops; avg_time = total_sec / loops;
/*convert microseconds to seconds*/ /*convert microseconds to seconds*/
@ -122,130 +124,139 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
max_bw = total_size / total_sec; max_bw = total_size / total_sec;
/*convert to MB/s*/ /*convert to MB/s*/
max_bw /= 1048576.0; max_bw /= 1048576.0;
printf(KMAG "%s: no matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " printf("%-18s, %-12s, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7s\n",
KBLU "average time per call =" RST " %.3f μs ," KBLU " bandwidth = " RST " %.3f MB/s \n", bench.label, "0", size, loops, total_sec, avg_time, max_bw, "0");
bench.label, size ,loops, total_sec, avg_time, max_bw );
} }
} }
int main(){ int main(){
int matches[] = {0, MAX_MATCHES}; const int matches[] = {0, MAX_MATCHES};
std::vector<size_t> sizes; std::vector<size_t> sizes;
for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2); for (size_t i = 0; i < N; i++)
const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa"; sizes.push_back(16000 << i * 2);
const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa";
printf("%-18s, %-12s, %-10s, %-6s, %-10s, %-9s, %-8s, %-7s\n", "Matcher",
"max_matches", "size", "loops", "total_sec", "avg_time", "max_bw",
"avg_bw");
for (int m = 0; m < 2; m++) { for (int m = 0; m < 2; m++) {
for (size_t i = 0; i < std::size(sizes); i++) { for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Shufti", sizes[i]); MicroBenchmark bench("Shufti", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, run_benchmarks(
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
b.chars.set('a'); b.chars.set('a');
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size); memset(b.buf.data(), 'b', b.size);
}, },
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); return shuftiExec(b.lo, b.hi, b.buf.data(),
} b.buf.data() + b.size);
); });
} }
for (size_t i = 0; i < std::size(sizes); i++) { for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Reverse Shufti", sizes[i]); MicroBenchmark bench("Reverse Shufti", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, run_benchmarks(
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
b.chars.set('a'); b.chars.set('a');
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size); memset(b.buf.data(), 'b', b.size);
}, },
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); return rshuftiExec(b.lo, b.hi, b.buf.data(),
} b.buf.data() + b.size);
); });
} }
for (size_t i = 0; i < std::size(sizes); i++) { for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Truffle", sizes[i]); MicroBenchmark bench("Truffle", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, run_benchmarks(
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
b.chars.set('a'); b.chars.set('a');
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size); memset(b.buf.data(), 'b', b.size);
}, },
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); return truffleExec(b.lo, b.hi, b.buf.data(),
} b.buf.data() + b.size);
); });
} }
for (size_t i = 0; i < std::size(sizes); i++) { for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Reverse Truffle", sizes[i]); MicroBenchmark bench("Reverse Truffle", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, run_benchmarks(
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
b.chars.set('a'); b.chars.set('a');
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size); memset(b.buf.data(), 'b', b.size);
}, },
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); return rtruffleExec(b.lo, b.hi, b.buf.data(),
} b.buf.data() + b.size);
); });
} }
for (size_t i = 0; i < std::size(sizes); i++) { for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Vermicelli", sizes[i]); MicroBenchmark bench("Vermicelli", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, run_benchmarks(
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
b.chars.set('a'); b.chars.set('a');
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size); memset(b.buf.data(), 'b', b.size);
}, },
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
return vermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size); return vermicelliExec('a', 'b', b.buf.data(),
} b.buf.data() + b.size);
); });
} }
for (size_t i = 0; i < std::size(sizes); i++) { for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Reverse Vermicelli", sizes[i]); MicroBenchmark bench("Reverse Vermicelli", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, run_benchmarks(
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
b.chars.set('a'); b.chars.set('a');
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size); memset(b.buf.data(), 'b', b.size);
}, },
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
return rvermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size); return rvermicelliExec('a', 'b', b.buf.data(),
} b.buf.data() + b.size);
); });
} }
for (size_t i = 0; i < std::size(sizes); i++) { for (size_t i = 0; i < std::size(sizes); i++) {
//we imitate the noodle unit tests // we imitate the noodle unit tests
std::string str; std::string str;
const size_t char_len = 5; const size_t char_len = 5;
str.resize(char_len + 1); str.resize(char_len + 1);
for (size_t j=0; j < char_len; j++) { for (size_t j = 0; j < char_len; j++) {
srand (time(NULL)); srand(time(NULL));
int key = rand() % + 36 ; int key = rand() % +36;
str[char_len] = charset[key]; str[char_len] = charset[key];
str[char_len + 1] = '\0'; str[char_len + 1] = '\0';
} }
MicroBenchmark bench("Noodle", sizes[i]); MicroBenchmark bench("Noodle", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, run_benchmarks(
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
ctxt.clear(); ctxt.clear();
memset(b.buf.data(), 'a', b.size); memset(b.buf.data(), 'a', b.size);
u32 id = 1000; u32 id = 1000;
ue2::hwlmLiteral lit(str, true, id); ue2::hwlmLiteral lit(str, true, id);
b.nt = ue2::noodBuildTable(lit); b.nt = ue2::noodBuildTable(lit);
assert(b.nt != nullptr); assert(b.nt.get() != nullptr);
}, },
[&](MicroBenchmark &b) { [&](MicroBenchmark &b) {
noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch); noodExec(b.nt.get(), b.buf.data(), b.size, 0,
hlmSimpleCallback, &b.scratch);
return b.buf.data() + b.size; return b.buf.data() + b.size;
} });
);
} }
} }

View File

@ -26,44 +26,32 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "hwlm/hwlm_literal.h"
#include "hwlm/noodle_build.h"
#include "hwlm/noodle_engine.h"
#include "hwlm/noodle_internal.h"
#include "nfa/shufti.h" #include "nfa/shufti.h"
#include "nfa/shufticompile.h" #include "nfa/shufticompile.h"
#include "nfa/truffle.h" #include "nfa/truffle.h"
#include "nfa/trufflecompile.h" #include "nfa/trufflecompile.h"
#include "nfa/vermicelli.hpp" #include "nfa/vermicelli.hpp"
#include "hwlm/noodle_build.h"
#include "hwlm/noodle_engine.h"
#include "hwlm/noodle_internal.h"
#include "hwlm/hwlm_literal.h"
#include "util/bytecode_ptr.h"
#include "scratch.h" #include "scratch.h"
#include "util/bytecode_ptr.h"
/*define colour control characters*/ class MicroBenchmark {
#define RST "\x1B[0m"
#define KRED "\x1B[31m"
#define KGRN "\x1B[32m"
#define KYEL "\x1B[33m"
#define KBLU "\x1B[34m"
#define KMAG "\x1B[35m"
#define KCYN "\x1B[36m"
#define KWHT "\x1B[37m"
class MicroBenchmark
{
public: public:
char const *label; char const *label;
size_t size; size_t size;
// Shufti/Truffle // Shufti/Truffle
m128 lo, hi; m128 lo, hi;
ue2::CharReach chars; ue2::CharReach chars;
std::vector<u8> buf; std::vector<u8> buf;
// Noodle // Noodle
struct hs_scratch scratch; struct hs_scratch scratch;
ue2::bytecode_ptr<noodTable> nt; ue2::bytecode_ptr<noodTable> nt;
MicroBenchmark(char const *label_, size_t size_) MicroBenchmark(char const *label_, size_t size_)
:label(label_), size(size_), buf(size_) { : label(label_), size(size_), buf(size_){};
};
}; };

View File

@ -6,10 +6,10 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
set(FREEBSD true) set(FREEBSD true)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
#FIXME: find a nicer and more general way of doing this #FIXME: find a nicer and more general way of doing this
if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12") if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13")
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12")
elseif(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13")
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc13") set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc13")
elseif(ARCH_AARCH64 AND (CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12"))
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12")
endif() endif()
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")

View File

@ -19,6 +19,7 @@ else()
set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build") set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees") set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html") set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
set(SPHINX_MAN_DIR "${CMAKE_CURRENT_BINARY_DIR}/man")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in" configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY) "${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY)
@ -32,4 +33,14 @@ add_custom_target(dev-reference
"${SPHINX_HTML_DIR}" "${SPHINX_HTML_DIR}"
DEPENDS dev-reference-doxygen DEPENDS dev-reference-doxygen
COMMENT "Building HTML dev reference with Sphinx") COMMENT "Building HTML dev reference with Sphinx")
add_custom_target(dev-reference-man
${SPHINX_BUILD}
-b man
-c "${CMAKE_CURRENT_BINARY_DIR}"
-d "${SPHINX_CACHE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}"
"${SPHINX_MAN_DIR}"
DEPENDS dev-reference-doxygen
COMMENT "Building man page reference with Sphinx")
endif() endif()

View File

@ -11,10 +11,10 @@ Introduction
************ ************
Chimera is a software regular expression matching engine that is a hybrid of Chimera is a software regular expression matching engine that is a hybrid of
Hyperscan and PCRE. The design goals of Chimera are to fully support PCRE Vectorscan and PCRE. The design goals of Chimera are to fully support PCRE
syntax as well as to take advantage of the high performance nature of Hyperscan. syntax as well as to take advantage of the high performance nature of Vectorscan.
Chimera inherits the design guideline of Hyperscan with C APIs for compilation Chimera inherits the design guideline of Vectorscan with C APIs for compilation
and scanning. and scanning.
The Chimera API itself is composed of two major components: The Chimera API itself is composed of two major components:
@ -65,13 +65,13 @@ For a given database, Chimera provides several guarantees:
.. note:: Chimera is designed to have the same matching behavior as PCRE, .. note:: Chimera is designed to have the same matching behavior as PCRE,
including greedy/ungreedy, capturing, etc. Chimera reports both including greedy/ungreedy, capturing, etc. Chimera reports both
**start offset** and **end offset** for each match like PCRE. Different **start offset** and **end offset** for each match like PCRE. Different
from the fashion of reporting all matches in Hyperscan, Chimera only reports from the fashion of reporting all matches in Vectorscan, Chimera only reports
non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will
match ``foofoofoofoo`` at offsets (0, 6) and (6, 12). match ``foofoofoofoo`` at offsets (0, 6) and (6, 12).
.. note:: Since Chimera is a hybrid of Hyperscan and PCRE in order to support .. note:: Since Chimera is a hybrid of Vectorscan and PCRE in order to support
full PCRE syntax, there will be extra performance overhead compared to full PCRE syntax, there will be extra performance overhead compared to
Hyperscan-only solution. Please always use Hyperscan for better performance Vectorscan-only solution. Please always use Vectorscan for better performance
unless you must need full PCRE syntax support. unless you must need full PCRE syntax support.
See :ref:`chruntime` for more details See :ref:`chruntime` for more details
@ -83,12 +83,12 @@ Requirements
The PCRE library (http://pcre.org/) version 8.41 is required for Chimera. The PCRE library (http://pcre.org/) version 8.41 is required for Chimera.
.. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source .. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source
directory under Hyperscan root directory in order to build Chimera. directory under Vectorscan root directory in order to build Chimera.
Beside this, both hardware and software requirements of Chimera are the same to Hyperscan. Beside this, both hardware and software requirements of Chimera are the same to Vectorscan.
See :ref:`hardware` and :ref:`software` for more details. See :ref:`hardware` and :ref:`software` for more details.
.. note:: Building Hyperscan will automatically generate Chimera library. .. note:: Building Vectorscan will automatically generate Chimera library.
Currently only static library is supported for Chimera, so please Currently only static library is supported for Chimera, so please
use static build type when configure CMake build options. use static build type when configure CMake build options.
@ -119,7 +119,7 @@ databases:
Compilation allows the Chimera library to analyze the given pattern(s) and Compilation allows the Chimera library to analyze the given pattern(s) and
pre-determine how to scan for these patterns in an optimized fashion using pre-determine how to scan for these patterns in an optimized fashion using
Hyperscan and PCRE. Vectorscan and PCRE.
=============== ===============
Pattern Support Pattern Support
@ -134,7 +134,7 @@ Semantics
========= =========
Chimera supports the exact same semantics of PCRE library. Moreover, it supports Chimera supports the exact same semantics of PCRE library. Moreover, it supports
multiple simultaneous pattern matching like Hyperscan and the multiple matches multiple simultaneous pattern matching like Vectorscan and the multiple matches
will be reported in order by end offset. will be reported in order by end offset.
.. _chruntime: .. _chruntime:

View File

@ -9,7 +9,7 @@ Compiling Patterns
Building a Database Building a Database
******************* *******************
The Hyperscan compiler API accepts regular expressions and converts them into a The Vectorscan compiler API accepts regular expressions and converts them into a
compiled pattern database that can then be used to scan data. compiled pattern database that can then be used to scan data.
The API provides three functions that compile regular expressions into The API provides three functions that compile regular expressions into
@ -24,7 +24,7 @@ databases:
#. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above, #. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above,
but allows :ref:`extparam` to be specified for each expression. but allows :ref:`extparam` to be specified for each expression.
Compilation allows the Hyperscan library to analyze the given pattern(s) and Compilation allows the Vectorscan library to analyze the given pattern(s) and
pre-determine how to scan for these patterns in an optimized fashion that would pre-determine how to scan for these patterns in an optimized fashion that would
be far too expensive to compute at run-time. be far too expensive to compute at run-time.
@ -48,10 +48,10 @@ To compile patterns to be used in streaming mode, the ``mode`` parameter of
block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode
requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled
for one mode (streaming, block or vectored) can only be used in that mode. The for one mode (streaming, block or vectored) can only be used in that mode. The
version of Hyperscan used to produce a compiled pattern database must match the version of Vectorscan used to produce a compiled pattern database must match the
version of Hyperscan used to scan with it. version of Vectorscan used to scan with it.
Hyperscan provides support for targeting a database at a particular CPU Vectorscan provides support for targeting a database at a particular CPU
platform; see :ref:`instr_specialization` for details. platform; see :ref:`instr_specialization` for details.
===================== =====================
@ -75,14 +75,14 @@ characters exist in regular grammar like ``[``, ``]``, ``(``, ``)``, ``{``,
While in pure literal case, all these meta characters lost extra meanings While in pure literal case, all these meta characters lost extra meanings
expect for that they are just common ASCII codes. expect for that they are just common ASCII codes.
Hyperscan is initially designed to process common regular expressions. It is Vectorscan is initially designed to process common regular expressions. It is
hence embedded with a complex parser to do comprehensive regular grammar hence embedded with a complex parser to do comprehensive regular grammar
interpretation. Particularly, the identification of above meta characters is the interpretation. Particularly, the identification of above meta characters is the
basic step for the interpretation of far more complex regular grammars. basic step for the interpretation of far more complex regular grammars.
However in real cases, patterns may not always be regular expressions. They However in real cases, patterns may not always be regular expressions. They
could just be pure literals. Problem will come if the pure literals contain could just be pure literals. Problem will come if the pure literals contain
regular meta characters. Supposing fed directly into traditional Hyperscan regular meta characters. Supposing fed directly into traditional Vectorscan
compile API, all these meta characters will be interpreted in predefined ways, compile API, all these meta characters will be interpreted in predefined ways,
which is unnecessary and the result is totally out of expectation. To avoid which is unnecessary and the result is totally out of expectation. To avoid
such misunderstanding by traditional API, users have to preprocess these such misunderstanding by traditional API, users have to preprocess these
@ -90,7 +90,7 @@ literal patterns by converting the meta characters into some other formats:
either by adding a backslash ``\`` before certain meta characters, or by either by adding a backslash ``\`` before certain meta characters, or by
converting all the characters into a hexadecimal representation. converting all the characters into a hexadecimal representation.
In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns: In ``v5.2.0``, Vectorscan introduces 2 new compile APIs for pure literal patterns:
#. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern #. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern
database. database.
@ -106,7 +106,7 @@ content directly into these APIs without worrying about writing regular meta
characters in their patterns. No preprocessing work is needed any more. characters in their patterns. No preprocessing work is needed any more.
For new APIs, the ``length`` of each literal pattern is a newly added parameter. For new APIs, the ``length`` of each literal pattern is a newly added parameter.
Hyperscan needs to locate the end position of the input expression via clearly Vectorscan needs to locate the end position of the input expression via clearly
knowing each literal's length, not by simply identifying character ``\0`` of a knowing each literal's length, not by simply identifying character ``\0`` of a
string. string.
@ -127,19 +127,19 @@ Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
Pattern Support Pattern Support
*************** ***************
Hyperscan supports the pattern syntax used by the PCRE library ("libpcre"), Vectorscan supports the pattern syntax used by the PCRE library ("libpcre"),
described at <http://www.pcre.org/>. However, not all constructs available in described at <http://www.pcre.org/>. However, not all constructs available in
libpcre are supported. The use of unsupported constructs will result in libpcre are supported. The use of unsupported constructs will result in
compilation errors. compilation errors.
The version of PCRE used to validate Hyperscan's interpretation of this syntax The version of PCRE used to validate Vectorscan's interpretation of this syntax
is 8.41 or above. is 8.41 or above.
==================== ====================
Supported Constructs Supported Constructs
==================== ====================
The following regex constructs are supported by Hyperscan: The following regex constructs are supported by Vectorscan:
* Literal characters and strings, with all libpcre quoting and character * Literal characters and strings, with all libpcre quoting and character
escapes. escapes.
@ -177,7 +177,7 @@ The following regex constructs are supported by Hyperscan:
:c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern. :c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern.
* Lazy modifiers (:regexp:`?` appended to another quantifier, e.g. * Lazy modifiers (:regexp:`?` appended to another quantifier, e.g.
:regexp:`\\w+?`) are supported but ignored (as Hyperscan reports all :regexp:`\\w+?`) are supported but ignored (as Vectorscan reports all
matches). matches).
* Parenthesization, including the named and unnamed capturing and * Parenthesization, including the named and unnamed capturing and
@ -219,15 +219,15 @@ The following regex constructs are supported by Hyperscan:
.. note:: At this time, not all patterns can be successfully compiled with the .. note:: At this time, not all patterns can be successfully compiled with the
:c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for :c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for
:ref:`som`. The patterns that support this flag are a subset of patterns that :ref:`som`. The patterns that support this flag are a subset of patterns that
can be successfully compiled with Hyperscan; notably, many bounded repeat can be successfully compiled with Vectorscan; notably, many bounded repeat
forms that can be compiled with Hyperscan without the Start of Match flag forms that can be compiled with Vectorscan without the Start of Match flag
enabled cannot be compiled with the flag enabled. enabled cannot be compiled with the flag enabled.
====================== ======================
Unsupported Constructs Unsupported Constructs
====================== ======================
The following regex constructs are not supported by Hyperscan: The following regex constructs are not supported by Vectorscan:
* Backreferences and capturing sub-expressions. * Backreferences and capturing sub-expressions.
* Arbitrary zero-width assertions. * Arbitrary zero-width assertions.
@ -246,32 +246,32 @@ The following regex constructs are not supported by Hyperscan:
Semantics Semantics
********* *********
While Hyperscan follows libpcre syntax, it provides different semantics. The While Vectorscan follows libpcre syntax, it provides different semantics. The
major departures from libpcre semantics are motivated by the requirements of major departures from libpcre semantics are motivated by the requirements of
streaming and multiple simultaneous pattern matching. streaming and multiple simultaneous pattern matching.
The major departures from libpcre semantics are: The major departures from libpcre semantics are:
#. **Multiple pattern matching**: Hyperscan allows matches to be reported for #. **Multiple pattern matching**: Vectorscan allows matches to be reported for
several patterns simultaneously. This is not equivalent to separating the several patterns simultaneously. This is not equivalent to separating the
patterns by :regexp:`|` in libpcre, which evaluates alternations patterns by :regexp:`|` in libpcre, which evaluates alternations
left-to-right. left-to-right.
#. **Lack of ordering**: the multiple matches that Hyperscan produces are not #. **Lack of ordering**: the multiple matches that Vectorscan produces are not
guaranteed to be ordered, although they will always fall within the bounds of guaranteed to be ordered, although they will always fall within the bounds of
the current scan. the current scan.
#. **End offsets only**: Hyperscan's default behaviour is only to report the end #. **End offsets only**: Vectorscan's default behaviour is only to report the end
offset of a match. Reporting of the start offset can be enabled with offset of a match. Reporting of the start offset can be enabled with
per-expression flags at pattern compile time. See :ref:`som` for details. per-expression flags at pattern compile time. See :ref:`som` for details.
#. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against #. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against
``fooxyzbarbar`` will return two matches from Hyperscan -- at the points ``fooxyzbarbar`` will return two matches from Vectorscan -- at the points
corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast, corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast,
libpcre semantics by default would report only one match at ``fooxyzbarbar`` libpcre semantics by default would report only one match at ``fooxyzbarbar``
(greedy semantics) or, if non-greedy semantics were switched on, one match at (greedy semantics) or, if non-greedy semantics were switched on, one match at
``fooxyzbar``. This means that switching between greedy and non-greedy ``fooxyzbar``. This means that switching between greedy and non-greedy
semantics is a no-op in Hyperscan. semantics is a no-op in Vectorscan.
To support libpcre quantifier semantics while accurately reporting streaming To support libpcre quantifier semantics while accurately reporting streaming
matches at the time they occur is impossible. For example, consider the pattern matches at the time they occur is impossible. For example, consider the pattern
@ -299,7 +299,7 @@ as in block 3 -- which would constitute a better match for the pattern.
Start of Match Start of Match
============== ==============
In standard operation, Hyperscan will only provide the end offset of a match In standard operation, Vectorscan will only provide the end offset of a match
when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag
is specified for a particular pattern, then the same set of matches is is specified for a particular pattern, then the same set of matches is
returned, but each match will also provide the leftmost possible start offset returned, but each match will also provide the leftmost possible start offset
@ -308,7 +308,7 @@ corresponding to its end offset.
Using the SOM flag entails a number of trade-offs and limitations: Using the SOM flag entails a number of trade-offs and limitations:
* Reduced pattern support: For many patterns, tracking SOM is complex and can * Reduced pattern support: For many patterns, tracking SOM is complex and can
result in Hyperscan failing to compile a pattern with a "Pattern too result in Vectorscan failing to compile a pattern with a "Pattern too
large" error, even if the pattern is supported in normal operation. large" error, even if the pattern is supported in normal operation.
* Increased stream state: At scan time, state space is required to track * Increased stream state: At scan time, state space is required to track
potential SOM offsets, and this must be stored in persistent stream state in potential SOM offsets, and this must be stored in persistent stream state in
@ -316,20 +316,20 @@ Using the SOM flag entails a number of trade-offs and limitations:
required to match a pattern. required to match a pattern.
* Performance overhead: Similarly, there is generally a performance cost * Performance overhead: Similarly, there is generally a performance cost
associated with tracking SOM. associated with tracking SOM.
* Incompatible features: Some other Hyperscan pattern flags (such as * Incompatible features: Some other Vectorscan pattern flags (such as
:c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be :c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be
used in combination with SOM. Specifying them together with used in combination with SOM. Specifying them together with
:c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error. :c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error.
In streaming mode, the amount of precision delivered by SOM can be controlled In streaming mode, the amount of precision delivered by SOM can be controlled
with the SOM horizon flags. These instruct Hyperscan to deliver accurate SOM with the SOM horizon flags. These instruct Vectorscan to deliver accurate SOM
information within a certain distance of the end offset, and return a special information within a certain distance of the end offset, and return a special
start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a
small or medium SOM horizon will usually reduce the stream state required for a small or medium SOM horizon will usually reduce the stream state required for a
given database. given database.
.. note:: In streaming mode, the start offset returned for a match may refer to .. note:: In streaming mode, the start offset returned for a match may refer to
a point in the stream *before* the current block being scanned. Hyperscan a point in the stream *before* the current block being scanned. Vectorscan
provides no facility for accessing earlier blocks; if the calling application provides no facility for accessing earlier blocks; if the calling application
needs to inspect historical data, then it must store it itself. needs to inspect historical data, then it must store it itself.
@ -341,7 +341,7 @@ Extended Parameters
In some circumstances, more control over the matching behaviour of a pattern is In some circumstances, more control over the matching behaviour of a pattern is
required than can be specified easily using regular expression syntax. For required than can be specified easily using regular expression syntax. For
these scenarios, Hyperscan provides the :c:func:`hs_compile_ext_multi` function these scenarios, Vectorscan provides the :c:func:`hs_compile_ext_multi` function
that allows a set of "extended parameters" to be set on a per-pattern basis. that allows a set of "extended parameters" to be set on a per-pattern basis.
Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure, Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure,
@ -383,18 +383,18 @@ section.
Prefiltering Mode Prefiltering Mode
================= =================
Hyperscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can Vectorscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can
be used to implement a prefilter for a pattern than Hyperscan would not be used to implement a prefilter for a pattern than Vectorscan would not
ordinarily support. ordinarily support.
This flag instructs Hyperscan to compile an "approximate" version of this This flag instructs Vectorscan to compile an "approximate" version of this
pattern for use in a prefiltering application, even if Hyperscan does not pattern for use in a prefiltering application, even if Vectorscan does not
support the pattern in normal operation. support the pattern in normal operation.
The set of matches returned when this flag is used is guaranteed to be a The set of matches returned when this flag is used is guaranteed to be a
superset of the matches specified by the non-prefiltering expression. superset of the matches specified by the non-prefiltering expression.
If the pattern contains pattern constructs not supported by Hyperscan (such as If the pattern contains pattern constructs not supported by Vectorscan (such as
zero-width assertions, back-references or conditional references) these zero-width assertions, back-references or conditional references) these
constructs will be replaced internally with broader constructs that may match constructs will be replaced internally with broader constructs that may match
more often. more often.
@ -404,7 +404,7 @@ back-reference :regexp:`\\1`. In prefiltering mode, this pattern might be
approximated by having its back-reference replaced with its referent, forming approximated by having its back-reference replaced with its referent, forming
:regexp:`/\\w+ again \\w+/`. :regexp:`/\\w+ again \\w+/`.
Furthermore, in prefiltering mode Hyperscan may simplify a pattern that would Furthermore, in prefiltering mode Vectorscan may simplify a pattern that would
otherwise return a "Pattern too large" error at compile time, or for performance otherwise return a "Pattern too large" error at compile time, or for performance
reasons (subject to the matching guarantee above). reasons (subject to the matching guarantee above).
@ -422,22 +422,22 @@ matches for the pattern.
Instruction Set Specialization Instruction Set Specialization
****************************** ******************************
Hyperscan is able to make use of several modern instruction set features found Vectorscan is able to make use of several modern instruction set features found
on x86 processors to provide improvements in scanning performance. on x86 processors to provide improvements in scanning performance.
Some of these features are selected when the library is built; for example, Some of these features are selected when the library is built; for example,
Hyperscan will use the native ``POPCNT`` instruction on processors where it is Vectorscan will use the native ``POPCNT`` instruction on processors where it is
available and the library has been optimized for the host architecture. available and the library has been optimized for the host architecture.
.. note:: By default, the Hyperscan runtime is built with the ``-march=native`` .. note:: By default, the Vectorscan runtime is built with the ``-march=native``
compiler flag and (where possible) will make use of all instructions known by compiler flag and (where possible) will make use of all instructions known by
the host's C compiler. the host's C compiler.
To use some instruction set features, however, Hyperscan must build a To use some instruction set features, however, Vectorscan must build a
specialized database to support them. This means that the target platform must specialized database to support them. This means that the target platform must
be specified at pattern compile time. be specified at pattern compile time.
The Hyperscan compiler API functions all accept an optional The Vectorscan compiler API functions all accept an optional
:c:type:`hs_platform_info_t` argument, which describes the target platform :c:type:`hs_platform_info_t` argument, which describes the target platform
for the database to be built. If this argument is NULL, the database will be for the database to be built. If this argument is NULL, the database will be
targeted at the current host platform. targeted at the current host platform.
@ -467,7 +467,7 @@ See :ref:`api_constants` for the full list of CPU tuning and feature flags.
Approximate matching Approximate matching
******************** ********************
Hyperscan provides an experimental approximate matching mode, which will match Vectorscan provides an experimental approximate matching mode, which will match
patterns within a given edit distance. The exact matching behavior is defined as patterns within a given edit distance. The exact matching behavior is defined as
follows: follows:
@ -492,7 +492,7 @@ follows:
Here are a few examples of approximate matching: Here are a few examples of approximate matching:
* Pattern :regexp:`/foo/` can match ``foo`` when using regular Hyperscan * Pattern :regexp:`/foo/` can match ``foo`` when using regular Vectorscan
matching behavior. With approximate matching within edit distance 2, the matching behavior. With approximate matching within edit distance 2, the
pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``, pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``,
``f``, and anything else that lies within edit distance 2 of matching corpora ``f``, and anything else that lies within edit distance 2 of matching corpora
@ -513,7 +513,7 @@ matching support. Here they are, in a nutshell:
* Reduced pattern support: * Reduced pattern support:
* For many patterns, approximate matching is complex and can result in * For many patterns, approximate matching is complex and can result in
Hyperscan failing to compile a pattern with a "Pattern too large" error, Vectorscan failing to compile a pattern with a "Pattern too large" error,
even if the pattern is supported in normal operation. even if the pattern is supported in normal operation.
* Additionally, some patterns cannot be approximately matched because they * Additionally, some patterns cannot be approximately matched because they
reduce to so-called "vacuous" patterns (patterns that match everything). For reduce to so-called "vacuous" patterns (patterns that match everything). For
@ -548,7 +548,7 @@ Logical Combinations
******************** ********************
For situations when a user requires behaviour that depends on the presence or For situations when a user requires behaviour that depends on the presence or
absence of matches from groups of patterns, Hyperscan provides support for the absence of matches from groups of patterns, Vectorscan provides support for the
logical combination of patterns in a given pattern set, with three operators: logical combination of patterns in a given pattern set, with three operators:
``NOT``, ``AND`` and ``OR``. ``NOT``, ``AND`` and ``OR``.
@ -561,7 +561,7 @@ offset is *true* if the expression it refers to is *false* at this offset.
For example, ``NOT 101`` means that expression 101 has not yet matched at this For example, ``NOT 101`` means that expression 101 has not yet matched at this
offset. offset.
A logical combination is passed to Hyperscan at compile time as an expression. A logical combination is passed to Vectorscan at compile time as an expression.
This combination expression will raise matches at every offset where one of its This combination expression will raise matches at every offset where one of its
sub-expressions matches and the logical value of the whole expression is *true*. sub-expressions matches and the logical value of the whole expression is *true*.
@ -603,7 +603,7 @@ In a logical combination expression:
* Whitespace is ignored. * Whitespace is ignored.
To use a logical combination expression, it must be passed to one of the To use a logical combination expression, it must be passed to one of the
Hyperscan compile functions (:c:func:`hs_compile_multi`, Vectorscan compile functions (:c:func:`hs_compile_multi`,
:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag, :c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag,
which identifies the pattern as a logical combination expression. The patterns which identifies the pattern as a logical combination expression. The patterns
referred to in the logical combination expression must be compiled together in referred to in the logical combination expression must be compiled together in
@ -613,7 +613,7 @@ When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores
all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the
:c:member:`HS_FLAG_QUIET` flag. :c:member:`HS_FLAG_QUIET` flag.
Hyperscan will accept logical combination expressions at compile time that Vectorscan will accept logical combination expressions at compile time that
evaluate to *true* when no patterns have matched, and report the match for evaluate to *true* when no patterns have matched, and report the match for
combination at end of data if no patterns have matched; for example: :: combination at end of data if no patterns have matched; for example: ::

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Hyperscan documentation build configuration file, created by # Vectorscan documentation build configuration file, created by
# sphinx-quickstart on Tue Sep 29 15:59:19 2015. # sphinx-quickstart on Tue Sep 29 15:59:19 2015.
# #
# This file is execfile()d with the current directory set to its # This file is execfile()d with the current directory set to its
@ -43,8 +43,8 @@ source_suffix = '.rst'
master_doc = 'index' master_doc = 'index'
# General information about the project. # General information about the project.
project = u'Hyperscan' project = u'Vectorscan'
copyright = u'2015-2018, Intel Corporation' copyright = u'2015-2020, Intel Corporation; 2020-2024, VectorCamp; and other contributors'
# The version info for the project you're documenting, acts as replacement for # The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the # |version| and |release|, also used in various other places throughout the
@ -202,7 +202,7 @@ latex_elements = {
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [
('index', 'Hyperscan.tex', u'Hyperscan Documentation', ('index', 'Hyperscan.tex', u'Vectorscan Documentation',
u'Intel Corporation', 'manual'), u'Intel Corporation', 'manual'),
] ]
@ -232,8 +232,8 @@ latex_documents = [
# One entry per manual page. List of tuples # One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section). # (source start file, name, description, authors, manual section).
man_pages = [ man_pages = [
('index', 'hyperscan', u'Hyperscan Documentation', ('index', 'vectorscan', u'Vectorscan Documentation',
[u'Intel Corporation'], 1) [u'Intel Corporation'], 7)
] ]
# If true, show URL addresses after external links. # If true, show URL addresses after external links.
@ -246,8 +246,8 @@ man_pages = [
# (source start file, target name, title, author, # (source start file, target name, title, author,
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [
('index', 'Hyperscan', u'Hyperscan Documentation', ('index', 'Vectorscan', u'Vectorscan Documentation',
u'Intel Corporation', 'Hyperscan', 'High-performance regular expression matcher.', u'Intel Corporation; VectorCamp', 'Vectorscan', 'High-performance regular expression matcher.',
'Miscellaneous'), 'Miscellaneous'),
] ]

View File

@ -7,43 +7,41 @@ Getting Started
Very Quick Start Very Quick Start
**************** ****************
#. Clone Hyperscan :: #. Clone Vectorscan ::
cd <where-you-want-hyperscan-source> cd <where-you-want-vectorscan-source>
git clone git://github.com/intel/hyperscan git clone https://github.com/VectorCamp/vectorscan
#. Configure Hyperscan #. Configure Vectorscan
Ensure that you have the correct :ref:`dependencies <software>` present, Ensure that you have the correct :ref:`dependencies <software>` present,
and then: and then:
:: ::
cd <where-you-want-to-build-hyperscan> cd <where-you-want-to-build-vectorscan>
mkdir <build-dir> mkdir <build-dir>
cd <build-dir> cd <build-dir>
cmake [-G <generator>] [options] <hyperscan-source-path> cmake [-G <generator>] [options] <vectorscan-source-path>
Known working generators: Known working generators:
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X) * ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files. * ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
* ``Visual Studio 15 2017`` --- Visual Studio projects
Generators that might work include: Unsupported generators that might work include:
* ``Xcode`` --- OS X Xcode projects. * ``Xcode`` --- OS X Xcode projects.
#. Build Hyperscan #. Build Vectorscan
Depending on the generator used: Depending on the generator used:
* ``cmake --build .`` --- will build everything * ``cmake --build .`` --- will build everything
* ``make -j<jobs>`` --- use makefiles in parallel * ``make -j<jobs>`` --- use makefiles in parallel
* ``ninja`` --- use Ninja build * ``ninja`` --- use Ninja build
* ``MsBuild.exe`` --- use Visual Studio MsBuild
* etc. * etc.
#. Check Hyperscan #. Check Vectorscan
Run the Hyperscan unit tests: :: Run the Vectorscan unit tests: ::
bin/unit-hyperscan bin/unit-hyperscan
@ -55,20 +53,23 @@ Requirements
Hardware Hardware
======== ========
Hyperscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and Vectorscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and
32-bit (IA-32 Architecture) modes. 32-bit (IA-32 Architecture) modes as well as Arm v8.0+ aarch64, and POWER 8+ ppc64le
machines.
Hyperscan is a high performance software library that takes advantage of recent Hyperscan is a high performance software library that takes advantage of recent
Intel architecture advances. At a minimum, support for Supplemental Streaming architecture advances.
SIMD Extensions 3 (SSSE3) is required, which should be available on any modern
x86 processor.
Additionally, Hyperscan can make use of: Additionally, Vectorscan can make use of:
* Intel Streaming SIMD Extensions 4.2 (SSE4.2) * Intel Streaming SIMD Extensions 4.2 (SSE4.2)
* the POPCNT instruction * the POPCNT instruction
* Bit Manipulation Instructions (BMI, BMI2) * Bit Manipulation Instructions (BMI, BMI2)
* Intel Advanced Vector Extensions 2 (Intel AVX2) * Intel Advanced Vector Extensions 2 (Intel AVX2)
* Arm NEON
* Arm SVE and SVE2
* Arm SVE2 BITPERM
* IBM Power8/Power9 VSX
if present. if present.
@ -79,40 +80,34 @@ These can be determined at library compile time, see :ref:`target_arch`.
Software Software
======== ========
As a software library, Hyperscan doesn't impose any particular runtime As a software library, Vectorscan doesn't impose any particular runtime
software requirements, however to build the Hyperscan library we require a software requirements, however to build the Vectorscan library we require a
modern C and C++ compiler -- in particular, Hyperscan requires C99 and C++11 modern C and C++ compiler -- in particular, Vectorscan requires C99 and C++17
compiler support. The supported compilers are: compiler support. The supported compilers are:
* GCC, v4.8.1 or higher * GCC, v9 or higher
* Clang, v3.4 or higher (with libstdc++ or libc++) * Clang, v5 or higher (with libstdc++ or libc++)
* Intel C++ Compiler v15 or higher
* Visual C++ 2017 Build Tools
Examples of operating systems that Hyperscan is known to work on include: Examples of operating systems that Vectorscan is known to work on include:
Linux: Linux:
* Ubuntu 14.04 LTS or newer * Ubuntu 20.04 LTS or newer
* RedHat/CentOS 7 or newer * RedHat/CentOS 7 or newer
* Fedora 38 or newer
* Debian 10
FreeBSD: FreeBSD:
* 10.0 or newer * 10.0 or newer
Windows:
* 8 or newer
Mac OS X: Mac OS X:
* 10.8 or newer, using XCode/Clang * 10.8 or newer, using XCode/Clang
Hyperscan *may* compile and run on other platforms, but there is no guarantee. Vectorscan *may* compile and run on other platforms, but there is no guarantee.
We currently have experimental support for Windows using Intel C++ Compiler
or Visual Studio 2017.
In addition, the following software is required for compiling the Hyperscan library: In addition, the following software is required for compiling the Vectorscan library:
======================================================= =========== ====================================== ======================================================= =========== ======================================
Dependency Version Notes Dependency Version Notes
@ -132,20 +127,20 @@ Ragel, you may use Cygwin to build it from source.
Boost Headers Boost Headers
------------- -------------
Compiling Hyperscan depends on a recent version of the Boost C++ header Compiling Vectorscan depends on a recent version of the Boost C++ header
library. If the Boost libraries are installed on the build machine in the library. If the Boost libraries are installed on the build machine in the
usual paths, CMake will find them. If the Boost libraries are not installed, usual paths, CMake will find them. If the Boost libraries are not installed,
the location of the Boost source tree can be specified during the CMake the location of the Boost source tree can be specified during the CMake
configuration step using the ``BOOST_ROOT`` variable (described below). configuration step using the ``BOOST_ROOT`` variable (described below).
Another alternative is to put a copy of (or a symlink to) the boost Another alternative is to put a copy of (or a symlink to) the boost
subdirectory in ``<hyperscan-source-path>/include/boost``. subdirectory in ``<vectorscanscan-source-path>/include/boost``.
For example: for the Boost-1.59.0 release: :: For example: for the Boost-1.59.0 release: ::
ln -s boost_1_59_0/boost <hyperscan-source-path>/include/boost ln -s boost_1_59_0/boost <vectorscan-source-path>/include/boost
As Hyperscan uses the header-only parts of Boost, it is not necessary to As Vectorscan uses the header-only parts of Boost, it is not necessary to
compile the Boost libraries. compile the Boost libraries.
CMake Configuration CMake Configuration
@ -168,11 +163,12 @@ Common options for CMake include:
| | Valid options are Debug, Release, RelWithDebInfo, | | | Valid options are Debug, Release, RelWithDebInfo, |
| | and MinSizeRel. Default is RelWithDebInfo. | | | and MinSizeRel. Default is RelWithDebInfo. |
+------------------------+----------------------------------------------------+ +------------------------+----------------------------------------------------+
| BUILD_SHARED_LIBS | Build Hyperscan as a shared library instead of | | BUILD_SHARED_LIBS | Build Vectorscan as a shared library instead of |
| | the default static library. | | | the default static library. |
| | Default: Off |
+------------------------+----------------------------------------------------+ +------------------------+----------------------------------------------------+
| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. | | BUILD_STATIC_LIBS | Build Vectorscan as a static library. |
| | Default off. | | | Default: On |
+------------------------+----------------------------------------------------+ +------------------------+----------------------------------------------------+
| BOOST_ROOT | Location of Boost source tree. | | BOOST_ROOT | Location of Boost source tree. |
+------------------------+----------------------------------------------------+ +------------------------+----------------------------------------------------+
@ -180,12 +176,64 @@ Common options for CMake include:
+------------------------+----------------------------------------------------+ +------------------------+----------------------------------------------------+
| FAT_RUNTIME | Build the :ref:`fat runtime<fat_runtime>`. Default | | FAT_RUNTIME | Build the :ref:`fat runtime<fat_runtime>`. Default |
| | true on Linux, not available elsewhere. | | | true on Linux, not available elsewhere. |
| | Default: Off |
+------------------------+----------------------------------------------------+
| USE_CPU_NATIVE | Native CPU detection is off by default, however it |
| | is possible to build a performance-oriented non-fat|
| | library tuned to your CPU. |
| | Default: Off |
+------------------------+----------------------------------------------------+
| SANITIZE | Use libasan sanitizer to detect possible bugs. |
| | Valid options are address, memory and undefined. |
+------------------------+----------------------------------------------------+
| SIMDE_BACKEND | Enable SIMDe backend. If this is chosen all native |
| | (SSE/AVX/AVX512/Neon/SVE/VSX) backends will be |
| | disabled and a SIMDe SSE4.2 emulation backend will |
| | be enabled. This will enable Vectorscan to build |
| | and run on architectures without SIMD. |
| | Default: Off |
+------------------------+----------------------------------------------------+
| SIMDE_NATIVE | Enable SIMDe native emulation of x86 SSE4.2 |
| | intrinsics on the building platform. That is, |
| | SSE4.2 intrinsics will be emulated using Neon on |
| | an Arm platform, or VSX on a Power platform, etc. |
| | Default: Off |
+------------------------+----------------------------------------------------+
X86 platform specific options include:
+------------------------+----------------------------------------------------+
| Variable | Description |
+========================+====================================================+
| BUILD_AVX2 | Enable code for AVX2. |
+------------------------+----------------------------------------------------+
| BUILD_AVX512 | Enable code for AVX512. Implies BUILD_AVX2. |
+------------------------+----------------------------------------------------+
| BUILD_AVX512VBMI | Enable code for AVX512 with VBMI extension. Implies|
| | BUILD_AVX512. |
+------------------------+----------------------------------------------------+
Arm platform specific options include:
+------------------------+----------------------------------------------------+
| Variable | Description |
+========================+====================================================+
| BUILD_SVE | Enable code for SVE, like on AWS Graviton3 CPUs. |
| | Not much code is ported just for SVE , but enabling|
| | SVE code production, does improve code generation, |
| | see Benchmarks. |
+------------------------+----------------------------------------------------+
| BUILD_SVE2 | Enable code for SVE2, implies BUILD_SVE. Most |
| | non-Neon code is written for SVE2. |
+------------------------+----------------------------------------------------+
| BUILD_SVE2_BITPERM | Enable code for SVE2_BITPERM harwdare feature, |
| | implies BUILD_SVE2. |
+------------------------+----------------------------------------------------+ +------------------------+----------------------------------------------------+
For example, to generate a ``Debug`` build: :: For example, to generate a ``Debug`` build: ::
cd <build-dir> cd <build-dir>
cmake -DCMAKE_BUILD_TYPE=Debug <hyperscan-source-path> cmake -DCMAKE_BUILD_TYPE=Debug <vectorscan-source-path>
@ -193,7 +241,7 @@ Build Type
---------- ----------
CMake determines a number of features for a build based on the Build Type. CMake determines a number of features for a build based on the Build Type.
Hyperscan defaults to ``RelWithDebInfo``, i.e. "release with debugging Vectorscan defaults to ``RelWithDebInfo``, i.e. "release with debugging
information". This is a performance optimized build without runtime assertions information". This is a performance optimized build without runtime assertions
but with debug symbols enabled. but with debug symbols enabled.
@ -201,7 +249,7 @@ The other types of builds are:
* ``Release``: as above, but without debug symbols * ``Release``: as above, but without debug symbols
* ``MinSizeRel``: a stripped release build * ``MinSizeRel``: a stripped release build
* ``Debug``: used when developing Hyperscan. Includes runtime assertions * ``Debug``: used when developing Vectorscan. Includes runtime assertions
(which has a large impact on runtime performance), and will also enable (which has a large impact on runtime performance), and will also enable
some other build features like building internal unit some other build features like building internal unit
tests. tests.
@ -211,7 +259,7 @@ The other types of builds are:
Target Architecture Target Architecture
------------------- -------------------
Unless using the :ref:`fat runtime<fat_runtime>`, by default Hyperscan will be Unless using the :ref:`fat runtime<fat_runtime>`, by default Vectorscan will be
compiled to target the instruction set of the processor of the machine that compiled to target the instruction set of the processor of the machine that
being used for compilation. This is done via the use of ``-march=native``. The being used for compilation. This is done via the use of ``-march=native``. The
result of this means that a library built on one machine may not work on a result of this means that a library built on one machine may not work on a
@ -223,7 +271,7 @@ CMake, or ``CMAKE_C_FLAGS`` and ``CMAKE_CXX_FLAGS`` on the CMake command line. F
example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: :: example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: ::
cmake -DCMAKE_C_FLAGS="-march=corei7" \ cmake -DCMAKE_C_FLAGS="-march=corei7" \
-DCMAKE_CXX_FLAGS="-march=corei7" <hyperscan-source-path> -DCMAKE_CXX_FLAGS="-march=corei7" <vectorscan-source-path>
For more information, refer to :ref:`instr_specialization`. For more information, refer to :ref:`instr_specialization`.
@ -232,17 +280,17 @@ For more information, refer to :ref:`instr_specialization`.
Fat Runtime Fat Runtime
----------- -----------
A feature introduced in Hyperscan v4.4 is the ability for the Hyperscan A feature introduced in Hyperscan v4.4 is the ability for the Vectorscan
library to dispatch the most appropriate runtime code for the host processor. library to dispatch the most appropriate runtime code for the host processor.
This feature is called the "fat runtime", as a single Hyperscan library This feature is called the "fat runtime", as a single Vectorscan library
contains multiple copies of the runtime code for different instruction sets. contains multiple copies of the runtime code for different instruction sets.
.. note:: .. note::
The fat runtime feature is only available on Linux. Release builds of The fat runtime feature is only available on Linux. Release builds of
Hyperscan will default to having the fat runtime enabled where supported. Vectorscan will default to having the fat runtime enabled where supported.
When building the library with the fat runtime, the Hyperscan runtime code When building the library with the fat runtime, the Vectorscan runtime code
will be compiled multiple times for these different instruction sets, and will be compiled multiple times for these different instruction sets, and
these compiled objects are combined into one library. There are no changes to these compiled objects are combined into one library. There are no changes to
how user applications are built against this library. how user applications are built against this library.
@ -254,11 +302,11 @@ resolved so that the right version of each API function is used. There is no
impact on function call performance, as this check and resolution is performed impact on function call performance, as this check and resolution is performed
by the ELF loader once when the binary is loaded. by the ELF loader once when the binary is loaded.
If the Hyperscan library is used on x86 systems without ``SSSE3``, the runtime If the Vectorscan library is used on x86 systems without ``SSSE4.2``, the runtime
API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR` API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR`
instead of potentially executing illegal instructions. The API function instead of potentially executing illegal instructions. The API function
:c:func:`hs_valid_platform` can be used by application writers to determine if :c:func:`hs_valid_platform` can be used by application writers to determine if
the current platform is supported by Hyperscan. the current platform is supported by Vectorscan.
As of this release, the variants of the runtime that are built, and the CPU As of this release, the variants of the runtime that are built, and the CPU
capability that is required, are the following: capability that is required, are the following:
@ -299,6 +347,11 @@ capability that is required, are the following:
cmake -DBUILD_AVX512VBMI=on <...> cmake -DBUILD_AVX512VBMI=on <...>
Vectorscan add support for Arm processors and SVE, SV2 and SVE2_BITPERM.
example: ::
cmake -DBUILD_SVE=ON -DBUILD_SVE2=ON -DBUILD_SVE2_BITPERM=ON <...>
As the fat runtime requires compiler, libc, and binutils support, at this time As the fat runtime requires compiler, libc, and binutils support, at this time
it will only be enabled for Linux builds where the compiler supports the it will only be enabled for Linux builds where the compiler supports the
`indirect function "ifunc" function attribute `indirect function "ifunc" function attribute

View File

@ -1,5 +1,5 @@
############################################### ###############################################
Hyperscan |version| Developer's Reference Guide Vectorscan |version| Developer's Reference Guide
############################################### ###############################################
------- -------

View File

@ -5,11 +5,11 @@
Introduction Introduction
############ ############
Hyperscan is a software regular expression matching engine designed with Vectorscan is a software regular expression matching engine designed with
high performance and flexibility in mind. It is implemented as a library that high performance and flexibility in mind. It is implemented as a library that
exposes a straightforward C API. exposes a straightforward C API.
The Hyperscan API itself is composed of two major components: The Vectorscan API itself is composed of two major components:
*********** ***********
Compilation Compilation
@ -17,7 +17,7 @@ Compilation
These functions take a group of regular expressions, along with identifiers and These functions take a group of regular expressions, along with identifiers and
option flags, and compile them into an immutable database that can be used by option flags, and compile them into an immutable database that can be used by
the Hyperscan scanning API. This compilation process performs considerable the Vectorscan scanning API. This compilation process performs considerable
analysis and optimization work in order to build a database that will match the analysis and optimization work in order to build a database that will match the
given expressions efficiently. given expressions efficiently.
@ -36,8 +36,8 @@ See :ref:`compilation` for more detail.
Scanning Scanning
******** ********
Once a Hyperscan database has been created, it can be used to scan data in Once a Vectorscan database has been created, it can be used to scan data in
memory. Hyperscan provides several scanning modes, depending on whether the memory. Vectorscan provides several scanning modes, depending on whether the
data to be scanned is available as a single contiguous block, whether it is data to be scanned is available as a single contiguous block, whether it is
distributed amongst several blocks in memory at the same time, or whether it is distributed amongst several blocks in memory at the same time, or whether it is
to be scanned as a sequence of blocks in a stream. to be scanned as a sequence of blocks in a stream.
@ -45,7 +45,7 @@ to be scanned as a sequence of blocks in a stream.
Matches are delivered to the application via a user-supplied callback function Matches are delivered to the application via a user-supplied callback function
that is called synchronously for each match. that is called synchronously for each match.
For a given database, Hyperscan provides several guarantees: For a given database, Vectorscan provides several guarantees:
* No memory allocations occur at runtime with the exception of two * No memory allocations occur at runtime with the exception of two
fixed-size allocations, both of which should be done ahead of time for fixed-size allocations, both of which should be done ahead of time for
@ -56,7 +56,7 @@ For a given database, Hyperscan provides several guarantees:
call. call.
- **Stream state**: in streaming mode only, some state space is required to - **Stream state**: in streaming mode only, some state space is required to
store data that persists between scan calls for each stream. This allows store data that persists between scan calls for each stream. This allows
Hyperscan to track matches that span multiple blocks of data. Vectorscan to track matches that span multiple blocks of data.
* The sizes of the scratch space and stream state (in streaming mode) required * The sizes of the scratch space and stream state (in streaming mode) required
for a given database are fixed and determined at database compile time. This for a given database are fixed and determined at database compile time. This
@ -64,7 +64,7 @@ For a given database, Hyperscan provides several guarantees:
time, and these structures can be pre-allocated if required for performance time, and these structures can be pre-allocated if required for performance
reasons. reasons.
* Any pattern that has successfully been compiled by the Hyperscan compiler can * Any pattern that has successfully been compiled by the Vectorscan compiler can
be scanned against any input. There are no internal resource limits or other be scanned against any input. There are no internal resource limits or other
limitations at runtime that could cause a scan call to return an error. limitations at runtime that could cause a scan call to return an error.
@ -74,12 +74,12 @@ See :ref:`runtime` for more detail.
Tools Tools
***** *****
Some utilities for testing and benchmarking Hyperscan are included with the Some utilities for testing and benchmarking Vectorscan are included with the
library. See :ref:`tools` for more information. library. See :ref:`tools` for more information.
************ ************
Example Code Example Code
************ ************
Some simple example code demonstrating the use of the Hyperscan API is Some simple example code demonstrating the use of the Vectorscan API is
available in the ``examples/`` subdirectory of the Hyperscan distribution. available in the ``examples/`` subdirectory of the Vectorscan distribution.

View File

@ -4,7 +4,7 @@
Performance Considerations Performance Considerations
########################## ##########################
Hyperscan supports a wide range of patterns in all three scanning modes. It is Vectorscan supports a wide range of patterns in all three scanning modes. It is
capable of extremely high levels of performance, but certain patterns can capable of extremely high levels of performance, but certain patterns can
reduce performance markedly. reduce performance markedly.
@ -25,7 +25,7 @@ For example, caseless matching of :regexp:`/abc/` can be written as:
* :regexp:`/(?i)abc(?-i)/` * :regexp:`/(?i)abc(?-i)/`
* :regexp:`/abc/i` * :regexp:`/abc/i`
Hyperscan is capable of handling all these constructs. Unless there is a Vectorscan is capable of handling all these constructs. Unless there is a
specific reason otherwise, do not rewrite patterns from one form to another. specific reason otherwise, do not rewrite patterns from one form to another.
As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be
@ -41,24 +41,24 @@ Library usage
.. tip:: Do not hand-optimize library usage. .. tip:: Do not hand-optimize library usage.
The Hyperscan library is capable of dealing with small writes, unusually large The Vectorscan library is capable of dealing with small writes, unusually large
and small pattern sets, etc. Unless there is a specific performance problem and small pattern sets, etc. Unless there is a specific performance problem
with some usage of the library, it is best to use Hyperscan in a simple and with some usage of the library, it is best to use Vectorscan in a simple and
direct fashion. For example, it is unlikely for there to be much benefit in direct fashion. For example, it is unlikely for there to be much benefit in
buffering input to the library into larger blocks unless streaming writes are buffering input to the library into larger blocks unless streaming writes are
tiny (say, 1-2 bytes at a time). tiny (say, 1-2 bytes at a time).
Unlike many other pattern matching products, Hyperscan will run faster with Unlike many other pattern matching products, Vectorscan will run faster with
small numbers of patterns and slower with large numbers of patterns in a smooth small numbers of patterns and slower with large numbers of patterns in a smooth
fashion (as opposed to, typically, running at a moderate speed up to some fixed fashion (as opposed to, typically, running at a moderate speed up to some fixed
limit then either breaking or running half as fast). limit then either breaking or running half as fast).
Hyperscan also provides high-throughput matching with a single thread of Vectorscan also provides high-throughput matching with a single thread of
control per core; if a database runs at 3.0 Gbps in Hyperscan it means that a control per core; if a database runs at 3.0 Gbps in Vectorscan it means that a
3000-bit block of data will be scanned in 1 microsecond in a single thread of 3000-bit block of data will be scanned in 1 microsecond in a single thread of
control, not that it is required to scan 22 3000-bit blocks of data in 22 control, not that it is required to scan 22 3000-bit blocks of data in 22
microseconds. Thus, it is not usually necessary to buffer data to supply microseconds. Thus, it is not usually necessary to buffer data to supply
Hyperscan with available parallelism. Vectorscan with available parallelism.
******************** ********************
Block-based matching Block-based matching
@ -72,7 +72,7 @@ accumulated before processing, it should be scanned in block rather than in
streaming mode. streaming mode.
Unnecessary use of streaming mode reduces the number of optimizations that can Unnecessary use of streaming mode reduces the number of optimizations that can
be applied in Hyperscan and may make some patterns run slower. be applied in Vectorscan and may make some patterns run slower.
If there is a mixture of 'block' and 'streaming' mode patterns, these should be If there is a mixture of 'block' and 'streaming' mode patterns, these should be
scanned in separate databases except in the case that the streaming patterns scanned in separate databases except in the case that the streaming patterns
@ -107,7 +107,7 @@ Allocate scratch ahead of time
Scratch allocation is not necessarily a cheap operation. Since it is the first Scratch allocation is not necessarily a cheap operation. Since it is the first
time (after compilation or deserialization) that a pattern database is used, time (after compilation or deserialization) that a pattern database is used,
Hyperscan performs some validation checks inside :c:func:`hs_alloc_scratch` and Vectorscan performs some validation checks inside :c:func:`hs_alloc_scratch` and
must also allocate memory. must also allocate memory.
Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not
@ -329,7 +329,7 @@ Consequently, :regexp:`/foo.*bar/L` with a check on start of match values after
the callback is considerably more expensive and general than the callback is considerably more expensive and general than
:regexp:`/foo.{300}bar/`. :regexp:`/foo.{300}bar/`.
Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be Similarly, the :cpp:member:`hs_expr_ext::min_length` extended parameter can be
used to specify a lower bound on the length of the matches for a pattern. Using used to specify a lower bound on the length of the matches for a pattern. Using
this facility may be more lightweight in some circumstances than using the SOM this facility may be more lightweight in some circumstances than using the SOM
flag and post-confirming match length in the calling application. flag and post-confirming match length in the calling application.

View File

@ -6,35 +6,35 @@ Preface
Overview Overview
******** ********
Hyperscan is a regular expression engine designed to offer high performance, the Vectorscan is a regular expression engine designed to offer high performance, the
ability to match multiple expressions simultaneously and flexibility in ability to match multiple expressions simultaneously and flexibility in
scanning operation. scanning operation.
Patterns are provided to a compilation interface which generates an immutable Patterns are provided to a compilation interface which generates an immutable
pattern database. The scan interface then can be used to scan a target data pattern database. The scan interface then can be used to scan a target data
buffer for the given patterns, returning any matching results from that data buffer for the given patterns, returning any matching results from that data
buffer. Hyperscan also provides a streaming mode, in which matches that span buffer. Vectorscan also provides a streaming mode, in which matches that span
several blocks in a stream are detected. several blocks in a stream are detected.
This document is designed to facilitate code-level integration of the Hyperscan This document is designed to facilitate code-level integration of the Vectorscan
library with existing or new applications. library with existing or new applications.
:ref:`intro` is a short overview of the Hyperscan library, with more detail on :ref:`intro` is a short overview of the Vectorscan library, with more detail on
the Hyperscan API provided in the subsequent sections: :ref:`compilation` and the Vectorscan API provided in the subsequent sections: :ref:`compilation` and
:ref:`runtime`. :ref:`runtime`.
:ref:`perf` provides details on various factors which may impact the :ref:`perf` provides details on various factors which may impact the
performance of a Hyperscan integration. performance of a Vectorscan integration.
:ref:`api_constants` and :ref:`api_files` provides a detailed summary of the :ref:`api_constants` and :ref:`api_files` provides a detailed summary of the
Hyperscan Application Programming Interface (API). Vectorscan Application Programming Interface (API).
******** ********
Audience Audience
******** ********
This guide is aimed at developers interested in integrating Hyperscan into an This guide is aimed at developers interested in integrating Vectorscan into an
application. For information on building the Hyperscan library, see the Quick application. For information on building the Vectorscan library, see the Quick
Start Guide. Start Guide.
*********** ***********

View File

@ -4,7 +4,7 @@
Scanning for Patterns Scanning for Patterns
##################### #####################
Hyperscan provides three different scanning modes, each with its own scan Vectorscan provides three different scanning modes, each with its own scan
function beginning with ``hs_scan``. In addition, streaming mode has a number function beginning with ``hs_scan``. In addition, streaming mode has a number
of other API functions for managing stream state. of other API functions for managing stream state.
@ -33,8 +33,8 @@ See :c:type:`match_event_handler` for more information.
Streaming Mode Streaming Mode
************** **************
The core of the Hyperscan streaming runtime API consists of functions to open, The core of the Vectorscan streaming runtime API consists of functions to open,
scan, and close Hyperscan data streams: scan, and close Vectorscan data streams:
* :c:func:`hs_open_stream`: allocates and initializes a new stream for scanning. * :c:func:`hs_open_stream`: allocates and initializes a new stream for scanning.
@ -57,14 +57,14 @@ will return immediately with :c:member:`HS_SCAN_TERMINATED`. The caller must
still call :c:func:`hs_close_stream` to complete the clean-up process for that still call :c:func:`hs_close_stream` to complete the clean-up process for that
stream. stream.
Streams exist in the Hyperscan library so that pattern matching state can be Streams exist in the Vectorscan library so that pattern matching state can be
maintained across multiple blocks of target data -- without maintaining this maintained across multiple blocks of target data -- without maintaining this
state, it would not be possible to detect patterns that span these blocks of state, it would not be possible to detect patterns that span these blocks of
data. This, however, does come at the cost of requiring an amount of storage data. This, however, does come at the cost of requiring an amount of storage
per-stream (the size of this storage is fixed at compile time), and a slight per-stream (the size of this storage is fixed at compile time), and a slight
performance penalty in some cases to manage the state. performance penalty in some cases to manage the state.
While Hyperscan does always support a strict ordering of multiple matches, While Vectorscan does always support a strict ordering of multiple matches,
streaming matches will not be delivered at offsets before the current stream streaming matches will not be delivered at offsets before the current stream
write, with the exception of zero-width asserts, where constructs such as write, with the exception of zero-width asserts, where constructs such as
:regexp:`\\b` and :regexp:`$` can cause a match on the final character of a :regexp:`\\b` and :regexp:`$` can cause a match on the final character of a
@ -76,7 +76,7 @@ Stream Management
================= =================
In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and
:c:func:`hs_close_stream`, the Hyperscan API provides a number of other :c:func:`hs_close_stream`, the Vectorscan API provides a number of other
functions for the management of streams: functions for the management of streams:
* :c:func:`hs_reset_stream`: resets a stream to its initial state; this is * :c:func:`hs_reset_stream`: resets a stream to its initial state; this is
@ -98,10 +98,10 @@ A stream object is allocated as a fixed size region of memory which has been
sized to ensure that no memory allocations are required during scan sized to ensure that no memory allocations are required during scan
operations. When the system is under memory pressure, it may be useful to reduce operations. When the system is under memory pressure, it may be useful to reduce
the memory consumed by streams that are not expected to be used soon. The the memory consumed by streams that are not expected to be used soon. The
Hyperscan API provides calls for translating a stream to and from a compressed Vectorscan API provides calls for translating a stream to and from a compressed
representation for this purpose. The compressed representation differs from the representation for this purpose. The compressed representation differs from the
full stream object as it does not reserve space for components which are not full stream object as it does not reserve space for components which are not
required given the current stream state. The Hyperscan API functions for this required given the current stream state. The Vectorscan API functions for this
functionality are: functionality are:
* :c:func:`hs_compress_stream`: fills the provided buffer with a compressed * :c:func:`hs_compress_stream`: fills the provided buffer with a compressed
@ -157,7 +157,7 @@ scanned in block mode.
Scratch Space Scratch Space
************* *************
While scanning data, Hyperscan needs a small amount of temporary memory to store While scanning data, Vectorscan needs a small amount of temporary memory to store
on-the-fly internal data. This amount is unfortunately too large to fit on the on-the-fly internal data. This amount is unfortunately too large to fit on the
stack, particularly for embedded applications, and allocating memory dynamically stack, particularly for embedded applications, and allocating memory dynamically
is too expensive, so a pre-allocated "scratch" space must be provided to the is too expensive, so a pre-allocated "scratch" space must be provided to the
@ -170,7 +170,7 @@ databases, only a single scratch region is necessary: in this case, calling
will ensure that the scratch space is large enough to support scanning against will ensure that the scratch space is large enough to support scanning against
any of the given databases. any of the given databases.
While the Hyperscan library is re-entrant, the use of scratch spaces is not. While the Vectorscan library is re-entrant, the use of scratch spaces is not.
For example, if by design it is deemed necessary to run recursive or nested For example, if by design it is deemed necessary to run recursive or nested
scanning (say, from the match callback function), then an additional scratch scanning (say, from the match callback function), then an additional scratch
space is required for that context. space is required for that context.
@ -219,11 +219,11 @@ For example:
Custom Allocators Custom Allocators
***************** *****************
By default, structures used by Hyperscan at runtime (scratch space, stream By default, structures used by Vectorscan at runtime (scratch space, stream
state, etc) are allocated with the default system allocators, usually state, etc) are allocated with the default system allocators, usually
``malloc()`` and ``free()``. ``malloc()`` and ``free()``.
The Hyperscan API provides a facility for changing this behaviour to support The Vectorscan API provides a facility for changing this behaviour to support
applications that use custom memory allocators. applications that use custom memory allocators.
These functions are: These functions are:

View File

@ -4,7 +4,7 @@
Serialization Serialization
############# #############
For some applications, compiling Hyperscan pattern databases immediately prior For some applications, compiling Vectorscan pattern databases immediately prior
to use is not an appropriate design. Some users may wish to: to use is not an appropriate design. Some users may wish to:
* Compile pattern databases on a different host; * Compile pattern databases on a different host;
@ -14,9 +14,9 @@ to use is not an appropriate design. Some users may wish to:
* Control the region of memory in which the compiled database is located. * Control the region of memory in which the compiled database is located.
Hyperscan pattern databases are not completely flat in memory: they contain Vectorscan pattern databases are not completely flat in memory: they contain
pointers and have specific alignment requirements. Therefore, they cannot be pointers and have specific alignment requirements. Therefore, they cannot be
copied (or otherwise relocated) directly. To enable these use cases, Hyperscan copied (or otherwise relocated) directly. To enable these use cases, Vectorscan
provides functionality for serializing and deserializing compiled pattern provides functionality for serializing and deserializing compiled pattern
databases. databases.
@ -40,10 +40,10 @@ The API provides the following functions:
returns a string containing information about the database. This call is returns a string containing information about the database. This call is
analogous to :c:func:`hs_database_info`. analogous to :c:func:`hs_database_info`.
.. note:: Hyperscan performs both version and platform compatibility checks .. note:: Vectorscan performs both version and platform compatibility checks
upon deserialization. The :c:func:`hs_deserialize_database` and upon deserialization. The :c:func:`hs_deserialize_database` and
:c:func:`hs_deserialize_database_at` functions will only permit the :c:func:`hs_deserialize_database_at` functions will only permit the
deserialization of databases compiled with (a) the same version of Hyperscan deserialization of databases compiled with (a) the same version of Vectorscan
and (b) platform features supported by the current host platform. See and (b) platform features supported by the current host platform. See
:ref:`instr_specialization` for more information on platform specialization. :ref:`instr_specialization` for more information on platform specialization.
@ -51,17 +51,17 @@ The API provides the following functions:
The Runtime Library The Runtime Library
=================== ===================
The main Hyperscan library (``libhs``) contains both the compiler and runtime The main Vectorscan library (``libhs``) contains both the compiler and runtime
portions of the library. This means that in order to support the Hyperscan portions of the library. This means that in order to support the Vectorscan
compiler, which is written in C++, it requires C++ linkage and has a compiler, which is written in C++, it requires C++ linkage and has a
dependency on the C++ standard library. dependency on the C++ standard library.
Many embedded applications require only the scanning ("runtime") portion of the Many embedded applications require only the scanning ("runtime") portion of the
Hyperscan library. In these cases, pattern compilation generally takes place on Vectorscan library. In these cases, pattern compilation generally takes place on
another host, and serialized pattern databases are delivered to the application another host, and serialized pattern databases are delivered to the application
for use. for use.
To support these applications without requiring the C++ dependency, a To support these applications without requiring the C++ dependency, a
runtime-only version of the Hyperscan library, called ``libhs_runtime``, is also runtime-only version of the Vectorscan library, called ``libhs_runtime``, is also
distributed. This library does not depend on the C++ standard library and distributed. This library does not depend on the C++ standard library and
provides all Hyperscan functions other that those used to compile databases. provides all Vectorscan functions other that those used to compile databases.

View File

@ -4,14 +4,14 @@
Tools Tools
##### #####
This section describes the set of utilities included with the Hyperscan library. This section describes the set of utilities included with the Vectorscan library.
******************** ********************
Quick Check: hscheck Quick Check: hscheck
******************** ********************
The ``hscheck`` tool allows the user to quickly check whether Hyperscan supports The ``hscheck`` tool allows the user to quickly check whether Vectorscan supports
a group of patterns. If a pattern is rejected by Hyperscan's compiler, the a group of patterns. If a pattern is rejected by Vectorscan's compiler, the
compile error is provided on standard output. compile error is provided on standard output.
For example, given the following three patterns (the last of which contains a For example, given the following three patterns (the last of which contains a
@ -34,7 +34,7 @@ syntax error) in a file called ``/tmp/test``::
Benchmarker: hsbench Benchmarker: hsbench
******************** ********************
The ``hsbench`` tool provides an easy way to measure Hyperscan's performance The ``hsbench`` tool provides an easy way to measure Vectorscan's performance
for a particular set of patterns and corpus of data to be scanned. for a particular set of patterns and corpus of data to be scanned.
Patterns are supplied in the format described below in Patterns are supplied in the format described below in
@ -44,7 +44,7 @@ easy control of how a corpus is broken into blocks and streams.
.. note:: A group of Python scripts for constructing corpora databases from .. note:: A group of Python scripts for constructing corpora databases from
various input types, such as PCAP network traffic captures or text files, can various input types, such as PCAP network traffic captures or text files, can
be found in the Hyperscan source tree in ``tools/hsbench/scripts``. be found in the Vectorscan source tree in ``tools/hsbench/scripts``.
Running hsbench Running hsbench
=============== ===============
@ -56,7 +56,7 @@ produce output like this::
$ hsbench -e /tmp/patterns -c /tmp/corpus.db $ hsbench -e /tmp/patterns -c /tmp/corpus.db
Signatures: /tmp/patterns Signatures: /tmp/patterns
Hyperscan info: Version: 4.3.1 Features: AVX2 Mode: STREAM Vectorscan info: Version: 5.4.11 Features: AVX2 Mode: STREAM
Expression count: 200 Expression count: 200
Bytecode size: 342,540 bytes Bytecode size: 342,540 bytes
Database CRC: 0x6cd6b67c Database CRC: 0x6cd6b67c
@ -77,7 +77,7 @@ takes to perform all twenty scans. The number of repeats can be changed with the
``-n`` argument, and the results of each scan will be displayed if the ``-n`` argument, and the results of each scan will be displayed if the
``--per-scan`` argument is specified. ``--per-scan`` argument is specified.
To benchmark Hyperscan on more than one core, you can supply a list of cores To benchmark Vectorscan on more than one core, you can supply a list of cores
with the ``-T`` argument, which will instruct ``hsbench`` to start one with the ``-T`` argument, which will instruct ``hsbench`` to start one
benchmark thread per core given and compute the throughput from the time taken benchmark thread per core given and compute the throughput from the time taken
to complete all of them. to complete all of them.
@ -91,17 +91,17 @@ Correctness Testing: hscollider
******************************* *******************************
The ``hscollider`` tool, or Pattern Collider, provides a way to verify The ``hscollider`` tool, or Pattern Collider, provides a way to verify
Hyperscan's matching behaviour. It does this by compiling and scanning patterns Vectorscan's matching behaviour. It does this by compiling and scanning patterns
(either singly or in groups) against known corpora and comparing the results (either singly or in groups) against known corpora and comparing the results
against another engine (the "ground truth"). Two sources of ground truth for against another engine (the "ground truth"). Two sources of ground truth for
comparison are available: comparison are available:
* The PCRE library (http://pcre.org/). * The PCRE library (http://pcre.org/).
* An NFA simulation run on Hyperscan's compile-time graph representation. This * An NFA simulation run on Vectorscan's compile-time graph representation. This
is used if PCRE cannot support the pattern or if PCRE execution fails due to is used if PCRE cannot support the pattern or if PCRE execution fails due to
a resource limit. a resource limit.
Much of Hyperscan's testing infrastructure is built on ``hscollider``, and the Much of Vectorscan's testing infrastructure is built on ``hscollider``, and the
tool is designed to take advantage of multiple cores and provide considerable tool is designed to take advantage of multiple cores and provide considerable
flexibility in controlling the test. These options are described in the help flexibility in controlling the test. These options are described in the help
(``hscollider -h``) and include: (``hscollider -h``) and include:
@ -116,11 +116,11 @@ flexibility in controlling the test. These options are described in the help
Using hscollider to debug a pattern Using hscollider to debug a pattern
=================================== ===================================
One common use-case for ``hscollider`` is to determine whether Hyperscan will One common use-case for ``hscollider`` is to determine whether Vectorscan will
match a pattern in the expected location, and whether this accords with PCRE's match a pattern in the expected location, and whether this accords with PCRE's
behaviour for the same case. behaviour for the same case.
Here is an example. We put our pattern in a file in Hyperscan's pattern Here is an example. We put our pattern in a file in Vectorscan's pattern
format:: format::
$ cat /tmp/pat $ cat /tmp/pat
@ -172,7 +172,7 @@ individual matches are displayed in the output::
Total elapsed time: 0.00522815 secs. Total elapsed time: 0.00522815 secs.
We can see from this output that both PCRE and Hyperscan find matches ending at We can see from this output that both PCRE and Vectorscan find matches ending at
offset 33 and 45, and so ``hscollider`` considers this test case to have offset 33 and 45, and so ``hscollider`` considers this test case to have
passed. passed.
@ -180,13 +180,13 @@ passed.
corpus alignment 0, and ``-T 1`` instructs us to only use one thread.) corpus alignment 0, and ``-T 1`` instructs us to only use one thread.)
.. note:: In default operation, PCRE produces only one match for a scan, unlike .. note:: In default operation, PCRE produces only one match for a scan, unlike
Hyperscan's automata semantics. The ``hscollider`` tool uses libpcre's Vectorscan's automata semantics. The ``hscollider`` tool uses libpcre's
"callout" functionality to match Hyperscan's semantics. "callout" functionality to match Vectorscan's semantics.
Running a larger scan test Running a larger scan test
========================== ==========================
A set of patterns for testing purposes are distributed with Hyperscan, and these A set of patterns for testing purposes are distributed with Vectorscan, and these
can be tested via ``hscollider`` on an in-tree build. Two CMake targets are can be tested via ``hscollider`` on an in-tree build. Two CMake targets are
provided to do this easily: provided to do this easily:
@ -202,10 +202,10 @@ Debugging: hsdump
***************** *****************
When built in debug mode (using the CMake directive ``CMAKE_BUILD_TYPE`` set to When built in debug mode (using the CMake directive ``CMAKE_BUILD_TYPE`` set to
``Debug``), Hyperscan includes support for dumping information about its ``Debug``), Vectorscan includes support for dumping information about its
internals during pattern compilation with the ``hsdump`` tool. internals during pattern compilation with the ``hsdump`` tool.
This information is mostly of use to Hyperscan developers familiar with the This information is mostly of use to Vectorscan developers familiar with the
library's internal structure, but can be used to diagnose issues with patterns library's internal structure, but can be used to diagnose issues with patterns
and provide more information in bug reports. and provide more information in bug reports.
@ -215,7 +215,7 @@ and provide more information in bug reports.
Pattern Format Pattern Format
************** **************
All of the Hyperscan tools accept patterns in the same format, read from plain All of the Vectorscan tools accept patterns in the same format, read from plain
text files with one pattern per line. Each line looks like this: text files with one pattern per line. Each line looks like this:
* ``<integer id>:/<regex>/<flags>`` * ``<integer id>:/<regex>/<flags>``
@ -227,12 +227,12 @@ For example::
3:/^.{10,20}hatstand/m 3:/^.{10,20}hatstand/m
The integer ID is the value that will be reported when a match is found by The integer ID is the value that will be reported when a match is found by
Hyperscan and must be unique. Vectorscan and must be unique.
The pattern itself is a regular expression in PCRE syntax; see The pattern itself is a regular expression in PCRE syntax; see
:ref:`compilation` for more information on supported features. :ref:`compilation` for more information on supported features.
The flags are single characters that map to Hyperscan flags as follows: The flags are single characters that map to Vectorscan flags as follows:
========= ================================= =========== ========= ================================= ===========
Character API Flag Description Character API Flag Description
@ -256,7 +256,7 @@ between braces, separated by commas. For example::
1:/hatstand.*teakettle/s{min_offset=50,max_offset=100} 1:/hatstand.*teakettle/s{min_offset=50,max_offset=100}
All Hyperscan tools will accept a pattern file (or a directory containing All Vectorscan tools will accept a pattern file (or a directory containing
pattern files) with the ``-e`` argument. If no further arguments constraining pattern files) with the ``-e`` argument. If no further arguments constraining
the pattern set are given, all patterns in those files are used. the pattern set are given, all patterns in those files are used.

View File

@ -202,7 +202,7 @@ struct FiveTuple {
unsigned int dstPort; unsigned int dstPort;
// Construct a FiveTuple from a TCP or UDP packet. // Construct a FiveTuple from a TCP or UDP packet.
FiveTuple(const struct ip *iphdr) { explicit FiveTuple(const struct ip *iphdr) {
// IP fields // IP fields
protocol = iphdr->ip_p; protocol = iphdr->ip_p;
srcAddr = iphdr->ip_src.s_addr; srcAddr = iphdr->ip_src.s_addr;
@ -389,7 +389,7 @@ public:
// Close all open Hyperscan streams (potentially generating any // Close all open Hyperscan streams (potentially generating any
// end-anchored matches) // end-anchored matches)
void closeStreams() { void closeStreams() {
for (auto &stream : streams) { for (const auto &stream : streams) {
hs_error_t err = hs_error_t err =
hs_close_stream(stream, scratch, onMatch, &matchCount); hs_close_stream(stream, scratch, onMatch, &matchCount);
if (err != HS_SUCCESS) { if (err != HS_SUCCESS) {
@ -442,7 +442,7 @@ class Sigdata {
public: public:
Sigdata() {} Sigdata() {}
Sigdata(const char *filename) { explicit Sigdata(const char *filename) {
parseFile(filename, patterns, flags, ids, originals); parseFile(filename, patterns, flags, ids, originals);
} }
@ -565,7 +565,7 @@ double measure_block_time(Benchmark &bench, unsigned int repeatCount) {
} }
static static
double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode, double eval_set(Benchmark &bench, const Sigdata &sigs, unsigned int mode,
unsigned repeatCount, Criterion criterion, unsigned repeatCount, Criterion criterion,
bool diagnose = true) { bool diagnose = true) {
double compileTime = 0; double compileTime = 0;
@ -605,8 +605,9 @@ double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
scan_time = measure_stream_time(bench, repeatCount); scan_time = measure_stream_time(bench, repeatCount);
} }
size_t bytes = bench.bytes(); size_t bytes = bench.bytes();
size_t matches = bench.matches();
if (diagnose) { if (diagnose) {
size_t matches = bench.matches();
std::ios::fmtflags f(cout.flags()); std::ios::fmtflags f(cout.flags());
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput " << " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "

View File

@ -99,7 +99,7 @@ struct FiveTuple {
unsigned int dstPort; unsigned int dstPort;
// Construct a FiveTuple from a TCP or UDP packet. // Construct a FiveTuple from a TCP or UDP packet.
FiveTuple(const struct ip *iphdr) { explicit FiveTuple(const struct ip *iphdr) {
// IP fields // IP fields
protocol = iphdr->ip_p; protocol = iphdr->ip_p;
srcAddr = iphdr->ip_src.s_addr; srcAddr = iphdr->ip_src.s_addr;
@ -281,7 +281,7 @@ public:
// Close all open Hyperscan streams (potentially generating any // Close all open Hyperscan streams (potentially generating any
// end-anchored matches) // end-anchored matches)
void closeStreams() { void closeStreams() {
for (auto &stream : streams) { for (const auto &stream : streams) {
hs_error_t err = hs_close_stream(stream, scratch, onMatch, hs_error_t err = hs_close_stream(stream, scratch, onMatch,
&matchCount); &matchCount);
if (err != HS_SUCCESS) { if (err != HS_SUCCESS) {

View File

@ -67,7 +67,7 @@
* to pass in the pattern that was being searched for so we can print it out. * to pass in the pattern that was being searched for so we can print it out.
*/ */
static int eventHandler(unsigned int id, unsigned long long from, static int eventHandler(unsigned int id, unsigned long long from,
unsigned long long to, unsigned int flags, void *ctx) { unsigned long long to, unsigned int flags, void *ctx) { // cppcheck-suppress constParameterCallback
printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to); printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to);
return 0; return 0;
} }
@ -150,7 +150,7 @@ int main(int argc, char *argv[]) {
} }
char *pattern = argv[1]; char *pattern = argv[1];
char *inputFN = argv[2]; const char *inputFN = argv[2];
/* First, we attempt to compile the pattern provided on the command line. /* First, we attempt to compile the pattern provided on the command line.
* We assume 'DOTALL' semantics, meaning that the '.' meta-character will * We assume 'DOTALL' semantics, meaning that the '.' meta-character will

View File

@ -4,7 +4,7 @@ libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@
includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@ includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@
Name: libhs Name: libhs
Description: Intel(R) Hyperscan Library Description: A portable fork of the high-performance regular expression matching library
Version: @HS_VERSION@ Version: @HS_VERSION@
Libs: -L${libdir} -lhs Libs: -L${libdir} -lhs
Cflags: -I${includedir}/hs Cflags: -I${includedir}/hs

2
simde

@ -1 +1 @@
Subproject commit aae22459fa284e9fc2b7d4b8e4571afa0418125f Subproject commit 416091ebdb9e901b29d026633e73167d6353a0b0

View File

@ -176,7 +176,8 @@ void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr,
auto ecit = edge_cache.find(cache_key); auto ecit = edge_cache.find(cache_key);
if (ecit == edge_cache.end()) { if (ecit == edge_cache.end()) {
DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index); DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index);
NFAEdge e = add_edge(u, v, g); NFAEdge e;
std::tie(e, std::ignore) = add_edge(u, v, g);
edge_cache.emplace(cache_key, e); edge_cache.emplace(cache_key, e);
g[e].assert_flags = flags; g[e].assert_flags = flags;
if (++assert_edge_count > MAX_ASSERT_EDGES) { if (++assert_edge_count > MAX_ASSERT_EDGES) {
@ -229,11 +230,12 @@ void checkForMultilineStart(ReportManager &rm, NGHolder &g,
/* we need to interpose a dummy dot vertex between v and accept if /* we need to interpose a dummy dot vertex between v and accept if
* required so that ^ doesn't match trailing \n */ * required so that ^ doesn't match trailing \n */
for (const auto &e : out_edges_range(v, g)) { auto deads = [&g=g](const NFAEdge &e) {
if (target(e, g) == g.accept) { return (target(e, g) == g.accept);
dead.emplace_back(e); };
} const auto &er = out_edges_range(v, g);
} std::copy_if(begin(er), end(er), std::back_inserter(dead), deads);
/* assert has been resolved; clear flag */ /* assert has been resolved; clear flag */
g[v].assert_flags &= ~POS_FLAG_MULTILINE_START; g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
} }

View File

@ -443,7 +443,7 @@ bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
if (!rose) { if (!rose) {
DEBUG_PRINTF("error building rose\n"); DEBUG_PRINTF("error building rose\n");
assert(0); assert(0);
return nullptr; return bytecode_ptr<RoseEngine>(nullptr);
} }
dumpReportManager(ng.rm, ng.cc.grey); dumpReportManager(ng.rm, ng.cc.grey);

View File

@ -542,14 +542,13 @@ u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
// Main aligned loop, processes eight bytes at a time. // Main aligned loop, processes eight bytes at a time.
u32 term1, term2;
for (size_t li = 0; li < running_length/8; li++) { for (size_t li = 0; li < running_length/8; li++) {
u32 block = *(const u32 *)p_buf; u32 block = *(const u32 *)p_buf;
crc ^= block; crc ^= block;
p_buf += 4; p_buf += 4;
term1 = crc_tableil8_o88[crc & 0x000000FF] ^ u32 term1 = crc_tableil8_o88[crc & 0x000000FF] ^
crc_tableil8_o80[(crc >> 8) & 0x000000FF]; crc_tableil8_o80[(crc >> 8) & 0x000000FF];
term2 = crc >> 16; u32 term2 = crc >> 16;
crc = term1 ^ crc = term1 ^
crc_tableil8_o72[term2 & 0x000000FF] ^ crc_tableil8_o72[term2 & 0x000000FF] ^
crc_tableil8_o64[(term2 >> 8) & 0x000000FF]; crc_tableil8_o64[(term2 >> 8) & 0x000000FF];

View File

@ -79,21 +79,18 @@ static UNUSED
const platform_t hs_current_platform_no_avx2 = { const platform_t hs_current_platform_no_avx2 = {
HS_PLATFORM_NOAVX2 | HS_PLATFORM_NOAVX2 |
HS_PLATFORM_NOAVX512 | HS_PLATFORM_NOAVX512 |
HS_PLATFORM_NOAVX512VBMI | HS_PLATFORM_NOAVX512VBMI
0,
}; };
static UNUSED static UNUSED
const platform_t hs_current_platform_no_avx512 = { const platform_t hs_current_platform_no_avx512 = {
HS_PLATFORM_NOAVX512 | HS_PLATFORM_NOAVX512 |
HS_PLATFORM_NOAVX512VBMI | HS_PLATFORM_NOAVX512VBMI
0,
}; };
static UNUSED static UNUSED
const platform_t hs_current_platform_no_avx512vbmi = { const platform_t hs_current_platform_no_avx512vbmi = {
HS_PLATFORM_NOAVX512VBMI | HS_PLATFORM_NOAVX512VBMI
0,
}; };
/* /*

View File

@ -1,5 +1,6 @@
/* /*
* Copyright (c) 2016-2020, Intel Corporation * Copyright (c) 2016-2020, Intel Corporation
* Copyright (c) 2024, VectorCamp PC
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -30,6 +31,39 @@
#include "hs_common.h" #include "hs_common.h"
#include "hs_runtime.h" #include "hs_runtime.h"
#include "ue2common.h" #include "ue2common.h"
/* Streamlining the dispatch to eliminate runtime checking/branching:
* What we want to do is, first call to the function will run the resolve
* code and set the static resolved/dispatch pointer to point to the
* correct function. Subsequent calls to the function will go directly to
* the resolved ptr. The simplest way to accomplish this is, to
* initially set the pointer to the resolve function.
* To accomplish this in a manner invisible to the user,
* we do involve some rather ugly/confusing macros in here.
* There are four macros that assemble the code for each function
* we want to dispatch in this manner:
* CREATE_DISPATCH
* this generates the declarations for the candidate target functions,
* for the fat_dispatch function pointer, for the resolve_ function,
* points the function pointer to the resolve function, and contains
* most of the definition of the resolve function. The very end of the
* resolve function is completed by the next macro, because in the
* CREATE_DISPATCH macro we have the argument list with the arg declarations,
* which is needed to generate correct function signatures, but we
* can't generate from this, in a macro, a _call_ to one of those functions.
* CONNECT_ARGS_1
* this macro fills in the actual call at the end of the resolve function,
* with the correct arg list. hence the name connect args.
* CONNECT_DISPATCH_2
* this macro likewise gives up the beginning of the definition of the
* actual entry point function (the 'real name' that's called by the user)
* but again in the pass-through call, cannot invoke the target without
* getting the arg list , which is supplied by the final macro,
* CONNECT_ARGS_3
*
*/
#if defined(ARCH_IA32) || defined(ARCH_X86_64) #if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/arch/x86/cpuid_inline.h" #include "util/arch/x86/cpuid_inline.h"
#include "util/join.h" #include "util/join.h"
@ -57,30 +91,38 @@
return (RTYPE)HS_ARCH_ERROR; \ return (RTYPE)HS_ARCH_ERROR; \
} \ } \
\ \
/* resolver */ \ /* dispatch routing pointer for this function */ \
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ /* initially point it at the resolve function */ \
if (check_avx512vbmi()) { \ static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
return JOIN(avx512vbmi_, NAME); \ static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
} \ &JOIN(resolve_, NAME); \
if (check_avx512()) { \
return JOIN(avx512_, NAME); \
} \
if (check_avx2()) { \
return JOIN(avx2_, NAME); \
} \
if (check_sse42() && check_popcnt()) { \
return JOIN(corei7_, NAME); \
} \
if (check_ssse3()) { \
return JOIN(core2_, NAME); \
} \
/* anything else is fail */ \
return JOIN(error_, NAME); \
} \
\ \
/* function */ \ /* resolver */ \
HS_PUBLIC_API \ static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) if (check_avx512vbmi()) { \
fat_dispatch_ ## NAME = &JOIN(avx512vbmi_, NAME); \
} \
else if (check_avx512()) { \
fat_dispatch_ ## NAME = &JOIN(avx512_, NAME); \
} \
else if (check_avx2()) { \
fat_dispatch_ ## NAME = &JOIN(avx2_, NAME); \
} \
else if (check_sse42() && check_popcnt()) { \
fat_dispatch_ ## NAME = &JOIN(corei7_, NAME); \
} \
else if (check_ssse3()) { \
fat_dispatch_ ## NAME = &JOIN(core2_, NAME); \
} else { \
/* anything else is fail */ \
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
} \
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
#elif defined(ARCH_AARCH64) #elif defined(ARCH_AARCH64)
#include "util/arch/arm/cpuid_inline.h" #include "util/arch/arm/cpuid_inline.h"
@ -97,99 +139,226 @@
return (RTYPE)HS_ARCH_ERROR; \ return (RTYPE)HS_ARCH_ERROR; \
} \ } \
\ \
/* resolver */ \ /* dispatch routing pointer for this function */ \
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ /* initially point it at the resolve function */ \
if (check_sve2()) { \ static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
return JOIN(sve2_, NAME); \ static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
} \ &JOIN(resolve_, NAME); \
if (check_sve()) { \
return JOIN(sve_, NAME); \
} \
if (check_neon()) { \
return JOIN(neon_, NAME); \
} \
/* anything else is fail */ \
return JOIN(error_, NAME); \
} \
\ \
/* function */ \ /* resolver */ \
HS_PUBLIC_API \ static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) if (check_sve2()) { \
fat_dispatch_ ## NAME = &JOIN(sve2_, NAME); \
} \
else if (check_sve()) { \
fat_dispatch_ ## NAME = &JOIN(sve_, NAME); \
} \
else if (check_neon()) { \
fat_dispatch_ ## NAME = &JOIN(neon_, NAME); \
} else { \
/* anything else is fail */ \
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
} \
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
#endif #endif
#define CONNECT_ARGS_1(RTYPE, NAME, ...) \
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
} \
#define CONNECT_DISPATCH_2(RTYPE, NAME, ...) \
/* new function */ \
HS_PUBLIC_API \
RTYPE NAME(__VA_ARGS__) { \
#define CONNECT_ARGS_3(RTYPE, NAME, ...) \
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
} \
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-function"
/* this gets a bit ugly to compose the static redirect functions,
* as we necessarily need first the typed arg list and then just the arg
* names, twice in a row, to define the redirect function and the
* dispatch function call */
CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
unsigned length, unsigned flags, hs_scratch_t *scratch, unsigned length, unsigned flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *userCtx); match_event_handler onEvent, void *userCtx);
CONNECT_ARGS_1(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
CONNECT_DISPATCH_2(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
unsigned length, unsigned flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *userCtx);
CONNECT_ARGS_3(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database, CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database,
size_t *stream_size); size_t *stream_size);
CONNECT_ARGS_1(hs_error_t, hs_stream_size, database, stream_size);
CONNECT_DISPATCH_2(hs_error_t, hs_stream_size, const hs_database_t *database,
size_t *stream_size);
CONNECT_ARGS_3(hs_error_t, hs_stream_size, database, stream_size);
CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db, CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db,
size_t *size); size_t *size);
CONNECT_ARGS_1(hs_error_t, hs_database_size, db, size);
CONNECT_DISPATCH_2(hs_error_t, hs_database_size, const hs_database_t *db,
size_t *size);
CONNECT_ARGS_3(hs_error_t, hs_database_size, db, size);
CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db); CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db);
CONNECT_ARGS_1(hs_error_t, dbIsValid, db);
CONNECT_DISPATCH_2(hs_error_t, dbIsValid, const hs_database_t *db);
CONNECT_ARGS_3(hs_error_t, dbIsValid, db);
CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db); CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db);
CONNECT_ARGS_1(hs_error_t, hs_free_database, db);
CONNECT_DISPATCH_2(hs_error_t, hs_free_database, hs_database_t *db);
CONNECT_ARGS_3(hs_error_t, hs_free_database, db);
CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db, CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db,
unsigned int flags, hs_stream_t **stream); unsigned int flags, hs_stream_t **stream);
CONNECT_ARGS_1(hs_error_t, hs_open_stream, db, flags, stream);
CONNECT_DISPATCH_2(hs_error_t, hs_open_stream, const hs_database_t *db,
unsigned int flags, hs_stream_t **stream);
CONNECT_ARGS_3(hs_error_t, hs_open_stream, db, flags, stream);
CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
unsigned int length, unsigned int flags, hs_scratch_t *scratch, unsigned int length, unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *ctxt); match_event_handler onEvent, void *ctxt);
CONNECT_ARGS_1(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
CONNECT_DISPATCH_2(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *ctxt);
CONNECT_ARGS_3(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id, CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id,
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
CONNECT_ARGS_1(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
CONNECT_DISPATCH_2(hs_error_t, hs_close_stream, hs_stream_t *id,
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
CONNECT_ARGS_3(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db, CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db,
const char *const *data, const unsigned int *length, const char *const *data, const unsigned int *length,
unsigned int count, unsigned int flags, hs_scratch_t *scratch, unsigned int count, unsigned int flags, hs_scratch_t *scratch,
match_event_handler onevent, void *context); match_event_handler onevent, void *context);
CONNECT_ARGS_1(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_scan_vector, const hs_database_t *db,
const char *const *data, const unsigned int *length,
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
match_event_handler onevent, void *context);
CONNECT_ARGS_3(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info); CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
CONNECT_ARGS_1(hs_error_t, hs_database_info, db, info);
CONNECT_DISPATCH_2(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
CONNECT_ARGS_3(hs_error_t, hs_database_info, db, info);
CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id, CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
const hs_stream_t *from_id); const hs_stream_t *from_id);
CONNECT_ARGS_1(hs_error_t, hs_copy_stream, to_id, from_id);
CONNECT_DISPATCH_2(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
const hs_stream_t *from_id);
CONNECT_ARGS_3(hs_error_t, hs_copy_stream, to_id, from_id);
CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id, CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id,
unsigned int flags, hs_scratch_t *scratch, unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *context); match_event_handler onEvent, void *context);
CONNECT_ARGS_1(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_reset_stream, hs_stream_t *id,
unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
CONNECT_ARGS_3(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
const hs_stream_t *from_id, hs_scratch_t *scratch, const hs_stream_t *from_id, hs_scratch_t *scratch,
match_event_handler onEvent, void *context); match_event_handler onEvent, void *context);
CONNECT_ARGS_1(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
const hs_stream_t *from_id, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
CONNECT_ARGS_3(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db, CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db,
char **bytes, size_t *length); char **bytes, size_t *length);
CONNECT_ARGS_1(hs_error_t, hs_serialize_database, db, bytes, length);
CONNECT_DISPATCH_2(hs_error_t, hs_serialize_database, const hs_database_t *db,
char **bytes, size_t *length);
CONNECT_ARGS_3(hs_error_t, hs_serialize_database, db, bytes, length);
CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes, CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes,
const size_t length, hs_database_t **db); const size_t length, hs_database_t **db);
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database, bytes, length, db);
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database, const char *bytes,
const size_t length, hs_database_t **db);
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database, bytes, length, db);
CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes, CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes,
const size_t length, hs_database_t *db); const size_t length, hs_database_t *db);
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database_at, bytes, length, db);
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database_at, const char *bytes,
const size_t length, hs_database_t *db);
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database_at, bytes, length, db);
CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
size_t length, char **info); size_t length, char **info);
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_info, bytes, length, info);
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_info, const char *bytes,
size_t length, char **info);
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_info, bytes, length, info);
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
const size_t length, size_t *deserialized_size); const size_t length, size_t *deserialized_size);
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_size, const char *bytes,
const size_t length, size_t *deserialized_size);
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream, CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
char *buf, size_t buf_space, size_t *used_space); char *buf, size_t buf_space, size_t *used_space);
CONNECT_ARGS_1(hs_error_t, hs_compress_stream, stream,
buf, buf_space, used_space);
CONNECT_DISPATCH_2(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
char *buf, size_t buf_space, size_t *used_space);
CONNECT_ARGS_3(hs_error_t, hs_compress_stream, stream,
buf, buf_space, used_space);
CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db, CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db,
hs_stream_t **stream, const char *buf,size_t buf_size); hs_stream_t **stream, const char *buf,size_t buf_size);
CONNECT_ARGS_1(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
CONNECT_DISPATCH_2(hs_error_t, hs_expand_stream, const hs_database_t *db,
hs_stream_t **stream, const char *buf,size_t buf_size);
CONNECT_ARGS_3(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream, CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
const char *buf, size_t buf_size, hs_scratch_t *scratch, const char *buf, size_t buf_size, hs_scratch_t *scratch,
match_event_handler onEvent, void *context); match_event_handler onEvent, void *context);
CONNECT_ARGS_1(hs_error_t, hs_reset_and_expand_stream, to_stream,
buf, buf_size, scratch, onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
const char *buf, size_t buf_size, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream,
buf, buf_size, scratch, onEvent, context);
/** INTERNALS **/ /** INTERNALS **/
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
CONNECT_ARGS_1(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
CONNECT_DISPATCH_2(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
CONNECT_ARGS_3(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#pragma GCC diagnostic pop #pragma GCC diagnostic pop

View File

@ -298,7 +298,7 @@ void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr,
static really_inline static really_inline
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
const u32 *confBase, const struct FDR_Runtime_Args *a, const u32 *confBase, const struct FDR_Runtime_Args *a,
const u8 *ptr, u32 *last_match_id, struct zone *z) { const u8 *ptr, u32 *last_match_id, const struct zone *z) {
const u8 bucket = 8; const u8 bucket = 8;
if (likely(!*conf)) { if (likely(!*conf)) {
@ -333,7 +333,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
} }
static really_inline static really_inline
void dumpZoneInfo(UNUSED struct zone *z, UNUSED size_t zone_id) { void dumpZoneInfo(UNUSED const struct zone *z, UNUSED size_t zone_id) {
#ifdef DEBUG #ifdef DEBUG
DEBUG_PRINTF("zone: zone=%zu, bufPtr=%p\n", zone_id, z->buf); DEBUG_PRINTF("zone: zone=%zu, bufPtr=%p\n", zone_id, z->buf);
DEBUG_PRINTF("zone: startPtr=%p, endPtr=%p, shift=%u\n", DEBUG_PRINTF("zone: startPtr=%p, endPtr=%p, shift=%u\n",

View File

@ -159,7 +159,7 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
map<u32, vector<LiteralIndex> > res2lits; map<u32, vector<LiteralIndex> > res2lits;
hwlm_group_t gm = 0; hwlm_group_t gm = 0;
for (LiteralIndex i = 0; i < lits.size(); i++) { for (LiteralIndex i = 0; i < lits.size(); i++) {
LitInfo & li = tmpLitInfo[i]; const LitInfo & li = tmpLitInfo[i];
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits); u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash); DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
res2lits[hash].emplace_back(i); res2lits[hash].emplace_back(i);
@ -294,9 +294,6 @@ setupFullConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng, const EngineDescription &eng,
const map<BucketIndex, vector<LiteralIndex>> &bucketToLits, const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
bool make_small) { bool make_small) {
unique_ptr<TeddyEngineDescription> teddyDescr =
getTeddyDescription(eng.getID());
BC2CONF bc2Conf; BC2CONF bc2Conf;
u32 totalConfirmSize = 0; u32 totalConfirmSize = 0;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {

View File

@ -71,7 +71,7 @@ u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
} else if (num_lits < 5000) { } else if (num_lits < 5000) {
// for larger but not huge sizes, go to stride 2 only if we have at // for larger but not huge sizes, go to stride 2 only if we have at
// least minlen 3 // least minlen 3
desiredStride = MIN(min_len - 1, 2); desiredStride = std::min(min_len - 1, 2UL);
} }
} }

View File

@ -622,7 +622,7 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
static static
bool assignStringsToBuckets( bool assignStringsToBuckets(
const vector<hwlmLiteral> &lits, const vector<hwlmLiteral> &lits,
TeddyEngineDescription &eng, const TeddyEngineDescription &eng,
map<BucketIndex, vector<LiteralIndex>> &bucketToLits) { map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
assert(eng.numMasks <= MAX_NUM_MASKS); assert(eng.numMasks <= MAX_NUM_MASKS);
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {

View File

@ -52,14 +52,14 @@ u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) { void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
static const TeddyEngineDef defns[] = { static const TeddyEngineDef defns[] = {
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false }, { 3, HS_CPU_FEATURES_AVX2, 1, 16, false },
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true }, { 4, HS_CPU_FEATURES_AVX2, 1, 16, true },
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false }, { 5, HS_CPU_FEATURES_AVX2, 2, 16, false },
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true }, { 6, HS_CPU_FEATURES_AVX2, 2, 16, true },
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false }, { 7, HS_CPU_FEATURES_AVX2, 3, 16, false },
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true }, { 8, HS_CPU_FEATURES_AVX2, 3, 16, true },
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false }, { 9, HS_CPU_FEATURES_AVX2, 4, 16, false },
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true }, { 10, HS_CPU_FEATURES_AVX2, 4, 16, true },
{ 11, 0, 1, 8, false }, { 11, 0, 1, 8, false },
{ 12, 0, 1, 8, true }, { 12, 0, 1, 8, true },
{ 13, 0, 2, 8, false }, { 13, 0, 2, 8, false },

View File

@ -48,6 +48,8 @@ hs_error_t HS_CDECL hs_valid_platform(void) {
return HS_ARCH_ERROR; return HS_ARCH_ERROR;
} }
#elif !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64)) #elif !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
//check_neon returns true for now
// cppcheck-suppress knownConditionTrueFalse
if (check_neon()) { if (check_neon()) {
return HS_SUCCESS; return HS_SUCCESS;
} else { } else {

View File

@ -170,8 +170,7 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len); DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
*start += delta; *start += delta;
} else if (hlen) { } else if (hlen) {
UNUSED size_t remaining = offset + ptr2 - found; DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", offset + ptr2 - found, hlen);
DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
} }
} }

View File

@ -143,7 +143,7 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
} }
if (!eng) { if (!eng) {
return nullptr; return bytecode_ptr<HWLM>(nullptr);
} }
assert(engSize); assert(engSize);

View File

@ -94,8 +94,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
const struct SubCastle *sub = getSubCastle(c, subIdx); const struct SubCastle *sub = getSubCastle(c, subIdx);
const struct RepeatInfo *info = getRepeatInfo(sub); const struct RepeatInfo *info = getRepeatInfo(sub);
union RepeatControl *rctrl = getControl(q->state, sub); const union RepeatControl *rctrl = getControl(q->state, sub);
char *rstate = (char *)q->streamState + sub->streamStateOffset + const char *rstate = (char *)q->streamState + sub->streamStateOffset +
info->packedCtrlSize; info->packedCtrlSize;
enum RepeatMatch match = enum RepeatMatch match =
repeatHasMatch(info, rctrl, rstate, offset); repeatHasMatch(info, rctrl, rstate, offset);
@ -118,10 +118,10 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) {
if (c->exclusive) { if (c->exclusive) {
u8 *active = (u8 *)q->streamState; u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset; const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize; const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
if (subCastleReportCurrent(c, q, if (subCastleReportCurrent(c, q,
@ -156,8 +156,8 @@ char subCastleInAccept(const struct Castle *c, struct mq *q,
} }
const struct RepeatInfo *info = getRepeatInfo(sub); const struct RepeatInfo *info = getRepeatInfo(sub);
union RepeatControl *rctrl = getControl(q->state, sub); const union RepeatControl *rctrl = getControl(q->state, sub);
char *rstate = (char *)q->streamState + sub->streamStateOffset + const char *rstate = (char *)q->streamState + sub->streamStateOffset +
info->packedCtrlSize; info->packedCtrlSize;
enum RepeatMatch match = enum RepeatMatch match =
repeatHasMatch(info, rctrl, rstate, offset); repeatHasMatch(info, rctrl, rstate, offset);
@ -180,10 +180,10 @@ char castleInAccept(const struct Castle *c, struct mq *q,
if (c->exclusive) { if (c->exclusive) {
u8 *active = (u8 *)q->streamState; u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset; const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize; const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
if (subCastleInAccept(c, q, report, offset, activeIdx)) { if (subCastleInAccept(c, q, report, offset, activeIdx)) {
@ -213,8 +213,8 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
const struct SubCastle *sub = getSubCastle(c, subIdx); const struct SubCastle *sub = getSubCastle(c, subIdx);
const struct RepeatInfo *info = getRepeatInfo(sub); const struct RepeatInfo *info = getRepeatInfo(sub);
union RepeatControl *rctrl = getControl(full_state, sub); const union RepeatControl *rctrl = getControl(full_state, sub);
char *rstate = (char *)stream_state + sub->streamStateOffset + const char *rstate = (char *)stream_state + sub->streamStateOffset +
info->packedCtrlSize; info->packedCtrlSize;
if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
@ -242,10 +242,10 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
if (c->exclusive) { if (c->exclusive) {
u8 *active = (u8 *)stream_state; u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset; const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize; const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
subCastleDeactivateStaleSubs(c, offset, full_state, subCastleDeactivateStaleSubs(c, offset, full_state,
@ -329,8 +329,8 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin,
size_t *mloc, char *found, const u32 subIdx) { size_t *mloc, char *found, const u32 subIdx) {
const struct SubCastle *sub = getSubCastle(c, subIdx); const struct SubCastle *sub = getSubCastle(c, subIdx);
const struct RepeatInfo *info = getRepeatInfo(sub); const struct RepeatInfo *info = getRepeatInfo(sub);
union RepeatControl *rctrl = getControl(full_state, sub); const union RepeatControl *rctrl = getControl(full_state, sub);
char *rstate = (char *)stream_state + sub->streamStateOffset + const char *rstate = (char *)stream_state + sub->streamStateOffset +
info->packedCtrlSize; info->packedCtrlSize;
u64a match = repeatNextMatch(info, rctrl, rstate, begin); u64a match = repeatNextMatch(info, rctrl, rstate, begin);
@ -374,10 +374,10 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
if (c->exclusive) { if (c->exclusive) {
u8 *active = (u8 *)stream_state; u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset; const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize; const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
@ -386,7 +386,7 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
} }
if (c->exclusive != PURE_EXCLUSIVE) { if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)stream_state + c->activeOffset; const u8 *active = (u8 *)stream_state + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i != MMB_INVALID;
i = mmbit_iterate(active, c->numRepeats, i)) { i = mmbit_iterate(active, c->numRepeats, i)) {
@ -400,8 +400,8 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
} }
static really_inline static really_inline
u64a subCastleNextMatch(const struct Castle *c, void *full_state, u64a subCastleNextMatch(const struct Castle *c, const void *full_state,
void *stream_state, const u64a loc, const void *stream_state, const u64a loc,
const u32 subIdx) { const u32 subIdx) {
DEBUG_PRINTF("subcastle %u\n", subIdx); DEBUG_PRINTF("subcastle %u\n", subIdx);
const struct SubCastle *sub = getSubCastle(c, subIdx); const struct SubCastle *sub = getSubCastle(c, subIdx);
@ -489,15 +489,14 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
// full_state (scratch). // full_state (scratch).
u64a offset = end; // min offset of next match u64a offset = end; // min offset of next match
u32 activeIdx = 0;
mmbit_clear(matching, c->numRepeats); mmbit_clear(matching, c->numRepeats);
if (c->exclusive) { if (c->exclusive) {
u8 *active = (u8 *)stream_state; u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset; u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize; const u8 *cur = active + i * c->activeIdxSize;
activeIdx = partial_load_u32(cur, c->activeIdxSize); u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
u64a match = subCastleNextMatch(c, full_state, stream_state, u64a match = subCastleNextMatch(c, full_state, stream_state,
loc, activeIdx); loc, activeIdx);
set_matching(c, match, groups, matching, c->numGroups, i, set_matching(c, match, groups, matching, c->numGroups, i,
@ -797,7 +796,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
char found = 0; char found = 0;
if (c->exclusive) { if (c->exclusive) {
u8 *groups = (u8 *)q->streamState + c->groupIterOffset; const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
found = mmbit_any(groups, c->numGroups); found = mmbit_any(groups, c->numGroups);
} }
@ -864,7 +863,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
} }
if (c->exclusive) { if (c->exclusive) {
u8 *groups = (u8 *)q->streamState + c->groupIterOffset; const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
if (mmbit_any_precise(groups, c->numGroups)) { if (mmbit_any_precise(groups, c->numGroups)) {
return 1; return 1;
} }
@ -884,7 +883,7 @@ char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) {
} }
static static
s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { s64a castleLastKillLoc(const struct Castle *c, const struct mq *q) {
assert(q_cur_type(q) == MQE_START); assert(q_cur_type(q) == MQE_START);
assert(q_last_type(q) == MQE_END); assert(q_last_type(q) == MQE_END);
s64a sp = q_cur_loc(q); s64a sp = q_cur_loc(q);
@ -907,7 +906,6 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
return (s64a)loc - hlen; return (s64a)loc - hlen;
} }
ep = 0;
} }
return sp - 1; /* the repeats are never killed */ return sp - 1; /* the repeats are never killed */
@ -959,7 +957,7 @@ char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) {
char found = 0; char found = 0;
if (c->exclusive) { if (c->exclusive) {
u8 *groups = (u8 *)q->streamState + c->groupIterOffset; const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
found = mmbit_any_precise(groups, c->numGroups); found = mmbit_any_precise(groups, c->numGroups);
} }
@ -1007,10 +1005,10 @@ char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) {
if (c->exclusive) { if (c->exclusive) {
u8 *active = (u8 *)q->streamState; u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset; const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize; const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
const struct SubCastle *sub = getSubCastle(c, activeIdx); const struct SubCastle *sub = getSubCastle(c, activeIdx);
@ -1079,7 +1077,7 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx,
const struct mq *q, const u64a offset) { const struct mq *q, const u64a offset) {
const struct SubCastle *sub = getSubCastle(c, subIdx); const struct SubCastle *sub = getSubCastle(c, subIdx);
const struct RepeatInfo *info = getRepeatInfo(sub); const struct RepeatInfo *info = getRepeatInfo(sub);
union RepeatControl *rctrl = getControl(q->state, sub); const union RepeatControl *rctrl = getControl(q->state, sub);
char *packed = (char *)q->streamState + sub->streamStateOffset; char *packed = (char *)q->streamState + sub->streamStateOffset;
DEBUG_PRINTF("sub %u next match %llu\n", subIdx, DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
repeatNextMatch(info, rctrl, repeatNextMatch(info, rctrl,
@ -1100,10 +1098,10 @@ char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q,
DEBUG_PRINTF("offset=%llu\n", offset); DEBUG_PRINTF("offset=%llu\n", offset);
if (c->exclusive) { if (c->exclusive) {
u8 *active = (u8 *)q->streamState; u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset; const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize; const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("packing state for sub %u\n", activeIdx); DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
subCastleQueueCompressState(c, activeIdx, q, offset); subCastleQueueCompressState(c, activeIdx, q, offset);

View File

@ -227,11 +227,13 @@ vector<u32> removeClique(CliqueGraph &cg) {
while (!graph_empty(cg)) { while (!graph_empty(cg)) {
const vector<u32> &c = cliquesVec.back(); const vector<u32> &c = cliquesVec.back();
vector<CliqueVertex> dead; vector<CliqueVertex> dead;
for (const auto &v : vertices_range(cg)) {
if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) { auto deads = [&c=c, &cg=cg](const CliqueVertex &v) {
dead.emplace_back(v); return (find(c.begin(), c.end(), cg[v].stateId) != c.end());
} };
} const auto &vr = vertices_range(cg);
std::copy_if(begin(vr), end(vr), std::back_inserter(dead), deads);
for (const auto &v : dead) { for (const auto &v : dead) {
clear_vertex(v, cg); clear_vertex(v, cg);
remove_vertex(v, cg); remove_vertex(v, cg);
@ -294,7 +296,7 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
size_t lower = 0; size_t lower = 0;
size_t total = 0; size_t total = 0;
while (lower < trigSize) { while (lower < trigSize) {
vector<CliqueVertex> vertices; vector<CliqueVertex> clvertices;
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>(); unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
vector<vector<size_t>> min_reset_dist; vector<vector<size_t>> min_reset_dist;
@ -302,7 +304,7 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
// get min reset distance for each repeat // get min reset distance for each repeat
for (size_t i = lower; i < upper; i++) { for (size_t i = lower; i < upper; i++) {
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
vertices.emplace_back(v); clvertices.emplace_back(v);
const vector<size_t> &tmp_dist = const vector<size_t> &tmp_dist =
minResetDistToEnd(triggers[i], cr); minResetDistToEnd(triggers[i], cr);
@ -311,11 +313,11 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
// find exclusive pair for each repeat // find exclusive pair for each repeat
for (size_t i = lower; i < upper; i++) { for (size_t i = lower; i < upper; i++) {
CliqueVertex s = vertices[i - lower]; CliqueVertex s = clvertices[i - lower];
for (size_t j = i + 1; j < upper; j++) { for (size_t j = i + 1; j < upper; j++) {
if (findExclusivePair(i, j, lower, min_reset_dist, if (findExclusivePair(i, j, lower, min_reset_dist,
triggers)) { triggers)) {
CliqueVertex d = vertices[j - lower]; CliqueVertex d = clvertices[j - lower];
add_edge(s, d, *cg); add_edge(s, d, *cg);
} }
} }
@ -655,7 +657,8 @@ buildCastle(const CastleProto &proto,
if (!stale_iter.empty()) { if (!stale_iter.empty()) {
c->staleIterOffset = verify_u32(ptr - base_ptr); c->staleIterOffset = verify_u32(ptr - base_ptr);
copy_bytes(ptr, stale_iter); copy_bytes(ptr, stale_iter);
ptr += byte_length(stale_iter); // Removed unused increment operation
// ptr += byte_length(stale_iter);
} }
return nfa; return nfa;
@ -919,7 +922,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
u32 min_bound = pr.bounds.min; // always finite u32 min_bound = pr.bounds.min; // always finite
if (min_bound == 0) { // Vacuous case, we can only do this once. if (min_bound == 0) { // Vacuous case, we can only do this once.
assert(!edge(g.start, g.accept, g).second); assert(!edge(g.start, g.accept, g).second);
NFAEdge e = add_edge(g.start, g.accept, g); NFAEdge e = add_edge(g.start, g.accept, g).first;
g[e].tops.insert(top); g[e].tops.insert(top);
g[u].reports.insert(pr.reports.begin(), pr.reports.end()); g[u].reports.insert(pr.reports.begin(), pr.reports.end());
min_bound = 1; min_bound = 1;
@ -928,7 +931,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
for (u32 i = 0; i < min_bound; i++) { for (u32 i = 0; i < min_bound; i++) {
NFAVertex v = add_vertex(g); NFAVertex v = add_vertex(g);
g[v].char_reach = pr.reach; g[v].char_reach = pr.reach;
NFAEdge e = add_edge(u, v, g); NFAEdge e = add_edge(u, v, g).first;
if (u == g.start) { if (u == g.start) {
g[e].tops.insert(top); g[e].tops.insert(top);
} }
@ -947,7 +950,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
if (head != u) { if (head != u) {
add_edge(head, v, g); add_edge(head, v, g);
} }
NFAEdge e = add_edge(u, v, g); NFAEdge e = add_edge(u, v, g).first;
if (u == g.start) { if (u == g.start) {
g[e].tops.insert(top); g[e].tops.insert(top);
} }

View File

@ -304,6 +304,7 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
DEBUG_PRINTF("dfa is empty\n"); DEBUG_PRINTF("dfa is empty\n");
} }
// cppcheck-suppress unreadVariable
UNUSED const size_t states_before = rdfa.states.size(); UNUSED const size_t states_before = rdfa.states.size();
HopcroftInfo info(rdfa); HopcroftInfo info(rdfa);

View File

@ -978,14 +978,14 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *ctxt = q->context;
u8 s = *(u8 *)q->state; u8 s = *(u8 *)q->state;
u64a offset = q_cur_offset(q); u64a offset = q_cur_offset(q);
struct gough_som_info *som = getSomInfo(q->state); const struct gough_som_info *som = getSomInfo(q->state);
assert(q_cur_type(q) == MQE_START); assert(q_cur_type(q) == MQE_START);
assert(s); assert(s);
if (s >= m->accept_limit_8) { if (s >= m->accept_limit_8) {
void *ctxt = q->context;
u32 cached_accept_id = 0; u32 cached_accept_id = 0;
u16 cached_accept_state = 0; u16 cached_accept_state = 0;
u32 cached_accept_som = 0; u32 cached_accept_som = 0;
@ -1000,16 +1000,16 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *ctxt = q->context;
u16 s = *(u16 *)q->state; u16 s = *(u16 *)q->state;
const struct mstate_aux *aux = get_aux(m, s); const struct mstate_aux *aux = get_aux(m, s);
u64a offset = q_cur_offset(q); u64a offset = q_cur_offset(q);
struct gough_som_info *som = getSomInfo(q->state); const struct gough_som_info *som = getSomInfo(q->state);
assert(q_cur_type(q) == MQE_START); assert(q_cur_type(q) == MQE_START);
DEBUG_PRINTF("state %hu\n", s); DEBUG_PRINTF("state %hu\n", s);
assert(s); assert(s);
if (aux->accept) { if (aux->accept) {
void *ctxt = q->context;
u32 cached_accept_id = 0; u32 cached_accept_id = 0;
u16 cached_accept_state = 0; u16 cached_accept_state = 0;
u32 cached_accept_som = 0; u32 cached_accept_som = 0;

View File

@ -132,7 +132,7 @@ void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) {
} }
static static
void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw, void translateRawReports(UNUSED const GoughGraph &cfg, UNUSED const raw_som_dfa &raw,
const flat_map<u32, GoughSSAVarJoin *> &joins_at_s, const flat_map<u32, GoughSSAVarJoin *> &joins_at_s,
UNUSED GoughVertex s, UNUSED GoughVertex s,
const set<som_report> &reports_in, const set<som_report> &reports_in,
@ -206,10 +206,6 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices,
assert(contains(src_slots, slot_id)); assert(contains(src_slots, slot_id));
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
if (!vmin) {
assert(0);
throw std::bad_alloc();
}
cfg[e].vars.emplace_back(vmin); cfg[e].vars.emplace_back(vmin);
final_var = vmin.get(); final_var = vmin.get();
@ -321,10 +317,6 @@ void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators,
DEBUG_PRINTF("bypassing min on join %u\n", slot_id); DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
} else { } else {
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
if (!vmin) {
assert(0);
throw std::bad_alloc();
}
cfg[e].vars.emplace_back(vmin); cfg[e].vars.emplace_back(vmin);
final_var = vmin.get(); final_var = vmin.get();
@ -441,10 +433,11 @@ unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
} }
static static
// cppcheck-suppress constParameterReference
void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) { void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) {
vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin(); vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin();
while (it != rep.end()) { while (it != rep.end()) {
GoughSSAVar *var = it->second; const GoughSSAVar *var = it->second;
if (!var) { if (!var) {
++it; ++it;
continue; continue;
@ -546,7 +539,7 @@ void remove_dead(GoughGraph &g) {
} }
while (!queue.empty()) { while (!queue.empty()) {
GoughSSAVar *v = queue.back(); const GoughSSAVar *v = queue.back();
queue.pop_back(); queue.pop_back();
for (GoughSSAVar *var : v->get_inputs()) { for (GoughSSAVar *var : v->get_inputs()) {
if (var->seen) { if (var->seen) {
@ -658,8 +651,8 @@ GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const {
return nullptr; return nullptr;
} }
const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input( // cppcheck-suppress constParameterPointer
GoughSSAVar *input) const { const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input(GoughSSAVar *input) const {
return input_map.at(input); return input_map.at(input);
} }
@ -810,7 +803,7 @@ private:
static static
void prep_joins_for_generation(const GoughGraph &g, GoughVertex v, void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
map<GoughEdge, edge_join_info> *edge_info) { map<GoughEdge, edge_join_info> &edge_info) {
DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id); DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id);
for (const auto &var : g[v].vars) { for (const auto &var : g[v].vars) {
u32 dest_slot = var->slot; u32 dest_slot = var->slot;
@ -821,7 +814,7 @@ void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
} }
for (const GoughEdge &incoming_edge : var_edges.second) { for (const GoughEdge &incoming_edge : var_edges.second) {
(*edge_info)[incoming_edge].insert(input, dest_slot); edge_info[incoming_edge].insert(input, dest_slot);
DEBUG_PRINTF("need %u<-%u\n", dest_slot, input); DEBUG_PRINTF("need %u<-%u\n", dest_slot, input);
} }
} }
@ -919,7 +912,7 @@ void build_blocks(const GoughGraph &g,
} }
map<GoughEdge, edge_join_info> eji; map<GoughEdge, edge_join_info> eji;
prep_joins_for_generation(g, t, &eji); prep_joins_for_generation(g, t, eji);
for (auto &m : eji) { for (auto &m : eji) {
vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)]; vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)];
@ -1017,7 +1010,7 @@ void update_accel_prog_offset(const gough_build_strat &gbs,
verts[gbs.gg[v].state_id] = v; verts[gbs.gg[v].state_id] = v;
} }
for (auto &m : gbs.built_accel) { for (const auto &m : gbs.built_accel) {
gough_accel *ga = m.first; gough_accel *ga = m.first;
assert(!ga->prog_offset); assert(!ga->prog_offset);
GoughVertex v = verts[m.second]; GoughVertex v = verts[m.second];
@ -1050,7 +1043,7 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|| !cc.streaming); || !cc.streaming);
if (!cc.grey.allowGough) { if (!cc.grey.allowGough) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
DEBUG_PRINTF("hello world\n"); DEBUG_PRINTF("hello world\n");
@ -1081,7 +1074,7 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); auto basic_dfa = mcclellanCompile_i(raw, gbs, cc);
assert(basic_dfa); assert(basic_dfa);
if (!basic_dfa) { if (!basic_dfa) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
u8 alphaShift u8 alphaShift

View File

@ -194,7 +194,7 @@ void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
if (contains(aux.containing_v, var)) { if (contains(aux.containing_v, var)) {
/* def is used by join vertex, value only needs to be live on some /* def is used by join vertex, value only needs to be live on some
* incoming edges */ * incoming edges */
GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var; const GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
const flat_set<GoughEdge> &live_edges const flat_set<GoughEdge> &live_edges
= vj->get_edges_for_input(def); = vj->get_edges_for_input(def);
for (const auto &e : live_edges) { for (const auto &e : live_edges) {
@ -278,7 +278,7 @@ set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
template<typename VarP> template<typename VarP>
void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) { void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
for (auto &var : vars) { for (const auto &var : vars) {
assert(var->slot == INVALID_SLOT); assert(var->slot == INVALID_SLOT);
var->slot = (*next_slot)++; var->slot = (*next_slot)++;
} }
@ -438,7 +438,7 @@ void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
} }
static static
void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals, void update_local_slots(GoughGraph &g, const set<GoughSSAVar *> &locals,
u32 local_base) { u32 local_base) {
DEBUG_PRINTF("%zu local variables\n", locals.size()); DEBUG_PRINTF("%zu local variables\n", locals.size());
/* local variables only occur on edges (joins are never local) */ /* local variables only occur on edges (joins are never local) */

View File

@ -56,7 +56,7 @@ extern "C"
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \ char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ char gf_name##_reportCurrent(const struct NFA *n, const struct mq *q); \
char gf_name##_inAccept(const struct NFA *n, ReportID report, \ char gf_name##_inAccept(const struct NFA *n, ReportID report, \
struct mq *q); \ struct mq *q); \
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \ char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \

View File

@ -332,7 +332,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
// UE-1636) need to guard cyclic tug-accepts as well. // UE-1636) need to guard cyclic tug-accepts as well.
static really_inline static really_inline
char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
union RepeatControl *repeat_ctrl, char *repeat_state, const union RepeatControl *repeat_ctrl, const char *repeat_state,
u64a offset, ReportID report) { u64a offset, ReportID report) {
assert(limex); assert(limex);
@ -382,7 +382,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
static really_inline static really_inline
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
union RepeatControl *repeat_ctrl, char *repeat_state, const union RepeatControl *repeat_ctrl, const char *repeat_state,
u64a offset) { u64a offset) {
assert(limex); assert(limex);

View File

@ -290,7 +290,7 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) {
template<class Mask> template<class Mask>
bool isMaskZero(Mask &m) { bool isMaskZero(Mask &m) {
u8 *m8 = (u8 *)&m; const u8 *m8 = (u8 *)&m;
for (u32 i = 0; i < sizeof(m); i++) { for (u32 i = 0; i < sizeof(m); i++) {
if (m8[i]) { if (m8[i]) {
return false; return false;
@ -329,11 +329,11 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
// Build a list of vertices with a state index assigned. // Build a list of vertices with a state index assigned.
vector<NFAVertex> verts; vector<NFAVertex> verts;
verts.reserve(args.num_states); verts.reserve(args.num_states);
for (auto v : vertices_range(h)) { auto sidat = [&state_ids=state_ids](const NFAVertex &v) {
if (state_ids.at(v) != NO_STATE) { return (state_ids.at(v) != NO_STATE);
verts.emplace_back(v); };
} const auto &vr = vertices_range(h);
} std::copy_if(begin(vr), end(vr), std::back_inserter(verts), sidat);
// Build a mapping from set-of-states -> reachability. // Build a mapping from set-of-states -> reachability.
map<NFAStateSet, CharReach> mapping; map<NFAStateSet, CharReach> mapping;
@ -555,7 +555,8 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
// Similarly, connect (start, startDs) if necessary. // Similarly, connect (start, startDs) if necessary.
if (!edge(g.start, g.startDs, g).second) { if (!edge(g.start, g.startDs, g).second) {
NFAEdge e = add_edge(g.start, g.startDs, g); NFAEdge e;
std::tie(e, std::ignore) = add_edge(g.start, g.startDs, g);
tempEdges.emplace_back(e); // Remove edge later. tempEdges.emplace_back(e); // Remove edge later.
} }
@ -1481,6 +1482,7 @@ u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
continue; continue;
} }
u32 j = args.state_ids.at(w); u32 j = args.state_ids.at(w);
// j can be NO_STATE if args.state_ids.at(w) returns NO_STATE
if (j == NO_STATE) { if (j == NO_STATE) {
continue; continue;
} }
@ -1572,7 +1574,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) {
static static
int getLimexScore(const build_info &args, u32 nShifts) { int getLimexScore(const build_info &args, u32 nShifts) {
const NGHolder &h = args.h; const NGHolder &h = args.h;
u32 maxVarShift = nShifts; u32 maxVarShift;
int score = 0; int score = 0;
score += SHIFT_COST * nShifts; score += SHIFT_COST * nShifts;
@ -1700,7 +1702,7 @@ struct Factory {
static static
void allocState(NFA *nfa, u32 repeatscratchStateSize, void allocState(NFA *nfa, u32 repeatscratchStateSize,
u32 repeatStreamState) { u32 repeatStreamState) {
implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa); const implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa);
// LimEx NFAs now store the following in state: // LimEx NFAs now store the following in state:
// 1. state bitvector (always present) // 1. state bitvector (always present)
@ -2218,7 +2220,7 @@ struct Factory {
static static
bytecode_ptr<NFA> generateNfa(const build_info &args) { bytecode_ptr<NFA> generateNfa(const build_info &args) {
if (args.num_states > NFATraits<dtype>::maxStates) { if (args.num_states > NFATraits<dtype>::maxStates) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
// Build bounded repeat structures. // Build bounded repeat structures.
@ -2577,7 +2579,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
if (!cc.grey.allowLimExNFA) { if (!cc.grey.allowLimExNFA) {
DEBUG_PRINTF("limex not allowed\n"); DEBUG_PRINTF("limex not allowed\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
// If you ask for a particular type, it had better be an NFA. // If you ask for a particular type, it had better be an NFA.
@ -2612,7 +2614,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
if (scores.empty()) { if (scores.empty()) {
DEBUG_PRINTF("No NFA returned a valid score for this case.\n"); DEBUG_PRINTF("No NFA returned a valid score for this case.\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
// Sort acceptable models in priority order, lowest score first. // Sort acceptable models in priority order, lowest score first.
@ -2631,7 +2633,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
} }
DEBUG_PRINTF("NFA build failed.\n"); DEBUG_PRINTF("NFA build failed.\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
u32 countAccelStates(NGHolder &h, u32 countAccelStates(NGHolder &h,

View File

@ -302,8 +302,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
} }
#else #else
// A copy of the estate as an array of GPR-sized chunks. // A copy of the estate as an array of GPR-sized chunks.
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
#ifdef ESTATE_ON_STACK #ifdef ESTATE_ON_STACK
memcpy(chunks, &estate, sizeof(STATE_T)); memcpy(chunks, &estate, sizeof(STATE_T));
#else #else
@ -311,7 +311,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
#endif #endif
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
base_index[0] = 0; base_index[0] = 0;
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);

View File

@ -927,7 +927,7 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
context); context);
} }
char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, const struct mq *q) {
const IMPL_NFA_T *limex = getImplNfa(n); const IMPL_NFA_T *limex = getImplNfa(n);
REPORTCURRENT_FN(limex, q); REPORTCURRENT_FN(limex, q);
return 1; return 1;
@ -984,9 +984,9 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
assert(q->state && q->streamState); assert(q->state && q->streamState);
const IMPL_NFA_T *limex = getImplNfa(nfa); const IMPL_NFA_T *limex = getImplNfa(nfa);
union RepeatControl *repeat_ctrl = const union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T)); getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize; const char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = *(STATE_T *)q->state; STATE_T state = *(STATE_T *)q->state;
u64a offset = q->offset + q_last_loc(q) + 1; u64a offset = q->offset + q_last_loc(q) + 1;
@ -999,9 +999,9 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
assert(q->state && q->streamState); assert(q->state && q->streamState);
const IMPL_NFA_T *limex = getImplNfa(nfa); const IMPL_NFA_T *limex = getImplNfa(nfa);
union RepeatControl *repeat_ctrl = const union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T)); getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize; const char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = *(STATE_T *)q->state; STATE_T state = *(STATE_T *)q->state;
u64a offset = q->offset + q_last_loc(q) + 1; u64a offset = q->offset + q_last_loc(q) + 1;
@ -1020,9 +1020,9 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
if (limex->repeatCount) { if (limex->repeatCount) {
u64a offset = q->offset + loc + 1; u64a offset = q->offset + loc + 1;
union RepeatControl *repeat_ctrl = const union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T)); getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize; const char *repeat_state = q->streamState + limex->stateSize;
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state); SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state);
} }

View File

@ -176,7 +176,7 @@ static
mstate_aux *getAux(NFA *n, dstate_id_t i) { mstate_aux *getAux(NFA *n, dstate_id_t i) {
assert(isMcClellanType(n->type)); assert(isMcClellanType(n->type));
mcclellan *m = (mcclellan *)getMutableImplNfa(n); const mcclellan *m = (mcclellan *)getMutableImplNfa(n);
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
mstate_aux *aux = aux_base + i; mstate_aux *aux = aux_base + i;
@ -202,7 +202,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
continue; continue;
} }
mstate_aux *aux = getAux(n, succ_table[c_prime]); const mstate_aux *aux = getAux(n, succ_table[c_prime]);
if (aux->accept) { if (aux->accept) {
succ_table[c_prime] |= ACCEPT_FLAG; succ_table[c_prime] |= ACCEPT_FLAG;
@ -231,7 +231,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
continue; continue;
} }
mstate_aux *aux = getAux(n, succ_i); const mstate_aux *aux = getAux(n, succ_i);
if (aux->accept) { if (aux->accept) {
succ_i |= ACCEPT_FLAG; succ_i |= ACCEPT_FLAG;
@ -261,7 +261,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
// check successful transition // check successful transition
u16 next = unaligned_load_u16((u8 *)trans); u16 next = unaligned_load_u16((u8 *)trans);
if (next < wide_limit) { if (next < wide_limit) {
mstate_aux *aux = getAux(n, next); const mstate_aux *aux = getAux(n, next);
if (aux->accept) { if (aux->accept) {
next |= ACCEPT_FLAG; next |= ACCEPT_FLAG;
} }
@ -278,7 +278,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
if (next_k >= wide_limit) { if (next_k >= wide_limit) {
continue; continue;
} }
mstate_aux *aux_k = getAux(n, next_k); const mstate_aux *aux_k = getAux(n, next_k);
if (aux_k->accept) { if (aux_k->accept) {
next_k |= ACCEPT_FLAG; next_k |= ACCEPT_FLAG;
} }
@ -361,7 +361,7 @@ struct raw_report_list {
raw_report_list(const flat_set<ReportID> &reports_in, raw_report_list(const flat_set<ReportID> &reports_in,
const ReportManager &rm, bool do_remap) { const ReportManager &rm, bool do_remap) {
if (do_remap) { if (do_remap) {
for (auto &id : reports_in) { for (const auto &id : reports_in) {
reports.insert(rm.getProgramOffset(id)); reports.insert(rm.getProgramOffset(id));
} }
} else { } else {
@ -540,7 +540,7 @@ size_t calcWideRegionSize(const dfa_info &info) {
static static
void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
const vector<u32> &reports, const vector<u32> &reports_eod, const vector<u32> &reports, const vector<u32> &reports_eod,
vector<u32> &reportOffsets) { const vector<u32> &reportOffsets) {
const dstate &raw_state = info.states[i]; const dstate &raw_state = info.states[i];
aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
aux->accept_eod = raw_state.reports_eod.empty() ? 0 aux->accept_eod = raw_state.reports_eod.empty() ? 0
@ -625,7 +625,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
if (!allocateFSN16(info, &count_real_states, &wide_limit)) { if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size()); info.size());
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
DEBUG_PRINTF("count_real_states: %d\n", count_real_states); DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
@ -794,8 +794,8 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
} }
for (size_t i : order) { for (size_t i : order) {
vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i]; const vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
u16 width = verify_u16(symbol_chain.size()); u16 width = verify_u16(symbol_chain.size());
*(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
@ -1367,11 +1367,11 @@ bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
/* \brief Generate wide_symbol_chain from wide_state_chain. */ /* \brief Generate wide_symbol_chain from wide_state_chain. */
static static
void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) { void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
raw_dfa &rdfa = info.raw; const raw_dfa &rdfa = info.raw;
assert(chain_tail.size() == info.wide_state_chain.size()); assert(chain_tail.size() == info.wide_state_chain.size());
for (size_t i = 0; i < info.wide_state_chain.size(); i++) { for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
vector<symbol_t> symbol_chain; vector<symbol_t> symbol_chain;
info.extra[state_chain[0]].wideHead = true; info.extra[state_chain[0]].wideHead = true;
@ -1379,7 +1379,6 @@ void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
for (size_t j = 0; j < width; j++) { for (size_t j = 0; j < width; j++) {
dstate_id_t curr_id = state_chain[j]; dstate_id_t curr_id = state_chain[j];
dstate_id_t next_id = state_chain[j + 1];
// The last state of the chain doesn't belong to a wide state. // The last state of the chain doesn't belong to a wide state.
info.extra[curr_id].wideState = true; info.extra[curr_id].wideState = true;
@ -1388,6 +1387,7 @@ void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
if (j == width - 1) { if (j == width - 1) {
symbol_chain.emplace_back(chain_tail[i]); symbol_chain.emplace_back(chain_tail[i]);
} else { } else {
dstate_id_t next_id = state_chain[j + 1];
for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
if (rdfa.states[curr_id].next[sym] == next_id) { if (rdfa.states[curr_id].next[sym] == next_id) {
symbol_chain.emplace_back(sym); symbol_chain.emplace_back(sym);

View File

@ -144,7 +144,7 @@ u8 dfa_info::getAlphaShift() const {
static static
mstate_aux *getAux(NFA *n, dstate_id_t i) { mstate_aux *getAux(NFA *n, dstate_id_t i) {
mcsheng *m = (mcsheng *)getMutableImplNfa(n); const mcsheng *m = (mcsheng *)getMutableImplNfa(n);
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
mstate_aux *aux = aux_base + i; mstate_aux *aux = aux_base + i;
@ -244,7 +244,7 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
static static
mstate_aux *getAux64(NFA *n, dstate_id_t i) { mstate_aux *getAux64(NFA *n, dstate_id_t i) {
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n); const mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n);
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
mstate_aux *aux = aux_base + i; mstate_aux *aux = aux_base + i;
@ -534,7 +534,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
static static
dstate_id_t find_sheng_states(dfa_info &info, dstate_id_t find_sheng_states(dfa_info &info,
map<dstate_id_t, AccelScheme> &accel_escape_info, const map<dstate_id_t, AccelScheme> &accel_escape_info,
size_t max_sheng_states) { size_t max_sheng_states) {
RdfaGraph g(info.raw); RdfaGraph g(info.raw);
auto cyclics = find_vertices_in_cycles(g); auto cyclics = find_vertices_in_cycles(g);
@ -674,7 +674,7 @@ void fill_in_aux_info(NFA *nfa, const dfa_info &info,
static static
u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) {
mstate_aux *aux = getAux(nfa, target_impl_id); const mstate_aux *aux = getAux(nfa, target_impl_id);
u16 flags = 0; u16 flags = 0;
if (aux->accept) { if (aux->accept) {
@ -748,7 +748,7 @@ void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
static static
u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) { u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
mstate_aux *aux = getAux64(nfa, target_impl_id); const mstate_aux *aux = getAux64(nfa, target_impl_id);
u16 flags = 0; u16 flags = 0;
if (aux->accept) { if (aux->accept) {
@ -955,7 +955,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
} }
static static
void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { void fill_in_sherman(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
char *nfa_base = (char *)nfa; char *nfa_base = (char *)nfa;
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
char *sherman_table = nfa_base + m->sherman_offset; char *sherman_table = nfa_base + m->sherman_offset;
@ -1018,12 +1018,16 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
// Sherman optimization // Sherman optimization
if (info.impl_alpha_size > 16) { if (info.impl_alpha_size > 16) {
#ifdef DEBUG
u16 total_daddy = 0; u16 total_daddy = 0;
#endif // DEBUG
for (u32 i = 0; i < info.size(); i++) { for (u32 i = 0; i < info.size(); i++) {
find_better_daddy(info, i, find_better_daddy(info, i,
is_cyclic_near(info.raw, info.raw.start_anchored), is_cyclic_near(info.raw, info.raw.start_anchored),
grey); grey);
#ifdef DEBUG
total_daddy += info.extra[i].daddytaken; total_daddy += info.extra[i].daddytaken;
#endif // DEBUG
} }
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
@ -1035,7 +1039,7 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
if (!allocateImplId16(info, sheng_end, &sherman_limit)) { if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size()); info.size());
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
u16 count_real_states = sherman_limit - sheng_end; u16 count_real_states = sherman_limit - sheng_end;
@ -1109,7 +1113,7 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
} }
static static
void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { void fill_in_sherman64(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
char *nfa_base = (char *)nfa; char *nfa_base = (char *)nfa;
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
char *sherman_table = nfa_base + m->sherman_offset; char *sherman_table = nfa_base + m->sherman_offset;
@ -1172,12 +1176,16 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
// Sherman optimization // Sherman optimization
if (info.impl_alpha_size > 16) { if (info.impl_alpha_size > 16) {
#ifdef DEBUG
u16 total_daddy = 0; u16 total_daddy = 0;
#endif // DEBUG
for (u32 i = 0; i < info.size(); i++) { for (u32 i = 0; i < info.size(); i++) {
find_better_daddy(info, i, find_better_daddy(info, i,
is_cyclic_near(info.raw, info.raw.start_anchored), is_cyclic_near(info.raw, info.raw.start_anchored),
grey); grey);
#ifdef DEBUG
total_daddy += info.extra[i].daddytaken; total_daddy += info.extra[i].daddytaken;
#endif // DEBUG
} }
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
@ -1189,7 +1197,7 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
if (!allocateImplId16(info, sheng_end, &sherman_limit)) { if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size()); info.size());
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
u16 count_real_states = sherman_limit - sheng_end; u16 count_real_states = sherman_limit - sheng_end;
@ -1414,7 +1422,7 @@ bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm) { const ReportManager &rm) {
if (!cc.grey.allowMcSheng) { if (!cc.grey.allowMcSheng) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
mcclellan_build_strat mbs(raw, rm, false); mcclellan_build_strat mbs(raw, rm, false);
@ -1430,12 +1438,10 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
map<dstate_id_t, AccelScheme> accel_escape_info map<dstate_id_t, AccelScheme> accel_escape_info
= info.strat.getAccelInfo(cc.grey); = info.strat.getAccelInfo(cc.grey);
auto old_states = info.states;
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES); dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
if (sheng_end <= DEAD_STATE + 1) { if (sheng_end <= DEAD_STATE + 1) {
info.states = old_states; return bytecode_ptr<NFA>(nullptr);
return nullptr;
} }
bytecode_ptr<NFA> nfa; bytecode_ptr<NFA> nfa;
@ -1447,7 +1453,6 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
} }
if (!nfa) { if (!nfa) {
info.states = old_states;
return nfa; return nfa;
} }
@ -1462,12 +1467,12 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm) { const ReportManager &rm) {
if (!cc.grey.allowMcSheng) { if (!cc.grey.allowMcSheng) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
if (!cc.target_info.has_avx512vbmi()) { if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
mcclellan_build_strat mbs(raw, rm, false); mcclellan_build_strat mbs(raw, rm, false);
@ -1488,7 +1493,7 @@ bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES); sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
if (sheng_end64 <= DEAD_STATE + 1) { if (sheng_end64 <= DEAD_STATE + 1) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} else { } else {
using64state = true; using64state = true;
} }

View File

@ -512,7 +512,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
verm_restart:; verm_restart:;
assert(buf[curr] == kp->u.verm.c); assert(buf[curr] == kp->u.verm.c);
size_t test = curr; size_t test;
if (curr + min_rep < length) { if (curr + min_rep < length) {
test = curr + min_rep; test = curr + min_rep;
} else { } else {
@ -534,7 +534,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
m128 hi = kp->u.shuf.mask_hi; m128 hi = kp->u.shuf.mask_hi;
shuf_restart: shuf_restart:
assert(do_single_shufti(lo, hi, buf[curr])); assert(do_single_shufti(lo, hi, buf[curr]));
size_t test = curr; size_t test;
if (curr + min_rep < length) { if (curr + min_rep < length) {
test = curr + min_rep; test = curr + min_rep;
} else { } else {
@ -556,7 +556,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
const m128 mask1 = kp->u.truffle.mask1; const m128 mask1 = kp->u.truffle.mask1;
const m128 mask2 = kp->u.truffle.mask2; const m128 mask2 = kp->u.truffle.mask2;
truffle_restart:; truffle_restart:;
size_t test = curr; size_t test;
if (curr + min_rep < length) { if (curr + min_rep < length) {
test = curr + min_rep; test = curr + min_rep;
} else { } else {
@ -582,7 +582,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
nverm_restart:; nverm_restart:;
assert(buf[curr] != kp->u.verm.c); assert(buf[curr] != kp->u.verm.c);
size_t test = curr; size_t test;
if (curr + min_rep < length) { if (curr + min_rep < length) {
test = curr + min_rep; test = curr + min_rep;
} else { } else {
@ -607,7 +607,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
} }
static really_inline static really_inline
void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters, void restartKilo(const struct mpv *m, UNUSED const u8 *active, u8 *reporters,
struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) { const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
const struct mpv_kilopuff *kp = (const void *)(m + 1); const struct mpv_kilopuff *kp = (const void *)(m + 1);
@ -1074,7 +1074,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
return 0; return 0;
} else { } else {
const struct mpv *m = getImplNfa(nfa); const struct mpv *m = getImplNfa(nfa);
u8 *reporters = (u8 *)q->state + m->reporter_offset; const u8 *reporters = (u8 *)q->state + m->reporter_offset;
if (mmbit_any_precise(reporters, m->kilo_count)) { if (mmbit_any_precise(reporters, m->kilo_count)) {
DEBUG_PRINTF("next byte\n"); DEBUG_PRINTF("next byte\n");
@ -1087,7 +1087,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
next_event = q->items[q->cur].location; next_event = q->items[q->cur].location;
} }
struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; const struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
struct mpv_pq_item *pq struct mpv_pq_item *pq
= (struct mpv_pq_item *)(q->state + m->pq_offset); = (struct mpv_pq_item *)(q->state + m->pq_offset);
if (s->pq_size) { if (s->pq_size) {

View File

@ -167,7 +167,7 @@ void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) {
// We assert that the event is different from its predecessor. If it's a // We assert that the event is different from its predecessor. If it's a
// dupe, you should have used the ordinary pushQueue call. // dupe, you should have used the ordinary pushQueue call.
if (q->end) { if (q->end) {
UNUSED struct mq_item *prev = &q->items[q->end - 1]; UNUSED const struct mq_item *prev = &q->items[q->end - 1];
assert(prev->type != e || prev->location != loc); assert(prev->type != e || prev->location != loc);
} }
#endif #endif
@ -251,6 +251,10 @@ void q_skip_forward_to(struct mq *q, s64a min_loc) {
// Dump the contents of the given queue. // Dump the contents of the given queue.
static never_inline UNUSED static never_inline UNUSED
void debugQueue(const struct mq *q) { void debugQueue(const struct mq *q) {
if (q == nullptr) {
DEBUG_PRINTF("q=NULL!\n");
return;
}
DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa); DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa);
DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n", DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n",
q->offset, q->buffer, q->length, q->history, q->hlength); q->offset, q->buffer, q->length, q->history, q->hlength);

View File

@ -68,7 +68,7 @@ void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) {
// We assert that the event is different from its predecessor. If it's a // We assert that the event is different from its predecessor. If it's a
// dupe, you should have used the ordinary pushQueue call. // dupe, you should have used the ordinary pushQueue call.
if (q->end) { if (q->end) {
UNUSED struct mq_item *prev = &q->items[q->end - 1]; UNUSED const struct mq_item *prev = &q->items[q->end - 1];
assert(prev->type != e || prev->location != loc); assert(prev->type != e || prev->location != loc);
} }
#endif #endif

View File

@ -45,7 +45,7 @@ struct RdfaEdgeProps {
}; };
struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> { struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> {
RdfaGraph(const raw_dfa &rdfa); explicit RdfaGraph(const raw_dfa &rdfa);
}; };

View File

@ -785,7 +785,7 @@ enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
if (diff > info->repeatMax) { if (diff > info->repeatMax) {
DEBUG_PRINTF("range list is stale\n"); DEBUG_PRINTF("range list is stale\n");
return REPEAT_STALE; return REPEAT_STALE;
} else if (diff >= info->repeatMin && diff <= info->repeatMax) { } else if (diff >= info->repeatMin) {
return REPEAT_MATCH; return REPEAT_MATCH;
} }
@ -836,7 +836,7 @@ enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
if (diff > info->repeatMax) { if (diff > info->repeatMax) {
DEBUG_PRINTF("stale\n"); DEBUG_PRINTF("stale\n");
return REPEAT_STALE; return REPEAT_STALE;
} else if (diff >= info->repeatMin && diff <= info->repeatMax) { } else if (diff >= info->repeatMin) {
return REPEAT_MATCH; return REPEAT_MATCH;
} }

View File

@ -94,9 +94,6 @@ u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
static static
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
const u32 minPeriod, u32 rv) { const u32 minPeriod, u32 rv) {
u32 cnt = 0;
u32 patch_bits = 0;
u32 total_size = 0;
u32 min = ~0U; u32 min = ~0U;
u32 patch_len = 0; u32 patch_len = 0;
@ -105,11 +102,11 @@ u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
} }
for (u32 i = minPeriod; i <= rv; i++) { for (u32 i = minPeriod; i <= rv; i++) {
cnt = ((u32)repeatMax + (i - 1)) / i + 1; u32 cnt = ((u32)repeatMax + (i - 1)) / i + 1;
// no bit packing version // no bit packing version
patch_bits = calcPackedBits(info->table[i]); u32 patch_bits = calcPackedBits(info->table[i]);
total_size = (patch_bits + 7U) / 8U * cnt; u32 total_size = (patch_bits + 7U) / 8U * cnt;
if (total_size < min) { if (total_size < min) {
patch_len = i; patch_len = i;

View File

@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
return MO_CONTINUE_MATCHING; /* continue execution */ return MO_CONTINUE_MATCHING; /* continue execution */
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32 // Sheng32
static really_inline static really_inline
const struct sheng32 *get_sheng32(const struct NFA *n) { const struct sheng32 *get_sheng32(const struct NFA *n) {
@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
} }
return MO_CONTINUE_MATCHING; /* continue execution */ return MO_CONTINUE_MATCHING; /* continue execution */
} }
#endif // end of HAVE_AVX512VBMI #endif // end of HAVE_AVX512VBMI || HAVE_SVE
/* include Sheng function definitions */ /* include Sheng function definitions */
#include "sheng_defs.h" #include "sheng_defs.h"
@ -814,7 +814,6 @@ char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state,
char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
const struct sheng *sh = (const struct sheng *)getImplNfa(n); const struct sheng *sh = (const struct sheng *)getImplNfa(n);
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *ctxt = q->context;
u8 s = *(u8 *)q->state; u8 s = *(u8 *)q->state;
const struct sstate_aux *aux = get_aux(sh, s); const struct sstate_aux *aux = get_aux(sh, s);
u64a offset = q_cur_offset(q); u64a offset = q_cur_offset(q);
@ -823,6 +822,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
assert(q_cur_type(q) == MQE_START); assert(q_cur_type(q) == MQE_START);
if (aux->accept) { if (aux->accept) {
void *ctxt = q->context;
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
fireSingleReport(cb, ctxt, sh->report, offset); fireSingleReport(cb, ctxt, sh->report, offset);
} else { } else {
@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
return 0; return 0;
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32 // Sheng32
static really_inline static really_inline
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt, char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
*(u8 *)dest = *(const u8 *)src; *(u8 *)dest = *(const u8 *)src;
return 0; return 0;
} }
#endif // end of HAVE_AVX512VBMI #endif // end of HAVE_AVX512VBMI || HAVE_SVE

View File

@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context); size_t length, NfaCallback cb, void *context);
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context); size_t length, NfaCallback cb, void *context);
#else // !HAVE_AVX512VBMI && !HAVE_SVE
#else // !HAVE_AVX512VBMI
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL #define nfaExecSheng64_testEOD NFA_API_NO_IMPL
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL #define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
#define nfaExecSheng64_B NFA_API_NO_IMPL #define nfaExecSheng64_B NFA_API_NO_IMPL
#endif // end of HAVE_AVX512VBMI #endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
#endif /* SHENG_H_ */ #endif /* SHENG_H_ */

View File

@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline static really_inline
u8 isDeadState32(const u8 a) { u8 isDeadState32(const u8 a) {
return a & SHENG32_STATE_DEAD; return a & SHENG32_STATE_DEAD;
@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_cod #define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState #define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_cod #define SHENG32_IMPL sheng32_cod
#define DEAD_FUNC32 isDeadState32 #define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_co #define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc #define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_co #define SHENG32_IMPL sheng32_co
#define DEAD_FUNC32 dummyFunc #define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_samd #define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState #define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_samd #define SHENG32_IMPL sheng32_samd
#define DEAD_FUNC32 isDeadState32 #define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_sam #define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc #define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_sam #define SHENG32_IMPL sheng32_sam
#define DEAD_FUNC32 dummyFunc #define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32 #define ACCEPT_FUNC32 isAcceptState32
@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_nmd #define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState #define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nmd #define SHENG32_IMPL sheng32_nmd
#define DEAD_FUNC32 isDeadState32 #define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 dummyFunc #define ACCEPT_FUNC32 dummyFunc
@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_nm #define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc #define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nm #define SHENG32_IMPL sheng32_nm
#define DEAD_FUNC32 dummyFunc #define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 dummyFunc #define ACCEPT_FUNC32 dummyFunc
@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL #undef SHENG_IMPL
#undef DEAD_FUNC #undef DEAD_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef DEAD_FUNC32 #undef DEAD_FUNC32
#undef ACCEPT_FUNC32 #undef ACCEPT_FUNC32
@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coda #define SHENG32_IMPL sheng32_4_coda
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_cod #define SHENG32_IMPL sheng32_4_cod
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coa #define SHENG32_IMPL sheng32_4_coa
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_co #define SHENG32_IMPL sheng32_4_co
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samda #define SHENG32_IMPL sheng32_4_samda
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samd #define SHENG32_IMPL sheng32_4_samd
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32 #define INNER_DEAD_FUNC32 isDeadState32
@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState #define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sama #define SHENG32_IMPL sheng32_4_sama
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState #define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sam #define SHENG32_IMPL sheng32_4_sam
#define INTERESTING_FUNC32 hasInterestingStates32 #define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmda #define SHENG32_IMPL sheng32_4_nmda
#define INTERESTING_FUNC32 dummyFunc4 #define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmd #define SHENG32_IMPL sheng32_4_nmd
#define INTERESTING_FUNC32 dummyFunc4 #define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32
@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc #define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nm #define SHENG32_IMPL sheng32_4_nm
#define INTERESTING_FUNC32 dummyFunc4 #define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc #define INNER_DEAD_FUNC32 dummyFunc
@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC #undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC #undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL #undef SHENG32_IMPL
#undef INTERESTING_FUNC32 #undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32 #undef INNER_DEAD_FUNC32

View File

@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s, const struct sheng32 *s,
@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
#if defined(HAVE_SVE)
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(cur_buf != end)) { while (likely(cur_buf != end)) {
const u8 c = *cur_buf; const u8 c = *cur_buf;
#if defined(HAVE_SVE)
svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 tmp = svlastb(lane_pred_32, cur_state);
#else
const m512 succ_mask = masks[c]; const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask); cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state); const u8 tmp = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
cur_buf++; cur_buf++;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_32, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
#if defined(HAVE_SVE)
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(cur_buf != end)) { while (likely(cur_buf != end)) {
const u8 c = *cur_buf; const u8 c = *cur_buf;
#if defined(HAVE_SVE)
svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 tmp = svlastb(lane_pred_64, cur_state);
#else
const m512 succ_mask = masks[c]; const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask); cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state); const u8 tmp = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
cur_buf++; cur_buf++;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_64, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }

View File

@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_AVX512VBMI) #if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s, const struct sheng32 *s,
@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_SVE)
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(end - cur_buf >= 4)) { while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf; const u8 *b1 = cur_buf;
@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const u8 c3 = *b3; const u8 c3 = *b3;
const u8 c4 = *b4; const u8 c4 = *b4;
#if defined(HAVE_SVE)
svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a1 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a2 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a3 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a4 = svlastb(lane_pred_32, cur_state);
#else
const m512 succ_mask1 = masks[c1]; const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1); cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state); const u8 a1 = movd512(cur_state);
@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const m512 succ_mask4 = masks[c4]; const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4); cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state); const u8 a4 = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
}; };
cur_buf += 4; cur_buf += 4;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_32, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
*scan_end = end; *scan_end = end;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
#if defined(HAVE_SVE)
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state); m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks; const m512 *masks = s->succ_masks;
#endif
while (likely(end - cur_buf >= 4)) { while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf; const u8 *b1 = cur_buf;
@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const u8 c3 = *b3; const u8 c3 = *b3;
const u8 c4 = *b4; const u8 c4 = *b4;
#if defined(HAVE_SVE)
svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a1 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a2 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a3 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a4 = svlastb(lane_pred_64, cur_state);
#else
const m512 succ_mask1 = masks[c1]; const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1); cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state); const u8 a1 = movd512(cur_state);
@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const m512 succ_mask4 = masks[c4]; const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4); cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state); const u8 a4 = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK, DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
} }
cur_buf += 4; cur_buf += 4;
} }
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_64, cur_state);
#else
*state = movd512(cur_state); *state = movd512(cur_state);
#endif
*scan_end = cur_buf; *scan_end = cur_buf;
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }

View File

@ -99,7 +99,7 @@ struct dfa_info {
return next(idx, TOP); return next(idx, TOP);
} }
dstate &next(dstate_id_t idx, u16 chr) { dstate &next(dstate_id_t idx, u16 chr) {
auto &src = (*this)[idx]; const auto &src = (*this)[idx];
auto next_id = src.next[raw.alpha_remap[chr]]; auto next_id = src.next[raw.alpha_remap[chr]];
return states[next_id]; return states[next_id];
} }
@ -109,7 +109,7 @@ struct dfa_info {
// if DFA can't die, shift all indices left by 1 // if DFA can't die, shift all indices left by 1
return can_die ? idx : idx + 1; return can_die ? idx : idx + 1;
} }
bool isDead(dstate &state) { bool isDead(const dstate &state) {
return raw_id(state.impl_id) == DEAD_STATE; return raw_id(state.impl_id) == DEAD_STATE;
} }
bool isDead(dstate_id_t idx) { bool isDead(dstate_id_t idx) {
@ -117,7 +117,7 @@ struct dfa_info {
} }
private: private:
static bool dfaCanDie(raw_dfa &rdfa) { static bool dfaCanDie(const raw_dfa &rdfa) {
for (unsigned chr = 0; chr < 256; chr++) { for (unsigned chr = 0; chr < 256; chr++) {
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) { for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]]; auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
@ -138,7 +138,7 @@ struct raw_report_list {
raw_report_list(const flat_set<ReportID> &reports_in, raw_report_list(const flat_set<ReportID> &reports_in,
const ReportManager &rm, bool do_remap) { const ReportManager &rm, bool do_remap) {
if (do_remap) { if (do_remap) {
for (auto &id : reports_in) { for (const auto &id : reports_in) {
reports.insert(rm.getProgramOffset(id)); reports.insert(rm.getProgramOffset(id));
} }
} else { } else {
@ -334,14 +334,14 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
template <typename T> template <typename T>
static static
u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info, u8 getShengState(UNUSED const dstate &state, UNUSED dfa_info &info,
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
return 0; return 0;
} }
template <> template <>
u8 getShengState<sheng>(dstate &state, dfa_info &info, u8 getShengState<sheng>(const dstate &state, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) { const map<dstate_id_t, AccelScheme> &accelInfo) {
u8 s = state.impl_id; u8 s = state.impl_id;
if (!state.reports.empty()) { if (!state.reports.empty()) {
s |= SHENG_STATE_ACCEPT; s |= SHENG_STATE_ACCEPT;
@ -356,8 +356,8 @@ u8 getShengState<sheng>(dstate &state, dfa_info &info,
} }
template <> template <>
u8 getShengState<sheng32>(dstate &state, dfa_info &info, u8 getShengState<sheng32>(const dstate &state, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) { const map<dstate_id_t, AccelScheme> &accelInfo) {
u8 s = state.impl_id; u8 s = state.impl_id;
if (!state.reports.empty()) { if (!state.reports.empty()) {
s |= SHENG32_STATE_ACCEPT; s |= SHENG32_STATE_ACCEPT;
@ -372,8 +372,8 @@ u8 getShengState<sheng32>(dstate &state, dfa_info &info,
} }
template <> template <>
u8 getShengState<sheng64>(dstate &state, dfa_info &info, u8 getShengState<sheng64>(const dstate &state, dfa_info &info,
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
u8 s = state.impl_id; u8 s = state.impl_id;
if (!state.reports.empty()) { if (!state.reports.empty()) {
s |= SHENG64_STATE_ACCEPT; s |= SHENG64_STATE_ACCEPT;
@ -409,8 +409,8 @@ void fillAccelAux(struct NFA *n, dfa_info &info,
template <typename T> template <typename T>
static static
void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer
UNUSED map<dstate_id_t, AccelScheme> &accelInfo, UNUSED const map<dstate_id_t, AccelScheme> &accelInfo,
UNUSED u32 aux_offset, UNUSED u32 report_offset, UNUSED u32 aux_offset, UNUSED u32 report_offset,
UNUSED u32 accel_offset, UNUSED u32 total_size, UNUSED u32 accel_offset, UNUSED u32 total_size,
UNUSED u32 dfa_size) { UNUSED u32 dfa_size) {
@ -418,7 +418,7 @@ void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
template <> template <>
void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info, void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo, const map<dstate_id_t, AccelScheme> &accelInfo,
u32 aux_offset, u32 report_offset, u32 aux_offset, u32 report_offset,
u32 accel_offset, u32 total_size, u32 accel_offset, u32 total_size,
u32 dfa_size) { u32 dfa_size) {
@ -443,7 +443,7 @@ void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
template <> template <>
void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info, void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo, const map<dstate_id_t, AccelScheme> &accelInfo,
u32 aux_offset, u32 report_offset, u32 aux_offset, u32 report_offset,
u32 accel_offset, u32 total_size, u32 accel_offset, u32 total_size,
u32 dfa_size) { u32 dfa_size) {
@ -468,7 +468,7 @@ void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
template <> template <>
void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info, void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo, const map<dstate_id_t, AccelScheme> &accelInfo,
u32 aux_offset, u32 report_offset, u32 aux_offset, u32 report_offset,
u32 accel_offset, u32 total_size, u32 accel_offset, u32 total_size,
u32 dfa_size) { u32 dfa_size) {
@ -551,19 +551,19 @@ void fillSingleReport(NFA *n, ReportID r_id) {
template <typename T> template <typename T>
static static
bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
return true; return true;
} }
template <> template <>
bool createShuffleMasks<sheng>(sheng *s, dfa_info &info, bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) { const map<dstate_id_t, AccelScheme> &accelInfo) {
for (u16 chr = 0; chr < 256; chr++) { for (u16 chr = 0; chr < 256; chr++) {
u8 buf[16] = {0}; u8 buf[16] = {0};
for (dstate_id_t idx = 0; idx < info.size(); idx++) { for (dstate_id_t idx = 0; idx < info.size(); idx++) {
auto &succ_state = info.next(idx, chr); const auto &succ_state = info.next(idx, chr);
buf[idx] = getShengState<sheng>(succ_state, info, accelInfo); buf[idx] = getShengState<sheng>(succ_state, info, accelInfo);
} }
@ -577,13 +577,13 @@ bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
template <> template <>
bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info, bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) { const map<dstate_id_t, AccelScheme> &accelInfo) {
for (u16 chr = 0; chr < 256; chr++) { for (u16 chr = 0; chr < 256; chr++) {
u8 buf[64] = {0}; u8 buf[64] = {0};
assert(info.size() <= 32); assert(info.size() <= 32);
for (dstate_id_t idx = 0; idx < info.size(); idx++) { for (dstate_id_t idx = 0; idx < info.size(); idx++) {
auto &succ_state = info.next(idx, chr); const auto &succ_state = info.next(idx, chr);
buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo); buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo);
buf[32 + idx] = buf[idx]; buf[32 + idx] = buf[idx];
@ -598,13 +598,13 @@ bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
template <> template <>
bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info, bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) { const map<dstate_id_t, AccelScheme> &accelInfo) {
for (u16 chr = 0; chr < 256; chr++) { for (u16 chr = 0; chr < 256; chr++) {
u8 buf[64] = {0}; u8 buf[64] = {0};
assert(info.size() <= 64); assert(info.size() <= 64);
for (dstate_id_t idx = 0; idx < info.size(); idx++) { for (dstate_id_t idx = 0; idx < info.size(); idx++) {
auto &succ_state = info.next(idx, chr); const auto &succ_state = info.next(idx, chr);
if (accelInfo.find(info.raw_id(succ_state.impl_id)) if (accelInfo.find(info.raw_id(succ_state.impl_id))
!= accelInfo.end()) { != accelInfo.end()) {
@ -690,7 +690,7 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
} }
if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) { if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
return nfa; return nfa;
@ -701,7 +701,7 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
set<dstate_id_t> *accel_states) { set<dstate_id_t> *accel_states) {
if (!cc.grey.allowSheng) { if (!cc.grey.allowSheng) {
DEBUG_PRINTF("Sheng is not allowed!\n"); DEBUG_PRINTF("Sheng is not allowed!\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
sheng_build_strat strat(raw, rm, only_accel_init); sheng_build_strat strat(raw, rm, only_accel_init);
@ -716,7 +716,7 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
info.can_die ? "can" : "cannot", info.size()); info.can_die ? "can" : "cannot", info.size());
if (info.size() > 16) { if (info.size() > 16) {
DEBUG_PRINTF("Too many states\n"); DEBUG_PRINTF("Too many states\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
return shengCompile_int<sheng>(raw, cc, accel_states, strat, info); return shengCompile_int<sheng>(raw, cc, accel_states, strat, info);
@ -727,13 +727,20 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
set<dstate_id_t> *accel_states) { set<dstate_id_t> *accel_states) {
if (!cc.grey.allowSheng) { if (!cc.grey.allowSheng) {
DEBUG_PRINTF("Sheng is not allowed!\n"); DEBUG_PRINTF("Sheng is not allowed!\n");
return nullptr; bytecode_ptr<NFA>(nullptr);
} }
#ifdef HAVE_SVE
if (svcntb()<32) {
DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
bytecode_ptr<NFA>(nullptr);
}
#else
if (!cc.target_info.has_avx512vbmi()) { if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr; bytecode_ptr<NFA>(nullptr);
} }
#endif
sheng_build_strat strat(raw, rm, only_accel_init); sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat); dfa_info info(strat);
@ -748,7 +755,7 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
assert(info.size() > 16); assert(info.size() > 16);
if (info.size() > 32) { if (info.size() > 32) {
DEBUG_PRINTF("Too many states\n"); DEBUG_PRINTF("Too many states\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info); return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info);
@ -759,13 +766,20 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
set<dstate_id_t> *accel_states) { set<dstate_id_t> *accel_states) {
if (!cc.grey.allowSheng) { if (!cc.grey.allowSheng) {
DEBUG_PRINTF("Sheng is not allowed!\n"); DEBUG_PRINTF("Sheng is not allowed!\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
#ifdef HAVE_SVE
if (svcntb()<64) {
DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
return bytecode_ptr<NFA>(nullptr);
}
#else
if (!cc.target_info.has_avx512vbmi()) { if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
#endif
sheng_build_strat strat(raw, rm, only_accel_init); sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat); dfa_info info(strat);
@ -780,13 +794,13 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
assert(info.size() > 32); assert(info.size() > 32);
if (info.size() > 64) { if (info.size() > 64) {
DEBUG_PRINTF("Too many states\n"); DEBUG_PRINTF("Too many states\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
vector<dstate> old_states; vector<dstate> old_states;
old_states = info.states; old_states = info.states;
auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info); auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info);
if (!nfa) { if (!nfa) {
info.states = old_states; info.states = old_states; // cppcheck-suppress unreadVariable
} }
return nfa; return nfa;
} }

View File

@ -32,6 +32,8 @@
*/ */
#include "config.h" #include "config.h"
#include <numeric>
#include "tamaramacompile.h" #include "tamaramacompile.h"
@ -127,9 +129,10 @@ buildTamarama(const TamaInfo &tamaInfo, const u32 queue,
sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and
// padding for subengines // padding for subengines
for (const auto &sub : tamaInfo.subengines) { auto subl = [](size_t z, NFA *sub) {
total_size += ROUNDUP_CL(sub->length); return z + (size_t)(ROUNDUP_CL(sub->length));
} };
total_size += std::accumulate(tamaInfo.subengines.begin(), tamaInfo.subengines.end(), 0, subl);
// use subSize as a sentinel value for no active subengines, // use subSize as a sentinel value for no active subengines,
// so add one to subSize here // so add one to subSize here

View File

@ -227,7 +227,7 @@ const u8 *fwdBlock(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_ma
} }
template <uint16_t S> template <uint16_t S>
const u8 *truffleExecReal(m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) { const u8 *truffleExecReal(const m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) {
assert(buf && buf_end); assert(buf && buf_end);
assert(buf < buf_end); assert(buf < buf_end);
DEBUG_PRINTF("truffle %p len %zu\n", buf, buf_end - buf); DEBUG_PRINTF("truffle %p len %zu\n", buf, buf_end - buf);
@ -349,4 +349,4 @@ const u8 *rtruffleExecReal(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highse
return buf - 1; return buf - 1;
} }
#endif //HAVE_SVE #endif //HAVE_SVE

View File

@ -193,9 +193,6 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
if (!som) { if (!som) {
mergeCyclicDotStars(g); mergeCyclicDotStars(g);
}
if (!som) {
removeSiblingsOfStartDotStar(g); removeSiblingsOfStartDotStar(g);
} }
} }
@ -292,7 +289,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
// Returns true if all components have been added. // Returns true if all components have been added.
static static
bool processComponents(NG &ng, ExpressionInfo &expr, bool processComponents(NG &ng, const ExpressionInfo &expr,
deque<unique_ptr<NGHolder>> &g_comp, deque<unique_ptr<NGHolder>> &g_comp,
const som_type som) { const som_type som) {
const u32 num_components = g_comp.size(); const u32 num_components = g_comp.size();

View File

@ -165,9 +165,9 @@ void reformAnchoredRepeatsComponent(NGHolder &g,
return; return;
} }
NFAVertex dotV = NGHolder::null_vertex();
set<NFAVertex> otherV; set<NFAVertex> otherV;
dotV = findReformable(g, compAnchoredStarts, otherV); NFAVertex dotV = findReformable(g, compAnchoredStarts, otherV);
if (dotV == NGHolder::null_vertex()) { if (dotV == NGHolder::null_vertex()) {
DEBUG_PRINTF("no candidate reformable dot found.\n"); DEBUG_PRINTF("no candidate reformable dot found.\n");
return; return;
@ -257,7 +257,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g,
static static
void reformUnanchoredRepeatsComponent(NGHolder &g, void reformUnanchoredRepeatsComponent(NGHolder &g,
set<NFAVertex> &compAnchoredStarts, const set<NFAVertex> &compAnchoredStarts,
set<NFAVertex> &compUnanchoredStarts, set<NFAVertex> &compUnanchoredStarts,
set<NFAVertex> &dead, set<NFAVertex> &dead,
depth *startBegin, depth *startEnd) { depth *startBegin, depth *startEnd) {
@ -268,9 +268,9 @@ void reformUnanchoredRepeatsComponent(NGHolder &g,
} }
while (true) { while (true) {
NFAVertex dotV = NGHolder::null_vertex();
set<NFAVertex> otherV; set<NFAVertex> otherV;
dotV = findReformable(g, compUnanchoredStarts, otherV); NFAVertex dotV = findReformable(g, compUnanchoredStarts, otherV);
if (dotV == NGHolder::null_vertex()) { if (dotV == NGHolder::null_vertex()) {
DEBUG_PRINTF("no candidate reformable dot found.\n"); DEBUG_PRINTF("no candidate reformable dot found.\n");
return; return;
@ -485,15 +485,15 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
// Collect all the other optional dot vertices and the successor vertices // Collect all the other optional dot vertices and the successor vertices
// by walking down the graph from initialDot // by walking down the graph from initialDot
set<NFAVertex> dots, succ; set<NFAVertex> dots, succr;
if (!gatherParticipants(g, start, initialDot, dots, succ)) { if (!gatherParticipants(g, start, initialDot, dots, succr)) {
DEBUG_PRINTF("gatherParticipants failed\n"); DEBUG_PRINTF("gatherParticipants failed\n");
return; return;
} }
DEBUG_PRINTF("optional dot repeat with %zu participants, " DEBUG_PRINTF("optional dot repeat with %zu participants, "
"terminating in %zu non-dot nodes\n", "terminating in %zu non-dot nodes\n",
dots.size(), succ.size()); dots.size(), succr.size());
// Remove all the participants and set the start offset // Remove all the participants and set the start offset
dead.insert(dots.begin(), dots.end()); dead.insert(dots.begin(), dots.end());
@ -509,7 +509,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
assert(startEnd->is_reachable()); assert(startEnd->is_reachable());
// Connect our successor vertices to both start and startDs. // Connect our successor vertices to both start and startDs.
for (auto v : succ) { for (auto v : succr) {
add_edge_if_not_present(g.start, v, g); add_edge_if_not_present(g.start, v, g);
add_edge_if_not_present(g.startDs, v, g); add_edge_if_not_present(g.startDs, v, g);
} }
@ -555,7 +555,7 @@ void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
} }
static static
void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs, void addDotsBetween(NGHolder &g, NFAVertex lhs, const vector<NFAVertex> &rhs,
depth min_repeat, depth max_repeat) { depth min_repeat, depth max_repeat) {
const bool unbounded = max_repeat.is_infinite(); const bool unbounded = max_repeat.is_infinite();
if (unbounded) { if (unbounded) {

View File

@ -92,11 +92,12 @@ static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
static static
vector<NFAEdge> getAsserts(const NGHolder &g) { vector<NFAEdge> getAsserts(const NGHolder &g) {
vector<NFAEdge> out; vector<NFAEdge> out;
for (const auto &e : edges_range(g)) { auto assertflags = [&g=g](const NFAEdge &e) {
if (g[e].assert_flags) { return (g[e].assert_flags);
out.emplace_back(e); };
} const auto &er = edges_range(g);
} std::copy_if(begin(er), end(er), std::back_inserter(out), assertflags);
return out; return out;
} }
@ -384,7 +385,10 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
/* there may already be a different edge from start to eod if so /* there may already be a different edge from start to eod if so
* we need to make it unconditional and alive * we need to make it unconditional and alive
*/ */
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { NFAEdge start_eod;
bool exists;
std::tie(start_eod, exists) = edge(u, g.acceptEod, g);
if (exists) {
g[start_eod].assert_flags = 0; g[start_eod].assert_flags = 0;
dead->erase(start_eod); dead->erase(start_eod);
} else { } else {
@ -437,7 +441,10 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
/* there may already be a different edge from start to eod if so /* there may already be a different edge from start to eod if so
* we need to make it unconditional and alive * we need to make it unconditional and alive
*/ */
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { NFAEdge start_eod;
bool exists;
std::tie(start_eod, exists) = edge(u, g.acceptEod, g);
if (exists) {
g[start_eod].assert_flags = 0; g[start_eod].assert_flags = 0;
dead->erase(start_eod); dead->erase(start_eod);
} else { } else {
@ -496,7 +503,8 @@ void ensureCodePointStart(ReportManager &rm, NGHolder &g,
* boundaries. Assert resolution handles the badness coming from asserts. * boundaries. Assert resolution handles the badness coming from asserts.
* The only other source of trouble is startDs->accept connections. * The only other source of trouble is startDs->accept connections.
*/ */
NFAEdge orig = edge(g.startDs, g.accept, g); NFAEdge orig;
std::tie(orig, std::ignore) = edge(g.startDs, g.accept, g);
if (expr.utf8 && orig) { if (expr.utf8 && orig) {
DEBUG_PRINTF("rectifying %u\n", expr.report); DEBUG_PRINTF("rectifying %u\n", expr.report);
Report ir = rm.getBasicInternalReport(expr); Report ir = rm.getBasicInternalReport(expr);

View File

@ -512,17 +512,17 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) {
* for SOM mode. (see UE-1544) */ * for SOM mode. (see UE-1544) */
bool optimiseVirtualStarts(NGHolder &g) { bool optimiseVirtualStarts(NGHolder &g) {
vector<NFAEdge> dead; vector<NFAEdge> dead;
auto deads = [&g=g](const NFAEdge &e) {
return (!is_any_start(source(e, g), g));
};
for (auto v : adjacent_vertices_range(g.startDs, g)) { for (auto v : adjacent_vertices_range(g.startDs, g)) {
u32 flags = g[v].assert_flags; u32 flags = g[v].assert_flags;
if (!(flags & POS_FLAG_VIRTUAL_START)) { if (!(flags & POS_FLAG_VIRTUAL_START)) {
continue; continue;
} }
const auto &e = in_edges_range(v, g);
for (const auto &e : in_edges_range(v, g)) { std::copy_if(begin(e), end(e), std::back_inserter(dead), deads);
if (!is_any_start(source(e, g), g)) {
dead.emplace_back(e);
}
}
} }
if (dead.empty()) { if (dead.empty()) {

View File

@ -98,9 +98,9 @@ class ClassInfo {
public: public:
struct ClassDepth { struct ClassDepth {
ClassDepth() {} ClassDepth() {}
ClassDepth(const NFAVertexDepth &d) explicit ClassDepth(const NFAVertexDepth &d)
: d1(d.fromStart), d2(d.fromStartDotStar) {} : d1(d.fromStart), d2(d.fromStartDotStar) {}
ClassDepth(const NFAVertexRevDepth &rd) explicit ClassDepth(const NFAVertexRevDepth &rd)
: d1(rd.toAccept), d2(rd.toAcceptEod) {} : d1(rd.toAccept), d2(rd.toAcceptEod) {}
DepthMinMax d1; DepthMinMax d1;
DepthMinMax d2; DepthMinMax d2;
@ -159,7 +159,7 @@ public:
return id; return id;
} }
void append(WorkQueue &other) { void append(const WorkQueue &other) {
for (const auto &e : other) { for (const auto &e : other) {
push(e); push(e);
} }
@ -193,7 +193,7 @@ private:
} }
static static
bool outIsIrreducible(NFAVertex &v, const NGHolder &g) { bool outIsIrreducible(const NFAVertex &v, const NGHolder &g) {
unsigned nonSpecialVertices = 0; unsigned nonSpecialVertices = 0;
for (auto w : adjacent_vertices_range(v, g)) { for (auto w : adjacent_vertices_range(v, g)) {
if (!is_special(w, g) && w != v) { if (!is_special(w, g) && w != v) {
@ -204,7 +204,7 @@ bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
} }
static static
bool inIsIrreducible(NFAVertex &v, const NGHolder &g) { bool inIsIrreducible(const NFAVertex &v, const NGHolder &g) {
unsigned nonSpecialVertices = 0; unsigned nonSpecialVertices = 0;
for (auto u : inv_adjacent_vertices_range(v, g)) { for (auto u : inv_adjacent_vertices_range(v, g)) {
if (!is_special(u, g) && u != v) { if (!is_special(u, g) && u != v) {
@ -337,9 +337,9 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
ClassInfo::ClassDepth depth; ClassInfo::ClassDepth depth;
if (eq == LEFT_EQUIVALENCE) { if (eq == LEFT_EQUIVALENCE) {
depth = depths[vi->vert_index]; depth = ClassInfo::ClassDepth(depths[vi->vert_index]);
} else { } else {
depth = rdepths[vi->vert_index]; depth = ClassInfo::ClassDepth(rdepths[vi->vert_index]);
} }
ClassInfo ci(g, *vi, depth, eq); ClassInfo ci(g, *vi, depth, eq);
@ -547,8 +547,8 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
pred_info->succ.erase(old_vertex_info); pred_info->succ.erase(old_vertex_info);
// if edge doesn't exist, create it // if edge doesn't exist, create it
NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g); NFAEdge e;
std::tie(e, std::ignore) = add_edge_if_not_present(pred_info->v, new_v, g);
// put edge tops, if applicable // put edge tops, if applicable
if (!edgetops.empty()) { if (!edgetops.empty()) {
assert(g[e].tops.empty() || g[e].tops == edgetops); assert(g[e].tops.empty() || g[e].tops == edgetops);
@ -558,7 +558,8 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
pred_info->succ.insert(new_vertex_info); pred_info->succ.insert(new_vertex_info);
if (new_v_eod) { if (new_v_eod) {
NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, NFAEdge ee;
std::tie(ee, std::ignore) = add_edge_if_not_present(pred_info->v, new_v_eod,
g); g);
// put edge tops, if applicable // put edge tops, if applicable

View File

@ -430,7 +430,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) {
} }
static static
bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g,
int *adjust) { int *adjust) {
const auto &reports = all_reports(g); const auto &reports = all_reports(g);
if (reports.empty()) { if (reports.empty()) {
@ -506,14 +506,14 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
while (v != cyclic) { while (v != cyclic) {
DEBUG_PRINTF("vertex %zu\n", g[v].index); DEBUG_PRINTF("vertex %zu\n", g[v].index);
width++; width++;
auto succ = succs(v, g); auto s = succs(v, g);
if (contains(succ, cyclic)) { if (contains(s, cyclic)) {
if (succ.size() == 1) { if (s.size() == 1) {
v = cyclic; v = cyclic;
} else if (succ.size() == 2) { } else if (s.size() == 2) {
// Cyclic and jump edge. // Cyclic and jump edge.
succ.erase(cyclic); s.erase(cyclic);
NFAVertex v2 = *succ.begin(); NFAVertex v2 = *s.begin();
if (!edge(cyclic, v2, g).second) { if (!edge(cyclic, v2, g).second) {
DEBUG_PRINTF("bad form\n"); DEBUG_PRINTF("bad form\n");
return false; return false;
@ -524,11 +524,11 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
return false; return false;
} }
} else { } else {
if (succ.size() != 1) { if (s.size() != 1) {
DEBUG_PRINTF("bad form\n"); DEBUG_PRINTF("bad form\n");
return false; return false;
} }
v = *succ.begin(); v = *s.begin();
} }
} }
@ -544,12 +544,12 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
while (!is_any_accept(v, g)) { while (!is_any_accept(v, g)) {
DEBUG_PRINTF("vertex %zu\n", g[v].index); DEBUG_PRINTF("vertex %zu\n", g[v].index);
width++; width++;
auto succ = succs(v, g); auto s = succs(v, g);
if (succ.size() != 1) { if (s.size() != 1) {
DEBUG_PRINTF("bad form\n"); DEBUG_PRINTF("bad form\n");
return false; return false;
} }
v = *succ.begin(); v = *s.begin();
} }
int offsetAdjust = 0; int offsetAdjust = 0;
@ -569,27 +569,28 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
return true; return true;
} }
vector<NFAVertex> preds; vector<NFAVertex> predcs;
vector<NFAEdge> dead; vector<NFAEdge> dead;
auto deads = [&g=g](const NFAEdge &e) {
return (target(e, g) != g.startDs);
};
for (auto u : inv_adjacent_vertices_range(cyclic, g)) { for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
DEBUG_PRINTF("pred %zu\n", g[u].index); DEBUG_PRINTF("pred %zu\n", g[u].index);
if (u == cyclic) { if (u == cyclic) {
continue; continue;
} }
preds.emplace_back(u); predcs.emplace_back(u);
// We want to delete the out-edges of each predecessor, but need to // We want to delete the out-edges of each predecessor, but need to
// make sure we don't delete the startDs self loop. // make sure we don't delete the startDs self loop.
for (const auto &e : out_edges_range(u, g)) {
if (target(e, g) != g.startDs) { const auto &e = out_edges_range(u, g);
dead.emplace_back(e); std::copy_if(begin(e), end(e), std::back_inserter(dead), deads);
}
}
} }
remove_edges(dead, g); remove_edges(dead, g);
assert(!preds.empty()); assert(!predcs.empty());
const CharReach &cr = g[cyclic].char_reach; const CharReach &cr = g[cyclic].char_reach;
@ -597,14 +598,14 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
v = add_vertex(g); v = add_vertex(g);
g[v].char_reach = cr; g[v].char_reach = cr;
for (auto u : preds) { for (auto u : predcs) {
add_edge(u, v, g); add_edge(u, v, g);
} }
preds.clear(); predcs.clear();
preds.emplace_back(v); predcs.emplace_back(v);
} }
assert(!preds.empty()); assert(!predcs.empty());
for (auto u : preds) { for (auto u : predcs) {
add_edge(u, cyclic, g); add_edge(u, cyclic, g);
} }

View File

@ -66,15 +66,15 @@ bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
return false; return false;
} }
set<NFAVertex> &succs = *anchored ? s_succ : sds_succ; set<NFAVertex> &succrs = *anchored ? s_succ : sds_succ;
succs.erase(g.startDs); succrs.erase(g.startDs);
if (succs.size() != 1) { if (succrs.size() != 1) {
DEBUG_PRINTF("branchy root\n"); DEBUG_PRINTF("branchy root\n");
return false; return false;
} }
NFAVertex u = *anchored ? g.start : g.startDs; NFAVertex u = *anchored ? g.start : g.startDs;
NFAVertex v = *succs.begin(); NFAVertex v = *succrs.begin();
while (true) { while (true) {
DEBUG_PRINTF("validating vertex %zu\n", g[v].index); DEBUG_PRINTF("validating vertex %zu\n", g[v].index);

View File

@ -71,13 +71,13 @@ vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g,
continue; continue;
} }
for (auto succ : adjacent_vertices_range(v, g)) { for (auto succr : adjacent_vertices_range(v, g)) {
// ignore self-loops // ignore self-loops
if (v == succ) { if (v == succr) {
continue; continue;
} }
DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1);
next.insert(succ); next.insert(succr);
} }
} }
result[d] = next; result[d] = next;
@ -113,13 +113,13 @@ vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g,
for (unsigned d = 1; d < depth; d++) { for (unsigned d = 1; d < depth; d++) {
// collect all successors for all current level vertices // collect all successors for all current level vertices
for (auto v : cur) { for (auto v : cur) {
for (auto pred : inv_adjacent_vertices_range(v, g)) { for (auto predc : inv_adjacent_vertices_range(v, g)) {
// ignore self-loops // ignore self-loops
if (v == pred) { if (v == predc) {
continue; continue;
} }
DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1);
next.insert(pred); next.insert(predc);
} }
} }
result[d] = next; result[d] = next;
@ -582,11 +582,11 @@ private:
// set up all reports // set up all reports
bool clone = false; bool clone = false;
for (auto &pair : reports_to_vertices) { for (const auto &pair : reports_to_vertices) {
const auto &reports = pair.first; const auto &reports = pair.first;
const auto &vertices = pair.second; const auto &svertices = pair.second;
for (auto src : vertices) { for (auto src : svertices) {
// get all predecessors up to edit distance // get all predecessors up to edit distance
auto src_vertices_by_depth = auto src_vertices_by_depth =
gatherPredecessorsByDepth(g, src, edit_distance); gatherPredecessorsByDepth(g, src, edit_distance);
@ -594,7 +594,8 @@ private:
// find which accepts source vertex connects to // find which accepts source vertex connects to
flat_set<NFAVertex> targets; flat_set<NFAVertex> targets;
for (const auto &accept : accepts) { for (const auto &accept : accepts) {
NFAEdge e = edge(src, accept, g); NFAEdge e;
std::tie(e, std::ignore) = edge(src, accept, g);
if (e) { if (e) {
targets.insert(accept); targets.insert(accept);
} }
@ -602,8 +603,8 @@ private:
assert(targets.size()); assert(targets.size());
for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) {
const auto &preds = src_vertices_by_depth[d]; const auto &predcs = src_vertices_by_depth[d];
for (auto v : preds) { for (auto v : predcs) {
// only clone a node if it already contains reports // only clone a node if it already contains reports
if (clone && !g[v].reports.empty()) { if (clone && !g[v].reports.empty()) {
create_clone(v, reports, edit_distance - d, create_clone(v, reports, edit_distance - d,

View File

@ -513,12 +513,12 @@ static
bool doHaig(const NGHolder &g, som_type som, bool doHaig(const NGHolder &g, som_type som,
const vector<vector<CharReach>> &triggers, bool unordered_som, const vector<vector<CharReach>> &triggers, bool unordered_som,
raw_som_dfa *rdfa) { raw_som_dfa *rdfa) {
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
a fight */
using StateSet = typename Auto::StateSet; using StateSet = typename Auto::StateSet;
vector<StateSet> nfa_state_map; vector<StateSet> nfa_state_map;
Auto n(g, som, triggers, unordered_som); Auto n(g, som, triggers, unordered_som);
try { try {
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
a fight */
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
DEBUG_PRINTF("state limit exceeded\n"); DEBUG_PRINTF("state limit exceeded\n");
return false; return false;

View File

@ -154,7 +154,7 @@ bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod, const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) { bool is_reset, ReportID report) {
if (!cr.all()) { if (!cr.all()) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
@ -176,7 +176,7 @@ bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin,
const CharReach escapes(~cr); const CharReach escapes(~cr);
if (escapes.count() != 1) { if (escapes.count() != 1) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
@ -199,7 +199,7 @@ bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin,
const CharReach escapes(cr); const CharReach escapes(cr);
if (escapes.count() != 1) { if (escapes.count() != 1) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
@ -228,7 +228,7 @@ bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin,
minPeriod, rtype); minPeriod, rtype);
if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
DEBUG_PRINTF("built shuf lbr\n"); DEBUG_PRINTF("built shuf lbr\n");
@ -296,7 +296,7 @@ bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin,
if (!nfa) { if (!nfa) {
assert(0); assert(0);
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
return nfa; return nfa;
@ -307,11 +307,11 @@ bytecode_ptr<NFA> constructLBR(const CastleProto &proto,
const CompileContext &cc, const CompileContext &cc,
const ReportManager &rm) { const ReportManager &rm) {
if (!cc.grey.allowLbr) { if (!cc.grey.allowLbr) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
if (proto.repeats.size() != 1) { if (proto.repeats.size() != 1) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
const PureRepeat &repeat = proto.repeats.begin()->second; const PureRepeat &repeat = proto.repeats.begin()->second;
@ -319,7 +319,7 @@ bytecode_ptr<NFA> constructLBR(const CastleProto &proto,
if (repeat.reports.size() != 1) { if (repeat.reports.size() != 1) {
DEBUG_PRINTF("too many reports\n"); DEBUG_PRINTF("too many reports\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
bool is_reset; bool is_reset;
@ -346,16 +346,16 @@ bytecode_ptr<NFA> constructLBR(const NGHolder &g,
const CompileContext &cc, const CompileContext &cc,
const ReportManager &rm) { const ReportManager &rm) {
if (!cc.grey.allowLbr) { if (!cc.grey.allowLbr) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
PureRepeat repeat; PureRepeat repeat;
if (!isPureRepeat(g, repeat)) { if (!isPureRepeat(g, repeat)) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
if (repeat.reports.size() != 1) { if (repeat.reports.size() != 1) {
DEBUG_PRINTF("too many reports\n"); DEBUG_PRINTF("too many reports\n");
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
CastleProto proto(g.kind, repeat); CastleProto proto(g.kind, repeat);

View File

@ -39,7 +39,7 @@ bytecode_ptr<NFA> buildLbrVerm16(const CharReach &cr, const depth &repeatMin,
const CharReach escapes(~cr); const CharReach escapes(~cr);
if (escapes.count() > 16) { if (escapes.count() > 16) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
@ -62,7 +62,7 @@ bytecode_ptr<NFA> buildLbrNVerm16(const CharReach &cr, const depth &repeatMin,
const CharReach escapes(cr); const CharReach escapes(cr);
if (escapes.count() > 16) { if (escapes.count() > 16) {
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,

View File

@ -342,7 +342,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
map<NFAVertex, flat_set<u32>> &unhandled_succ_tops, map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
map<u32, set<NFAVertex>> &tops_out) { map<u32, set<NFAVertex>> &tops_out) {
flat_set<u32> top_inter = unhandled_succ_tops.at(u); flat_set<u32> top_inter = unhandled_succ_tops.at(u);
flat_set<NFAVertex> succs; flat_set<NFAVertex> f_succs;
for (NFAVertex v : adjacent_vertices_range(u, g)) { for (NFAVertex v : adjacent_vertices_range(u, g)) {
if (!contains(unhandled_succ_tops, v)) { if (!contains(unhandled_succ_tops, v)) {
return; return;
@ -360,7 +360,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
set_intersection(top_inter.begin(), top_inter.end(), set_intersection(top_inter.begin(), top_inter.end(),
v_tops.begin(), v_tops.end(), ni_inserter); v_tops.begin(), v_tops.end(), ni_inserter);
top_inter = std::move(new_inter); top_inter = std::move(new_inter);
succs.insert(v); f_succs.insert(v);
} }
if (top_inter.empty()) { if (top_inter.empty()) {
@ -373,7 +373,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
} }
DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index); DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index);
markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, markTopSuccAsHandled(u, top_inter, f_succs, tops_out, unhandled_top_succs,
unhandled_succ_tops); unhandled_succ_tops);
} }
@ -389,11 +389,11 @@ void reusePredsAsStarts(const NGHolder &g, const map<u32, CharReach> &top_reach,
/* create list of candidates first, to avoid issues of iter invalidation */ /* create list of candidates first, to avoid issues of iter invalidation */
DEBUG_PRINTF("attempting to reuse vertices for top starts\n"); DEBUG_PRINTF("attempting to reuse vertices for top starts\n");
vector<NFAVertex> cand_starts; vector<NFAVertex> cand_starts;
for (NFAVertex u : unhandled_succ_tops | map_keys) { auto cands = [&g=g](const NFAVertex &u) {
if (hasSelfLoop(u, g)) { return (hasSelfLoop(u, g));
cand_starts.emplace_back(u); };
} const auto &u = unhandled_succ_tops | map_keys;
} std::copy_if(begin(u), end(u), std::back_inserter(cand_starts), cands);
for (NFAVertex u : cand_starts) { for (NFAVertex u : cand_starts) {
if (!contains(unhandled_succ_tops, u)) { if (!contains(unhandled_succ_tops, u)) {
@ -652,7 +652,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
u32 numStates = countStates(state_ids); u32 numStates = countStates(state_ids);
if (numStates > NFA_MAX_STATES) { if (numStates > NFA_MAX_STATES) {
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
map<NFAVertex, BoundedRepeatSummary> br_cyclic; map<NFAVertex, BoundedRepeatSummary> br_cyclic;
@ -722,14 +722,14 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
// Do state numbering. // Do state numbering.
auto state_ids = numberStates(h, {}); auto state_ids = numberStates(h, flat_set<graph_detail::vertex_descriptor<ue2_graph<NGHolder, NFAGraphVertexProps, NFAGraphEdgeProps>>>());
// Quick exit: if we've got an embarrassment of riches, i.e. more states // Quick exit: if we've got an embarrassment of riches, i.e. more states
// than we can implement in our largest NFA model, bail here. // than we can implement in our largest NFA model, bail here.
u32 numStates = countStates(state_ids); u32 numStates = countStates(state_ids);
if (numStates > NFA_MAX_STATES) { if (numStates > NFA_MAX_STATES) {
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
return nullptr; return bytecode_ptr<NFA>(nullptr);
} }
assert(sanityCheckGraph(h, state_ids)); assert(sanityCheckGraph(h, state_ids));

View File

@ -62,12 +62,12 @@ namespace ue2 {
static static
void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
const flat_set<NFAVertex> &cands, const flat_set<NFAVertex> &cands,
const flat_set<NFAVertex> &preds, const flat_set<NFAVertex> &f_preds,
flat_set<NFAVertex> *next_cands, flat_set<NFAVertex> *next_cands,
flat_set<NFAVertex> *next_preds, flat_set<NFAVertex> *next_preds,
flat_set<NFAVertex> *friends) { flat_set<NFAVertex> *friends) {
for (auto v : cands) { for (auto v : cands) {
if (contains(preds, v)) { if (contains(f_preds, v)) {
continue; continue;
} }
@ -80,7 +80,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
} }
for (auto u : inv_adjacent_vertices_range(v, g)) { for (auto u : inv_adjacent_vertices_range(v, g)) {
if (!contains(preds, u)) { if (!contains(f_preds, u)) {
DEBUG_PRINTF("bad pred\n"); DEBUG_PRINTF("bad pred\n");
goto next_cand; goto next_cand;
} }
@ -116,8 +116,8 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
u32 friend_depth = offset + 1; u32 friend_depth = offset + 1;
flat_set<NFAVertex> preds; flat_set<NFAVertex> f_preds;
insert(&preds, inv_adjacent_vertices(v, g)); insert(&f_preds, inv_adjacent_vertices(v, g));
const CharReach &cr = g[v].char_reach; const CharReach &cr = g[v].char_reach;
flat_set<NFAVertex> cands; flat_set<NFAVertex> cands;
@ -126,9 +126,9 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
flat_set<NFAVertex> next_preds; flat_set<NFAVertex> next_preds;
flat_set<NFAVertex> next_cands; flat_set<NFAVertex> next_cands;
for (u32 i = 0; i < friend_depth; i++) { for (u32 i = 0; i < friend_depth; i++) {
findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds, findAccelFriendGeneration(g, cr, cands, f_preds, &next_cands, &next_preds,
friends); friends);
preds.insert(next_preds.begin(), next_preds.end()); f_preds.insert(next_preds.begin(), next_preds.end());
next_preds.clear(); next_preds.clear();
cands.swap(next_cands); cands.swap(next_cands);
next_cands.clear(); next_cands.clear();
@ -321,7 +321,7 @@ struct DAccelScheme {
bool cd_a = buildDvermMask(a.double_byte); bool cd_a = buildDvermMask(a.double_byte);
bool cd_b = buildDvermMask(b.double_byte); bool cd_b = buildDvermMask(b.double_byte);
if (cd_a != cd_b) { if (cd_a != cd_b) {
return cd_a > cd_b; return cd_a;
} }
} }
@ -811,11 +811,9 @@ depth_done:
return true; return true;
} }
} }
}
// Second option: a two-byte shufti (i.e. less than eight 2-byte // Second option: a two-byte shufti (i.e. less than eight 2-byte
// literals) // literals)
if (depth > 1) {
for (unsigned int i = 0; i < (depth - 1); i++) { for (unsigned int i = 0; i < (depth - 1); i++) {
if (depthReach[i].count() * depthReach[i+1].count() if (depthReach[i].count() * depthReach[i+1].count()
<= DOUBLE_SHUFTI_LIMIT) { <= DOUBLE_SHUFTI_LIMIT) {

View File

@ -488,9 +488,9 @@ vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
const size_t edge_count = num_edges(lg); const size_t edge_count = num_edges(lg);
vector<LitEdge> fwd_edges; vector<LitEdge> fwd_edges;
fwd_edges.reserve(edge_count); fwd_edges.reserve(edge_count);
for (const auto &e : edges_range(lg)) {
fwd_edges.push_back(e); const auto &e = edges_range(lg);
} std::copy(begin(e), end(e), std::back_inserter(fwd_edges));
vector<LitEdge> rev_map(2 * edge_count); vector<LitEdge> rev_map(2 * edge_count);

View File

@ -70,7 +70,7 @@ bool bad_mixed_sensitivity(const ue2_literal &s);
* Score all the edges in the given graph, returning them in \p scores indexed * Score all the edges in the given graph, returning them in \p scores indexed
* by edge_index. */ * by edge_index. */
std::vector<u64a> scoreEdges(const NGHolder &h, std::vector<u64a> scoreEdges(const NGHolder &h,
const flat_set<NFAEdge> &known_bad = {}); const flat_set<NFAEdge> &known_bad = flat_set<NFAEdge>());
/** Returns a score for a literal set. Lower scores are better. */ /** Returns a score for a literal set. Lower scores are better. */
u64a scoreSet(const std::set<ue2_literal> &s); u64a scoreSet(const std::set<ue2_literal> &s);

View File

@ -98,7 +98,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) {
} }
static static
bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, bool splitOffLiteral(NG &ng, const NGHolder &g, NFAVertex v, const bool anchored,
set<NFAVertex> &dead) { set<NFAVertex> &dead) {
DEBUG_PRINTF("examine vertex %zu\n", g[v].index); DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
bool nocase = false, casefixed = false; bool nocase = false, casefixed = false;

View File

@ -94,7 +94,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
/* generate top transitions, false -> top = selfloop */ /* generate top transitions, false -> top = selfloop */
bool top_allowed = is_triggered(graph); bool top_allowed = is_triggered(graph);
StateSet succ = nfa.dead; StateSet succr = nfa.dead;
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
NFAVertex u = vByStateId[i]; NFAVertex u = vByStateId[i];
@ -102,7 +102,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
if (contains(unused, v)) { if (contains(unused, v)) {
continue; continue;
} }
succ.set(graph[v].index); succr.set(graph[v].index);
} }
if (top_allowed && !nfa.toppable.test(i)) { if (top_allowed && !nfa.toppable.test(i)) {
@ -112,15 +112,15 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
} }
} }
StateSet active_squash = succ & squash; StateSet active_squash = succr & squash;
if (active_squash.any()) { if (active_squash.any()) {
for (size_t j = active_squash.find_first(); j != active_squash.npos; for (size_t j = active_squash.find_first(); j != active_squash.npos;
j = active_squash.find_next(j)) { j = active_squash.find_next(j)) {
succ &= squash_mask.find(j)->second; succr &= squash_mask.find(j)->second;
} }
} }
for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) { for (size_t j = succr.find_first(); j != succr.npos; j = succr.find_next(j)) {
const CharReach &cr = cr_by_index[j]; const CharReach &cr = cr_by_index[j];
for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) { for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
next[s].set(j); /* already alpha'ed */ next[s].set(j); /* already alpha'ed */

View File

@ -398,19 +398,19 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
return v_cr; return v_cr;
} }
NFAVertex pred = getSoleSourceVertex(g, v); NFAVertex s_pred = getSoleSourceVertex(g, v);
assert(pred); assert(s_pred);
/* require pred to be fed by one vertex OR (start + startDS) */ /* require s_pred to be fed by one vertex OR (start + startDS) */
NFAVertex predpred; NFAVertex predpred;
size_t idp = in_degree(pred, g); size_t idp = in_degree(s_pred, g);
if (hasSelfLoop(pred, g)) { if (hasSelfLoop(s_pred, g)) {
return v_cr; /* not cliche */ return v_cr; /* not cliche */
} else if (idp == 1) { } else if (idp == 1) {
predpred = getSoleSourceVertex(g, pred); predpred = getSoleSourceVertex(g, s_pred);
} else if (idp == 2 } else if (idp == 2
&& edge(g.start, pred, g).second && edge(g.start, s_pred, g).second
&& edge(g.startDs, pred, g).second) { && edge(g.startDs, s_pred, g).second) {
predpred = g.startDs; predpred = g.startDs;
} else { } else {
return v_cr; /* not cliche */ return v_cr; /* not cliche */
@ -419,7 +419,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
assert(predpred); assert(predpred);
/* require predpred to be cyclic and its cr to be a superset of /* require predpred to be cyclic and its cr to be a superset of
pred and v */ s_pred and v */
if (!hasSelfLoop(predpred, g)) { if (!hasSelfLoop(predpred, g)) {
return v_cr; /* not cliche */ return v_cr; /* not cliche */
} }
@ -429,7 +429,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
return v_cr; /* fake cyclic */ return v_cr; /* fake cyclic */
} }
const CharReach &p_cr = g[pred].char_reach; const CharReach &p_cr = g[s_pred].char_reach;
const CharReach &pp_cr = g[predpred].char_reach; const CharReach &pp_cr = g[predpred].char_reach;
if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) { if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
return v_cr; /* not cliche */ return v_cr; /* not cliche */
@ -440,7 +440,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
set<NFAVertex> v_succ; set<NFAVertex> v_succ;
insert(&v_succ, adjacent_vertices(v, g)); insert(&v_succ, adjacent_vertices(v, g));
set<NFAVertex> p_succ; set<NFAVertex> p_succ;
insert(&p_succ, adjacent_vertices(pred, g)); insert(&p_succ, adjacent_vertices(s_pred, g));
if (!is_subset_of(v_succ, p_succ)) { if (!is_subset_of(v_succ, p_succ)) {
DEBUG_PRINTF("fail\n"); DEBUG_PRINTF("fail\n");
@ -450,7 +450,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) { if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
/* need to check that reports of v are a subset of p's */ /* need to check that reports of v are a subset of p's */
if (!is_subset_of(g[v].reports, if (!is_subset_of(g[v].reports,
g[pred].reports)) { g[s_pred].reports)) {
DEBUG_PRINTF("fail - reports not subset\n"); DEBUG_PRINTF("fail - reports not subset\n");
return v_cr; /* not cliche */ return v_cr; /* not cliche */
} }

View File

@ -93,7 +93,8 @@ void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge,
if (it == allEdges.end()) { if (it == allEdges.end()) {
// No reverse edge, add one. // No reverse edge, add one.
NFAVertex u = source(fwd, g), v = target(fwd, g); NFAVertex u = source(fwd, g), v = target(fwd, g);
NFAEdge rev = add_edge(v, u, g); NFAEdge rev;
std::tie(rev, std::ignore) = add_edge(v, u, g);
it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first; it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
// Add to capacity map. // Add to capacity map.
u32 revIndex = g[rev].index; u32 revIndex = g[rev].index;

View File

@ -62,11 +62,13 @@ void pruneUnreachable(NGHolder &g) {
&& edge(g.accept, g.acceptEod, g).second) { && edge(g.accept, g.acceptEod, g).second) {
// Trivial case: there are no in-edges to our accepts (other than // Trivial case: there are no in-edges to our accepts (other than
// accept->acceptEod), so all non-specials are unreachable. // accept->acceptEod), so all non-specials are unreachable.
for (auto v : vertices_range(g)) {
if (!is_special(v, g)) { auto deads = [&g=g](const NFAVertex &v) {
dead.emplace_back(v); return (!is_special(v, g));
} };
} const auto &vr = vertices_range(g);
std::copy_if(begin(vr), end(vr), std::back_inserter(dead), deads);
} else { } else {
// Walk a reverse graph from acceptEod with Boost's depth_first_visit // Walk a reverse graph from acceptEod with Boost's depth_first_visit
// call. // call.
@ -199,17 +201,17 @@ void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
} }
vector<NFAEdge> dead; vector<NFAEdge> dead;
auto deads = [&g=g](const NFAEdge &e) {
return (!is_any_accept(target(e, g), g));
};
for (auto u : inv_adjacent_vertices_range(g.accept, g)) { for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
if (is_special(u, g)) { if (is_special(u, g)) {
continue; continue;
} }
// We can prune any out-edges that aren't accepts // We can prune any out-edges that aren't accepts
for (const auto &e : out_edges_range(u, g)) { const auto &er = out_edges_range(u, g);
if (!is_any_accept(target(e, g), g)) { std::copy_if(begin(er), end(er), std::back_inserter(dead), deads);
dead.emplace_back(e);
}
}
} }
if (dead.empty()) { if (dead.empty()) {

View File

@ -241,7 +241,7 @@ u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
/** Gives a stronger puff trigger when the trigger is connected to a wide /** Gives a stronger puff trigger when the trigger is connected to a wide
* cyclic state (aside from sds) */ * cyclic state (aside from sds) */
static static
void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) { void improveHead(const NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
DEBUG_PRINTF("attempting to improve puff trigger\n"); DEBUG_PRINTF("attempting to improve puff trigger\n");
assert(!nodes->empty()); assert(!nodes->empty());
const CharReach &puff_cr = g[nodes->back()].char_reach; const CharReach &puff_cr = g[nodes->back()].char_reach;
@ -260,7 +260,7 @@ void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
} }
static static
void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, void constructPuff(const NGHolder &g, const NFAVertex a, const NFAVertex puffv,
const CharReach &cr, const ReportID report, u32 width, const CharReach &cr, const ReportID report, u32 width,
bool fixed_depth, bool unbounded, bool auto_restart, bool fixed_depth, bool unbounded, bool auto_restart,
RoseBuild &rose, ReportManager &rm, RoseBuild &rose, ReportManager &rm,
@ -358,9 +358,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
// single report ID on a vertex // single report ID on a vertex
if (is_match_vertex(a, g)) { if (is_match_vertex(a, g)) {
DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n"); DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
if (!nodes.empty()) { nodes.pop_back();
nodes.pop_back();
}
break; break;
} }
} }

View File

@ -307,13 +307,15 @@ void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
static static
bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
NFAEdge e = edge(g.start, v, g);
NFAEdge e;
std::tie(e, std::ignore) = edge(g.start, v, g);
return e && !g[e].tops.empty(); return e && !g[e].tops.empty();
} }
/** Transform (1), removal of redundant vertices. */ /** Transform (1), removal of redundant vertices. */
static static
bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, bool doUselessMergePass(const NGHolder &g, const som_type som, VertexInfoMap &infoMap,
set<NFAVertex> &removable) { set<NFAVertex> &removable) {
/* useless merges can be done in any order, no need to take any care with /* useless merges can be done in any order, no need to take any care with
* ordering */ * ordering */
@ -323,7 +325,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
bool changed = false; bool changed = false;
for (auto v : vertices_range(g)) { for (auto v : vertices_range(g)) {
VertexInfo &info = infoMap[v]; const VertexInfo &info = infoMap[v];
if (info.isRemoved) { if (info.isRemoved) {
continue; continue;
@ -439,7 +441,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
continue; // Conservatively skip anything with nonzero tops. continue; // Conservatively skip anything with nonzero tops.
} }
CharReach &otherReach = g[t].char_reach; const CharReach &otherReach = g[t].char_reach;
if (currReach.isSubsetOf(otherReach)) { if (currReach.isSubsetOf(otherReach)) {
DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n", DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
g[v].index, g[t].index); g[v].index, g[t].index);
@ -636,12 +638,12 @@ bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
NFAVertex start = source(e, g); NFAVertex start = source(e, g);
using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>; using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
// Walk the graph backwards from v, examining each node. We fail (return // Walk the graph backwards from v, examining each node. We fail (return
// false) if we encounter a node with reach NOT a subset of domReach, and // false) if we encounter a node with reach NOT a subset of domReach, and
// we stop searching at dom. // we stop searching at dom.
try { try {
map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
depth_first_visit(RevGraph(g), start, depth_first_visit(RevGraph(g), start,
ReachSubsetVisitor(domReach), ReachSubsetVisitor(domReach),
make_assoc_property_map(vertexColor), make_assoc_property_map(vertexColor),
@ -664,12 +666,12 @@ bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
} }
NFAVertex start = target(e, g); NFAVertex start = target(e, g);
map<NFAVertex, boost::default_color_type> vertexColor;
// Walk the graph forward from v, examining each node. We fail (return // Walk the graph forward from v, examining each node. We fail (return
// false) if we encounter a node with reach NOT a subset of domReach, and // false) if we encounter a node with reach NOT a subset of domReach, and
// we stop searching at dom. // we stop searching at dom.
try { try {
map<NFAVertex, boost::default_color_type> vertexColor;
depth_first_visit(g, start, ReachSubsetVisitor(domReach), depth_first_visit(g, start, ReachSubsetVisitor(domReach),
make_assoc_property_map(vertexColor), make_assoc_property_map(vertexColor),
VertexIs<NGHolder, NFAVertex>(dom)); VertexIs<NGHolder, NFAVertex>(dom));
@ -745,7 +747,7 @@ u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
} }
static static
void findCyclicDom(NGHolder &g, vector<bool> &cyclic, void findCyclicDom(const NGHolder &g, vector<bool> &cyclic,
set<NFAEdge> &dead, som_type som) { set<NFAEdge> &dead, som_type som) {
auto dominators = findDominators(g); auto dominators = findDominators(g);
@ -789,7 +791,7 @@ void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
} }
static static
void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, void findCyclicPostDom(const NGHolder &g, vector<bool> &cyclic,
set<NFAEdge> &dead) { set<NFAEdge> &dead) {
auto postdominators = findPostDominators(g); auto postdominators = findPostDominators(g);

View File

@ -391,9 +391,9 @@ void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
unordered_set<NFAVertex> involved(rsi.vertices.begin(), unordered_set<NFAVertex> involved(rsi.vertices.begin(),
rsi.vertices.end()); rsi.vertices.end());
unordered_set<NFAVertex> tail(involved); // to look for back-edges. unordered_set<NFAVertex> tail(involved); // to look for back-edges.
unordered_set<NFAVertex> pred, succ; unordered_set<NFAVertex> v_pred, v_succ;
proper_pred(g, rsi.vertices.front(), pred); proper_pred(g, rsi.vertices.front(), v_pred);
proper_succ(g, rsi.vertices.back(), succ); proper_succ(g, rsi.vertices.back(), v_succ);
flat_set<ReportID> reports; flat_set<ReportID> reports;
findFirstReports(g, rsi, reports); findFirstReports(g, rsi, reports);
@ -404,7 +404,7 @@ void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
for (auto v : rsi.vertices) { for (auto v : rsi.vertices) {
tail.erase(v); // now contains all vertices _after_ this one. tail.erase(v); // now contains all vertices _after_ this one.
if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) { if (vertexIsBad(g, v, involved, tail, v_pred, v_succ, reports)) {
recalc = true; recalc = true;
continue; continue;
} }
@ -788,10 +788,10 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
const unordered_set<NFAVertex> involved(rsi.vertices.begin(), const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
rsi.vertices.end()); rsi.vertices.end());
vector<NFAVertex> succs; vector<NFAVertex> g_succs;
getSuccessors(g, rsi, &succs); getSuccessors(g, rsi, &g_succs);
unpeelNearEnd(g, rsi, depths, &succs); unpeelNearEnd(g, rsi, depths, &g_succs);
// Create our replacement cyclic state with the same reachability and // Create our replacement cyclic state with the same reachability and
// report info as the last vertex in our topo-ordered list. // report info as the last vertex in our topo-ordered list.
@ -819,7 +819,7 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
// Wire cyclic state to tug trigger states built from successors. // Wire cyclic state to tug trigger states built from successors.
vector<NFAVertex> tugs; vector<NFAVertex> tugs;
for (auto v : succs) { for (auto v : g_succs) {
buildTugTrigger(g, cyclic, v, involved, depths, tugs); buildTugTrigger(g, cyclic, v, involved, depths, tugs);
} }
created.insert(tugs.begin(), tugs.end()); created.insert(tugs.begin(), tugs.end());
@ -854,11 +854,9 @@ void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
assert(rsi.repeatMax >= rsi.repeatMin); assert(rsi.repeatMax >= rsi.repeatMin);
DEBUG_PRINTF("entry\n"); DEBUG_PRINTF("entry\n");
const unordered_set<NFAVertex> involved(rsi.vertices.begin(), vector<NFAVertex> g_succs;
rsi.vertices.end()); getSuccessors(g, rsi, &g_succs);
vector<NFAVertex> succs;
getSuccessors(g, rsi, &succs);
// Create our replacement cyclic state with the same reachability and // Create our replacement cyclic state with the same reachability and
// report info as the last vertex in our topo-ordered list. // report info as the last vertex in our topo-ordered list.
@ -887,15 +885,15 @@ void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
// In the rose case, our tug is our cyclic, and it's wired to our // In the rose case, our tug is our cyclic, and it's wired to our
// successors (which should be just the accept). // successors (which should be just the accept).
vector<NFAVertex> tugs; vector<NFAVertex> tugs;
assert(succs.size() == 1); assert(g_succs.size() == 1);
for (auto v : succs) { for (auto v : g_succs) {
add_edge(cyclic, v, g); add_edge(cyclic, v, g);
} }
// Wire pos trigger to accept if min repeat is one -- this deals with cases // Wire pos trigger to accept if min repeat is one -- this deals with cases
// where we can get a pos and tug trigger on the same byte. // where we can get a pos and tug trigger on the same byte.
if (rsi.repeatMin == depth(1)) { if (rsi.repeatMin == depth(1)) {
for (auto v : succs) { for (auto v : g_succs) {
add_edge(pos_trigger, v, g); add_edge(pos_trigger, v, g);
g[pos_trigger].reports = g[cyclic].reports; g[pos_trigger].reports = g[cyclic].reports;
} }
@ -1135,7 +1133,7 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger,
g[v].char_reach = cr; g[v].char_reach = cr;
add_edge(u, v, g); add_edge(u, v, g);
if (u == g.start) { if (u == g.start) {
g[edge(u, v, g)].tops.insert(top); g[edge(u, v, g).first].tops.insert(top);
} }
u = v; u = v;
} }
@ -1456,9 +1454,9 @@ struct StrawWalker {
} }
if (ai != ae) { if (ai != ae) {
DEBUG_PRINTF("more than one succ\n"); DEBUG_PRINTF("more than one succ\n");
set<NFAVertex> succs; set<NFAVertex> a_succs;
insert(&succs, adjacent_vertices(v, g)); insert(&a_succs, adjacent_vertices(v, g));
succs.erase(v); a_succs.erase(v);
for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) {
next = *ai; next = *ai;
DEBUG_PRINTF("checking %zu\n", g[next].index); DEBUG_PRINTF("checking %zu\n", g[next].index);
@ -1468,7 +1466,7 @@ struct StrawWalker {
set<NFAVertex> lsuccs; set<NFAVertex> lsuccs;
insert(&lsuccs, adjacent_vertices(next, g)); insert(&lsuccs, adjacent_vertices(next, g));
if (lsuccs != succs) { if (lsuccs != a_succs) {
continue; continue;
} }
@ -1874,7 +1872,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd,
* offset. * offset.
*/ */
static static
bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, bool improveLeadingRepeat(NGHolder &g, const BoundedRepeatData &rd,
unordered_set<NFAVertex> &created, unordered_set<NFAVertex> &created,
const vector<BoundedRepeatData> &all_repeats) { const vector<BoundedRepeatData> &all_repeats) {
assert(edge(g.startDs, g.startDs, g).second); assert(edge(g.startDs, g.startDs, g).second);
@ -1895,9 +1893,9 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
} }
vector<NFAVertex> straw; vector<NFAVertex> straw;
NFAVertex pred = NFAVertex w_pred =
walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
if (pred != g.startDs) { if (w_pred != g.startDs) {
DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
return false; return false;
} }
@ -1944,7 +1942,7 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
} }
static static
vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd, vector<NFAVertex> makeOwnStraw(NGHolder &g, const BoundedRepeatData &rd,
const vector<NFAVertex> &straw) { const vector<NFAVertex> &straw) {
// Straw runs from startDs to our pos trigger. // Straw runs from startDs to our pos trigger.
assert(!straw.empty()); assert(!straw.empty());
@ -1978,7 +1976,7 @@ vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd,
* rewire the straw to start instead of removing the startDs self-loop. * rewire the straw to start instead of removing the startDs self-loop.
*/ */
static static
bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd, bool improveLeadingRepeatOutfix(NGHolder &g, const BoundedRepeatData &rd,
unordered_set<NFAVertex> &created, unordered_set<NFAVertex> &created,
const vector<BoundedRepeatData> &all_repeats) { const vector<BoundedRepeatData> &all_repeats) {
assert(g.kind == NFA_OUTFIX); assert(g.kind == NFA_OUTFIX);
@ -1999,9 +1997,9 @@ bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd,
} }
vector<NFAVertex> straw; vector<NFAVertex> straw;
NFAVertex pred = NFAVertex w_pred =
walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw); walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
if (pred != g.startDs) { if (w_pred != g.startDs) {
DEBUG_PRINTF("straw walk doesn't lead to startDs\n"); DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
return false; return false;
} }

View File

@ -54,8 +54,8 @@ void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops,
vector<NFAEdge> &tempEdges) { vector<NFAEdge> &tempEdges) {
for (NFAVertex v : tops) { for (NFAVertex v : tops) {
assert(!isLeafNode(v, g)); assert(!isLeafNode(v, g));
auto edge_result = add_edge(g.start, v, g);
const NFAEdge &e = add_edge(g.start, v, g); const NFAEdge &e = edge_result.first;
tempEdges.emplace_back(e); tempEdges.emplace_back(e);
} }
} }

View File

@ -876,18 +876,18 @@ bool beginsWithDotStar(const NGHolder &g) {
// We can ignore the successors of start, as matches that begin there will // We can ignore the successors of start, as matches that begin there will
// necessarily have a SOM of 0. // necessarily have a SOM of 0.
set<NFAVertex> succ; set<NFAVertex> a_succ;
insert(&succ, adjacent_vertices(g.startDs, g)); insert(&a_succ, adjacent_vertices(g.startDs, g));
succ.erase(g.startDs); a_succ.erase(g.startDs);
for (auto v : succ) { for (auto v : a_succ) {
// We want 'dot' states that aren't virtual starts. // We want 'dot' states that aren't virtual starts.
if (g[v].char_reach.all() && if (g[v].char_reach.all() &&
!g[v].assert_flags) { !g[v].assert_flags) {
hasDot = true; hasDot = true;
set<NFAVertex> dotsucc; set<NFAVertex> dotsucc;
insert(&dotsucc, adjacent_vertices(v, g)); insert(&dotsucc, adjacent_vertices(v, g));
if (dotsucc != succ) { if (dotsucc != a_succ) {
DEBUG_PRINTF("failed dot-star succ check\n"); DEBUG_PRINTF("failed dot-star succ check\n");
return false; return false;
} }
@ -1177,7 +1177,7 @@ void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> &regions,
} }
static static
bool doTreePlanningIntl(NGHolder &g, bool doTreePlanningIntl(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions, const unordered_map<NFAVertex, u32> &regions,
const map<u32, region_info> &info, const map<u32, region_info> &info,
map<u32, region_info>::const_iterator picked, u32 bad_region, map<u32, region_info>::const_iterator picked, u32 bad_region,
@ -1292,8 +1292,8 @@ bool doTreePlanningIntl(NGHolder &g,
DEBUG_PRINTF("add mapped reporters for region %u\n", it->first); DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
addMappedReporterVertices(it->second, g, copy_to_orig, addMappedReporterVertices(it->second, g, copy_to_orig,
plan.back().reporters); plan.back().reporters);
} while (it->second.optional && it != info.rend() && } while (it != info.rend() && it->second.optional &&
(++it)->first > furthest->first); (++it)->first > furthest->first);
return true; return true;
} }
@ -1408,7 +1408,7 @@ bool doSomPlanning(NGHolder &g, bool stuck_in,
/* Need to verify how far the lock covers */ /* Need to verify how far the lock covers */
u32 bad_region; u32 bad_region;
NGHolder *ap_pref = plan.back().prefix.get(); const NGHolder *ap_pref = plan.back().prefix.get();
NGHolder ap_temp; NGHolder ap_temp;
if (hasBigCycles(*ap_pref)) { if (hasBigCycles(*ap_pref)) {
fillRoughMidfix(&ap_temp, g, regions, info, picked); fillRoughMidfix(&ap_temp, g, regions, info, picked);
@ -1551,7 +1551,7 @@ bool doSomPlanning(NGHolder &g, bool stuck_in,
DEBUG_PRINTF("region %u contributes reporters to last plan\n", DEBUG_PRINTF("region %u contributes reporters to last plan\n",
it->first); it->first);
addReporterVertices(it->second, g, plan.back().reporters); addReporterVertices(it->second, g, plan.back().reporters);
} while (it->second.optional && it != info.rend() && } while (it != info.rend() && it->second.optional &&
(++it)->first > furthest->first); (++it)->first > furthest->first);
DEBUG_PRINTF("done!\n"); DEBUG_PRINTF("done!\n");
@ -1855,7 +1855,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
} }
static static
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, const NGHolder &g,
const CompileContext &cc) { const CompileContext &cc) {
depth maxWidth = findMaxWidth(g); depth maxWidth = findMaxWidth(g);
@ -2011,7 +2011,7 @@ void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
} }
static static
bool tryHaig(RoseBuild &rose, NGHolder &g, bool tryHaig(RoseBuild &rose, const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions, const unordered_map<NFAVertex, u32> &regions,
som_type som, u32 somPrecision, som_type som, u32 somPrecision,
map<u32, region_info>::const_iterator picked, map<u32, region_info>::const_iterator picked,
@ -2442,13 +2442,9 @@ void makeReportsSomPass(ReportManager &rm, NGHolder &g) {
} }
static static
bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { bool doLitHaigSom(NG &ng, const NGHolder &g, som_type som) {
ue2_literal lit; ue2_literal lit;
shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
if (!rhs) {
assert(0);
throw std::bad_alloc();
}
if (!ng.cc.grey.allowLitHaig) { if (!ng.cc.grey.allowLitHaig) {
return false; return false;
} }
@ -2513,10 +2509,6 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g,
ue2_literal lit; ue2_literal lit;
shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
if (!rhs || !lhs) {
assert(0);
throw std::bad_alloc();
}
if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) { if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
return false; return false;
@ -2659,7 +2651,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g,
} }
static static
bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) { bool doMultiLitHaigSom(NG &ng, const NGHolder &g, som_type som) {
set<ue2_literal> lits; set<ue2_literal> lits;
shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
if (!ng.cc.grey.allowLitHaig) { if (!ng.cc.grey.allowLitHaig) {
@ -3133,7 +3125,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
// try a redundancy pass. // try a redundancy pass.
if (addSomRedundancy(g, depths)) { if (addSomRedundancy(g, depths)) {
depths = getDistancesFromSOM(g); depths = getDistancesFromSOM(g); // cppcheck-suppress unreadVariable
} }
auto regions = assignRegions(g); auto regions = assignRegions(g);

View File

@ -112,9 +112,9 @@ bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
} }
*numNewVertices += predGroups.size(); *numNewVertices += predGroups.size();
for (auto &group : predGroups) { for (const auto &group : predGroups) {
const depth &predDepth = group.first; const depth &predDepth = group.first;
const vector<NFAEdge> &preds = group.second; const vector<NFAEdge> &gspreds = group.second;
// Clone v for this depth with all its associated out-edges. // Clone v for this depth with all its associated out-edges.
u32 clone_idx = depths.size(); // next index to be used u32 clone_idx = depths.size(); // next index to be used
@ -130,8 +130,8 @@ bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
add_edge(clone, target(e, g), g[e], g); add_edge(clone, target(e, g), g[e], g);
} }
// Add in-edges from preds in this group. // Add in-edges from gspreds in this group.
for (const auto &e : preds) { for (const auto &e : gspreds) {
add_edge(source(e, g), clone, g[e], g); add_edge(source(e, g), clone, g[e], g);
} }
} }

View File

@ -58,11 +58,12 @@ vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
cloneHolder(g, g_orig, &vmap); cloneHolder(g, g_orig, &vmap);
vector<NFAVertex> vstarts; vector<NFAVertex> vstarts;
for (auto v : vertices_range(g)) { auto vstart = [&g=g](const NFAVertex &v) {
if (is_virtual_start(v, g)) { return (is_virtual_start(v, g));
vstarts.emplace_back(v); };
} const auto &vr = vertices_range(g);
} std::copy_if(begin(vr), end(vr), std::back_inserter(vstarts), vstart);
vstarts.emplace_back(g.startDs); vstarts.emplace_back(g.startDs);
// wire the successors of every virtual start or startDs to g.start. // wire the successors of every virtual start or startDs to g.start.
@ -267,18 +268,6 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
boost::depth_first_search(c_g, visitor(backEdgeVisitor) boost::depth_first_search(c_g, visitor(backEdgeVisitor)
.root_vertex(c_g.start)); .root_vertex(c_g.start));
for (const auto &e : be) {
NFAVertex s = source(e, c_g);
NFAVertex t = target(e, c_g);
DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index);
if (s != t) {
assert(0);
DEBUG_PRINTF("eek big cycle\n");
rv = true; /* big cycle -> eek */
goto exit;
}
}
DEBUG_PRINTF("checking acyclic+selfloop graph\n"); DEBUG_PRINTF("checking acyclic+selfloop graph\n");
rv = !firstMatchIsFirst(c_g); rv = !firstMatchIsFirst(c_g);

View File

@ -151,7 +151,8 @@ void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
for (auto pivot : pivots) { for (auto pivot : pivots) {
assert(contains(*rhs_map, pivot)); assert(contains(*rhs_map, pivot));
NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); auto edge_result = add_edge(rhs->start, (*rhs_map)[pivot], *rhs);
NFAEdge e = edge_result.first;
(*rhs)[e].tops.insert(DEFAULT_TOP); (*rhs)[e].tops.insert(DEFAULT_TOP);
} }

View File

@ -255,19 +255,19 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
} }
static static
void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) { void buildSucc(NFAStateSet &ssucc, const NGHolder &g, NFAVertex v) {
for (auto w : adjacent_vertices_range(v, g)) { for (auto w : adjacent_vertices_range(v, g)) {
if (!is_special(w, g)) { if (!is_special(w, g)) {
succ.set(g[w].index); ssucc.set(g[w].index);
} }
} }
} }
static static
void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) { void buildPred(NFAStateSet &spred, const NGHolder &g, NFAVertex v) {
for (auto u : inv_adjacent_vertices_range(v, g)) { for (auto u : inv_adjacent_vertices_range(v, g)) {
if (!is_special(u, g)) { if (!is_special(u, g)) {
pred.set(g[u].index); spred.set(g[u].index);
} }
} }
} }
@ -408,19 +408,19 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
DEBUG_PRINTF("state %u is cyclic\n", i); DEBUG_PRINTF("state %u is cyclic\n", i);
NFAStateSet mask(numStates), succ(numStates), pred(numStates); NFAStateSet mask(numStates), ssucc(numStates), spred(numStates);
buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som, buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
som_depths, region_map, cache); som_depths, region_map, cache);
buildSucc(succ, g, v); buildSucc(ssucc, g, v);
buildPred(pred, g, v); buildPred(spred, g, v);
const auto &reports = g[v].reports; const auto &reports = g[v].reports;
for (size_t j = succ.find_first(); j != succ.npos; for (size_t j = ssucc.find_first(); j != ssucc.npos;
j = succ.find_next(j)) { j = ssucc.find_next(j)) {
NFAVertex vj = vByIndex[j]; NFAVertex vj = vByIndex[j];
NFAStateSet pred2(numStates); NFAStateSet pred2(numStates);
buildPred(pred2, g, vj); buildPred(pred2, g, vj);
if (pred2 == pred) { if (pred2 == spred) {
DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i); DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
NFAStateSet tmp(numStates); NFAStateSet tmp(numStates);
buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
@ -429,14 +429,14 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
} }
} }
for (size_t j = pred.find_first(); j != pred.npos; for (size_t j = spred.find_first(); j != spred.npos;
j = pred.find_next(j)) { j = spred.find_next(j)) {
NFAVertex vj = vByIndex[j]; NFAVertex vj = vByIndex[j];
NFAStateSet succ2(numStates); NFAStateSet succ2(numStates);
buildSucc(succ2, g, vj); buildSucc(succ2, g, vj);
/* we can use j as a basis for squashing if its succs are a subset /* we can use j as a basis for squashing if its succs are a subset
* of ours */ * of ours */
if ((succ2 & ~succ).any()) { if ((succ2 & ~ssucc).any()) {
continue; continue;
} }
@ -589,7 +589,7 @@ void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
verts.insert(v); verts.insert(v);
next_vertex: next_vertex:
continue; ;
} }
} }

View File

@ -196,10 +196,11 @@ u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking,
} }
a_count++; a_count++;
NFAEdge b_edge;
bool b_edge_bool;
std::tie(b_edge, b_edge_bool) = edge(b_ranking.at(i), b_ranking.at(sid), gb);
NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb); if (!b_edge_bool) {
if (!b_edge) {
max = i; max = i;
DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
max, i, sid); max, i, sid);
@ -319,7 +320,7 @@ void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) {
DEBUG_PRINTF("skipping common edge\n"); DEBUG_PRINTF("skipping common edge\n");
assert(edge(u, v, dest).second); assert(edge(u, v, dest).second);
// Should never merge edges with different top values. // Should never merge edges with different top values.
assert(vic[e].tops == dest[edge(u, v, dest)].tops); assert(vic[e].tops == dest[edge(u, v, dest).first].tops);
continue; continue;
} else { } else {
assert(is_any_accept(v, dest)); assert(is_any_accept(v, dest));
@ -454,8 +455,8 @@ void buildNfaMergeQueue(const vector<NGHolder *> &cluster,
} }
} }
NGHolder &g_i = *(cluster[ci]); const NGHolder &g_i = *(cluster[ci]);
NGHolder &g_j = *(cluster[cj]); const NGHolder &g_j = *(cluster[cj]);
if (!compatibleStarts(g_i, g_j)) { if (!compatibleStarts(g_i, g_j)) {
continue; continue;
@ -505,16 +506,26 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
/* TODO: relax top checks if reports match */ /* TODO: relax top checks if reports match */
// If both graphs have edge (start, accept), the tops must match. // If both graphs have edge (start, accept), the tops must match.
NFAEdge e1_accept = edge(h1.start, h1.accept, h1); bool bool_e1_accept;
NFAEdge e2_accept = edge(h2.start, h2.accept, h2); NFAEdge e1_accept;
if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { NFAEdge e2_accept;
std::tie(e1_accept, bool_e1_accept) = edge(h1.start, h1.accept, h1);
bool bool_e2_accept;
std::tie(e2_accept, bool_e2_accept) = edge(h2.start, h2.accept, h2);
if (bool_e1_accept && bool_e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) {
return false; return false;
} }
// If both graphs have edge (start, acceptEod), the tops must match. // If both graphs have edge (start, acceptEod), the tops must match.
NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1); bool bool_e1_eod;
NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2); NFAEdge e1_eod;
if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { NFAEdge e2_eod;
std::tie(e1_eod, bool_e1_eod) = edge(h1.start, h1.acceptEod, h1);
bool bool_e2_eod;
std::tie(e2_eod, bool_e2_eod) = edge(h2.start, h2.acceptEod, h2);
if (bool_e1_eod && bool_e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) {
return false; return false;
} }

View File

@ -128,7 +128,7 @@ void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) {
if (edge(dest, t, g).second) { if (edge(dest, t, g).second) {
continue; continue;
} }
NFAEdge clone = add_edge(dest, t, g); NFAEdge clone = add_edge(dest, t, g).first;
u32 idx = g[clone].index; u32 idx = g[clone].index;
g[clone] = g[e]; g[clone] = g[e];
g[clone].index = idx; g[clone].index = idx;
@ -139,7 +139,7 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) {
for (const auto &e : in_edges_range(s, g)) { for (const auto &e : in_edges_range(s, g)) {
NFAVertex ss = source(e, g); NFAVertex ss = source(e, g);
assert(!edge(ss, dest, g).second); assert(!edge(ss, dest, g).second);
NFAEdge clone = add_edge(ss, dest, g); NFAEdge clone = add_edge(ss, dest, g).first;
u32 idx = g[clone].index; u32 idx = g[clone].index;
g[clone] = g[e]; g[clone] = g[e];
g[clone].index = idx; g[clone].index = idx;
@ -278,9 +278,11 @@ bool can_only_match_at_eod(const NGHolder &g) {
} }
bool matches_everywhere(const NGHolder &h) { bool matches_everywhere(const NGHolder &h) {
NFAEdge e = edge(h.startDs, h.accept, h); bool bool_e;
NFAEdge e;
std::tie(e, bool_e) = edge(h.startDs, h.accept, h);
return e && !h[e].assert_flags; return bool_e && !h[e].assert_flags;
} }
bool is_virtual_start(NFAVertex v, const NGHolder &g) { bool is_virtual_start(NFAVertex v, const NGHolder &g) {
@ -405,9 +407,10 @@ void appendLiteral(NGHolder &h, const ue2_literal &s) {
DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str()); DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str());
vector<NFAVertex> tail; vector<NFAVertex> tail;
assert(in_degree(h.acceptEod, h) == 1); assert(in_degree(h.acceptEod, h) == 1);
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
tail.emplace_back(v); const auto &vr = inv_adjacent_vertices_range(h.accept, h);
} std::copy(begin(vr), end(vr), std::back_inserter(tail));
assert(!tail.empty()); assert(!tail.empty());
for (auto v : tail) { for (auto v : tail) {
@ -568,7 +571,7 @@ void cloneHolder(NGHolder &out, const NGHolder &in) {
NFAVertex s = out_mapping[si]; NFAVertex s = out_mapping[si];
NFAVertex t = out_mapping[ti]; NFAVertex t = out_mapping[ti];
NFAEdge e2 = add_edge(s, t, out); NFAEdge e2 = add_edge(s, t, out).first;
out[e2] = in[e]; out[e2] = in[e];
} }
@ -709,16 +712,16 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
assert(delay <= lit.length()); assert(delay <= lit.length());
DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay);
set<NFAVertex> pred; set<NFAVertex> predv;
for (auto v : curr) { for (auto v : curr) {
insert(&pred, inv_adjacent_vertices_range(v, g)); insert(&predv, inv_adjacent_vertices_range(v, g));
} }
clear_in_edges(g.accept, g); clear_in_edges(g.accept, g);
clearReports(g); clearReports(g);
for (auto v : pred) { for (auto v : predv) {
NFAEdge e = add_edge(v, g.accept, g); NFAEdge e = add_edge(v, g.accept, g).first;
g[v].reports.insert(0); g[v].reports.insert(0);
if (is_triggered(g) && v == g.start) { if (is_triggered(g) && v == g.start) {
g[e].tops.insert(DEFAULT_TOP); g[e].tops.insert(DEFAULT_TOP);

View File

@ -314,7 +314,7 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to /** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
* accepts. */ * accepts. */
void reverseHolder(const NGHolder &g, NGHolder &out); void reverseHolder(const NGHolder &g_in, NGHolder &g);
/** \brief Returns the delay or ~0U if the graph cannot match with /** \brief Returns the delay or ~0U if the graph cannot match with
* the trailing literal. */ * the trailing literal. */

View File

@ -348,10 +348,9 @@ void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored,
map<NFAVertex, u64a> scores; map<NFAVertex, u64a> scores;
map<NFAVertex, unique_ptr<VertLitInfo>> lit_info; map<NFAVertex, unique_ptr<VertLitInfo>> lit_info;
set<ue2_literal> s;
for (auto v : a_dom) { for (auto v : a_dom) {
s = getLiteralSet(g, v, true); /* RHS will take responsibility for any set<ue2_literal> s = getLiteralSet(g, v, true); /* RHS will take responsibility for any
revisits to the target vertex */ revisits to the target vertex */
if (s.empty()) { if (s.empty()) {
@ -688,7 +687,7 @@ unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g,
} }
if (seeking_transient) { if (seeking_transient) {
for (auto &a : lits) { for (const auto &a : lits) {
a->creates_transient a->creates_transient
= createsTransientLHS(g, a->vv, *depths, cc.grey); = createsTransientLHS(g, a->vv, *depths, cc.grey);
} }
@ -697,20 +696,20 @@ unique_ptr<VertLitInfo> findBestSplit(const NGHolder &g,
if (last_chance) { if (last_chance) {
const size_t num_verts = num_vertices(g); const size_t num_verts = num_vertices(g);
auto color_map = make_small_color_map(g); auto color_map = make_small_color_map(g);
for (auto &a : lits) { for (const auto &a : lits) {
size_t num_reachable = count_reachable(g, a->vv, color_map); size_t num_reachable = count_reachable(g, a->vv, color_map);
double ratio = (double)num_reachable / (double)num_verts; double ratio = (double)num_reachable / (double)num_verts;
a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio; a->split_ratio = ratio > 0.5 ? 1 - ratio : ratio;
} }
} }
auto cmp = LitComparator(g, seeking_anchored, seeking_transient, auto lcmp = LitComparator(g, seeking_anchored, seeking_transient,
last_chance); last_chance);
unique_ptr<VertLitInfo> best = std::move(lits.back()); unique_ptr<VertLitInfo> best = std::move(lits.back());
lits.pop_back(); lits.pop_back();
while (!lits.empty()) { while (!lits.empty()) {
if (cmp(best, lits.back())) { if (lcmp(best, lits.back())) {
best = std::move(lits.back()); best = std::move(lits.back());
} }
lits.pop_back(); lits.pop_back();
@ -812,7 +811,7 @@ flat_set<NFAEdge> poisonEdges(const NGHolder &h,
/* poison edges covered by successor literal */ /* poison edges covered by successor literal */
set<pair<ue2_literal, bool> > succs; set<pair<ue2_literal, bool> > lsuccs;
for (const RoseInEdge &ve : ee) { for (const RoseInEdge &ve : ee) {
if (vg[target(ve, vg)].type != RIV_LITERAL) { if (vg[target(ve, vg)].type != RIV_LITERAL) {
/* nothing to poison in suffixes/outfixes */ /* nothing to poison in suffixes/outfixes */
@ -820,15 +819,15 @@ flat_set<NFAEdge> poisonEdges(const NGHolder &h,
assert(is_any_accept_type(vg[target(ve, vg)].type)); assert(is_any_accept_type(vg[target(ve, vg)].type));
continue; continue;
} }
succs.insert({vg[target(ve, vg)].s, lsuccs.insert({vg[target(ve, vg)].s,
vg[source(ve, vg)].type == RIV_LITERAL}); vg[source(ve, vg)].type == RIV_LITERAL});
} }
DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size()); DEBUG_PRINTF("poisoning edges %zu successor literals\n", lsuccs.size());
flat_set<NFAEdge> bad; flat_set<NFAEdge> bad;
for (const auto &p : succs) { for (const auto &p : lsuccs) {
poisonFromSuccessor(h, p.first, p.second, bad); poisonFromSuccessor(h, p.first, p.second, bad);
} }
@ -1036,11 +1035,6 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg,
shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
if (!lhs || !rhs) {
assert(0);
throw std::bad_alloc();
}
unordered_map<NFAVertex, NFAVertex> lhs_map; unordered_map<NFAVertex, NFAVertex> lhs_map;
unordered_map<NFAVertex, NFAVertex> rhs_map; unordered_map<NFAVertex, NFAVertex> rhs_map;
@ -1172,7 +1166,7 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg,
#define MAX_LEN_2_LITERALS_PER_CUT 3 #define MAX_LEN_2_LITERALS_PER_CUT 3
static static
bool checkValidNetflowLits(NGHolder &h, const vector<u64a> &scores, bool checkValidNetflowLits(const NGHolder &h, const vector<u64a> &scores,
const map<NFAEdge, set<ue2_literal>> &cut_lits, const map<NFAEdge, set<ue2_literal>> &cut_lits,
u32 min_allowed_length) { u32 min_allowed_length) {
DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(), DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(),
@ -1209,7 +1203,7 @@ bool checkValidNetflowLits(NGHolder &h, const vector<u64a> &scores,
} }
static static
void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, void splitEdgesByCut(const NGHolder &h, RoseInGraph &vg,
const vector<RoseInEdge> &to_cut, const vector<RoseInEdge> &to_cut,
const vector<NFAEdge> &cut, const vector<NFAEdge> &cut,
const map<NFAEdge, set<ue2_literal>> &cut_lits) { const map<NFAEdge, set<ue2_literal>> &cut_lits) {
@ -1234,17 +1228,14 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index);
unordered_map<NFAVertex, NFAVertex> temp_map; unordered_map<NFAVertex, NFAVertex> temp_map;
shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>(); shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>();
if (!new_lhs) {
assert(0);
throw std::bad_alloc();
}
splitLHS(h, pivot, new_lhs.get(), &temp_map); splitLHS(h, pivot, new_lhs.get(), &temp_map);
/* want to cut off paths to pivot from things other than the pivot - /* want to cut off paths to pivot from things other than the pivot -
* makes a more svelte graphy */ * makes a more svelte graphy */
clear_in_edges(temp_map[pivot], *new_lhs); clear_in_edges(temp_map[pivot], *new_lhs);
NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot],
*new_lhs); *new_lhs).first;
if (is_triggered(h) && prev_v == h.start) { if (is_triggered(h) && prev_v == h.start) {
(*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP);
} }
@ -1319,10 +1310,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
if (!contains(done_rhs, adj)) { if (!contains(done_rhs, adj)) {
unordered_map<NFAVertex, NFAVertex> temp_map; unordered_map<NFAVertex, NFAVertex> temp_map;
shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>(); shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>();
if (!new_rhs) {
assert(0);
throw std::bad_alloc();
}
splitRHS(h, adj, new_rhs.get(), &temp_map); splitRHS(h, adj, new_rhs.get(), &temp_map);
remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); remove_edge(new_rhs->start, new_rhs->accept, *new_rhs);
remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs);
@ -1434,11 +1422,11 @@ bool deanchorIfNeeded(NGHolder &g) {
if (succ_v == succ_g) { if (succ_v == succ_g) {
DEBUG_PRINTF("found ^.*\n"); DEBUG_PRINTF("found ^.*\n");
for (auto succ : adjacent_vertices_range(g.start, g)) { for (auto asucc : adjacent_vertices_range(g.start, g)) {
if (succ == g.startDs) { if (asucc == g.startDs) {
continue; continue;
} }
add_edge(g.startDs, succ, g); add_edge(g.startDs, asucc, g);
} }
clear_vertex(v, g); clear_vertex(v, g);
remove_vertex(v, g); remove_vertex(v, g);
@ -1685,18 +1673,18 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig,
* successor literal. This would require using distinct report ids and also * successor literal. This would require using distinct report ids and also
* taking into account overlap of successor literals. */ * taking into account overlap of successor literals. */
set<ue2_literal> preds; set<ue2_literal> lpreds;
set<ue2_literal> succs; set<ue2_literal> lsuccs;
for (const RoseInEdge &e : ee) { for (const RoseInEdge &e : ee) {
RoseInVertex u = source(e, ig); RoseInVertex u = source(e, ig);
assert(ig[u].type == RIV_LITERAL); assert(ig[u].type == RIV_LITERAL);
assert(!ig[u].delay); assert(!ig[u].delay);
preds.insert(ig[u].s); lpreds.insert(ig[u].s);
RoseInVertex v = target(e, ig); RoseInVertex v = target(e, ig);
assert(ig[v].type == RIV_LITERAL); assert(ig[v].type == RIV_LITERAL);
assert(!ig[v].delay); assert(!ig[v].delay);
succs.insert(ig[v].s); lsuccs.insert(ig[v].s);
if (ig[e].graph_lag) { if (ig[e].graph_lag) {
/* already removed redundant parts of literals */ /* already removed redundant parts of literals */
@ -1708,9 +1696,9 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig,
map<ue2_literal, pair<shared_ptr<NGHolder>, u32> > graphs; /* + delay */ map<ue2_literal, pair<shared_ptr<NGHolder>, u32> > graphs; /* + delay */
for (const ue2_literal &right : succs) { for (const ue2_literal &right : lsuccs) {
size_t max_overlap = 0; size_t max_overlap = 0;
for (const ue2_literal &left : preds) { for (const ue2_literal &left : lpreds) {
size_t overlap = maxOverlap(left, right, 0); size_t overlap = maxOverlap(left, right, 0);
ENSURE_AT_LEAST(&max_overlap, overlap); ENSURE_AT_LEAST(&max_overlap, overlap);
} }
@ -1747,13 +1735,13 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig,
for (const RoseInEdge &e : ee) { for (const RoseInEdge &e : ee) {
RoseInVertex v = target(e, ig); RoseInVertex v = target(e, ig);
const ue2_literal &succ = ig[v].s; const ue2_literal &igsucc = ig[v].s;
if (!contains(graphs, succ)) { if (!contains(graphs, igsucc)) {
continue; continue;
} }
ig[e].graph = graphs[succ].first; ig[e].graph = graphs[igsucc].first;
ig[e].graph_lag = graphs[succ].second; ig[e].graph_lag = graphs[igsucc].second;
if (isStarCliche(*ig[e].graph)) { if (isStarCliche(*ig[e].graph)) {
DEBUG_PRINTF("is a X star!\n"); DEBUG_PRINTF("is a X star!\n");
@ -1792,9 +1780,9 @@ void removeRedundantLiteralsFromInfixes(RoseInGraph &g,
} }
for (const auto &m : infixes) { for (const auto &m : infixes) {
NGHolder *h = m.first; const NGHolder *h = m.first;
const auto &edges = m.second; const auto &medges = m.second;
removeRedundantLiteralsFromInfix(*h, g, edges, cc); removeRedundantLiteralsFromInfix(*h, g, medges, cc);
} }
} }
@ -1805,7 +1793,7 @@ void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) {
} }
static static
RoseInVertex getStart(RoseInGraph &vg) { RoseInVertex getStart(const RoseInGraph &vg) {
for (RoseInVertex v : vertices_range(vg)) { for (RoseInVertex v : vertices_range(vg)) {
if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) { if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) {
return v; return v;
@ -1870,7 +1858,7 @@ unique_ptr<NGHolder> make_chain(u32 count) {
#define SHORT_TRIGGER_LEN 16 #define SHORT_TRIGGER_LEN 16
static static
bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, bool makeTransientFromLongLiteral(const NGHolder &h, RoseInGraph &vg,
const vector<RoseInEdge> &ee, const vector<RoseInEdge> &ee,
const CompileContext &cc) { const CompileContext &cc) {
/* check max width and literal lengths to see if possible */ /* check max width and literal lengths to see if possible */
@ -1953,7 +1941,7 @@ bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg,
static static
void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 delay, const vector<NFAVertex> &preds) { u32 delay, const vector<NFAVertex> &lpreds) {
assert(delay <= lit.length()); assert(delay <= lit.length());
assert(isCorrectlyTopped(g)); assert(isCorrectlyTopped(g));
DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay);
@ -1969,8 +1957,8 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
prev = curr; prev = curr;
} }
for (auto v : preds) { for (auto v : lpreds) {
NFAEdge e = add_edge_if_not_present(v, prev, g); NFAEdge e = add_edge_if_not_present(v, prev, g).first;
if (v == g.start && is_triggered(g)) { if (v == g.start && is_triggered(g)) {
g[e].tops.insert(DEFAULT_TOP); g[e].tops.insert(DEFAULT_TOP);
} }
@ -1988,11 +1976,11 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
static static
void restoreTrailingLiteralStates(NGHolder &g, void restoreTrailingLiteralStates(NGHolder &g,
const vector<pair<ue2_literal, u32>> &lits) { const vector<pair<ue2_literal, u32>> &lits) {
vector<NFAVertex> preds; vector<NFAVertex> vpreds;
insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); insert(&vpreds, vpreds.end(), inv_adjacent_vertices(g.accept, g));
clear_in_edges(g.accept, g); clear_in_edges(g.accept, g);
for (auto v : preds) { for (auto v : vpreds) {
g[v].reports.clear(); /* clear report from old accepts */ g[v].reports.clear(); /* clear report from old accepts */
} }
@ -2000,7 +1988,7 @@ void restoreTrailingLiteralStates(NGHolder &g,
const ue2_literal &lit = p.first; const ue2_literal &lit = p.first;
u32 delay = p.second; u32 delay = p.second;
restoreTrailingLiteralStates(g, lit, delay, preds); restoreTrailingLiteralStates(g, lit, delay, vpreds);
} }
} }
@ -2134,14 +2122,14 @@ void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) {
/* look for bad prefixes and try to split */ /* look for bad prefixes and try to split */
for (const auto &m : prefixes) { for (const auto &m : prefixes) {
NGHolder *h = m.first; NGHolder *h = m.first;
const auto &edges = m.second; const auto &medges = m.second;
depth max_width = findMaxWidth(*h); depth max_width = findMaxWidth(*h);
if (willBeTransient(max_width, cc) if (willBeTransient(max_width, cc)
|| willBeAnchoredTable(max_width, cc.grey)) { || willBeAnchoredTable(max_width, cc.grey)) {
continue; continue;
} }
changed = improvePrefix(*h, vg, edges, cc); changed = improvePrefix(*h, vg, medges, cc);
} }
} while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN); } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN);
} }
@ -2150,7 +2138,7 @@ void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) {
#define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10 #define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10
static static
bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg, bool extractStrongLiteral(const NGHolder &h, RoseInGraph &vg,
const vector<RoseInEdge> &ee, const vector<RoseInEdge> &ee,
const CompileContext &cc) { const CompileContext &cc) {
DEBUG_PRINTF("looking for string literal\n"); DEBUG_PRINTF("looking for string literal\n");
@ -2198,12 +2186,12 @@ void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) {
for (const auto &m : edges_by_graph) { for (const auto &m : edges_by_graph) {
NGHolder *g = m.first; NGHolder *g = m.first;
const auto &edges = m.second; const auto &medges = m.second;
if (contains(stuck, g)) { if (contains(stuck, g)) {
DEBUG_PRINTF("already known to be bad\n"); DEBUG_PRINTF("already known to be bad\n");
continue; continue;
} }
bool rv = extractStrongLiteral(*g, vg, edges, cc); bool rv = extractStrongLiteral(*g, vg, medges, cc);
if (rv) { if (rv) {
changed = true; changed = true;
} else { } else {
@ -2281,8 +2269,8 @@ void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) {
for (const auto &m : weak_edges) { for (const auto &m : weak_edges) {
NGHolder *h = m.first; NGHolder *h = m.first;
const auto &edges = m.second; const auto &medges = m.second;
improveInfix(*h, vg, edges, cc); improveInfix(*h, vg, medges, cc);
} }
} }
@ -2294,10 +2282,7 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg,
assert(!splitters.empty()); assert(!splitters.empty());
shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
if (!lhs) {
assert(0);
throw bad_alloc();
}
unordered_map<NFAVertex, NFAVertex> v_map; unordered_map<NFAVertex, NFAVertex> v_map;
cloneHolder(*lhs, base_graph, &v_map); cloneHolder(*lhs, base_graph, &v_map);
lhs->kind = NFA_INFIX; lhs->kind = NFA_INFIX;
@ -2306,7 +2291,7 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg,
add_edge(lhs->accept, lhs->acceptEod, *lhs); add_edge(lhs->accept, lhs->acceptEod, *lhs);
clearReports(*lhs); clearReports(*lhs);
for (NFAVertex v : splitters) { for (NFAVertex v : splitters) {
NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs); NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs).first;
if (v == base_graph.start) { if (v == base_graph.start) {
(*lhs)[e].tops.insert(DEFAULT_TOP); (*lhs)[e].tops.insert(DEFAULT_TOP);
} }
@ -2407,14 +2392,14 @@ bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg,
assert(!by_reports.empty()); assert(!by_reports.empty());
/* TODO: how strong a min len do we want here ? */ /* TODO: how strong a min len do we want here ? */
u32 min_len = cc.grey.minRoseLiteralLength; u32 rose_min_len = cc.grey.minRoseLiteralLength;
ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN); ENSURE_AT_LEAST(&rose_min_len, MIN_SUFFIX_LEN);
for (auto &vli : by_reports | map_values) { for (auto &vli : by_reports | map_values) {
u64a score = sanitizeAndCompressAndScore(vli.lit); u64a score = sanitizeAndCompressAndScore(vli.lit);
if (vli.lit.empty() if (vli.lit.empty()
|| !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, || !validateRoseLiteralSetQuality(vli.lit, score, false, rose_min_len,
false, false)) { false, false)) {
return false; return false;
} }
@ -2458,8 +2443,8 @@ void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) {
/* look at suffixes and try to split */ /* look at suffixes and try to split */
for (const auto &m : suffixes) { for (const auto &m : suffixes) {
const NGHolder *h = m.first; const NGHolder *h = m.first;
const auto &edges = m.second; const auto &medges = m.second;
replaceSuffixWithInfix(*h, vg, edges, cc); replaceSuffixWithInfix(*h, vg, medges, cc);
} }
} }
@ -2553,8 +2538,8 @@ void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) {
for (const auto &m : right_edges) { for (const auto &m : right_edges) {
const NGHolder *h = m.first; const NGHolder *h = m.first;
const auto &edges = m.second; const auto &medges = m.second;
lookForDoubleCut(*h, edges, vg, cc.grey); lookForDoubleCut(*h, medges, vg, cc.grey);
} }
} }
@ -2745,8 +2730,8 @@ void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) {
for (const auto &m : rightfixes) { for (const auto &m : rightfixes) {
const NGHolder *h = m.first; const NGHolder *h = m.first;
const auto &edges = m.second; const auto &medges = m.second;
lookForCleanSplit(*h, edges, vg, cc); lookForCleanSplit(*h, medges, vg, cc);
} }
prev = std::move(curr); prev = std::move(curr);
@ -2805,7 +2790,7 @@ bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) {
} }
static static
vector<vector<CharReach>> getDfaTriggers(RoseInGraph &vg, vector<vector<CharReach>> getDfaTriggers(const RoseInGraph &vg,
const vector<RoseInEdge> &edges, const vector<RoseInEdge> &edges,
bool *single_trigger) { bool *single_trigger) {
vector<vector<CharReach>> triggers; vector<vector<CharReach>> triggers;
@ -2868,7 +2853,6 @@ static
bool splitForImplementability(RoseInGraph &vg, NGHolder &h, bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
const vector<RoseInEdge> &edges, const vector<RoseInEdge> &edges,
const CompileContext &cc) { const CompileContext &cc) {
vector<pair<ue2_literal, u32>> succ_lits;
DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n",
to_string(h.kind).c_str(), num_vertices(h), edges.size()); to_string(h.kind).c_str(), num_vertices(h), edges.size());
@ -2877,6 +2861,7 @@ bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
} }
if (!generates_callbacks(h)) { if (!generates_callbacks(h)) {
vector<pair<ue2_literal, u32>> succ_lits;
for (const auto &e : edges) { for (const auto &e : edges) {
const auto &lit = vg[target(e, vg)].s; const auto &lit = vg[target(e, vg)].s;
u32 delay = vg[e].graph_lag; u32 delay = vg[e].graph_lag;
@ -2889,8 +2874,8 @@ bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
} }
unique_ptr<VertLitInfo> split; unique_ptr<VertLitInfo> split;
bool last_chance = true;
if (h.kind == NFA_PREFIX) { if (h.kind == NFA_PREFIX) {
bool last_chance = true;
auto depths = calcDepths(h); auto depths = calcDepths(h);
split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc);
@ -2927,7 +2912,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes,
vector<RoseInEdge>> edges_by_graph; vector<RoseInEdge>> edges_by_graph;
for (const RoseInEdge &ve : edges_range(vg)) { for (const RoseInEdge &ve : edges_range(vg)) {
if (vg[ve].graph && !vg[ve].dfa) { if (vg[ve].graph && !vg[ve].dfa) {
auto &h = vg[ve].graph; const auto &h = vg[ve].graph;
edges_by_graph[h].emplace_back(ve); edges_by_graph[h].emplace_back(ve);
} }
} }
@ -2942,10 +2927,10 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes,
continue; continue;
} }
const auto &edges = m.second; const auto &medges = m.second;
if (tryForEarlyDfa(*h, cc) && if (tryForEarlyDfa(*h, cc) &&
doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) { doEarlyDfa(rose, vg, *h, medges, final_chance, rm, cc)) {
continue; continue;
} }
@ -2954,7 +2939,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes,
return false; return false;
} }
if (splitForImplementability(vg, *h, edges, cc)) { if (splitForImplementability(vg, *h, medges, cc)) {
added_count++; added_count++;
if (added_count > MAX_IMPLEMENTABLE_SPLITS) { if (added_count > MAX_IMPLEMENTABLE_SPLITS) {
DEBUG_PRINTF("added_count hit limit\n"); DEBUG_PRINTF("added_count hit limit\n");

View File

@ -73,7 +73,7 @@ Component *ComponentAlternation::accept(ComponentVisitor &v) {
} }
for (auto i = children.begin(), e = children.end(); i != e; ++i) { for (auto i = children.begin(), e = children.end(); i != e; ++i) {
Component *child = i->get(); const Component *child = i->get();
c = (*i)->accept(v); c = (*i)->accept(v);
if (c != child) { if (c != child) {
// Child has been replaced (new Component pointer) or we've been // Child has been replaced (new Component pointer) or we've been
@ -109,20 +109,20 @@ void ComponentAlternation::append(unique_ptr<Component> component) {
vector<PositionInfo> ComponentAlternation::first() const { vector<PositionInfo> ComponentAlternation::first() const {
// firsts come from all our subcomponents in position order. This will // firsts come from all our subcomponents in position order. This will
// maintain left-to-right priority order. // maintain left-to-right priority order.
vector<PositionInfo> firsts, subfirsts; vector<PositionInfo> firsts;
for (const auto &c : children) { for (const auto &c : children) {
subfirsts = c->first(); vector<PositionInfo> subfirsts = c->first();
firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end()); firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end());
} }
return firsts; return firsts;
} }
vector<PositionInfo> ComponentAlternation::last() const { vector<PositionInfo> ComponentAlternation::last() const {
vector<PositionInfo> lasts, sublasts; vector<PositionInfo> lasts;
for (const auto &c : children) { for (const auto &c : children) {
sublasts = c->last(); vector<PositionInfo> sublasts = c->last();
lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
} }
return lasts; return lasts;

View File

@ -59,7 +59,7 @@ Component * ComponentAssertion::accept(ComponentVisitor &v) {
} }
for (auto i = children.begin(), e = children.end(); i != e; ++i) { for (auto i = children.begin(), e = children.end(); i != e; ++i) {
Component *child = i->get(); const Component *child = i->get();
c = (*i)->accept(v); c = (*i)->accept(v);
if (c != child) { if (c != child) {
// Child has been replaced (new Component pointer) or we've been // Child has been replaced (new Component pointer) or we've been

View File

@ -51,7 +51,7 @@ Component *ComponentAtomicGroup::accept(ComponentVisitor &v) {
} }
for (auto i = children.begin(), e = children.end(); i != e; ++i) { for (auto i = children.begin(), e = children.end(); i != e; ++i) {
Component *child = i->get(); const Component *child = i->get();
c = (*i)->accept(v); c = (*i)->accept(v);
if (c != child) { if (c != child) {
// Child has been replaced (new Component pointer) or we've been // Child has been replaced (new Component pointer) or we've been

View File

@ -161,26 +161,26 @@ void ComponentBoundary::buildFollowSet(GlushkovBuildState &,
bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const { bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const {
if (at_start) { if (at_start) {
return at_start; return true;
} }
if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) { if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) {
throw ParseError("Embedded start anchors not supported."); throw ParseError("Embedded start anchors not supported.");
} }
return at_start; return false;
} }
bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const { bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const {
if (at_end) { if (at_end) {
return at_end; return true;
} }
if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) { if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) {
throw ParseError("Embedded end anchors not supported."); throw ParseError("Embedded end anchors not supported.");
} }
return at_end; return false;
} }
} // namespace } // namespace

View File

@ -79,7 +79,7 @@ Component *ComponentCondReference::accept(ComponentVisitor &v) {
} }
if (kind == CONDITION_ASSERTION) { if (kind == CONDITION_ASSERTION) {
Component *a = assertion.get(); const Component *a = assertion.get();
c = assertion->accept(v); c = assertion->accept(v);
if (c != a) { if (c != a) {
assertion.reset(c); assertion.reset(c);
@ -87,7 +87,7 @@ Component *ComponentCondReference::accept(ComponentVisitor &v) {
} }
for (auto i = children.begin(), e = children.end(); i != e; ++i) { for (auto i = children.begin(), e = children.end(); i != e; ++i) {
Component *child = i->get(); const Component *child = i->get();
c = (*i)->accept(v); c = (*i)->accept(v);
if (c != child) { if (c != child) {
// Child has been replaced (new Component pointer) or we've been // Child has been replaced (new Component pointer) or we've been

Some files were not shown because too many files have changed in this diff Show More