mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch 'develop' into wip-isildur-g-cppcheck-47-48-58
This commit is contained in:
commit
727cff3621
@ -1221,11 +1221,17 @@ if (NOT BUILD_STATIC_LIBS)
|
||||
endif ()
|
||||
|
||||
add_subdirectory(util)
|
||||
add_subdirectory(unit)
|
||||
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
|
||||
option(BUILD_UNIT "Build Hyperscan unit tests (default TRUE)" TRUE)
|
||||
if(BUILD_UNIT)
|
||||
add_subdirectory(unit)
|
||||
endif()
|
||||
|
||||
option(BUILD_TOOLS "Build Hyperscan tools (default TRUE)" TRUE)
|
||||
if(EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt AND BUILD_TOOLS)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
|
||||
add_subdirectory(chimera)
|
||||
endif()
|
||||
@ -1240,4 +1246,7 @@ if(BUILD_BENCHMARKS)
|
||||
add_subdirectory(benchmarks)
|
||||
endif()
|
||||
|
||||
add_subdirectory(doc/dev-reference)
|
||||
option(BUILD_DOC "Build the Hyperscan documentation (default TRUE)" TRUE)
|
||||
if(BUILD_DOC)
|
||||
add_subdirectory(doc/dev-reference)
|
||||
endif()
|
||||
|
@ -146,6 +146,7 @@ export CXX="/usr/pkg/gcc12/bin/g++"
|
||||
```
|
||||
|
||||
In FreeBSD similarly, you might want to install a different compiler.
|
||||
If you want to use gcc, it is recommended to use gcc12.
|
||||
You will also, as in NetBSD, need to install cmake, sqlite, boost and ragel packages.
|
||||
Using the example of gcc12 from pkg:
|
||||
installing the desired compiler:
|
||||
@ -164,7 +165,6 @@ the environment variables to point to this compiler:
|
||||
export CC="/usr/local/bin/gcc"
|
||||
export CXX="/usr/local/bin/g++"
|
||||
```
|
||||
|
||||
A further note in FreeBSD, on the PowerPC and ARM platforms,
|
||||
the gcc12 package installs to a slightly different name, on FreeBSD/ppc,
|
||||
gcc12 will be found using:
|
||||
@ -175,12 +175,6 @@ export CXX="/usr/local/bin/g++12"
|
||||
|
||||
Then continue with the build as below.
|
||||
|
||||
A note about running in FreeBSD: if you built a dynamically linked binary
|
||||
with an alternative compiler, the libraries specific to the compiler that
|
||||
built the binary will probably not be found and the base distro libraries
|
||||
in /lib will be found instead. Adjust LD_LIBRARY_PATH appropriately. For
|
||||
example, with gcc12 installed from pkg, one would want to use
|
||||
```export LD_LIBRARY_PATH=/usr/local/lib/gcc12/```
|
||||
|
||||
## Configure & build
|
||||
|
||||
|
@ -26,32 +26,30 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include "benchmarks.hpp"
|
||||
|
||||
#define MAX_LOOPS 1000000000
|
||||
#define MAX_MATCHES 5
|
||||
#define N 8
|
||||
#define MAX_LOOPS 1000000000
|
||||
#define MAX_MATCHES 5
|
||||
#define N 8
|
||||
|
||||
struct hlmMatchEntry {
|
||||
size_t to;
|
||||
u32 id;
|
||||
hlmMatchEntry(size_t end, u32 identifier) :
|
||||
to(end), id(identifier) {}
|
||||
hlmMatchEntry(size_t end, u32 identifier) : to(end), id(identifier) {}
|
||||
};
|
||||
|
||||
std::vector<hlmMatchEntry> ctxt;
|
||||
|
||||
static
|
||||
hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
|
||||
UNUSED struct hs_scratch *scratch) {
|
||||
static hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
|
||||
UNUSED struct hs_scratch *scratch) {
|
||||
DEBUG_PRINTF("match @%zu = %u\n", to, id);
|
||||
|
||||
ctxt.push_back(hlmMatchEntry(to, id));
|
||||
@ -59,40 +57,42 @@ hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
template<typename InitFunc, typename BenchFunc>
|
||||
static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) {
|
||||
template <typename InitFunc, typename BenchFunc>
|
||||
static void run_benchmarks(int size, int loops, int max_matches,
|
||||
bool is_reverse, MicroBenchmark &bench,
|
||||
InitFunc &&init, BenchFunc &&func) {
|
||||
init(bench);
|
||||
double total_sec = 0.0;
|
||||
u64a total_size = 0;
|
||||
double bw = 0.0;
|
||||
double avg_bw = 0.0;
|
||||
double total_sec = 0.0;
|
||||
double max_bw = 0.0;
|
||||
double avg_time = 0.0;
|
||||
if (max_matches) {
|
||||
double avg_bw = 0.0;
|
||||
int pos = 0;
|
||||
for(int j = 0; j < max_matches - 1; j++) {
|
||||
for (int j = 0; j < max_matches - 1; j++) {
|
||||
bench.buf[pos] = 'b';
|
||||
pos = (j+1) *size / max_matches ;
|
||||
pos = (j + 1) * size / max_matches;
|
||||
bench.buf[pos] = 'a';
|
||||
u64a actual_size = 0;
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
for(int i = 0; i < loops; i++) {
|
||||
for (int i = 0; i < loops; i++) {
|
||||
const u8 *res = func(bench);
|
||||
if (is_reverse)
|
||||
actual_size += bench.buf.data() + size - res;
|
||||
else
|
||||
actual_size += res - bench.buf.data();
|
||||
if (is_reverse)
|
||||
actual_size += bench.buf.data() + size - res;
|
||||
else
|
||||
actual_size += res - bench.buf.data();
|
||||
}
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
double dt = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
||||
double dt = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
end - start)
|
||||
.count();
|
||||
total_sec += dt;
|
||||
/*convert microseconds to seconds*/
|
||||
/*calculate bandwidth*/
|
||||
bw = (actual_size / dt) * 1000000.0 / 1048576.0;
|
||||
/*std::cout << "act_size = " << act_size << std::endl;
|
||||
std::cout << "dt = " << dt << std::endl;
|
||||
std::cout << "bw = " << bw << std::endl;*/
|
||||
avg_bw += bw;
|
||||
double bw = (actual_size / dt) * 1000000.0 / 1048576.0;
|
||||
/*std::cout << "act_size = " << act_size << std::endl;
|
||||
std::cout << "dt = " << dt << std::endl;
|
||||
std::cout << "bw = " << bw << std::endl;*/
|
||||
avg_bw += bw;
|
||||
/*convert to MB/s*/
|
||||
max_bw = std::max(bw, max_bw);
|
||||
/*calculate average time*/
|
||||
@ -100,20 +100,22 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
|
||||
}
|
||||
avg_time /= max_matches;
|
||||
avg_bw /= max_matches;
|
||||
total_sec /= 1000000.0;
|
||||
total_sec /= 1000000.0;
|
||||
/*convert average time to us*/
|
||||
printf(KMAG "%s: %u matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, "
|
||||
KBLU "average time per call =" RST " %.3f μs," KBLU " max bandwidth = " RST " %.3f MB/s," KBLU " average bandwidth =" RST " %.3f MB/s \n",
|
||||
printf("%-18s, %-12d, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7.3f\n",
|
||||
bench.label, max_matches, size ,loops, total_sec, avg_time, max_bw, avg_bw);
|
||||
} else {
|
||||
u64a total_size = 0;
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
for (int i = 0; i < loops; i++) {
|
||||
const u8 *res = func(bench);
|
||||
func(bench);
|
||||
}
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
total_sec += std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
||||
total_sec +=
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||
.count();
|
||||
/*calculate transferred size*/
|
||||
total_size = size * loops;
|
||||
total_size = (u64a)size * (u64a)loops;
|
||||
/*calculate average time*/
|
||||
avg_time = total_sec / loops;
|
||||
/*convert microseconds to seconds*/
|
||||
@ -122,117 +124,126 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
|
||||
max_bw = total_size / total_sec;
|
||||
/*convert to MB/s*/
|
||||
max_bw /= 1048576.0;
|
||||
printf(KMAG "%s: no matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, "
|
||||
KBLU "average time per call =" RST " %.3f μs ," KBLU " bandwidth = " RST " %.3f MB/s \n",
|
||||
bench.label, size ,loops, total_sec, avg_time, max_bw );
|
||||
printf("%-18s, %-12s, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7s\n",
|
||||
bench.label, "0", size, loops, total_sec, avg_time, max_bw, "0");
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
const int matches[] = {0, MAX_MATCHES};
|
||||
std::vector<size_t> sizes;
|
||||
for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2);
|
||||
const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa";
|
||||
|
||||
for (size_t i = 0; i < N; i++)
|
||||
sizes.push_back(16000 << i * 2);
|
||||
const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa";
|
||||
printf("%-18s, %-12s, %-10s, %-6s, %-10s, %-9s, %-8s, %-7s\n", "Matcher",
|
||||
"max_matches", "size", "loops", "total_sec", "avg_time", "max_bw",
|
||||
"avg_bw");
|
||||
for (int m = 0; m < 2; m++) {
|
||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||
MicroBenchmark bench("Shufti", sizes[i]);
|
||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
run_benchmarks(
|
||||
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
[&](MicroBenchmark &b) {
|
||||
b.chars.set('a');
|
||||
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||
memset(b.buf.data(), 'b', b.size);
|
||||
},
|
||||
[&](MicroBenchmark &b) {
|
||||
return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
||||
}
|
||||
);
|
||||
return shuftiExec(b.lo, b.hi, b.buf.data(),
|
||||
b.buf.data() + b.size);
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||
MicroBenchmark bench("Reverse Shufti", sizes[i]);
|
||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||
run_benchmarks(
|
||||
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||
[&](MicroBenchmark &b) {
|
||||
b.chars.set('a');
|
||||
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||
memset(b.buf.data(), 'b', b.size);
|
||||
},
|
||||
[&](MicroBenchmark &b) {
|
||||
return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
||||
}
|
||||
);
|
||||
return rshuftiExec(b.lo, b.hi, b.buf.data(),
|
||||
b.buf.data() + b.size);
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||
MicroBenchmark bench("Truffle", sizes[i]);
|
||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
run_benchmarks(
|
||||
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
[&](MicroBenchmark &b) {
|
||||
b.chars.set('a');
|
||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||
memset(b.buf.data(), 'b', b.size);
|
||||
},
|
||||
[&](MicroBenchmark &b) {
|
||||
return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
||||
}
|
||||
);
|
||||
return truffleExec(b.lo, b.hi, b.buf.data(),
|
||||
b.buf.data() + b.size);
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||
MicroBenchmark bench("Reverse Truffle", sizes[i]);
|
||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||
run_benchmarks(
|
||||
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||
[&](MicroBenchmark &b) {
|
||||
b.chars.set('a');
|
||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||
memset(b.buf.data(), 'b', b.size);
|
||||
},
|
||||
[&](MicroBenchmark &b) {
|
||||
return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
||||
}
|
||||
);
|
||||
return rtruffleExec(b.lo, b.hi, b.buf.data(),
|
||||
b.buf.data() + b.size);
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||
MicroBenchmark bench("Vermicelli", sizes[i]);
|
||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
run_benchmarks(
|
||||
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
[&](MicroBenchmark &b) {
|
||||
b.chars.set('a');
|
||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||
memset(b.buf.data(), 'b', b.size);
|
||||
},
|
||||
[&](MicroBenchmark &b) {
|
||||
return vermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
|
||||
}
|
||||
);
|
||||
return vermicelliExec('a', 'b', b.buf.data(),
|
||||
b.buf.data() + b.size);
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||
MicroBenchmark bench("Reverse Vermicelli", sizes[i]);
|
||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||
run_benchmarks(
|
||||
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||
[&](MicroBenchmark &b) {
|
||||
b.chars.set('a');
|
||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||
memset(b.buf.data(), 'b', b.size);
|
||||
},
|
||||
[&](MicroBenchmark &b) {
|
||||
return rvermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
|
||||
}
|
||||
);
|
||||
return rvermicelliExec('a', 'b', b.buf.data(),
|
||||
b.buf.data() + b.size);
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||
//we imitate the noodle unit tests
|
||||
// we imitate the noodle unit tests
|
||||
std::string str;
|
||||
const size_t char_len = 5;
|
||||
str.resize(char_len + 1);
|
||||
for (size_t j=0; j < char_len; j++) {
|
||||
srand (time(NULL));
|
||||
int key = rand() % + 36 ;
|
||||
for (size_t j = 0; j < char_len; j++) {
|
||||
srand(time(NULL));
|
||||
int key = rand() % +36;
|
||||
str[char_len] = charset[key];
|
||||
str[char_len + 1] = '\0';
|
||||
}
|
||||
|
||||
MicroBenchmark bench("Noodle", sizes[i]);
|
||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
run_benchmarks(
|
||||
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||
[&](MicroBenchmark &b) {
|
||||
ctxt.clear();
|
||||
memset(b.buf.data(), 'a', b.size);
|
||||
@ -242,10 +253,10 @@ int main(){
|
||||
assert(b.nt != nullptr);
|
||||
},
|
||||
[&](MicroBenchmark &b) {
|
||||
noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch);
|
||||
noodExec(b.nt.get(), b.buf.data(), b.size, 0,
|
||||
hlmSimpleCallback, &b.scratch);
|
||||
return b.buf.data() + b.size;
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -26,44 +26,32 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
#include "hwlm/noodle_build.h"
|
||||
#include "hwlm/noodle_engine.h"
|
||||
#include "hwlm/noodle_internal.h"
|
||||
#include "nfa/shufti.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "nfa/truffle.h"
|
||||
#include "nfa/trufflecompile.h"
|
||||
#include "nfa/vermicelli.hpp"
|
||||
#include "hwlm/noodle_build.h"
|
||||
#include "hwlm/noodle_engine.h"
|
||||
#include "hwlm/noodle_internal.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "scratch.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
|
||||
/*define colour control characters*/
|
||||
#define RST "\x1B[0m"
|
||||
#define KRED "\x1B[31m"
|
||||
#define KGRN "\x1B[32m"
|
||||
#define KYEL "\x1B[33m"
|
||||
#define KBLU "\x1B[34m"
|
||||
#define KMAG "\x1B[35m"
|
||||
#define KCYN "\x1B[36m"
|
||||
#define KWHT "\x1B[37m"
|
||||
|
||||
class MicroBenchmark
|
||||
{
|
||||
class MicroBenchmark {
|
||||
public:
|
||||
char const *label;
|
||||
size_t size;
|
||||
char const *label;
|
||||
size_t size;
|
||||
|
||||
// Shufti/Truffle
|
||||
m128 lo, hi;
|
||||
ue2::CharReach chars;
|
||||
std::vector<u8> buf;
|
||||
// Shufti/Truffle
|
||||
m128 lo, hi;
|
||||
ue2::CharReach chars;
|
||||
std::vector<u8> buf;
|
||||
|
||||
// Noodle
|
||||
struct hs_scratch scratch;
|
||||
ue2::bytecode_ptr<noodTable> nt;
|
||||
// Noodle
|
||||
struct hs_scratch scratch;
|
||||
ue2::bytecode_ptr<noodTable> nt;
|
||||
|
||||
MicroBenchmark(char const *label_, size_t size_)
|
||||
:label(label_), size(size_), buf(size_) {
|
||||
};
|
||||
MicroBenchmark(char const *label_, size_t size_)
|
||||
: label(label_), size(size_), buf(size_){};
|
||||
};
|
||||
|
@ -6,10 +6,10 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
set(FREEBSD true)
|
||||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||
#FIXME: find a nicer and more general way of doing this
|
||||
if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12")
|
||||
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12")
|
||||
elseif(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13")
|
||||
if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13")
|
||||
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc13")
|
||||
elseif(ARCH_AARCH64 AND (CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12"))
|
||||
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12")
|
||||
endif()
|
||||
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
|
||||
|
@ -19,6 +19,7 @@ else()
|
||||
set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
|
||||
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
|
||||
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
|
||||
set(SPHINX_MAN_DIR "${CMAKE_CURRENT_BINARY_DIR}/man")
|
||||
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY)
|
||||
@ -32,4 +33,14 @@ add_custom_target(dev-reference
|
||||
"${SPHINX_HTML_DIR}"
|
||||
DEPENDS dev-reference-doxygen
|
||||
COMMENT "Building HTML dev reference with Sphinx")
|
||||
|
||||
add_custom_target(dev-reference-man
|
||||
${SPHINX_BUILD}
|
||||
-b man
|
||||
-c "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
-d "${SPHINX_CACHE_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${SPHINX_MAN_DIR}"
|
||||
DEPENDS dev-reference-doxygen
|
||||
COMMENT "Building man page reference with Sphinx")
|
||||
endif()
|
||||
|
@ -11,10 +11,10 @@ Introduction
|
||||
************
|
||||
|
||||
Chimera is a software regular expression matching engine that is a hybrid of
|
||||
Hyperscan and PCRE. The design goals of Chimera are to fully support PCRE
|
||||
syntax as well as to take advantage of the high performance nature of Hyperscan.
|
||||
Vectorscan and PCRE. The design goals of Chimera are to fully support PCRE
|
||||
syntax as well as to take advantage of the high performance nature of Vectorscan.
|
||||
|
||||
Chimera inherits the design guideline of Hyperscan with C APIs for compilation
|
||||
Chimera inherits the design guideline of Vectorscan with C APIs for compilation
|
||||
and scanning.
|
||||
|
||||
The Chimera API itself is composed of two major components:
|
||||
@ -65,13 +65,13 @@ For a given database, Chimera provides several guarantees:
|
||||
.. note:: Chimera is designed to have the same matching behavior as PCRE,
|
||||
including greedy/ungreedy, capturing, etc. Chimera reports both
|
||||
**start offset** and **end offset** for each match like PCRE. Different
|
||||
from the fashion of reporting all matches in Hyperscan, Chimera only reports
|
||||
from the fashion of reporting all matches in Vectorscan, Chimera only reports
|
||||
non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will
|
||||
match ``foofoofoofoo`` at offsets (0, 6) and (6, 12).
|
||||
|
||||
.. note:: Since Chimera is a hybrid of Hyperscan and PCRE in order to support
|
||||
.. note:: Since Chimera is a hybrid of Vectorscan and PCRE in order to support
|
||||
full PCRE syntax, there will be extra performance overhead compared to
|
||||
Hyperscan-only solution. Please always use Hyperscan for better performance
|
||||
Vectorscan-only solution. Please always use Vectorscan for better performance
|
||||
unless you must need full PCRE syntax support.
|
||||
|
||||
See :ref:`chruntime` for more details
|
||||
@ -83,12 +83,12 @@ Requirements
|
||||
The PCRE library (http://pcre.org/) version 8.41 is required for Chimera.
|
||||
|
||||
.. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source
|
||||
directory under Hyperscan root directory in order to build Chimera.
|
||||
directory under Vectorscan root directory in order to build Chimera.
|
||||
|
||||
Beside this, both hardware and software requirements of Chimera are the same to Hyperscan.
|
||||
Beside this, both hardware and software requirements of Chimera are the same to Vectorscan.
|
||||
See :ref:`hardware` and :ref:`software` for more details.
|
||||
|
||||
.. note:: Building Hyperscan will automatically generate Chimera library.
|
||||
.. note:: Building Vectorscan will automatically generate Chimera library.
|
||||
Currently only static library is supported for Chimera, so please
|
||||
use static build type when configure CMake build options.
|
||||
|
||||
@ -119,7 +119,7 @@ databases:
|
||||
|
||||
Compilation allows the Chimera library to analyze the given pattern(s) and
|
||||
pre-determine how to scan for these patterns in an optimized fashion using
|
||||
Hyperscan and PCRE.
|
||||
Vectorscan and PCRE.
|
||||
|
||||
===============
|
||||
Pattern Support
|
||||
@ -134,7 +134,7 @@ Semantics
|
||||
=========
|
||||
|
||||
Chimera supports the exact same semantics of PCRE library. Moreover, it supports
|
||||
multiple simultaneous pattern matching like Hyperscan and the multiple matches
|
||||
multiple simultaneous pattern matching like Vectorscan and the multiple matches
|
||||
will be reported in order by end offset.
|
||||
|
||||
.. _chruntime:
|
||||
|
@ -9,7 +9,7 @@ Compiling Patterns
|
||||
Building a Database
|
||||
*******************
|
||||
|
||||
The Hyperscan compiler API accepts regular expressions and converts them into a
|
||||
The Vectorscan compiler API accepts regular expressions and converts them into a
|
||||
compiled pattern database that can then be used to scan data.
|
||||
|
||||
The API provides three functions that compile regular expressions into
|
||||
@ -24,7 +24,7 @@ databases:
|
||||
#. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above,
|
||||
but allows :ref:`extparam` to be specified for each expression.
|
||||
|
||||
Compilation allows the Hyperscan library to analyze the given pattern(s) and
|
||||
Compilation allows the Vectorscan library to analyze the given pattern(s) and
|
||||
pre-determine how to scan for these patterns in an optimized fashion that would
|
||||
be far too expensive to compute at run-time.
|
||||
|
||||
@ -48,10 +48,10 @@ To compile patterns to be used in streaming mode, the ``mode`` parameter of
|
||||
block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode
|
||||
requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled
|
||||
for one mode (streaming, block or vectored) can only be used in that mode. The
|
||||
version of Hyperscan used to produce a compiled pattern database must match the
|
||||
version of Hyperscan used to scan with it.
|
||||
version of Vectorscan used to produce a compiled pattern database must match the
|
||||
version of Vectorscan used to scan with it.
|
||||
|
||||
Hyperscan provides support for targeting a database at a particular CPU
|
||||
Vectorscan provides support for targeting a database at a particular CPU
|
||||
platform; see :ref:`instr_specialization` for details.
|
||||
|
||||
=====================
|
||||
@ -75,14 +75,14 @@ characters exist in regular grammar like ``[``, ``]``, ``(``, ``)``, ``{``,
|
||||
While in pure literal case, all these meta characters lost extra meanings
|
||||
expect for that they are just common ASCII codes.
|
||||
|
||||
Hyperscan is initially designed to process common regular expressions. It is
|
||||
Vectorscan is initially designed to process common regular expressions. It is
|
||||
hence embedded with a complex parser to do comprehensive regular grammar
|
||||
interpretation. Particularly, the identification of above meta characters is the
|
||||
basic step for the interpretation of far more complex regular grammars.
|
||||
|
||||
However in real cases, patterns may not always be regular expressions. They
|
||||
could just be pure literals. Problem will come if the pure literals contain
|
||||
regular meta characters. Supposing fed directly into traditional Hyperscan
|
||||
regular meta characters. Supposing fed directly into traditional Vectorscan
|
||||
compile API, all these meta characters will be interpreted in predefined ways,
|
||||
which is unnecessary and the result is totally out of expectation. To avoid
|
||||
such misunderstanding by traditional API, users have to preprocess these
|
||||
@ -90,7 +90,7 @@ literal patterns by converting the meta characters into some other formats:
|
||||
either by adding a backslash ``\`` before certain meta characters, or by
|
||||
converting all the characters into a hexadecimal representation.
|
||||
|
||||
In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns:
|
||||
In ``v5.2.0``, Vectorscan introduces 2 new compile APIs for pure literal patterns:
|
||||
|
||||
#. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern
|
||||
database.
|
||||
@ -106,7 +106,7 @@ content directly into these APIs without worrying about writing regular meta
|
||||
characters in their patterns. No preprocessing work is needed any more.
|
||||
|
||||
For new APIs, the ``length`` of each literal pattern is a newly added parameter.
|
||||
Hyperscan needs to locate the end position of the input expression via clearly
|
||||
Vectorscan needs to locate the end position of the input expression via clearly
|
||||
knowing each literal's length, not by simply identifying character ``\0`` of a
|
||||
string.
|
||||
|
||||
@ -127,19 +127,19 @@ Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
|
||||
Pattern Support
|
||||
***************
|
||||
|
||||
Hyperscan supports the pattern syntax used by the PCRE library ("libpcre"),
|
||||
Vectorscan supports the pattern syntax used by the PCRE library ("libpcre"),
|
||||
described at <http://www.pcre.org/>. However, not all constructs available in
|
||||
libpcre are supported. The use of unsupported constructs will result in
|
||||
compilation errors.
|
||||
|
||||
The version of PCRE used to validate Hyperscan's interpretation of this syntax
|
||||
The version of PCRE used to validate Vectorscan's interpretation of this syntax
|
||||
is 8.41 or above.
|
||||
|
||||
====================
|
||||
Supported Constructs
|
||||
====================
|
||||
|
||||
The following regex constructs are supported by Hyperscan:
|
||||
The following regex constructs are supported by Vectorscan:
|
||||
|
||||
* Literal characters and strings, with all libpcre quoting and character
|
||||
escapes.
|
||||
@ -177,7 +177,7 @@ The following regex constructs are supported by Hyperscan:
|
||||
:c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern.
|
||||
|
||||
* Lazy modifiers (:regexp:`?` appended to another quantifier, e.g.
|
||||
:regexp:`\\w+?`) are supported but ignored (as Hyperscan reports all
|
||||
:regexp:`\\w+?`) are supported but ignored (as Vectorscan reports all
|
||||
matches).
|
||||
|
||||
* Parenthesization, including the named and unnamed capturing and
|
||||
@ -219,15 +219,15 @@ The following regex constructs are supported by Hyperscan:
|
||||
.. note:: At this time, not all patterns can be successfully compiled with the
|
||||
:c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for
|
||||
:ref:`som`. The patterns that support this flag are a subset of patterns that
|
||||
can be successfully compiled with Hyperscan; notably, many bounded repeat
|
||||
forms that can be compiled with Hyperscan without the Start of Match flag
|
||||
can be successfully compiled with Vectorscan; notably, many bounded repeat
|
||||
forms that can be compiled with Vectorscan without the Start of Match flag
|
||||
enabled cannot be compiled with the flag enabled.
|
||||
|
||||
======================
|
||||
Unsupported Constructs
|
||||
======================
|
||||
|
||||
The following regex constructs are not supported by Hyperscan:
|
||||
The following regex constructs are not supported by Vectorscan:
|
||||
|
||||
* Backreferences and capturing sub-expressions.
|
||||
* Arbitrary zero-width assertions.
|
||||
@ -246,32 +246,32 @@ The following regex constructs are not supported by Hyperscan:
|
||||
Semantics
|
||||
*********
|
||||
|
||||
While Hyperscan follows libpcre syntax, it provides different semantics. The
|
||||
While Vectorscan follows libpcre syntax, it provides different semantics. The
|
||||
major departures from libpcre semantics are motivated by the requirements of
|
||||
streaming and multiple simultaneous pattern matching.
|
||||
|
||||
The major departures from libpcre semantics are:
|
||||
|
||||
#. **Multiple pattern matching**: Hyperscan allows matches to be reported for
|
||||
#. **Multiple pattern matching**: Vectorscan allows matches to be reported for
|
||||
several patterns simultaneously. This is not equivalent to separating the
|
||||
patterns by :regexp:`|` in libpcre, which evaluates alternations
|
||||
left-to-right.
|
||||
|
||||
#. **Lack of ordering**: the multiple matches that Hyperscan produces are not
|
||||
#. **Lack of ordering**: the multiple matches that Vectorscan produces are not
|
||||
guaranteed to be ordered, although they will always fall within the bounds of
|
||||
the current scan.
|
||||
|
||||
#. **End offsets only**: Hyperscan's default behaviour is only to report the end
|
||||
#. **End offsets only**: Vectorscan's default behaviour is only to report the end
|
||||
offset of a match. Reporting of the start offset can be enabled with
|
||||
per-expression flags at pattern compile time. See :ref:`som` for details.
|
||||
|
||||
#. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against
|
||||
``fooxyzbarbar`` will return two matches from Hyperscan -- at the points
|
||||
``fooxyzbarbar`` will return two matches from Vectorscan -- at the points
|
||||
corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast,
|
||||
libpcre semantics by default would report only one match at ``fooxyzbarbar``
|
||||
(greedy semantics) or, if non-greedy semantics were switched on, one match at
|
||||
``fooxyzbar``. This means that switching between greedy and non-greedy
|
||||
semantics is a no-op in Hyperscan.
|
||||
semantics is a no-op in Vectorscan.
|
||||
|
||||
To support libpcre quantifier semantics while accurately reporting streaming
|
||||
matches at the time they occur is impossible. For example, consider the pattern
|
||||
@ -299,7 +299,7 @@ as in block 3 -- which would constitute a better match for the pattern.
|
||||
Start of Match
|
||||
==============
|
||||
|
||||
In standard operation, Hyperscan will only provide the end offset of a match
|
||||
In standard operation, Vectorscan will only provide the end offset of a match
|
||||
when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag
|
||||
is specified for a particular pattern, then the same set of matches is
|
||||
returned, but each match will also provide the leftmost possible start offset
|
||||
@ -308,7 +308,7 @@ corresponding to its end offset.
|
||||
Using the SOM flag entails a number of trade-offs and limitations:
|
||||
|
||||
* Reduced pattern support: For many patterns, tracking SOM is complex and can
|
||||
result in Hyperscan failing to compile a pattern with a "Pattern too
|
||||
result in Vectorscan failing to compile a pattern with a "Pattern too
|
||||
large" error, even if the pattern is supported in normal operation.
|
||||
* Increased stream state: At scan time, state space is required to track
|
||||
potential SOM offsets, and this must be stored in persistent stream state in
|
||||
@ -316,20 +316,20 @@ Using the SOM flag entails a number of trade-offs and limitations:
|
||||
required to match a pattern.
|
||||
* Performance overhead: Similarly, there is generally a performance cost
|
||||
associated with tracking SOM.
|
||||
* Incompatible features: Some other Hyperscan pattern flags (such as
|
||||
* Incompatible features: Some other Vectorscan pattern flags (such as
|
||||
:c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be
|
||||
used in combination with SOM. Specifying them together with
|
||||
:c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error.
|
||||
|
||||
In streaming mode, the amount of precision delivered by SOM can be controlled
|
||||
with the SOM horizon flags. These instruct Hyperscan to deliver accurate SOM
|
||||
with the SOM horizon flags. These instruct Vectorscan to deliver accurate SOM
|
||||
information within a certain distance of the end offset, and return a special
|
||||
start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a
|
||||
small or medium SOM horizon will usually reduce the stream state required for a
|
||||
given database.
|
||||
|
||||
.. note:: In streaming mode, the start offset returned for a match may refer to
|
||||
a point in the stream *before* the current block being scanned. Hyperscan
|
||||
a point in the stream *before* the current block being scanned. Vectorscan
|
||||
provides no facility for accessing earlier blocks; if the calling application
|
||||
needs to inspect historical data, then it must store it itself.
|
||||
|
||||
@ -341,7 +341,7 @@ Extended Parameters
|
||||
|
||||
In some circumstances, more control over the matching behaviour of a pattern is
|
||||
required than can be specified easily using regular expression syntax. For
|
||||
these scenarios, Hyperscan provides the :c:func:`hs_compile_ext_multi` function
|
||||
these scenarios, Vectorscan provides the :c:func:`hs_compile_ext_multi` function
|
||||
that allows a set of "extended parameters" to be set on a per-pattern basis.
|
||||
|
||||
Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure,
|
||||
@ -383,18 +383,18 @@ section.
|
||||
Prefiltering Mode
|
||||
=================
|
||||
|
||||
Hyperscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can
|
||||
be used to implement a prefilter for a pattern than Hyperscan would not
|
||||
Vectorscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can
|
||||
be used to implement a prefilter for a pattern than Vectorscan would not
|
||||
ordinarily support.
|
||||
|
||||
This flag instructs Hyperscan to compile an "approximate" version of this
|
||||
pattern for use in a prefiltering application, even if Hyperscan does not
|
||||
This flag instructs Vectorscan to compile an "approximate" version of this
|
||||
pattern for use in a prefiltering application, even if Vectorscan does not
|
||||
support the pattern in normal operation.
|
||||
|
||||
The set of matches returned when this flag is used is guaranteed to be a
|
||||
superset of the matches specified by the non-prefiltering expression.
|
||||
|
||||
If the pattern contains pattern constructs not supported by Hyperscan (such as
|
||||
If the pattern contains pattern constructs not supported by Vectorscan (such as
|
||||
zero-width assertions, back-references or conditional references) these
|
||||
constructs will be replaced internally with broader constructs that may match
|
||||
more often.
|
||||
@ -404,7 +404,7 @@ back-reference :regexp:`\\1`. In prefiltering mode, this pattern might be
|
||||
approximated by having its back-reference replaced with its referent, forming
|
||||
:regexp:`/\\w+ again \\w+/`.
|
||||
|
||||
Furthermore, in prefiltering mode Hyperscan may simplify a pattern that would
|
||||
Furthermore, in prefiltering mode Vectorscan may simplify a pattern that would
|
||||
otherwise return a "Pattern too large" error at compile time, or for performance
|
||||
reasons (subject to the matching guarantee above).
|
||||
|
||||
@ -422,22 +422,22 @@ matches for the pattern.
|
||||
Instruction Set Specialization
|
||||
******************************
|
||||
|
||||
Hyperscan is able to make use of several modern instruction set features found
|
||||
Vectorscan is able to make use of several modern instruction set features found
|
||||
on x86 processors to provide improvements in scanning performance.
|
||||
|
||||
Some of these features are selected when the library is built; for example,
|
||||
Hyperscan will use the native ``POPCNT`` instruction on processors where it is
|
||||
Vectorscan will use the native ``POPCNT`` instruction on processors where it is
|
||||
available and the library has been optimized for the host architecture.
|
||||
|
||||
.. note:: By default, the Hyperscan runtime is built with the ``-march=native``
|
||||
.. note:: By default, the Vectorscan runtime is built with the ``-march=native``
|
||||
compiler flag and (where possible) will make use of all instructions known by
|
||||
the host's C compiler.
|
||||
|
||||
To use some instruction set features, however, Hyperscan must build a
|
||||
To use some instruction set features, however, Vectorscan must build a
|
||||
specialized database to support them. This means that the target platform must
|
||||
be specified at pattern compile time.
|
||||
|
||||
The Hyperscan compiler API functions all accept an optional
|
||||
The Vectorscan compiler API functions all accept an optional
|
||||
:c:type:`hs_platform_info_t` argument, which describes the target platform
|
||||
for the database to be built. If this argument is NULL, the database will be
|
||||
targeted at the current host platform.
|
||||
@ -467,7 +467,7 @@ See :ref:`api_constants` for the full list of CPU tuning and feature flags.
|
||||
Approximate matching
|
||||
********************
|
||||
|
||||
Hyperscan provides an experimental approximate matching mode, which will match
|
||||
Vectorscan provides an experimental approximate matching mode, which will match
|
||||
patterns within a given edit distance. The exact matching behavior is defined as
|
||||
follows:
|
||||
|
||||
@ -492,7 +492,7 @@ follows:
|
||||
|
||||
Here are a few examples of approximate matching:
|
||||
|
||||
* Pattern :regexp:`/foo/` can match ``foo`` when using regular Hyperscan
|
||||
* Pattern :regexp:`/foo/` can match ``foo`` when using regular Vectorscan
|
||||
matching behavior. With approximate matching within edit distance 2, the
|
||||
pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``,
|
||||
``f``, and anything else that lies within edit distance 2 of matching corpora
|
||||
@ -513,7 +513,7 @@ matching support. Here they are, in a nutshell:
|
||||
* Reduced pattern support:
|
||||
|
||||
* For many patterns, approximate matching is complex and can result in
|
||||
Hyperscan failing to compile a pattern with a "Pattern too large" error,
|
||||
Vectorscan failing to compile a pattern with a "Pattern too large" error,
|
||||
even if the pattern is supported in normal operation.
|
||||
* Additionally, some patterns cannot be approximately matched because they
|
||||
reduce to so-called "vacuous" patterns (patterns that match everything). For
|
||||
@ -548,7 +548,7 @@ Logical Combinations
|
||||
********************
|
||||
|
||||
For situations when a user requires behaviour that depends on the presence or
|
||||
absence of matches from groups of patterns, Hyperscan provides support for the
|
||||
absence of matches from groups of patterns, Vectorscan provides support for the
|
||||
logical combination of patterns in a given pattern set, with three operators:
|
||||
``NOT``, ``AND`` and ``OR``.
|
||||
|
||||
@ -561,7 +561,7 @@ offset is *true* if the expression it refers to is *false* at this offset.
|
||||
For example, ``NOT 101`` means that expression 101 has not yet matched at this
|
||||
offset.
|
||||
|
||||
A logical combination is passed to Hyperscan at compile time as an expression.
|
||||
A logical combination is passed to Vectorscan at compile time as an expression.
|
||||
This combination expression will raise matches at every offset where one of its
|
||||
sub-expressions matches and the logical value of the whole expression is *true*.
|
||||
|
||||
@ -603,7 +603,7 @@ In a logical combination expression:
|
||||
* Whitespace is ignored.
|
||||
|
||||
To use a logical combination expression, it must be passed to one of the
|
||||
Hyperscan compile functions (:c:func:`hs_compile_multi`,
|
||||
Vectorscan compile functions (:c:func:`hs_compile_multi`,
|
||||
:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag,
|
||||
which identifies the pattern as a logical combination expression. The patterns
|
||||
referred to in the logical combination expression must be compiled together in
|
||||
@ -613,7 +613,7 @@ When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores
|
||||
all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the
|
||||
:c:member:`HS_FLAG_QUIET` flag.
|
||||
|
||||
Hyperscan will accept logical combination expressions at compile time that
|
||||
Vectorscan will accept logical combination expressions at compile time that
|
||||
evaluate to *true* when no patterns have matched, and report the match for
|
||||
combination at end of data if no patterns have matched; for example: ::
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Hyperscan documentation build configuration file, created by
|
||||
# Vectorscan documentation build configuration file, created by
|
||||
# sphinx-quickstart on Tue Sep 29 15:59:19 2015.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its
|
||||
@ -43,8 +43,8 @@ source_suffix = '.rst'
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'Hyperscan'
|
||||
copyright = u'2015-2018, Intel Corporation'
|
||||
project = u'Vectorscan'
|
||||
copyright = u'2015-2020, Intel Corporation; 2020-2024, VectorCamp; and other contributors'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
@ -202,7 +202,7 @@ latex_elements = {
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
('index', 'Hyperscan.tex', u'Hyperscan Documentation',
|
||||
('index', 'Hyperscan.tex', u'Vectorscan Documentation',
|
||||
u'Intel Corporation', 'manual'),
|
||||
]
|
||||
|
||||
@ -232,8 +232,8 @@ latex_documents = [
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
('index', 'hyperscan', u'Hyperscan Documentation',
|
||||
[u'Intel Corporation'], 1)
|
||||
('index', 'vectorscan', u'Vectorscan Documentation',
|
||||
[u'Intel Corporation'], 7)
|
||||
]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
@ -246,8 +246,8 @@ man_pages = [
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
('index', 'Hyperscan', u'Hyperscan Documentation',
|
||||
u'Intel Corporation', 'Hyperscan', 'High-performance regular expression matcher.',
|
||||
('index', 'Vectorscan', u'Vectorscan Documentation',
|
||||
u'Intel Corporation; VectorCamp', 'Vectorscan', 'High-performance regular expression matcher.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
|
@ -7,43 +7,41 @@ Getting Started
|
||||
Very Quick Start
|
||||
****************
|
||||
|
||||
#. Clone Hyperscan ::
|
||||
#. Clone Vectorscan ::
|
||||
|
||||
cd <where-you-want-hyperscan-source>
|
||||
git clone git://github.com/intel/hyperscan
|
||||
cd <where-you-want-vectorscan-source>
|
||||
git clone https://github.com/VectorCamp/vectorscan
|
||||
|
||||
#. Configure Hyperscan
|
||||
#. Configure Vectorscan
|
||||
|
||||
Ensure that you have the correct :ref:`dependencies <software>` present,
|
||||
and then:
|
||||
|
||||
::
|
||||
|
||||
cd <where-you-want-to-build-hyperscan>
|
||||
cd <where-you-want-to-build-vectorscan>
|
||||
mkdir <build-dir>
|
||||
cd <build-dir>
|
||||
cmake [-G <generator>] [options] <hyperscan-source-path>
|
||||
cmake [-G <generator>] [options] <vectorscan-source-path>
|
||||
|
||||
Known working generators:
|
||||
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
|
||||
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
|
||||
* ``Visual Studio 15 2017`` --- Visual Studio projects
|
||||
|
||||
Generators that might work include:
|
||||
Unsupported generators that might work include:
|
||||
* ``Xcode`` --- OS X Xcode projects.
|
||||
|
||||
#. Build Hyperscan
|
||||
#. Build Vectorscan
|
||||
|
||||
Depending on the generator used:
|
||||
* ``cmake --build .`` --- will build everything
|
||||
* ``make -j<jobs>`` --- use makefiles in parallel
|
||||
* ``ninja`` --- use Ninja build
|
||||
* ``MsBuild.exe`` --- use Visual Studio MsBuild
|
||||
* etc.
|
||||
|
||||
#. Check Hyperscan
|
||||
#. Check Vectorscan
|
||||
|
||||
Run the Hyperscan unit tests: ::
|
||||
Run the Vectorscan unit tests: ::
|
||||
|
||||
bin/unit-hyperscan
|
||||
|
||||
@ -55,20 +53,23 @@ Requirements
|
||||
Hardware
|
||||
========
|
||||
|
||||
Hyperscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and
|
||||
32-bit (IA-32 Architecture) modes.
|
||||
Vectorscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and
|
||||
32-bit (IA-32 Architecture) modes as well as Arm v8.0+ aarch64, and POWER 8+ ppc64le
|
||||
machines.
|
||||
|
||||
Hyperscan is a high performance software library that takes advantage of recent
|
||||
Intel architecture advances. At a minimum, support for Supplemental Streaming
|
||||
SIMD Extensions 3 (SSSE3) is required, which should be available on any modern
|
||||
x86 processor.
|
||||
architecture advances.
|
||||
|
||||
Additionally, Hyperscan can make use of:
|
||||
Additionally, Vectorscan can make use of:
|
||||
|
||||
* Intel Streaming SIMD Extensions 4.2 (SSE4.2)
|
||||
* the POPCNT instruction
|
||||
* Bit Manipulation Instructions (BMI, BMI2)
|
||||
* Intel Advanced Vector Extensions 2 (Intel AVX2)
|
||||
* Arm NEON
|
||||
* Arm SVE and SVE2
|
||||
* Arm SVE2 BITPERM
|
||||
* IBM Power8/Power9 VSX
|
||||
|
||||
if present.
|
||||
|
||||
@ -79,40 +80,34 @@ These can be determined at library compile time, see :ref:`target_arch`.
|
||||
Software
|
||||
========
|
||||
|
||||
As a software library, Hyperscan doesn't impose any particular runtime
|
||||
software requirements, however to build the Hyperscan library we require a
|
||||
modern C and C++ compiler -- in particular, Hyperscan requires C99 and C++11
|
||||
As a software library, Vectorscan doesn't impose any particular runtime
|
||||
software requirements, however to build the Vectorscan library we require a
|
||||
modern C and C++ compiler -- in particular, Vectorscan requires C99 and C++17
|
||||
compiler support. The supported compilers are:
|
||||
|
||||
* GCC, v4.8.1 or higher
|
||||
* Clang, v3.4 or higher (with libstdc++ or libc++)
|
||||
* Intel C++ Compiler v15 or higher
|
||||
* Visual C++ 2017 Build Tools
|
||||
* GCC, v9 or higher
|
||||
* Clang, v5 or higher (with libstdc++ or libc++)
|
||||
|
||||
Examples of operating systems that Hyperscan is known to work on include:
|
||||
Examples of operating systems that Vectorscan is known to work on include:
|
||||
|
||||
Linux:
|
||||
|
||||
* Ubuntu 14.04 LTS or newer
|
||||
* Ubuntu 20.04 LTS or newer
|
||||
* RedHat/CentOS 7 or newer
|
||||
* Fedora 38 or newer
|
||||
* Debian 10
|
||||
|
||||
FreeBSD:
|
||||
|
||||
* 10.0 or newer
|
||||
|
||||
Windows:
|
||||
|
||||
* 8 or newer
|
||||
|
||||
Mac OS X:
|
||||
|
||||
* 10.8 or newer, using XCode/Clang
|
||||
|
||||
Hyperscan *may* compile and run on other platforms, but there is no guarantee.
|
||||
We currently have experimental support for Windows using Intel C++ Compiler
|
||||
or Visual Studio 2017.
|
||||
Vectorscan *may* compile and run on other platforms, but there is no guarantee.
|
||||
|
||||
In addition, the following software is required for compiling the Hyperscan library:
|
||||
In addition, the following software is required for compiling the Vectorscan library:
|
||||
|
||||
======================================================= =========== ======================================
|
||||
Dependency Version Notes
|
||||
@ -132,20 +127,20 @@ Ragel, you may use Cygwin to build it from source.
|
||||
Boost Headers
|
||||
-------------
|
||||
|
||||
Compiling Hyperscan depends on a recent version of the Boost C++ header
|
||||
Compiling Vectorscan depends on a recent version of the Boost C++ header
|
||||
library. If the Boost libraries are installed on the build machine in the
|
||||
usual paths, CMake will find them. If the Boost libraries are not installed,
|
||||
the location of the Boost source tree can be specified during the CMake
|
||||
configuration step using the ``BOOST_ROOT`` variable (described below).
|
||||
|
||||
Another alternative is to put a copy of (or a symlink to) the boost
|
||||
subdirectory in ``<hyperscan-source-path>/include/boost``.
|
||||
subdirectory in ``<vectorscanscan-source-path>/include/boost``.
|
||||
|
||||
For example: for the Boost-1.59.0 release: ::
|
||||
|
||||
ln -s boost_1_59_0/boost <hyperscan-source-path>/include/boost
|
||||
ln -s boost_1_59_0/boost <vectorscan-source-path>/include/boost
|
||||
|
||||
As Hyperscan uses the header-only parts of Boost, it is not necessary to
|
||||
As Vectorscan uses the header-only parts of Boost, it is not necessary to
|
||||
compile the Boost libraries.
|
||||
|
||||
CMake Configuration
|
||||
@ -168,11 +163,12 @@ Common options for CMake include:
|
||||
| | Valid options are Debug, Release, RelWithDebInfo, |
|
||||
| | and MinSizeRel. Default is RelWithDebInfo. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_SHARED_LIBS | Build Hyperscan as a shared library instead of |
|
||||
| BUILD_SHARED_LIBS | Build Vectorscan as a shared library instead of |
|
||||
| | the default static library. |
|
||||
| | Default: Off |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. |
|
||||
| | Default off. |
|
||||
| BUILD_STATIC_LIBS | Build Vectorscan as a static library. |
|
||||
| | Default: On |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BOOST_ROOT | Location of Boost source tree. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
@ -180,12 +176,64 @@ Common options for CMake include:
|
||||
+------------------------+----------------------------------------------------+
|
||||
| FAT_RUNTIME | Build the :ref:`fat runtime<fat_runtime>`. Default |
|
||||
| | true on Linux, not available elsewhere. |
|
||||
| | Default: Off |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| USE_CPU_NATIVE | Native CPU detection is off by default, however it |
|
||||
| | is possible to build a performance-oriented non-fat|
|
||||
| | library tuned to your CPU. |
|
||||
| | Default: Off |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| SANITIZE | Use libasan sanitizer to detect possible bugs. |
|
||||
| | Valid options are address, memory and undefined. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| SIMDE_BACKEND | Enable SIMDe backend. If this is chosen all native |
|
||||
| | (SSE/AVX/AVX512/Neon/SVE/VSX) backends will be |
|
||||
| | disabled and a SIMDe SSE4.2 emulation backend will |
|
||||
| | be enabled. This will enable Vectorscan to build |
|
||||
| | and run on architectures without SIMD. |
|
||||
| | Default: Off |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| SIMDE_NATIVE | Enable SIMDe native emulation of x86 SSE4.2 |
|
||||
| | intrinsics on the building platform. That is, |
|
||||
| | SSE4.2 intrinsics will be emulated using Neon on |
|
||||
| | an Arm platform, or VSX on a Power platform, etc. |
|
||||
| | Default: Off |
|
||||
+------------------------+----------------------------------------------------+
|
||||
|
||||
X86 platform specific options include:
|
||||
|
||||
+------------------------+----------------------------------------------------+
|
||||
| Variable | Description |
|
||||
+========================+====================================================+
|
||||
| BUILD_AVX2 | Enable code for AVX2. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_AVX512 | Enable code for AVX512. Implies BUILD_AVX2. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_AVX512VBMI | Enable code for AVX512 with VBMI extension. Implies|
|
||||
| | BUILD_AVX512. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
|
||||
Arm platform specific options include:
|
||||
|
||||
+------------------------+----------------------------------------------------+
|
||||
| Variable | Description |
|
||||
+========================+====================================================+
|
||||
| BUILD_SVE | Enable code for SVE, like on AWS Graviton3 CPUs. |
|
||||
| | Not much code is ported just for SVE , but enabling|
|
||||
| | SVE code production, does improve code generation, |
|
||||
| | see Benchmarks. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_SVE2 | Enable code for SVE2, implies BUILD_SVE. Most |
|
||||
| | non-Neon code is written for SVE2. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_SVE2_BITPERM | Enable code for SVE2_BITPERM harwdare feature, |
|
||||
| | implies BUILD_SVE2. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
|
||||
For example, to generate a ``Debug`` build: ::
|
||||
|
||||
cd <build-dir>
|
||||
cmake -DCMAKE_BUILD_TYPE=Debug <hyperscan-source-path>
|
||||
cmake -DCMAKE_BUILD_TYPE=Debug <vectorscan-source-path>
|
||||
|
||||
|
||||
|
||||
@ -193,7 +241,7 @@ Build Type
|
||||
----------
|
||||
|
||||
CMake determines a number of features for a build based on the Build Type.
|
||||
Hyperscan defaults to ``RelWithDebInfo``, i.e. "release with debugging
|
||||
Vectorscan defaults to ``RelWithDebInfo``, i.e. "release with debugging
|
||||
information". This is a performance optimized build without runtime assertions
|
||||
but with debug symbols enabled.
|
||||
|
||||
@ -201,7 +249,7 @@ The other types of builds are:
|
||||
|
||||
* ``Release``: as above, but without debug symbols
|
||||
* ``MinSizeRel``: a stripped release build
|
||||
* ``Debug``: used when developing Hyperscan. Includes runtime assertions
|
||||
* ``Debug``: used when developing Vectorscan. Includes runtime assertions
|
||||
(which has a large impact on runtime performance), and will also enable
|
||||
some other build features like building internal unit
|
||||
tests.
|
||||
@ -211,7 +259,7 @@ The other types of builds are:
|
||||
Target Architecture
|
||||
-------------------
|
||||
|
||||
Unless using the :ref:`fat runtime<fat_runtime>`, by default Hyperscan will be
|
||||
Unless using the :ref:`fat runtime<fat_runtime>`, by default Vectorscan will be
|
||||
compiled to target the instruction set of the processor of the machine that
|
||||
being used for compilation. This is done via the use of ``-march=native``. The
|
||||
result of this means that a library built on one machine may not work on a
|
||||
@ -223,7 +271,7 @@ CMake, or ``CMAKE_C_FLAGS`` and ``CMAKE_CXX_FLAGS`` on the CMake command line. F
|
||||
example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: ::
|
||||
|
||||
cmake -DCMAKE_C_FLAGS="-march=corei7" \
|
||||
-DCMAKE_CXX_FLAGS="-march=corei7" <hyperscan-source-path>
|
||||
-DCMAKE_CXX_FLAGS="-march=corei7" <vectorscan-source-path>
|
||||
|
||||
For more information, refer to :ref:`instr_specialization`.
|
||||
|
||||
@ -232,17 +280,17 @@ For more information, refer to :ref:`instr_specialization`.
|
||||
Fat Runtime
|
||||
-----------
|
||||
|
||||
A feature introduced in Hyperscan v4.4 is the ability for the Hyperscan
|
||||
A feature introduced in Hyperscan v4.4 is the ability for the Vectorscan
|
||||
library to dispatch the most appropriate runtime code for the host processor.
|
||||
This feature is called the "fat runtime", as a single Hyperscan library
|
||||
This feature is called the "fat runtime", as a single Vectorscan library
|
||||
contains multiple copies of the runtime code for different instruction sets.
|
||||
|
||||
.. note::
|
||||
|
||||
The fat runtime feature is only available on Linux. Release builds of
|
||||
Hyperscan will default to having the fat runtime enabled where supported.
|
||||
Vectorscan will default to having the fat runtime enabled where supported.
|
||||
|
||||
When building the library with the fat runtime, the Hyperscan runtime code
|
||||
When building the library with the fat runtime, the Vectorscan runtime code
|
||||
will be compiled multiple times for these different instruction sets, and
|
||||
these compiled objects are combined into one library. There are no changes to
|
||||
how user applications are built against this library.
|
||||
@ -254,11 +302,11 @@ resolved so that the right version of each API function is used. There is no
|
||||
impact on function call performance, as this check and resolution is performed
|
||||
by the ELF loader once when the binary is loaded.
|
||||
|
||||
If the Hyperscan library is used on x86 systems without ``SSSE3``, the runtime
|
||||
If the Vectorscan library is used on x86 systems without ``SSSE4.2``, the runtime
|
||||
API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR`
|
||||
instead of potentially executing illegal instructions. The API function
|
||||
:c:func:`hs_valid_platform` can be used by application writers to determine if
|
||||
the current platform is supported by Hyperscan.
|
||||
the current platform is supported by Vectorscan.
|
||||
|
||||
As of this release, the variants of the runtime that are built, and the CPU
|
||||
capability that is required, are the following:
|
||||
@ -299,6 +347,11 @@ capability that is required, are the following:
|
||||
|
||||
cmake -DBUILD_AVX512VBMI=on <...>
|
||||
|
||||
Vectorscan add support for Arm processors and SVE, SV2 and SVE2_BITPERM.
|
||||
example: ::
|
||||
|
||||
cmake -DBUILD_SVE=ON -DBUILD_SVE2=ON -DBUILD_SVE2_BITPERM=ON <...>
|
||||
|
||||
As the fat runtime requires compiler, libc, and binutils support, at this time
|
||||
it will only be enabled for Linux builds where the compiler supports the
|
||||
`indirect function "ifunc" function attribute
|
||||
|
@ -1,5 +1,5 @@
|
||||
###############################################
|
||||
Hyperscan |version| Developer's Reference Guide
|
||||
Vectorscan |version| Developer's Reference Guide
|
||||
###############################################
|
||||
|
||||
-------
|
||||
|
@ -5,11 +5,11 @@
|
||||
Introduction
|
||||
############
|
||||
|
||||
Hyperscan is a software regular expression matching engine designed with
|
||||
Vectorscan is a software regular expression matching engine designed with
|
||||
high performance and flexibility in mind. It is implemented as a library that
|
||||
exposes a straightforward C API.
|
||||
|
||||
The Hyperscan API itself is composed of two major components:
|
||||
The Vectorscan API itself is composed of two major components:
|
||||
|
||||
***********
|
||||
Compilation
|
||||
@ -17,7 +17,7 @@ Compilation
|
||||
|
||||
These functions take a group of regular expressions, along with identifiers and
|
||||
option flags, and compile them into an immutable database that can be used by
|
||||
the Hyperscan scanning API. This compilation process performs considerable
|
||||
the Vectorscan scanning API. This compilation process performs considerable
|
||||
analysis and optimization work in order to build a database that will match the
|
||||
given expressions efficiently.
|
||||
|
||||
@ -36,8 +36,8 @@ See :ref:`compilation` for more detail.
|
||||
Scanning
|
||||
********
|
||||
|
||||
Once a Hyperscan database has been created, it can be used to scan data in
|
||||
memory. Hyperscan provides several scanning modes, depending on whether the
|
||||
Once a Vectorscan database has been created, it can be used to scan data in
|
||||
memory. Vectorscan provides several scanning modes, depending on whether the
|
||||
data to be scanned is available as a single contiguous block, whether it is
|
||||
distributed amongst several blocks in memory at the same time, or whether it is
|
||||
to be scanned as a sequence of blocks in a stream.
|
||||
@ -45,7 +45,7 @@ to be scanned as a sequence of blocks in a stream.
|
||||
Matches are delivered to the application via a user-supplied callback function
|
||||
that is called synchronously for each match.
|
||||
|
||||
For a given database, Hyperscan provides several guarantees:
|
||||
For a given database, Vectorscan provides several guarantees:
|
||||
|
||||
* No memory allocations occur at runtime with the exception of two
|
||||
fixed-size allocations, both of which should be done ahead of time for
|
||||
@ -56,7 +56,7 @@ For a given database, Hyperscan provides several guarantees:
|
||||
call.
|
||||
- **Stream state**: in streaming mode only, some state space is required to
|
||||
store data that persists between scan calls for each stream. This allows
|
||||
Hyperscan to track matches that span multiple blocks of data.
|
||||
Vectorscan to track matches that span multiple blocks of data.
|
||||
|
||||
* The sizes of the scratch space and stream state (in streaming mode) required
|
||||
for a given database are fixed and determined at database compile time. This
|
||||
@ -64,7 +64,7 @@ For a given database, Hyperscan provides several guarantees:
|
||||
time, and these structures can be pre-allocated if required for performance
|
||||
reasons.
|
||||
|
||||
* Any pattern that has successfully been compiled by the Hyperscan compiler can
|
||||
* Any pattern that has successfully been compiled by the Vectorscan compiler can
|
||||
be scanned against any input. There are no internal resource limits or other
|
||||
limitations at runtime that could cause a scan call to return an error.
|
||||
|
||||
@ -74,12 +74,12 @@ See :ref:`runtime` for more detail.
|
||||
Tools
|
||||
*****
|
||||
|
||||
Some utilities for testing and benchmarking Hyperscan are included with the
|
||||
Some utilities for testing and benchmarking Vectorscan are included with the
|
||||
library. See :ref:`tools` for more information.
|
||||
|
||||
************
|
||||
Example Code
|
||||
************
|
||||
|
||||
Some simple example code demonstrating the use of the Hyperscan API is
|
||||
available in the ``examples/`` subdirectory of the Hyperscan distribution.
|
||||
Some simple example code demonstrating the use of the Vectorscan API is
|
||||
available in the ``examples/`` subdirectory of the Vectorscan distribution.
|
||||
|
@ -4,7 +4,7 @@
|
||||
Performance Considerations
|
||||
##########################
|
||||
|
||||
Hyperscan supports a wide range of patterns in all three scanning modes. It is
|
||||
Vectorscan supports a wide range of patterns in all three scanning modes. It is
|
||||
capable of extremely high levels of performance, but certain patterns can
|
||||
reduce performance markedly.
|
||||
|
||||
@ -25,7 +25,7 @@ For example, caseless matching of :regexp:`/abc/` can be written as:
|
||||
* :regexp:`/(?i)abc(?-i)/`
|
||||
* :regexp:`/abc/i`
|
||||
|
||||
Hyperscan is capable of handling all these constructs. Unless there is a
|
||||
Vectorscan is capable of handling all these constructs. Unless there is a
|
||||
specific reason otherwise, do not rewrite patterns from one form to another.
|
||||
|
||||
As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be
|
||||
@ -41,24 +41,24 @@ Library usage
|
||||
|
||||
.. tip:: Do not hand-optimize library usage.
|
||||
|
||||
The Hyperscan library is capable of dealing with small writes, unusually large
|
||||
The Vectorscan library is capable of dealing with small writes, unusually large
|
||||
and small pattern sets, etc. Unless there is a specific performance problem
|
||||
with some usage of the library, it is best to use Hyperscan in a simple and
|
||||
with some usage of the library, it is best to use Vectorscan in a simple and
|
||||
direct fashion. For example, it is unlikely for there to be much benefit in
|
||||
buffering input to the library into larger blocks unless streaming writes are
|
||||
tiny (say, 1-2 bytes at a time).
|
||||
|
||||
Unlike many other pattern matching products, Hyperscan will run faster with
|
||||
Unlike many other pattern matching products, Vectorscan will run faster with
|
||||
small numbers of patterns and slower with large numbers of patterns in a smooth
|
||||
fashion (as opposed to, typically, running at a moderate speed up to some fixed
|
||||
limit then either breaking or running half as fast).
|
||||
|
||||
Hyperscan also provides high-throughput matching with a single thread of
|
||||
control per core; if a database runs at 3.0 Gbps in Hyperscan it means that a
|
||||
Vectorscan also provides high-throughput matching with a single thread of
|
||||
control per core; if a database runs at 3.0 Gbps in Vectorscan it means that a
|
||||
3000-bit block of data will be scanned in 1 microsecond in a single thread of
|
||||
control, not that it is required to scan 22 3000-bit blocks of data in 22
|
||||
microseconds. Thus, it is not usually necessary to buffer data to supply
|
||||
Hyperscan with available parallelism.
|
||||
Vectorscan with available parallelism.
|
||||
|
||||
********************
|
||||
Block-based matching
|
||||
@ -72,7 +72,7 @@ accumulated before processing, it should be scanned in block rather than in
|
||||
streaming mode.
|
||||
|
||||
Unnecessary use of streaming mode reduces the number of optimizations that can
|
||||
be applied in Hyperscan and may make some patterns run slower.
|
||||
be applied in Vectorscan and may make some patterns run slower.
|
||||
|
||||
If there is a mixture of 'block' and 'streaming' mode patterns, these should be
|
||||
scanned in separate databases except in the case that the streaming patterns
|
||||
@ -107,7 +107,7 @@ Allocate scratch ahead of time
|
||||
|
||||
Scratch allocation is not necessarily a cheap operation. Since it is the first
|
||||
time (after compilation or deserialization) that a pattern database is used,
|
||||
Hyperscan performs some validation checks inside :c:func:`hs_alloc_scratch` and
|
||||
Vectorscan performs some validation checks inside :c:func:`hs_alloc_scratch` and
|
||||
must also allocate memory.
|
||||
|
||||
Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not
|
||||
@ -329,7 +329,7 @@ Consequently, :regexp:`/foo.*bar/L` with a check on start of match values after
|
||||
the callback is considerably more expensive and general than
|
||||
:regexp:`/foo.{300}bar/`.
|
||||
|
||||
Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be
|
||||
Similarly, the :cpp:member:`hs_expr_ext::min_length` extended parameter can be
|
||||
used to specify a lower bound on the length of the matches for a pattern. Using
|
||||
this facility may be more lightweight in some circumstances than using the SOM
|
||||
flag and post-confirming match length in the calling application.
|
||||
|
@ -6,35 +6,35 @@ Preface
|
||||
Overview
|
||||
********
|
||||
|
||||
Hyperscan is a regular expression engine designed to offer high performance, the
|
||||
Vectorscan is a regular expression engine designed to offer high performance, the
|
||||
ability to match multiple expressions simultaneously and flexibility in
|
||||
scanning operation.
|
||||
|
||||
Patterns are provided to a compilation interface which generates an immutable
|
||||
pattern database. The scan interface then can be used to scan a target data
|
||||
buffer for the given patterns, returning any matching results from that data
|
||||
buffer. Hyperscan also provides a streaming mode, in which matches that span
|
||||
buffer. Vectorscan also provides a streaming mode, in which matches that span
|
||||
several blocks in a stream are detected.
|
||||
|
||||
This document is designed to facilitate code-level integration of the Hyperscan
|
||||
This document is designed to facilitate code-level integration of the Vectorscan
|
||||
library with existing or new applications.
|
||||
|
||||
:ref:`intro` is a short overview of the Hyperscan library, with more detail on
|
||||
the Hyperscan API provided in the subsequent sections: :ref:`compilation` and
|
||||
:ref:`intro` is a short overview of the Vectorscan library, with more detail on
|
||||
the Vectorscan API provided in the subsequent sections: :ref:`compilation` and
|
||||
:ref:`runtime`.
|
||||
|
||||
:ref:`perf` provides details on various factors which may impact the
|
||||
performance of a Hyperscan integration.
|
||||
performance of a Vectorscan integration.
|
||||
|
||||
:ref:`api_constants` and :ref:`api_files` provides a detailed summary of the
|
||||
Hyperscan Application Programming Interface (API).
|
||||
Vectorscan Application Programming Interface (API).
|
||||
|
||||
********
|
||||
Audience
|
||||
********
|
||||
|
||||
This guide is aimed at developers interested in integrating Hyperscan into an
|
||||
application. For information on building the Hyperscan library, see the Quick
|
||||
This guide is aimed at developers interested in integrating Vectorscan into an
|
||||
application. For information on building the Vectorscan library, see the Quick
|
||||
Start Guide.
|
||||
|
||||
***********
|
||||
|
@ -4,7 +4,7 @@
|
||||
Scanning for Patterns
|
||||
#####################
|
||||
|
||||
Hyperscan provides three different scanning modes, each with its own scan
|
||||
Vectorscan provides three different scanning modes, each with its own scan
|
||||
function beginning with ``hs_scan``. In addition, streaming mode has a number
|
||||
of other API functions for managing stream state.
|
||||
|
||||
@ -33,8 +33,8 @@ See :c:type:`match_event_handler` for more information.
|
||||
Streaming Mode
|
||||
**************
|
||||
|
||||
The core of the Hyperscan streaming runtime API consists of functions to open,
|
||||
scan, and close Hyperscan data streams:
|
||||
The core of the Vectorscan streaming runtime API consists of functions to open,
|
||||
scan, and close Vectorscan data streams:
|
||||
|
||||
* :c:func:`hs_open_stream`: allocates and initializes a new stream for scanning.
|
||||
|
||||
@ -57,14 +57,14 @@ will return immediately with :c:member:`HS_SCAN_TERMINATED`. The caller must
|
||||
still call :c:func:`hs_close_stream` to complete the clean-up process for that
|
||||
stream.
|
||||
|
||||
Streams exist in the Hyperscan library so that pattern matching state can be
|
||||
Streams exist in the Vectorscan library so that pattern matching state can be
|
||||
maintained across multiple blocks of target data -- without maintaining this
|
||||
state, it would not be possible to detect patterns that span these blocks of
|
||||
data. This, however, does come at the cost of requiring an amount of storage
|
||||
per-stream (the size of this storage is fixed at compile time), and a slight
|
||||
performance penalty in some cases to manage the state.
|
||||
|
||||
While Hyperscan does always support a strict ordering of multiple matches,
|
||||
While Vectorscan does always support a strict ordering of multiple matches,
|
||||
streaming matches will not be delivered at offsets before the current stream
|
||||
write, with the exception of zero-width asserts, where constructs such as
|
||||
:regexp:`\\b` and :regexp:`$` can cause a match on the final character of a
|
||||
@ -76,7 +76,7 @@ Stream Management
|
||||
=================
|
||||
|
||||
In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and
|
||||
:c:func:`hs_close_stream`, the Hyperscan API provides a number of other
|
||||
:c:func:`hs_close_stream`, the Vectorscan API provides a number of other
|
||||
functions for the management of streams:
|
||||
|
||||
* :c:func:`hs_reset_stream`: resets a stream to its initial state; this is
|
||||
@ -98,10 +98,10 @@ A stream object is allocated as a fixed size region of memory which has been
|
||||
sized to ensure that no memory allocations are required during scan
|
||||
operations. When the system is under memory pressure, it may be useful to reduce
|
||||
the memory consumed by streams that are not expected to be used soon. The
|
||||
Hyperscan API provides calls for translating a stream to and from a compressed
|
||||
Vectorscan API provides calls for translating a stream to and from a compressed
|
||||
representation for this purpose. The compressed representation differs from the
|
||||
full stream object as it does not reserve space for components which are not
|
||||
required given the current stream state. The Hyperscan API functions for this
|
||||
required given the current stream state. The Vectorscan API functions for this
|
||||
functionality are:
|
||||
|
||||
* :c:func:`hs_compress_stream`: fills the provided buffer with a compressed
|
||||
@ -157,7 +157,7 @@ scanned in block mode.
|
||||
Scratch Space
|
||||
*************
|
||||
|
||||
While scanning data, Hyperscan needs a small amount of temporary memory to store
|
||||
While scanning data, Vectorscan needs a small amount of temporary memory to store
|
||||
on-the-fly internal data. This amount is unfortunately too large to fit on the
|
||||
stack, particularly for embedded applications, and allocating memory dynamically
|
||||
is too expensive, so a pre-allocated "scratch" space must be provided to the
|
||||
@ -170,7 +170,7 @@ databases, only a single scratch region is necessary: in this case, calling
|
||||
will ensure that the scratch space is large enough to support scanning against
|
||||
any of the given databases.
|
||||
|
||||
While the Hyperscan library is re-entrant, the use of scratch spaces is not.
|
||||
While the Vectorscan library is re-entrant, the use of scratch spaces is not.
|
||||
For example, if by design it is deemed necessary to run recursive or nested
|
||||
scanning (say, from the match callback function), then an additional scratch
|
||||
space is required for that context.
|
||||
@ -219,11 +219,11 @@ For example:
|
||||
Custom Allocators
|
||||
*****************
|
||||
|
||||
By default, structures used by Hyperscan at runtime (scratch space, stream
|
||||
By default, structures used by Vectorscan at runtime (scratch space, stream
|
||||
state, etc) are allocated with the default system allocators, usually
|
||||
``malloc()`` and ``free()``.
|
||||
|
||||
The Hyperscan API provides a facility for changing this behaviour to support
|
||||
The Vectorscan API provides a facility for changing this behaviour to support
|
||||
applications that use custom memory allocators.
|
||||
|
||||
These functions are:
|
||||
|
@ -4,7 +4,7 @@
|
||||
Serialization
|
||||
#############
|
||||
|
||||
For some applications, compiling Hyperscan pattern databases immediately prior
|
||||
For some applications, compiling Vectorscan pattern databases immediately prior
|
||||
to use is not an appropriate design. Some users may wish to:
|
||||
|
||||
* Compile pattern databases on a different host;
|
||||
@ -14,9 +14,9 @@ to use is not an appropriate design. Some users may wish to:
|
||||
|
||||
* Control the region of memory in which the compiled database is located.
|
||||
|
||||
Hyperscan pattern databases are not completely flat in memory: they contain
|
||||
Vectorscan pattern databases are not completely flat in memory: they contain
|
||||
pointers and have specific alignment requirements. Therefore, they cannot be
|
||||
copied (or otherwise relocated) directly. To enable these use cases, Hyperscan
|
||||
copied (or otherwise relocated) directly. To enable these use cases, Vectorscan
|
||||
provides functionality for serializing and deserializing compiled pattern
|
||||
databases.
|
||||
|
||||
@ -40,10 +40,10 @@ The API provides the following functions:
|
||||
returns a string containing information about the database. This call is
|
||||
analogous to :c:func:`hs_database_info`.
|
||||
|
||||
.. note:: Hyperscan performs both version and platform compatibility checks
|
||||
.. note:: Vectorscan performs both version and platform compatibility checks
|
||||
upon deserialization. The :c:func:`hs_deserialize_database` and
|
||||
:c:func:`hs_deserialize_database_at` functions will only permit the
|
||||
deserialization of databases compiled with (a) the same version of Hyperscan
|
||||
deserialization of databases compiled with (a) the same version of Vectorscan
|
||||
and (b) platform features supported by the current host platform. See
|
||||
:ref:`instr_specialization` for more information on platform specialization.
|
||||
|
||||
@ -51,17 +51,17 @@ The API provides the following functions:
|
||||
The Runtime Library
|
||||
===================
|
||||
|
||||
The main Hyperscan library (``libhs``) contains both the compiler and runtime
|
||||
portions of the library. This means that in order to support the Hyperscan
|
||||
The main Vectorscan library (``libhs``) contains both the compiler and runtime
|
||||
portions of the library. This means that in order to support the Vectorscan
|
||||
compiler, which is written in C++, it requires C++ linkage and has a
|
||||
dependency on the C++ standard library.
|
||||
|
||||
Many embedded applications require only the scanning ("runtime") portion of the
|
||||
Hyperscan library. In these cases, pattern compilation generally takes place on
|
||||
Vectorscan library. In these cases, pattern compilation generally takes place on
|
||||
another host, and serialized pattern databases are delivered to the application
|
||||
for use.
|
||||
|
||||
To support these applications without requiring the C++ dependency, a
|
||||
runtime-only version of the Hyperscan library, called ``libhs_runtime``, is also
|
||||
runtime-only version of the Vectorscan library, called ``libhs_runtime``, is also
|
||||
distributed. This library does not depend on the C++ standard library and
|
||||
provides all Hyperscan functions other that those used to compile databases.
|
||||
provides all Vectorscan functions other that those used to compile databases.
|
||||
|
@ -4,14 +4,14 @@
|
||||
Tools
|
||||
#####
|
||||
|
||||
This section describes the set of utilities included with the Hyperscan library.
|
||||
This section describes the set of utilities included with the Vectorscan library.
|
||||
|
||||
********************
|
||||
Quick Check: hscheck
|
||||
********************
|
||||
|
||||
The ``hscheck`` tool allows the user to quickly check whether Hyperscan supports
|
||||
a group of patterns. If a pattern is rejected by Hyperscan's compiler, the
|
||||
The ``hscheck`` tool allows the user to quickly check whether Vectorscan supports
|
||||
a group of patterns. If a pattern is rejected by Vectorscan's compiler, the
|
||||
compile error is provided on standard output.
|
||||
|
||||
For example, given the following three patterns (the last of which contains a
|
||||
@ -34,7 +34,7 @@ syntax error) in a file called ``/tmp/test``::
|
||||
Benchmarker: hsbench
|
||||
********************
|
||||
|
||||
The ``hsbench`` tool provides an easy way to measure Hyperscan's performance
|
||||
The ``hsbench`` tool provides an easy way to measure Vectorscan's performance
|
||||
for a particular set of patterns and corpus of data to be scanned.
|
||||
|
||||
Patterns are supplied in the format described below in
|
||||
@ -44,7 +44,7 @@ easy control of how a corpus is broken into blocks and streams.
|
||||
|
||||
.. note:: A group of Python scripts for constructing corpora databases from
|
||||
various input types, such as PCAP network traffic captures or text files, can
|
||||
be found in the Hyperscan source tree in ``tools/hsbench/scripts``.
|
||||
be found in the Vectorscan source tree in ``tools/hsbench/scripts``.
|
||||
|
||||
Running hsbench
|
||||
===============
|
||||
@ -56,7 +56,7 @@ produce output like this::
|
||||
$ hsbench -e /tmp/patterns -c /tmp/corpus.db
|
||||
|
||||
Signatures: /tmp/patterns
|
||||
Hyperscan info: Version: 4.3.1 Features: AVX2 Mode: STREAM
|
||||
Vectorscan info: Version: 5.4.11 Features: AVX2 Mode: STREAM
|
||||
Expression count: 200
|
||||
Bytecode size: 342,540 bytes
|
||||
Database CRC: 0x6cd6b67c
|
||||
@ -77,7 +77,7 @@ takes to perform all twenty scans. The number of repeats can be changed with the
|
||||
``-n`` argument, and the results of each scan will be displayed if the
|
||||
``--per-scan`` argument is specified.
|
||||
|
||||
To benchmark Hyperscan on more than one core, you can supply a list of cores
|
||||
To benchmark Vectorscan on more than one core, you can supply a list of cores
|
||||
with the ``-T`` argument, which will instruct ``hsbench`` to start one
|
||||
benchmark thread per core given and compute the throughput from the time taken
|
||||
to complete all of them.
|
||||
@ -91,17 +91,17 @@ Correctness Testing: hscollider
|
||||
*******************************
|
||||
|
||||
The ``hscollider`` tool, or Pattern Collider, provides a way to verify
|
||||
Hyperscan's matching behaviour. It does this by compiling and scanning patterns
|
||||
Vectorscan's matching behaviour. It does this by compiling and scanning patterns
|
||||
(either singly or in groups) against known corpora and comparing the results
|
||||
against another engine (the "ground truth"). Two sources of ground truth for
|
||||
comparison are available:
|
||||
|
||||
* The PCRE library (http://pcre.org/).
|
||||
* An NFA simulation run on Hyperscan's compile-time graph representation. This
|
||||
* An NFA simulation run on Vectorscan's compile-time graph representation. This
|
||||
is used if PCRE cannot support the pattern or if PCRE execution fails due to
|
||||
a resource limit.
|
||||
|
||||
Much of Hyperscan's testing infrastructure is built on ``hscollider``, and the
|
||||
Much of Vectorscan's testing infrastructure is built on ``hscollider``, and the
|
||||
tool is designed to take advantage of multiple cores and provide considerable
|
||||
flexibility in controlling the test. These options are described in the help
|
||||
(``hscollider -h``) and include:
|
||||
@ -116,11 +116,11 @@ flexibility in controlling the test. These options are described in the help
|
||||
Using hscollider to debug a pattern
|
||||
===================================
|
||||
|
||||
One common use-case for ``hscollider`` is to determine whether Hyperscan will
|
||||
One common use-case for ``hscollider`` is to determine whether Vectorscan will
|
||||
match a pattern in the expected location, and whether this accords with PCRE's
|
||||
behaviour for the same case.
|
||||
|
||||
Here is an example. We put our pattern in a file in Hyperscan's pattern
|
||||
Here is an example. We put our pattern in a file in Vectorscan's pattern
|
||||
format::
|
||||
|
||||
$ cat /tmp/pat
|
||||
@ -172,7 +172,7 @@ individual matches are displayed in the output::
|
||||
|
||||
Total elapsed time: 0.00522815 secs.
|
||||
|
||||
We can see from this output that both PCRE and Hyperscan find matches ending at
|
||||
We can see from this output that both PCRE and Vectorscan find matches ending at
|
||||
offset 33 and 45, and so ``hscollider`` considers this test case to have
|
||||
passed.
|
||||
|
||||
@ -180,13 +180,13 @@ passed.
|
||||
corpus alignment 0, and ``-T 1`` instructs us to only use one thread.)
|
||||
|
||||
.. note:: In default operation, PCRE produces only one match for a scan, unlike
|
||||
Hyperscan's automata semantics. The ``hscollider`` tool uses libpcre's
|
||||
"callout" functionality to match Hyperscan's semantics.
|
||||
Vectorscan's automata semantics. The ``hscollider`` tool uses libpcre's
|
||||
"callout" functionality to match Vectorscan's semantics.
|
||||
|
||||
Running a larger scan test
|
||||
==========================
|
||||
|
||||
A set of patterns for testing purposes are distributed with Hyperscan, and these
|
||||
A set of patterns for testing purposes are distributed with Vectorscan, and these
|
||||
can be tested via ``hscollider`` on an in-tree build. Two CMake targets are
|
||||
provided to do this easily:
|
||||
|
||||
@ -202,10 +202,10 @@ Debugging: hsdump
|
||||
*****************
|
||||
|
||||
When built in debug mode (using the CMake directive ``CMAKE_BUILD_TYPE`` set to
|
||||
``Debug``), Hyperscan includes support for dumping information about its
|
||||
``Debug``), Vectorscan includes support for dumping information about its
|
||||
internals during pattern compilation with the ``hsdump`` tool.
|
||||
|
||||
This information is mostly of use to Hyperscan developers familiar with the
|
||||
This information is mostly of use to Vectorscan developers familiar with the
|
||||
library's internal structure, but can be used to diagnose issues with patterns
|
||||
and provide more information in bug reports.
|
||||
|
||||
@ -215,7 +215,7 @@ and provide more information in bug reports.
|
||||
Pattern Format
|
||||
**************
|
||||
|
||||
All of the Hyperscan tools accept patterns in the same format, read from plain
|
||||
All of the Vectorscan tools accept patterns in the same format, read from plain
|
||||
text files with one pattern per line. Each line looks like this:
|
||||
|
||||
* ``<integer id>:/<regex>/<flags>``
|
||||
@ -227,12 +227,12 @@ For example::
|
||||
3:/^.{10,20}hatstand/m
|
||||
|
||||
The integer ID is the value that will be reported when a match is found by
|
||||
Hyperscan and must be unique.
|
||||
Vectorscan and must be unique.
|
||||
|
||||
The pattern itself is a regular expression in PCRE syntax; see
|
||||
:ref:`compilation` for more information on supported features.
|
||||
|
||||
The flags are single characters that map to Hyperscan flags as follows:
|
||||
The flags are single characters that map to Vectorscan flags as follows:
|
||||
|
||||
========= ================================= ===========
|
||||
Character API Flag Description
|
||||
@ -256,7 +256,7 @@ between braces, separated by commas. For example::
|
||||
|
||||
1:/hatstand.*teakettle/s{min_offset=50,max_offset=100}
|
||||
|
||||
All Hyperscan tools will accept a pattern file (or a directory containing
|
||||
All Vectorscan tools will accept a pattern file (or a directory containing
|
||||
pattern files) with the ``-e`` argument. If no further arguments constraining
|
||||
the pattern set are given, all patterns in those files are used.
|
||||
|
||||
|
@ -605,8 +605,9 @@ double eval_set(Benchmark &bench, const Sigdata &sigs, unsigned int mode,
|
||||
scan_time = measure_stream_time(bench, repeatCount);
|
||||
}
|
||||
size_t bytes = bench.bytes();
|
||||
size_t matches = bench.matches();
|
||||
|
||||
if (diagnose) {
|
||||
size_t matches = bench.matches();
|
||||
std::ios::fmtflags f(cout.flags());
|
||||
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
|
||||
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
|
||||
|
@ -4,7 +4,7 @@ libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@
|
||||
includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: libhs
|
||||
Description: Intel(R) Hyperscan Library
|
||||
Description: A portable fork of the high-performance regular expression matching library
|
||||
Version: @HS_VERSION@
|
||||
Libs: -L${libdir} -lhs
|
||||
Cflags: -I${includedir}/hs
|
||||
|
2
simde
2
simde
@ -1 +1 @@
|
||||
Subproject commit aae22459fa284e9fc2b7d4b8e4571afa0418125f
|
||||
Subproject commit 416091ebdb9e901b29d026633e73167d6353a0b0
|
@ -542,14 +542,13 @@ u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
|
||||
|
||||
// Main aligned loop, processes eight bytes at a time.
|
||||
|
||||
u32 term1, term2;
|
||||
for (size_t li = 0; li < running_length/8; li++) {
|
||||
u32 block = *(const u32 *)p_buf;
|
||||
crc ^= block;
|
||||
p_buf += 4;
|
||||
term1 = crc_tableil8_o88[crc & 0x000000FF] ^
|
||||
u32 term1 = crc_tableil8_o88[crc & 0x000000FF] ^
|
||||
crc_tableil8_o80[(crc >> 8) & 0x000000FF];
|
||||
term2 = crc >> 16;
|
||||
u32 term2 = crc >> 16;
|
||||
crc = term1 ^
|
||||
crc_tableil8_o72[term2 & 0x000000FF] ^
|
||||
crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
|
||||
|
@ -79,21 +79,18 @@ static UNUSED
|
||||
const platform_t hs_current_platform_no_avx2 = {
|
||||
HS_PLATFORM_NOAVX2 |
|
||||
HS_PLATFORM_NOAVX512 |
|
||||
HS_PLATFORM_NOAVX512VBMI |
|
||||
0,
|
||||
HS_PLATFORM_NOAVX512VBMI
|
||||
};
|
||||
|
||||
static UNUSED
|
||||
const platform_t hs_current_platform_no_avx512 = {
|
||||
HS_PLATFORM_NOAVX512 |
|
||||
HS_PLATFORM_NOAVX512VBMI |
|
||||
0,
|
||||
HS_PLATFORM_NOAVX512VBMI
|
||||
};
|
||||
|
||||
static UNUSED
|
||||
const platform_t hs_current_platform_no_avx512vbmi = {
|
||||
HS_PLATFORM_NOAVX512VBMI |
|
||||
0,
|
||||
HS_PLATFORM_NOAVX512VBMI
|
||||
};
|
||||
|
||||
/*
|
||||
|
249
src/dispatcher.c
249
src/dispatcher.c
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2020, Intel Corporation
|
||||
* Copyright (c) 2024, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,6 +31,39 @@
|
||||
#include "hs_common.h"
|
||||
#include "hs_runtime.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
/* Streamlining the dispatch to eliminate runtime checking/branching:
|
||||
* What we want to do is, first call to the function will run the resolve
|
||||
* code and set the static resolved/dispatch pointer to point to the
|
||||
* correct function. Subsequent calls to the function will go directly to
|
||||
* the resolved ptr. The simplest way to accomplish this is, to
|
||||
* initially set the pointer to the resolve function.
|
||||
* To accomplish this in a manner invisible to the user,
|
||||
* we do involve some rather ugly/confusing macros in here.
|
||||
* There are four macros that assemble the code for each function
|
||||
* we want to dispatch in this manner:
|
||||
* CREATE_DISPATCH
|
||||
* this generates the declarations for the candidate target functions,
|
||||
* for the fat_dispatch function pointer, for the resolve_ function,
|
||||
* points the function pointer to the resolve function, and contains
|
||||
* most of the definition of the resolve function. The very end of the
|
||||
* resolve function is completed by the next macro, because in the
|
||||
* CREATE_DISPATCH macro we have the argument list with the arg declarations,
|
||||
* which is needed to generate correct function signatures, but we
|
||||
* can't generate from this, in a macro, a _call_ to one of those functions.
|
||||
* CONNECT_ARGS_1
|
||||
* this macro fills in the actual call at the end of the resolve function,
|
||||
* with the correct arg list. hence the name connect args.
|
||||
* CONNECT_DISPATCH_2
|
||||
* this macro likewise gives up the beginning of the definition of the
|
||||
* actual entry point function (the 'real name' that's called by the user)
|
||||
* but again in the pass-through call, cannot invoke the target without
|
||||
* getting the arg list , which is supplied by the final macro,
|
||||
* CONNECT_ARGS_3
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/arch/x86/cpuid_inline.h"
|
||||
#include "util/join.h"
|
||||
@ -57,30 +91,38 @@
|
||||
return (RTYPE)HS_ARCH_ERROR; \
|
||||
} \
|
||||
\
|
||||
/* resolver */ \
|
||||
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
|
||||
if (check_avx512vbmi()) { \
|
||||
return JOIN(avx512vbmi_, NAME); \
|
||||
} \
|
||||
if (check_avx512()) { \
|
||||
return JOIN(avx512_, NAME); \
|
||||
} \
|
||||
if (check_avx2()) { \
|
||||
return JOIN(avx2_, NAME); \
|
||||
} \
|
||||
if (check_sse42() && check_popcnt()) { \
|
||||
return JOIN(corei7_, NAME); \
|
||||
} \
|
||||
if (check_ssse3()) { \
|
||||
return JOIN(core2_, NAME); \
|
||||
} \
|
||||
/* anything else is fail */ \
|
||||
return JOIN(error_, NAME); \
|
||||
} \
|
||||
/* dispatch routing pointer for this function */ \
|
||||
/* initially point it at the resolve function */ \
|
||||
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
|
||||
static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
|
||||
&JOIN(resolve_, NAME); \
|
||||
\
|
||||
/* function */ \
|
||||
HS_PUBLIC_API \
|
||||
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
|
||||
/* resolver */ \
|
||||
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
|
||||
if (check_avx512vbmi()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(avx512vbmi_, NAME); \
|
||||
} \
|
||||
else if (check_avx512()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(avx512_, NAME); \
|
||||
} \
|
||||
else if (check_avx2()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(avx2_, NAME); \
|
||||
} \
|
||||
else if (check_sse42() && check_popcnt()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(corei7_, NAME); \
|
||||
} \
|
||||
else if (check_ssse3()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(core2_, NAME); \
|
||||
} else { \
|
||||
/* anything else is fail */ \
|
||||
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
|
||||
} \
|
||||
|
||||
|
||||
|
||||
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
|
||||
|
||||
|
||||
|
||||
#elif defined(ARCH_AARCH64)
|
||||
#include "util/arch/arm/cpuid_inline.h"
|
||||
@ -97,99 +139,226 @@
|
||||
return (RTYPE)HS_ARCH_ERROR; \
|
||||
} \
|
||||
\
|
||||
/* resolver */ \
|
||||
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
|
||||
if (check_sve2()) { \
|
||||
return JOIN(sve2_, NAME); \
|
||||
} \
|
||||
if (check_sve()) { \
|
||||
return JOIN(sve_, NAME); \
|
||||
} \
|
||||
if (check_neon()) { \
|
||||
return JOIN(neon_, NAME); \
|
||||
} \
|
||||
/* anything else is fail */ \
|
||||
return JOIN(error_, NAME); \
|
||||
} \
|
||||
/* dispatch routing pointer for this function */ \
|
||||
/* initially point it at the resolve function */ \
|
||||
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
|
||||
static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
|
||||
&JOIN(resolve_, NAME); \
|
||||
\
|
||||
/* function */ \
|
||||
HS_PUBLIC_API \
|
||||
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
|
||||
/* resolver */ \
|
||||
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
|
||||
if (check_sve2()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(sve2_, NAME); \
|
||||
} \
|
||||
else if (check_sve()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(sve_, NAME); \
|
||||
} \
|
||||
else if (check_neon()) { \
|
||||
fat_dispatch_ ## NAME = &JOIN(neon_, NAME); \
|
||||
} else { \
|
||||
/* anything else is fail */ \
|
||||
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
|
||||
} \
|
||||
|
||||
|
||||
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define CONNECT_ARGS_1(RTYPE, NAME, ...) \
|
||||
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
|
||||
} \
|
||||
|
||||
|
||||
#define CONNECT_DISPATCH_2(RTYPE, NAME, ...) \
|
||||
/* new function */ \
|
||||
HS_PUBLIC_API \
|
||||
RTYPE NAME(__VA_ARGS__) { \
|
||||
|
||||
|
||||
#define CONNECT_ARGS_3(RTYPE, NAME, ...) \
|
||||
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
|
||||
} \
|
||||
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
|
||||
/* this gets a bit ugly to compose the static redirect functions,
|
||||
* as we necessarily need first the typed arg list and then just the arg
|
||||
* names, twice in a row, to define the redirect function and the
|
||||
* dispatch function call */
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
|
||||
unsigned length, unsigned flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *userCtx);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
|
||||
unsigned length, unsigned flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *userCtx);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database,
|
||||
size_t *stream_size);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_stream_size, database, stream_size);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_stream_size, const hs_database_t *database,
|
||||
size_t *stream_size);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_stream_size, database, stream_size);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db,
|
||||
size_t *size);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_database_size, db, size);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_database_size, const hs_database_t *db,
|
||||
size_t *size);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_database_size, db, size);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db);
|
||||
CONNECT_ARGS_1(hs_error_t, dbIsValid, db);
|
||||
CONNECT_DISPATCH_2(hs_error_t, dbIsValid, const hs_database_t *db);
|
||||
CONNECT_ARGS_3(hs_error_t, dbIsValid, db);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_free_database, db);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_free_database, hs_database_t *db);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_free_database, db);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db,
|
||||
unsigned int flags, hs_stream_t **stream);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_open_stream, db, flags, stream);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_open_stream, const hs_database_t *db,
|
||||
unsigned int flags, hs_stream_t **stream);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_open_stream, db, flags, stream);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
|
||||
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *ctxt);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
|
||||
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *ctxt);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_close_stream, hs_stream_t *id,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db,
|
||||
const char *const *data, const unsigned int *length,
|
||||
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onevent, void *context);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_scan_vector, const hs_database_t *db,
|
||||
const char *const *data, const unsigned int *length,
|
||||
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onevent, void *context);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_database_info, db, info);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_database_info, db, info);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
|
||||
const hs_stream_t *from_id);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_copy_stream, to_id, from_id);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
|
||||
const hs_stream_t *from_id);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_copy_stream, to_id, from_id);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id,
|
||||
unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_reset_stream, hs_stream_t *id,
|
||||
unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
|
||||
const hs_stream_t *from_id, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
|
||||
const hs_stream_t *from_id, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db,
|
||||
char **bytes, size_t *length);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_serialize_database, db, bytes, length);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_serialize_database, const hs_database_t *db,
|
||||
char **bytes, size_t *length);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_serialize_database, db, bytes, length);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes,
|
||||
const size_t length, hs_database_t **db);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database, bytes, length, db);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database, const char *bytes,
|
||||
const size_t length, hs_database_t **db);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database, bytes, length, db);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes,
|
||||
const size_t length, hs_database_t *db);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database_at, bytes, length, db);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database_at, const char *bytes,
|
||||
const size_t length, hs_database_t *db);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database_at, bytes, length, db);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
|
||||
size_t length, char **info);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_info, bytes, length, info);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_info, const char *bytes,
|
||||
size_t length, char **info);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_info, bytes, length, info);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
|
||||
const size_t length, size_t *deserialized_size);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_size, const char *bytes,
|
||||
const size_t length, size_t *deserialized_size);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
|
||||
char *buf, size_t buf_space, size_t *used_space);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_compress_stream, stream,
|
||||
buf, buf_space, used_space);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
|
||||
char *buf, size_t buf_space, size_t *used_space);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_compress_stream, stream,
|
||||
buf, buf_space, used_space);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db,
|
||||
hs_stream_t **stream, const char *buf,size_t buf_size);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_expand_stream, const hs_database_t *db,
|
||||
hs_stream_t **stream, const char *buf,size_t buf_size);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
|
||||
const char *buf, size_t buf_size, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
CONNECT_ARGS_1(hs_error_t, hs_reset_and_expand_stream, to_stream,
|
||||
buf, buf_size, scratch, onEvent, context);
|
||||
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
|
||||
const char *buf, size_t buf_size, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream,
|
||||
buf, buf_size, scratch, onEvent, context);
|
||||
|
||||
/** INTERNALS **/
|
||||
|
||||
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
||||
CONNECT_ARGS_1(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
|
||||
CONNECT_DISPATCH_2(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
||||
CONNECT_ARGS_3(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
|
@ -298,7 +298,7 @@ void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr,
|
||||
static really_inline
|
||||
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
||||
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
||||
const u8 *ptr, u32 *last_match_id, struct zone *z) {
|
||||
const u8 *ptr, u32 *last_match_id, const struct zone *z) {
|
||||
const u8 bucket = 8;
|
||||
|
||||
if (likely(!*conf)) {
|
||||
|
@ -52,14 +52,14 @@ u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
||||
|
||||
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
|
||||
static const TeddyEngineDef defns[] = {
|
||||
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false },
|
||||
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true },
|
||||
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false },
|
||||
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true },
|
||||
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false },
|
||||
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true },
|
||||
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false },
|
||||
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true },
|
||||
{ 3, HS_CPU_FEATURES_AVX2, 1, 16, false },
|
||||
{ 4, HS_CPU_FEATURES_AVX2, 1, 16, true },
|
||||
{ 5, HS_CPU_FEATURES_AVX2, 2, 16, false },
|
||||
{ 6, HS_CPU_FEATURES_AVX2, 2, 16, true },
|
||||
{ 7, HS_CPU_FEATURES_AVX2, 3, 16, false },
|
||||
{ 8, HS_CPU_FEATURES_AVX2, 3, 16, true },
|
||||
{ 9, HS_CPU_FEATURES_AVX2, 4, 16, false },
|
||||
{ 10, HS_CPU_FEATURES_AVX2, 4, 16, true },
|
||||
{ 11, 0, 1, 8, false },
|
||||
{ 12, 0, 1, 8, true },
|
||||
{ 13, 0, 2, 8, false },
|
||||
|
@ -400,7 +400,7 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a subCastleNextMatch(const struct Castle *c, void *full_state,
|
||||
u64a subCastleNextMatch(const struct Castle *c, const void *full_state,
|
||||
void *stream_state, const u64a loc,
|
||||
const u32 subIdx) {
|
||||
DEBUG_PRINTF("subcastle %u\n", subIdx);
|
||||
@ -489,7 +489,6 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
|
||||
// full_state (scratch).
|
||||
|
||||
u64a offset = end; // min offset of next match
|
||||
u32 activeIdx = 0;
|
||||
mmbit_clear(matching, c->numRepeats);
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
@ -497,7 +496,7 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
u64a match = subCastleNextMatch(c, full_state, stream_state,
|
||||
loc, activeIdx);
|
||||
set_matching(c, match, groups, matching, c->numGroups, i,
|
||||
@ -907,7 +906,6 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
|
||||
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
|
||||
return (s64a)loc - hlen;
|
||||
}
|
||||
ep = 0;
|
||||
}
|
||||
|
||||
return sp - 1; /* the repeats are never killed */
|
||||
|
@ -655,7 +655,8 @@ buildCastle(const CastleProto &proto,
|
||||
if (!stale_iter.empty()) {
|
||||
c->staleIterOffset = verify_u32(ptr - base_ptr);
|
||||
copy_bytes(ptr, stale_iter);
|
||||
ptr += byte_length(stale_iter);
|
||||
// Removed unused increment operation
|
||||
// ptr += byte_length(stale_iter);
|
||||
}
|
||||
|
||||
return nfa;
|
||||
|
@ -332,7 +332,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
|
||||
// UE-1636) need to guard cyclic tug-accepts as well.
|
||||
static really_inline
|
||||
char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||
u64a offset, ReportID report) {
|
||||
assert(limex);
|
||||
|
||||
@ -382,7 +382,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
|
||||
static really_inline
|
||||
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||
u64a offset) {
|
||||
assert(limex);
|
||||
|
||||
|
@ -1572,7 +1572,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) {
|
||||
static
|
||||
int getLimexScore(const build_info &args, u32 nShifts) {
|
||||
const NGHolder &h = args.h;
|
||||
u32 maxVarShift = nShifts;
|
||||
u32 maxVarShift;
|
||||
int score = 0;
|
||||
|
||||
score += SHIFT_COST * nShifts;
|
||||
|
@ -512,7 +512,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
|
||||
verm_restart:;
|
||||
assert(buf[curr] == kp->u.verm.c);
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@ -534,7 +534,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
m128 hi = kp->u.shuf.mask_hi;
|
||||
shuf_restart:
|
||||
assert(do_single_shufti(lo, hi, buf[curr]));
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@ -556,7 +556,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
const m128 mask1 = kp->u.truffle.mask1;
|
||||
const m128 mask2 = kp->u.truffle.mask2;
|
||||
truffle_restart:;
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@ -582,7 +582,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
|
||||
nverm_restart:;
|
||||
assert(buf[curr] != kp->u.verm.c);
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@ -607,7 +607,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters,
|
||||
void restartKilo(const struct mpv *m, UNUSED const u8 *active, u8 *reporters,
|
||||
struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
|
||||
const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
|
||||
const struct mpv_kilopuff *kp = (const void *)(m + 1);
|
||||
|
@ -94,9 +94,6 @@ u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
static
|
||||
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
const u32 minPeriod, u32 rv) {
|
||||
u32 cnt = 0;
|
||||
u32 patch_bits = 0;
|
||||
u32 total_size = 0;
|
||||
u32 min = ~0U;
|
||||
u32 patch_len = 0;
|
||||
|
||||
@ -105,11 +102,11 @@ u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
}
|
||||
|
||||
for (u32 i = minPeriod; i <= rv; i++) {
|
||||
cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
||||
u32 cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
||||
|
||||
// no bit packing version
|
||||
patch_bits = calcPackedBits(info->table[i]);
|
||||
total_size = (patch_bits + 7U) / 8U * cnt;
|
||||
u32 patch_bits = calcPackedBits(info->table[i]);
|
||||
u32 total_size = (patch_bits + 7U) / 8U * cnt;
|
||||
|
||||
if (total_size < min) {
|
||||
patch_len = i;
|
||||
|
@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
// Sheng32
|
||||
static really_inline
|
||||
const struct sheng32 *get_sheng32(const struct NFA *n) {
|
||||
@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||
|
||||
/* include Sheng function definitions */
|
||||
#include "sheng_defs.h"
|
||||
@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
// Sheng32
|
||||
static really_inline
|
||||
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
|
||||
@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
*(u8 *)dest = *(const u8 *)src;
|
||||
return 0;
|
||||
}
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||
|
@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
|
||||
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#else // !HAVE_AVX512VBMI
|
||||
#else // !HAVE_AVX512VBMI && !HAVE_SVE
|
||||
|
||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
|
||||
#define nfaExecSheng64_B NFA_API_NO_IMPL
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
|
||||
|
||||
|
||||
#endif /* SHENG_H_ */
|
||||
|
@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
u8 isDeadState32(const u8 a) {
|
||||
return a & SHENG32_STATE_DEAD;
|
||||
@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_cod
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_cod
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_co
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_co
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_samd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_samd
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_sam
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_sam
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_nmd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_nmd
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 dummyFunc
|
||||
@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_nm
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_nm
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 dummyFunc
|
||||
@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_coda
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_cod
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_coa
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_co
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_samda
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_samd
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_sama
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_sam
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC isAccelState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nmda
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nmd
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nm
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
|
@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const struct sheng32 *s,
|
||||
@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
|
||||
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 tmp = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
const m512 succ_mask = masks[c];
|
||||
cur_state = vpermb512(cur_state, succ_mask);
|
||||
const u8 tmp = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
|
||||
@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
|
||||
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 tmp = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
const m512 succ_mask = masks[c];
|
||||
cur_state = vpermb512(cur_state, succ_mask);
|
||||
const u8 tmp = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
|
||||
@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const struct sheng32 *s,
|
||||
@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
|
||||
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a1 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
|
||||
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a2 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
|
||||
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a3 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
|
||||
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a4 = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
const m512 succ_mask1 = masks[c1];
|
||||
cur_state = vpermb512(cur_state, succ_mask1);
|
||||
const u8 a1 = movd512(cur_state);
|
||||
@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const m512 succ_mask4 = masks[c4];
|
||||
cur_state = vpermb512(cur_state, succ_mask4);
|
||||
const u8 a4 = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
|
||||
@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
};
|
||||
cur_buf += 4;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
*scan_end = end;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
|
||||
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a1 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
|
||||
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a2 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
|
||||
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a3 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
|
||||
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a4 = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
const m512 succ_mask1 = masks[c1];
|
||||
cur_state = vpermb512(cur_state, succ_mask1);
|
||||
const u8 a1 = movd512(cur_state);
|
||||
@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const m512 succ_mask4 = masks[c4];
|
||||
cur_state = vpermb512(cur_state, succ_mask4);
|
||||
const u8 a4 = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
|
||||
@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf += 4;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
@ -730,10 +730,17 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SVE
|
||||
if (svcntb()<32) {
|
||||
DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
if (!cc.target_info.has_avx512vbmi()) {
|
||||
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||
dfa_info info(strat);
|
||||
@ -762,10 +769,17 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SVE
|
||||
if (svcntb()<64) {
|
||||
DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
if (!cc.target_info.has_avx512vbmi()) {
|
||||
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||
dfa_info info(strat);
|
||||
|
@ -193,9 +193,6 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
|
||||
|
||||
if (!som) {
|
||||
mergeCyclicDotStars(g);
|
||||
}
|
||||
|
||||
if (!som) {
|
||||
removeSiblingsOfStartDotStar(g);
|
||||
}
|
||||
}
|
||||
|
@ -165,9 +165,9 @@ void reformAnchoredRepeatsComponent(NGHolder &g,
|
||||
return;
|
||||
}
|
||||
|
||||
NFAVertex dotV = NGHolder::null_vertex();
|
||||
|
||||
set<NFAVertex> otherV;
|
||||
dotV = findReformable(g, compAnchoredStarts, otherV);
|
||||
NFAVertex dotV = findReformable(g, compAnchoredStarts, otherV);
|
||||
if (dotV == NGHolder::null_vertex()) {
|
||||
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
||||
return;
|
||||
@ -268,9 +268,9 @@ void reformUnanchoredRepeatsComponent(NGHolder &g,
|
||||
}
|
||||
|
||||
while (true) {
|
||||
NFAVertex dotV = NGHolder::null_vertex();
|
||||
|
||||
set<NFAVertex> otherV;
|
||||
dotV = findReformable(g, compUnanchoredStarts, otherV);
|
||||
NFAVertex dotV = findReformable(g, compUnanchoredStarts, otherV);
|
||||
if (dotV == NGHolder::null_vertex()) {
|
||||
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
||||
return;
|
||||
|
@ -513,12 +513,12 @@ static
|
||||
bool doHaig(const NGHolder &g, som_type som,
|
||||
const vector<vector<CharReach>> &triggers, bool unordered_som,
|
||||
raw_som_dfa *rdfa) {
|
||||
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
||||
a fight */
|
||||
using StateSet = typename Auto::StateSet;
|
||||
vector<StateSet> nfa_state_map;
|
||||
Auto n(g, som, triggers, unordered_som);
|
||||
try {
|
||||
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
||||
a fight */
|
||||
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return false;
|
||||
|
@ -321,7 +321,7 @@ struct DAccelScheme {
|
||||
bool cd_a = buildDvermMask(a.double_byte);
|
||||
bool cd_b = buildDvermMask(b.double_byte);
|
||||
if (cd_a != cd_b) {
|
||||
return cd_a > cd_b;
|
||||
return cd_a;
|
||||
}
|
||||
}
|
||||
|
||||
@ -811,11 +811,9 @@ depth_done:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Second option: a two-byte shufti (i.e. less than eight 2-byte
|
||||
// literals)
|
||||
if (depth > 1) {
|
||||
for (unsigned int i = 0; i < (depth - 1); i++) {
|
||||
if (depthReach[i].count() * depthReach[i+1].count()
|
||||
<= DOUBLE_SHUFTI_LIMIT) {
|
||||
|
@ -636,12 +636,12 @@ bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
|
||||
|
||||
NFAVertex start = source(e, g);
|
||||
using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
|
||||
map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
|
||||
|
||||
// Walk the graph backwards from v, examining each node. We fail (return
|
||||
// false) if we encounter a node with reach NOT a subset of domReach, and
|
||||
// we stop searching at dom.
|
||||
try {
|
||||
map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
|
||||
depth_first_visit(RevGraph(g), start,
|
||||
ReachSubsetVisitor(domReach),
|
||||
make_assoc_property_map(vertexColor),
|
||||
@ -664,12 +664,12 @@ bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
|
||||
}
|
||||
|
||||
NFAVertex start = target(e, g);
|
||||
map<NFAVertex, boost::default_color_type> vertexColor;
|
||||
|
||||
// Walk the graph forward from v, examining each node. We fail (return
|
||||
// false) if we encounter a node with reach NOT a subset of domReach, and
|
||||
// we stop searching at dom.
|
||||
try {
|
||||
map<NFAVertex, boost::default_color_type> vertexColor;
|
||||
depth_first_visit(g, start, ReachSubsetVisitor(domReach),
|
||||
make_assoc_property_map(vertexColor),
|
||||
VertexIs<NGHolder, NFAVertex>(dom));
|
||||
|
@ -1292,8 +1292,8 @@ bool doTreePlanningIntl(const NGHolder &g,
|
||||
DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
|
||||
addMappedReporterVertices(it->second, g, copy_to_orig,
|
||||
plan.back().reporters);
|
||||
} while (it->second.optional && it != info.rend() &&
|
||||
(++it)->first > furthest->first);
|
||||
} while (it != info.rend() && it->second.optional &&
|
||||
(++it)->first > furthest->first);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1551,7 +1551,7 @@ bool doSomPlanning(NGHolder &g, bool stuck_in,
|
||||
DEBUG_PRINTF("region %u contributes reporters to last plan\n",
|
||||
it->first);
|
||||
addReporterVertices(it->second, g, plan.back().reporters);
|
||||
} while (it->second.optional && it != info.rend() &&
|
||||
} while (it != info.rend() && it->second.optional &&
|
||||
(++it)->first > furthest->first);
|
||||
|
||||
DEBUG_PRINTF("done!\n");
|
||||
|
@ -267,18 +267,6 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
|
||||
boost::depth_first_search(c_g, visitor(backEdgeVisitor)
|
||||
.root_vertex(c_g.start));
|
||||
|
||||
for (const auto &e : be) {
|
||||
NFAVertex s = source(e, c_g);
|
||||
NFAVertex t = target(e, c_g);
|
||||
DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index);
|
||||
if (s != t) {
|
||||
assert(0);
|
||||
DEBUG_PRINTF("eek big cycle\n");
|
||||
rv = true; /* big cycle -> eek */
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("checking acyclic+selfloop graph\n");
|
||||
|
||||
rv = !firstMatchIsFirst(c_g);
|
||||
|
@ -589,7 +589,7 @@ void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
|
||||
|
||||
verts.insert(v);
|
||||
next_vertex:
|
||||
continue;
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -314,7 +314,7 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
|
||||
|
||||
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
|
||||
* accepts. */
|
||||
void reverseHolder(const NGHolder &g, NGHolder &out);
|
||||
void reverseHolder(const NGHolder &g_in, NGHolder &g);
|
||||
|
||||
/** \brief Returns the delay or ~0U if the graph cannot match with
|
||||
* the trailing literal. */
|
||||
|
@ -348,10 +348,9 @@ void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored,
|
||||
|
||||
map<NFAVertex, u64a> scores;
|
||||
map<NFAVertex, unique_ptr<VertLitInfo>> lit_info;
|
||||
set<ue2_literal> s;
|
||||
|
||||
for (auto v : a_dom) {
|
||||
s = getLiteralSet(g, v, true); /* RHS will take responsibility for any
|
||||
set<ue2_literal> s = getLiteralSet(g, v, true); /* RHS will take responsibility for any
|
||||
revisits to the target vertex */
|
||||
|
||||
if (s.empty()) {
|
||||
@ -2868,7 +2867,6 @@ static
|
||||
bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
|
||||
const vector<RoseInEdge> &edges,
|
||||
const CompileContext &cc) {
|
||||
vector<pair<ue2_literal, u32>> succ_lits;
|
||||
DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n",
|
||||
to_string(h.kind).c_str(), num_vertices(h), edges.size());
|
||||
|
||||
@ -2877,6 +2875,7 @@ bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
|
||||
}
|
||||
|
||||
if (!generates_callbacks(h)) {
|
||||
vector<pair<ue2_literal, u32>> succ_lits;
|
||||
for (const auto &e : edges) {
|
||||
const auto &lit = vg[target(e, vg)].s;
|
||||
u32 delay = vg[e].graph_lag;
|
||||
@ -2889,8 +2888,8 @@ bool splitForImplementability(RoseInGraph &vg, NGHolder &h,
|
||||
}
|
||||
|
||||
unique_ptr<VertLitInfo> split;
|
||||
bool last_chance = true;
|
||||
if (h.kind == NFA_PREFIX) {
|
||||
bool last_chance = true;
|
||||
auto depths = calcDepths(h);
|
||||
|
||||
split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc);
|
||||
|
@ -109,20 +109,20 @@ void ComponentAlternation::append(unique_ptr<Component> component) {
|
||||
vector<PositionInfo> ComponentAlternation::first() const {
|
||||
// firsts come from all our subcomponents in position order. This will
|
||||
// maintain left-to-right priority order.
|
||||
vector<PositionInfo> firsts, subfirsts;
|
||||
vector<PositionInfo> firsts;
|
||||
|
||||
for (const auto &c : children) {
|
||||
subfirsts = c->first();
|
||||
vector<PositionInfo> subfirsts = c->first();
|
||||
firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end());
|
||||
}
|
||||
return firsts;
|
||||
}
|
||||
|
||||
vector<PositionInfo> ComponentAlternation::last() const {
|
||||
vector<PositionInfo> lasts, sublasts;
|
||||
vector<PositionInfo> lasts;
|
||||
|
||||
for (const auto &c : children) {
|
||||
sublasts = c->last();
|
||||
vector<PositionInfo> sublasts = c->last();
|
||||
lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
|
||||
}
|
||||
return lasts;
|
||||
|
@ -320,7 +320,7 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) {
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min);
|
||||
DEBUG_PRINTF("wiring up %u optional repeats\n", copies - m_min);
|
||||
for (u32 rep = MAX(m_min, 1); rep < copies; rep++) {
|
||||
vector<PositionInfo> lasts = m_lasts[rep - 1];
|
||||
if (rep != m_min) {
|
||||
|
@ -157,10 +157,10 @@ void ComponentSequence::finalize() {
|
||||
}
|
||||
|
||||
vector<PositionInfo> ComponentSequence::first() const {
|
||||
vector<PositionInfo> firsts, subfirsts;
|
||||
vector<PositionInfo> firsts;
|
||||
|
||||
for (const auto &c : children) {
|
||||
subfirsts = c->first();
|
||||
vector<PositionInfo> subfirsts = c->first();
|
||||
replaceEpsilons(firsts, subfirsts);
|
||||
if (!c->empty()) {
|
||||
break;
|
||||
@ -229,12 +229,12 @@ void applyEpsilonVisits(vector<PositionInfo> &lasts,
|
||||
}
|
||||
|
||||
vector<PositionInfo> ComponentSequence::last() const {
|
||||
vector<PositionInfo> lasts, sublasts;
|
||||
vector<PositionInfo> lasts;
|
||||
vector<eps_info> visits(1);
|
||||
|
||||
auto i = children.rbegin(), e = children.rend();
|
||||
for (; i != e; ++i) {
|
||||
sublasts = (*i)->last();
|
||||
vector<PositionInfo> sublasts = (*i)->last();
|
||||
applyEpsilonVisits(sublasts, visits);
|
||||
lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
|
||||
if ((*i)->empty()) {
|
||||
|
@ -260,14 +260,14 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
|
||||
u32 ekey, u64a min_offset,
|
||||
u64a max_offset) {
|
||||
u32 ckey = getCombKey(id);
|
||||
vector<LogicalOperator> op_stack;
|
||||
vector<u32> subid_stack;
|
||||
u32 lkey_start = INVALID_LKEY; // logical operation's lkey
|
||||
u32 paren = 0; // parentheses
|
||||
u32 digit = (u32)-1; // digit start offset, invalid offset is -1
|
||||
u32 subid = (u32)-1;
|
||||
u32 i;
|
||||
try {
|
||||
vector<LogicalOperator> op_stack;
|
||||
u32 paren = 0; // parentheses
|
||||
for (i = 0; logical[i]; i++) {
|
||||
if (isdigit(logical[i])) {
|
||||
if (digit == (u32)-1) { // new digit start
|
||||
@ -284,7 +284,7 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
|
||||
if (logical[i] == '(') {
|
||||
paren += 1;
|
||||
} else if (logical[i] == ')') {
|
||||
if (paren <= 0) {
|
||||
if (paren == 0) {
|
||||
throw LocatedParseError("Not enough left parentheses");
|
||||
}
|
||||
paren -= 1;
|
||||
|
@ -192,7 +192,7 @@ int roseCountingMiracleOccurs(const struct RoseEngine *t,
|
||||
|
||||
u32 count = 0;
|
||||
|
||||
s64a m_loc = start;
|
||||
s64a m_loc;
|
||||
|
||||
if (!cm->shufti) {
|
||||
u8 c = cm->c;
|
||||
|
@ -131,7 +131,6 @@ void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
|
||||
if (better) {
|
||||
best_begin = begin;
|
||||
best_end = end;
|
||||
best_len = len;
|
||||
}
|
||||
|
||||
for (size_t i = best_begin; i < best_end; i++) {
|
||||
@ -393,8 +392,9 @@ bool validateTransientMask(const vector<CharReach> &mask, bool anchored,
|
||||
none_of(begin(lits), end(lits), mixed_sensitivity));
|
||||
|
||||
// Build the HWLM literal mask.
|
||||
vector<u8> msk, cmp;
|
||||
vector<u8> msk;
|
||||
if (grey.roseHamsterMasks) {
|
||||
vector<u8> cmp;
|
||||
buildLiteralMask(mask, msk, cmp, delay);
|
||||
}
|
||||
|
||||
|
@ -2251,10 +2251,9 @@ vector<u32> buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc,
|
||||
|
||||
/* for each outfix also build elists */
|
||||
for (const auto &outfix : build.outfixes) {
|
||||
u32 qi = outfix.get_queue();
|
||||
set<u32> ekeys = reportsToEkeys(all_reports(outfix), build.rm);
|
||||
|
||||
if (!ekeys.empty()) {
|
||||
u32 qi = outfix.get_queue();
|
||||
qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()};
|
||||
}
|
||||
}
|
||||
@ -2975,7 +2974,8 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
|
||||
!lit_prog.empty()) {
|
||||
const auto &cfrag = fragments[pfrag.included_frag_id];
|
||||
assert(pfrag.s.length() >= cfrag.s.length() &&
|
||||
!pfrag.s.any_nocase() >= !cfrag.s.any_nocase());
|
||||
!pfrag.s.any_nocase() == !cfrag.s.any_nocase());
|
||||
/** !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); **/
|
||||
u32 child_offset = cfrag.lit_program_offset;
|
||||
DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id,
|
||||
child_offset);
|
||||
@ -2992,8 +2992,8 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
|
||||
pfrag.lit_ids);
|
||||
if (pfrag.included_delay_frag_id != INVALID_FRAG_ID &&
|
||||
!rebuild_prog.empty()) {
|
||||
const auto &cfrag = fragments[pfrag.included_delay_frag_id];
|
||||
assert(pfrag.s.length() >= cfrag.s.length() &&
|
||||
/** assert(pfrag.s.length() >= cfrag.s.length() && **/
|
||||
assert(pfrag.s.length() == cfrag.s.length() &&
|
||||
!pfrag.s.any_nocase() >= !cfrag.s.any_nocase());
|
||||
u32 child_offset = cfrag.delay_program_offset;
|
||||
DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id,
|
||||
|
@ -170,7 +170,6 @@ void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle,
|
||||
return; /* bail - TODO: be less lazy */
|
||||
}
|
||||
|
||||
vector<CharReach> rem_local_cr;
|
||||
u32 ok_count = 0;
|
||||
for (auto it = e.s.end() - g[v].left.lag; it != e.s.end(); ++it) {
|
||||
if (!isSubsetOf(*it, cr)) {
|
||||
|
@ -884,7 +884,7 @@ void buildAccel(const RoseBuildImpl &build,
|
||||
}
|
||||
|
||||
bytecode_ptr<HWLM>
|
||||
buildHWLMMatcher(const RoseBuildImpl &build, LitProto *litProto) {
|
||||
buildHWLMMatcher(const RoseBuildImpl &build, const LitProto *litProto) {
|
||||
if (!litProto) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -101,7 +101,7 @@ struct LitProto {
|
||||
};
|
||||
|
||||
bytecode_ptr<HWLM>
|
||||
buildHWLMMatcher(const RoseBuildImpl &build, LitProto *proto);
|
||||
buildHWLMMatcher(const RoseBuildImpl &build, const LitProto *proto);
|
||||
|
||||
std::unique_ptr<LitProto>
|
||||
buildFloatingMatcherProto(const RoseBuildImpl &build,
|
||||
|
@ -1599,7 +1599,8 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
engine_groups[DedupeLeftKey(build, std::move(preds), left)].emplace_back(left);
|
||||
auto preds_copy = std::move(preds);
|
||||
engine_groups[DedupeLeftKey(build, preds_copy , left)].emplace_back(left);
|
||||
}
|
||||
|
||||
/* We don't bother chunking as we expect deduping to be successful if the
|
||||
|
@ -1004,9 +1004,9 @@ bool hasOrphanedTops(const RoseBuildImpl &build) {
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (g[v].left) {
|
||||
set<u32> &tops = leftfixes[g[v].left];
|
||||
if (!build.isRootSuccessor(v)) {
|
||||
// Tops for infixes come from the in-edges.
|
||||
set<u32> &tops = leftfixes[g[v].left];
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
tops.insert(g[e].rose_top);
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
|
||||
|
||||
|
||||
static really_inline
|
||||
void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
|
||||
void saveStreamState(const struct NFA *nfa, const struct mq *q, s64a loc) {
|
||||
DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
|
||||
q->offset, q->length, q->hlength, loc);
|
||||
nfaQueueCompressState(nfa, q, loc);
|
||||
|
@ -215,12 +215,12 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
|
||||
|
||||
/* array of fatbit ptr; TODO: why not an array of fatbits? */
|
||||
static really_inline
|
||||
struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) {
|
||||
struct fatbit **getAnchoredLiteralLog(const struct hs_scratch *scratch) {
|
||||
return scratch->al_log;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
struct fatbit **getDelaySlots(struct hs_scratch *scratch) {
|
||||
struct fatbit **getDelaySlots(const struct hs_scratch *scratch) {
|
||||
return scratch->delay_slots;
|
||||
}
|
||||
|
||||
|
@ -69,8 +69,8 @@ void setSomLoc(struct fatbit *som_set_now, u64a *som_store, u32 som_store_count,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now,
|
||||
u8 *som_store_writable, u32 som_store_count,
|
||||
char ok_and_mark_if_write(u8 *som_store_valid, const struct fatbit *som_set_now,
|
||||
const u8 *som_store_writable, u32 som_store_count,
|
||||
u32 loc) {
|
||||
return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
|
||||
|| fatbit_isset(som_set_now, som_store_count, loc) /* write here, need
|
||||
@ -79,7 +79,7 @@ char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now,
|
||||
char ok_and_mark_if_unset(u8 *som_store_valid, const struct fatbit *som_set_now,
|
||||
u32 som_store_count, u32 loc) {
|
||||
return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
|
||||
|| fatbit_isset(som_set_now, som_store_count, loc); /* write here, need
|
||||
|
@ -68,7 +68,7 @@ namespace ue2 {
|
||||
#endif
|
||||
|
||||
void *aligned_malloc_internal(size_t size, size_t align) {
|
||||
void *mem;
|
||||
void *mem= nullptr;;
|
||||
int rv = posix_memalign(&mem, align, size);
|
||||
if (rv != 0) {
|
||||
DEBUG_PRINTF("posix_memalign returned %d when asked for %zu bytes\n",
|
||||
|
@ -155,13 +155,13 @@ u32 compress32_impl_c(u32 x, u32 m) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 mk, mp, mv, t;
|
||||
u32 mk, mv;
|
||||
|
||||
x &= m; // clear irrelevant bits
|
||||
|
||||
mk = ~m << 1; // we will count 0's to right
|
||||
for (u32 i = 0; i < 5; i++) {
|
||||
mp = mk ^ (mk << 1);
|
||||
u32 mp = mk ^ (mk << 1);
|
||||
mp ^= mp << 2;
|
||||
mp ^= mp << 4;
|
||||
mp ^= mp << 8;
|
||||
@ -169,7 +169,7 @@ u32 compress32_impl_c(u32 x, u32 m) {
|
||||
|
||||
mv = mp & m; // bits to move
|
||||
m = (m ^ mv) | (mv >> (1 << i)); // compress m
|
||||
t = x & mv;
|
||||
u32 t = x & mv;
|
||||
x = (x ^ t) | (t >> (1 << i)); // compress x
|
||||
mk = mk & ~mp;
|
||||
}
|
||||
@ -239,14 +239,14 @@ u32 expand32_impl_c(u32 x, u32 m) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 m0, mk, mp, mv, t;
|
||||
u32 m0, mk, mv;
|
||||
u32 array[5];
|
||||
|
||||
m0 = m; // save original mask
|
||||
mk = ~m << 1; // we will count 0's to right
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
mp = mk ^ (mk << 1); // parallel suffix
|
||||
u32 mp = mk ^ (mk << 1); // parallel suffix
|
||||
mp = mp ^ (mp << 2);
|
||||
mp = mp ^ (mp << 4);
|
||||
mp = mp ^ (mp << 8);
|
||||
@ -259,7 +259,7 @@ u32 expand32_impl_c(u32 x, u32 m) {
|
||||
|
||||
for (int i = 4; i >= 0; i--) {
|
||||
mv = array[i];
|
||||
t = x << (1 << i);
|
||||
u32 t = x << (1 << i);
|
||||
x = (x & ~mv) | (t & mv);
|
||||
}
|
||||
|
||||
@ -409,7 +409,7 @@ u64a pdep64_impl_c(u64a x, u64a _m) {
|
||||
u64a result = 0x0UL;
|
||||
const u64a mask = 0x8000000000000000UL;
|
||||
u64a m = _m;
|
||||
u64a c, t;
|
||||
|
||||
u64a p;
|
||||
|
||||
/* The pop-count of the mask gives the number of the bits from
|
||||
@ -421,8 +421,8 @@ u64a pdep64_impl_c(u64a x, u64a _m) {
|
||||
each mask bit as it is processed. */
|
||||
while (m != 0)
|
||||
{
|
||||
c = __builtin_clzl (m);
|
||||
t = x << (p - c);
|
||||
u64a c = __builtin_clzl (m);
|
||||
u64a t = x << (p - c);
|
||||
m ^= (mask >> c);
|
||||
result |= (t & (mask >> c));
|
||||
p++;
|
||||
|
@ -178,9 +178,9 @@ size_t describeClassInt(ostream &os, const CharReach &incr, size_t maxLength,
|
||||
|
||||
// Render charclass as a series of ranges
|
||||
size_t c_start = cr.find_first();
|
||||
size_t c = c_start, c_last = 0;
|
||||
size_t c = c_start;
|
||||
while (c != CharReach::npos) {
|
||||
c_last = c;
|
||||
size_t c_last = c;
|
||||
c = cr.find_next(c);
|
||||
if (c != c_last + 1 || c_last == 0xff) {
|
||||
describeRange(os, c_start, c_last, out_type);
|
||||
|
@ -102,10 +102,10 @@ public:
|
||||
using category = boost::read_write_property_map_tag;
|
||||
|
||||
small_color_map(size_t n_in, const IndexMap &index_map_in)
|
||||
: n(n_in), index_map(index_map_in) {
|
||||
size_t num_bytes = (n + entries_per_byte - 1) / entries_per_byte;
|
||||
data = std::make_shared<std::vector<unsigned char>>(num_bytes);
|
||||
fill(small_color::white);
|
||||
: n(n_in),
|
||||
index_map(index_map_in),
|
||||
data(std::make_shared<std::vector<unsigned char>>((n_in + entries_per_byte - 1) / entries_per_byte)) {
|
||||
fill(small_color::white);
|
||||
}
|
||||
|
||||
void fill(small_color color) {
|
||||
|
@ -1145,7 +1145,7 @@ really_inline SuperVector<32> SuperVector<32>::loadu_maskz(void const *ptr, uint
|
||||
template<>
|
||||
really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, int8_t offset)
|
||||
{
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !(defined(__GNUC__) && (__GNUC__ == 13))
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !(defined(__GNUC__) && ((__GNUC__ == 13) || (__GNUC__ == 14)))
|
||||
if (__builtin_constant_p(offset)) {
|
||||
if (offset == 16) {
|
||||
return *this;
|
||||
@ -1801,7 +1801,7 @@ really_inline SuperVector<64> SuperVector<64>::pshufb_maskz(SuperVector<64> b, u
|
||||
template<>
|
||||
really_inline SuperVector<64> SuperVector<64>::alignr(SuperVector<64> &l, int8_t offset)
|
||||
{
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P)
|
||||
#if defined(HAVE__BUILTIN_CONSTANT_P) && !(defined(__GNUC__) && (__GNUC__ == 14))
|
||||
if (__builtin_constant_p(offset)) {
|
||||
if (offset == 16) {
|
||||
return *this;
|
||||
|
@ -66,32 +66,32 @@ public:
|
||||
explicit EngineChimera(ch_database_t *db, CompileCHStats cs);
|
||||
~EngineChimera();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
std::unique_ptr<EngineContext> makeContext() const override;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const override;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const override;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
unsigned id) const override;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
ResultEntry &result) const override;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
std::vector<char> &temp) const override;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
unsigned int id, ResultEntry &result) const override;
|
||||
|
||||
void printStats() const;
|
||||
void printStats() const override;
|
||||
|
||||
void printCsvStats() const;
|
||||
void printCsvStats() const override;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
void sqlStats(SqlDB &db) const override;
|
||||
|
||||
private:
|
||||
ch_database_t *db;
|
||||
|
@ -248,7 +248,7 @@ void EngineHyperscan::printStats() const {
|
||||
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("Hyperscan info: %s\n", compile_stats.db_info.c_str());
|
||||
printf("Vectorscan info: %s\n", compile_stats.db_info.c_str());
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
printf("Database CRC: 0x%x\n", compile_stats.crc32);
|
||||
@ -456,7 +456,7 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||
|
||||
if (err == HS_COMPILER_ERROR) {
|
||||
if (compile_err->expression >= 0) {
|
||||
printf("Compile error for signature #%u: %s\n",
|
||||
printf("Compile error for signature #%d: %s\n",
|
||||
compile_err->expression, compile_err->message);
|
||||
} else {
|
||||
printf("Compile error: %s\n", compile_err->message);
|
||||
|
@ -75,32 +75,32 @@ public:
|
||||
explicit EngineHyperscan(hs_database_t *db, CompileHSStats cs);
|
||||
~EngineHyperscan();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
std::unique_ptr<EngineContext> makeContext() const override;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const override;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const override;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
unsigned id) const override;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
ResultEntry &result) const override;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
std::vector<char> &temp) const override;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
unsigned int id, ResultEntry &result) const override;
|
||||
|
||||
void printStats() const;
|
||||
void printStats() const override;
|
||||
|
||||
void printCsvStats() const;
|
||||
void printCsvStats() const override;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
void sqlStats(SqlDB &db) const override;
|
||||
|
||||
private:
|
||||
hs_database_t *db;
|
||||
|
@ -74,32 +74,32 @@ public:
|
||||
CompilePCREStats cs, int capture_cnt_in);
|
||||
~EnginePCRE();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
std::unique_ptr<EngineContext> makeContext() const override;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const override;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const override;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
unsigned id) const override;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
ResultEntry &result) const override;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
std::vector<char> &temp) const override;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
unsigned int id, ResultEntry &result) const override;
|
||||
|
||||
void printStats() const;
|
||||
void printStats() const override;
|
||||
|
||||
void printCsvStats() const;
|
||||
void printCsvStats() const override;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
void sqlStats(SqlDB &db) const override;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<PcreDB>> dbs;
|
||||
|
@ -465,7 +465,7 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
|
||||
/** Start the global timer. */
|
||||
static
|
||||
void startTotalTimer(ThreadContext *ctx) {
|
||||
void startTotalTimer(const ThreadContext *ctx) {
|
||||
if (ctx->num != 0) {
|
||||
return; // only runs in the first thread
|
||||
}
|
||||
@ -474,7 +474,7 @@ void startTotalTimer(ThreadContext *ctx) {
|
||||
|
||||
/** Stop the global timer and calculate totals. */
|
||||
static
|
||||
void stopTotalTimer(ThreadContext *ctx) {
|
||||
void stopTotalTimer(const ThreadContext *ctx) {
|
||||
if (ctx->num != 0) {
|
||||
return; // only runs in the first thread
|
||||
}
|
||||
|
@ -97,12 +97,12 @@ unsigned int countFailures = 0;
|
||||
|
||||
class ParsedExpr {
|
||||
public:
|
||||
ParsedExpr(string regex_in, unsigned int flags_in, hs_expr_ext ext_in)
|
||||
ParsedExpr(string regex_in, unsigned int flags_in, const hs_expr_ext& ext_in)
|
||||
: regex(regex_in), flags(flags_in), ext(ext_in) {}
|
||||
~ParsedExpr() {}
|
||||
string regex;
|
||||
unsigned int flags;
|
||||
hs_expr_ext ext;
|
||||
const hs_expr_ext& ext;
|
||||
};
|
||||
|
||||
typedef map<unsigned int, ParsedExpr> ExprExtMap;
|
||||
|
@ -102,6 +102,7 @@ set(unit_internal_SOURCES
|
||||
internal/rvermicelli.cpp
|
||||
internal/simd_utils.cpp
|
||||
internal/supervector.cpp
|
||||
internal/sheng.cpp
|
||||
internal/shuffle.cpp
|
||||
internal/shufti.cpp
|
||||
internal/state_compress.cpp
|
||||
|
@ -58,7 +58,7 @@ std::ostream &operator<<(std::ostream &o, const pattern &p) {
|
||||
}
|
||||
|
||||
hs_database_t *buildDB(const vector<pattern> &patterns, unsigned int mode,
|
||||
hs_platform_info *plat) {
|
||||
const hs_platform_info *plat) {
|
||||
vector<const char *> expressions;
|
||||
vector<unsigned int> flags;
|
||||
vector<unsigned int> ids;
|
||||
@ -92,7 +92,7 @@ hs_database_t *buildDB(const pattern &expr, unsigned int mode) {
|
||||
|
||||
hs_database_t *buildDB(const char *expression, unsigned int flags,
|
||||
unsigned int id, unsigned int mode,
|
||||
hs_platform_info_t *plat) {
|
||||
const hs_platform_info_t *plat) {
|
||||
return buildDB({pattern(expression, flags, id)}, mode, plat);
|
||||
}
|
||||
|
||||
|
@ -99,11 +99,11 @@ struct pattern {
|
||||
std::ostream &operator<<(std::ostream &o, const pattern &p);
|
||||
|
||||
hs_database_t *buildDB(const std::vector<pattern> &patterns, unsigned int mode,
|
||||
hs_platform_info *plat = nullptr);
|
||||
const hs_platform_info *plat = nullptr);
|
||||
hs_database_t *buildDB(const pattern &pat, unsigned int mode);
|
||||
hs_database_t *buildDB(const char *expression, unsigned int flags,
|
||||
unsigned int id, unsigned int mode,
|
||||
hs_platform_info *plat = nullptr);
|
||||
const hs_platform_info *plat = nullptr);
|
||||
hs_database_t *buildDB(const char *filename, unsigned int mode,
|
||||
unsigned int extra_flags = 0);
|
||||
hs_database_t *buildDB(const char *filename, unsigned int mode,
|
||||
|
@ -62,7 +62,7 @@ u32 our_clzll(u64a x) {
|
||||
TEST(BitUtils, findAndClearLSB32_1) {
|
||||
// test that it can find every single-bit case
|
||||
for (unsigned int i = 0; i < 32; i++) {
|
||||
u32 input = 1 << i;
|
||||
u32 input = 1U << i;
|
||||
u32 idx = findAndClearLSB_32(&input);
|
||||
EXPECT_EQ(i, idx);
|
||||
EXPECT_EQ(0U, input);
|
||||
@ -112,7 +112,7 @@ TEST(BitUtils, findAndClearLSB64_2) {
|
||||
TEST(BitUtils, findAndClearMSB32_1) {
|
||||
// test that it can find every single-bit case
|
||||
for (unsigned int i = 0; i < 32; i++) {
|
||||
u32 input = 1 << i;
|
||||
u32 input = 1U << i;
|
||||
u32 idx = findAndClearMSB_32(&input);
|
||||
EXPECT_EQ(i, idx);
|
||||
EXPECT_EQ(0U, input);
|
||||
|
@ -488,7 +488,6 @@ TEST_P(FDRFloodp, StreamingMask) {
|
||||
Grey());
|
||||
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
|
||||
|
||||
hwlm_error_t fdrStatus;
|
||||
const u32 cnt4 = dataSize - 4 + 1;
|
||||
|
||||
for (u32 streamChunk = 1; streamChunk <= 16; streamChunk *= 2) {
|
||||
@ -496,7 +495,7 @@ TEST_P(FDRFloodp, StreamingMask) {
|
||||
const u8 *d = data.data();
|
||||
// reference past the end of fake history to allow headroom
|
||||
const u8 *fhist = fake_history.data() + fake_history_size;
|
||||
fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0,
|
||||
hwlm_error_t fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0,
|
||||
countCallback, &scratch,
|
||||
HWLM_ALL_GROUPS);
|
||||
ASSERT_EQ(0, fdrStatus);
|
||||
|
@ -46,7 +46,7 @@ UNUSED
|
||||
static
|
||||
void mmbit_display(const u8 *bits, u32 total_bits) {
|
||||
for (u32 i = 0; i < mmbit_size(total_bits); i += 8) {
|
||||
printf("block %d:", i / 8);
|
||||
printf("block %u:", i / 8);
|
||||
for (s32 j = 7; j >= 0; j--) {
|
||||
u8 a = (*(bits + i + j));
|
||||
printf(" %02x", a);
|
||||
@ -72,7 +72,7 @@ UNUSED
|
||||
static
|
||||
void mmbit_display_comp(const u8 *bits, u32 comp_size) {
|
||||
for (u32 i = 0; i < comp_size; i += 8) {
|
||||
printf("block %d:", i / 8);
|
||||
printf("block %u:", i / 8);
|
||||
for (s32 j = 7; j >= 0; j--) {
|
||||
u8 a = (*(bits + i + j));
|
||||
printf(" %02x", a);
|
||||
@ -401,7 +401,7 @@ TEST_P(MultiBitCompTest, CompCompressDecompressDense) {
|
||||
|
||||
TEST(MultiBitComp, CompIntegration1) {
|
||||
// 256 + 1 --> smallest 2-level mmbit
|
||||
u32 total_size = mmbit_size(257);
|
||||
//u32 total_size = mmbit_size(257);
|
||||
mmbit_holder ba(257);
|
||||
|
||||
//-------------------- 1 -----------------------//
|
||||
@ -516,7 +516,7 @@ TEST(MultiBitComp, CompIntegration1) {
|
||||
|
||||
TEST(MultiBitComp, CompIntegration2) {
|
||||
// 64^2 + 1 --> smallest 3-level mmbit
|
||||
u32 total_size = mmbit_size(4097);
|
||||
//u32 total_size = mmbit_size(4097);
|
||||
mmbit_holder ba(4097);
|
||||
|
||||
//-------------------- 1 -----------------------//
|
||||
@ -645,7 +645,7 @@ TEST(MultiBitComp, CompIntegration2) {
|
||||
|
||||
TEST(MultiBitComp, CompIntegration3) {
|
||||
// 64^3 + 1 --> smallest 4-level mmbit
|
||||
u32 total_size = mmbit_size(262145);
|
||||
//u32 total_size = mmbit_size(262145);
|
||||
mmbit_holder ba(262145);
|
||||
|
||||
//-------------------- 1 -----------------------//
|
||||
|
@ -245,7 +245,7 @@ TEST(pqueue, queue1) {
|
||||
u32 in[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
u32 expected[] = {4, 5, 6, 7, 8, 3, 2, 1};
|
||||
u32 temp[ARRAY_LENGTH(in)];
|
||||
u32 output[ARRAY_LENGTH(in)];
|
||||
u32 output[ARRAY_LENGTH(in)] = {0};
|
||||
|
||||
u32 queue_size = 0;
|
||||
u32 i = 0, o = 0;
|
||||
@ -275,7 +275,7 @@ TEST(pqueue, queue2) {
|
||||
u32 in[] = {8, 7, 6, 5, 4, 3, 2, 1};
|
||||
u32 expected[] = {8, 7, 6, 5, 4, 3, 2, 1};
|
||||
u32 temp[ARRAY_LENGTH(in)];
|
||||
u32 output[ARRAY_LENGTH(in)];
|
||||
u32 output[ARRAY_LENGTH(in)] = {0};
|
||||
|
||||
u32 queue_size = 0;
|
||||
u32 i = 0, o = 0;
|
||||
@ -301,7 +301,7 @@ TEST(pqueue, queue3) {
|
||||
u32 in[] = {1, 8, 2, 7, 3, 6, 4, 5};
|
||||
u32 expected[] = {8, 7, 6, 4, 5, 3, 2, 1};
|
||||
u32 temp[ARRAY_LENGTH(in)];
|
||||
u32 output[ARRAY_LENGTH(in)];
|
||||
u32 output[ARRAY_LENGTH(in)] = {0};
|
||||
|
||||
u32 queue_size = 0;
|
||||
u32 i = 0, o = 0;
|
||||
|
@ -277,10 +277,9 @@ TEST_P(RepeatTest, FillRing) {
|
||||
}
|
||||
|
||||
// We should be able to see matches for all of these (beyond the last top offset).
|
||||
enum TriggerResult rv;
|
||||
for (u64a i = offset + info.repeatMax;
|
||||
i <= offset + info.repeatMax + info.repeatMin; i++) {
|
||||
rv = processTugTrigger(&info, ctrl, state, i);
|
||||
enum TriggerResult rv = processTugTrigger(&info, ctrl, state, i);
|
||||
if (rv == TRIGGER_SUCCESS_CACHE) {
|
||||
rv = TRIGGER_SUCCESS;
|
||||
}
|
||||
@ -998,16 +997,14 @@ TEST_P(SparseOptimalTest, FillTops) {
|
||||
repeatStore(info, ctrl, state, offset, 0);
|
||||
ASSERT_EQ(offset, repeatLastTop(info, ctrl, state));
|
||||
|
||||
u64a offset2;
|
||||
for (u32 i = min_period; i < patch_count * patch_size; i += min_period) {
|
||||
offset2 = offset + i;
|
||||
u64a offset2 = offset + i;
|
||||
repeatStore(info, ctrl, state, offset2, 1);
|
||||
ASSERT_EQ(offset2, repeatLastTop(info, ctrl, state));
|
||||
}
|
||||
|
||||
u64a exit2;
|
||||
for (u32 i = 0; i < patch_count * patch_size; i += min_period) {
|
||||
exit2 = exit + i;
|
||||
u64a exit2 = exit + i;
|
||||
for (u32 j = exit2 + info->repeatMin;
|
||||
j <= offset + info->repeatMax; j++) {
|
||||
ASSERT_EQ(REPEAT_MATCH, repeatHasMatch(info, ctrl, state, j));
|
||||
|
@ -87,12 +87,11 @@ static int initLegalValidMasks(u64a validMasks[]) {
|
||||
*/
|
||||
static int initLegalNegMasks(u64a negMasks[]) {
|
||||
u64a data = 0;
|
||||
u64a offset;
|
||||
int num = 0;
|
||||
while (data != ONES64) {
|
||||
negMasks[num] = data;
|
||||
num++;
|
||||
offset = (data | (data +1)) ^ data;
|
||||
u64a offset = (data | (data +1)) ^ data;
|
||||
data += 0xfeULL * offset + 1;
|
||||
}
|
||||
negMasks[num] = data;
|
||||
|
@ -194,10 +194,9 @@ TEST(ValidateMask32, testMask32_3) {
|
||||
u32 valid_mask = ONES32 << (left + right) >> left;
|
||||
for (int i = 0; i < test_len; i++) {
|
||||
const auto &t = testBasic[i];
|
||||
int bool_result;
|
||||
for (int j = 0; j < 5000; j++) {
|
||||
u32 neg_mask = neg_mask_rand.Generate(1u << 31);
|
||||
bool_result = (neg_mask & valid_mask) ==
|
||||
int bool_result = (neg_mask & valid_mask) ==
|
||||
(t.neg_mask & valid_mask);
|
||||
EXPECT_EQ(bool_result, validateMask32(t.data.a256,
|
||||
valid_mask,
|
||||
|
709
unit/internal/sheng.cpp
Normal file
709
unit/internal/sheng.cpp
Normal file
@ -0,0 +1,709 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Arm ltd
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "nfa/shengcompile.h"
|
||||
#include "nfa/rdfa.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/report_manager.h"
|
||||
|
||||
extern "C" {
|
||||
#include "hs_compile.h"
|
||||
#include "nfa/nfa_api.h"
|
||||
#include "nfa/nfa_api_queue.h"
|
||||
#include "nfa/nfa_api_util.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/rdfa.h"
|
||||
#include "nfa/sheng.h"
|
||||
#include "ue2common.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct callback_context {
|
||||
unsigned int period;
|
||||
unsigned int match_count;
|
||||
unsigned int pattern_length;
|
||||
};
|
||||
|
||||
int dummy_callback(u64a start, u64a end, ReportID id, void *context) {
|
||||
(void) context;
|
||||
printf("callback %llu %llu %u\n", start, end, id);
|
||||
return 1; /* 0 stops matching, !0 continue */
|
||||
}
|
||||
|
||||
int periodic_pattern_callback(u64a start, u64a end, ReportID id, void *raw_context) {
|
||||
struct callback_context *context = (struct callback_context*) raw_context;
|
||||
(void) start;
|
||||
(void) id;
|
||||
EXPECT_EQ(context->period * context->match_count, end - context->pattern_length);
|
||||
context->match_count++;
|
||||
return 1; /* 0 stops matching, !0 continue */
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Fill the state matrix with a diagonal pattern: accept the Nth character to go to the N+1 state
|
||||
*/
|
||||
static void fill_straight_regex_sequence(struct ue2::raw_dfa *dfa, int start_state, int end_state, int state_count)
|
||||
{
|
||||
for (int state = start_state; state < end_state; state++) {
|
||||
dfa->states[state].next.assign(state_count ,1);
|
||||
dfa->states[state].next[0] = 2;
|
||||
dfa->states[state].next[1] = 2;
|
||||
dfa->states[state].next[state] = state+1;
|
||||
}
|
||||
}
|
||||
|
||||
static void init_raw_dfa16(struct ue2::raw_dfa *dfa, const ReportID rID)
|
||||
{
|
||||
dfa->start_anchored = 1;
|
||||
dfa->start_floating = 1;
|
||||
dfa->alpha_size = 8;
|
||||
|
||||
int nb_state = 8;
|
||||
for(int i = 0; i < nb_state; i++) {
|
||||
struct ue2::dstate state(dfa->alpha_size);
|
||||
state.next = std::vector<ue2::dstate_id_t>(nb_state);
|
||||
state.daddy = 0;
|
||||
state.impl_id = i; /* id of the state */
|
||||
state.reports = ue2::flat_set<ReportID>();
|
||||
state.reports_eod = ue2::flat_set<ReportID>();
|
||||
dfa->states.push_back(state);
|
||||
}
|
||||
|
||||
/* add a report to every accept state */
|
||||
dfa->states[7].reports.insert(rID);
|
||||
|
||||
/**
|
||||
* [a,b][c-e]{3}of
|
||||
* (1) -a,b-> (2) -c,d,e-> (3) -c,d,e-> (4) -c,d,e-> (5) -o-> (6) -f-> ((7))
|
||||
* (0) = dead
|
||||
*/
|
||||
|
||||
for(int i = 0; i < ue2::ALPHABET_SIZE; i++) {
|
||||
dfa->alpha_remap[i] = 0;
|
||||
}
|
||||
|
||||
dfa->alpha_remap['a'] = 0;
|
||||
dfa->alpha_remap['b'] = 1;
|
||||
dfa->alpha_remap['c'] = 2;
|
||||
dfa->alpha_remap['d'] = 3;
|
||||
dfa->alpha_remap['e'] = 4;
|
||||
dfa->alpha_remap['o'] = 5;
|
||||
dfa->alpha_remap['f'] = 6;
|
||||
dfa->alpha_remap[256] = 7; /* for some reason there's a check that run on dfa->alpha_size-1 */
|
||||
|
||||
/* a b c d e o f */
|
||||
dfa->states[0].next = {0,0,0,0,0,0,0};
|
||||
dfa->states[1].next = {2,2,1,1,1,1,1}; /* nothing */
|
||||
dfa->states[2].next = {2,2,3,3,3,1,1}; /* [a,b] */
|
||||
dfa->states[3].next = {2,2,4,4,4,1,1}; /* [a,b][c-e]{1} */
|
||||
dfa->states[4].next = {2,2,5,5,5,1,1}; /* [a,b][c-e]{2} */
|
||||
fill_straight_regex_sequence(dfa, 5, 7, 7); /* [a,b][c-e]{3}o */
|
||||
dfa->states[7].next = {2,2,1,1,1,1,1}; /* [a,b][c-e]{3}of */
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
/* We need more than 16 states to run sheng32, so make the graph longer */
|
||||
static void init_raw_dfa32(struct ue2::raw_dfa *dfa, const ReportID rID)
|
||||
{
|
||||
dfa->start_anchored = 1;
|
||||
dfa->start_floating = 1;
|
||||
dfa->alpha_size = 18;
|
||||
|
||||
int nb_state = 18;
|
||||
for(int i = 0; i < nb_state; i++) {
|
||||
struct ue2::dstate state(dfa->alpha_size);
|
||||
state.next = std::vector<ue2::dstate_id_t>(nb_state);
|
||||
state.daddy = 0;
|
||||
state.impl_id = i; /* id of the state */
|
||||
state.reports = ue2::flat_set<ReportID>();
|
||||
state.reports_eod = ue2::flat_set<ReportID>();
|
||||
dfa->states.push_back(state);
|
||||
}
|
||||
|
||||
/* add a report to every accept state */
|
||||
dfa->states[17].reports.insert(rID);
|
||||
|
||||
/**
|
||||
* [a,b][c-e]{3}of0123456789
|
||||
* (1) -a,b-> (2) -c,d,e-> (3) -c,d,e-> (4) -c,d,e-> (5) -o-> (6) -f-> (7) -<numbers>-> ((17))
|
||||
* (0) = dead
|
||||
*/
|
||||
|
||||
for(int i = 0; i < ue2::ALPHABET_SIZE; i++) {
|
||||
dfa->alpha_remap[i] = 0;
|
||||
}
|
||||
|
||||
dfa->alpha_remap['a'] = 0;
|
||||
dfa->alpha_remap['b'] = 1;
|
||||
dfa->alpha_remap['c'] = 2;
|
||||
dfa->alpha_remap['d'] = 3;
|
||||
dfa->alpha_remap['e'] = 4;
|
||||
dfa->alpha_remap['o'] = 5;
|
||||
dfa->alpha_remap['f'] = 6;
|
||||
// maps 0 to 9
|
||||
for (int i = 0; i < 10; i ++) {
|
||||
dfa->alpha_remap[i + '0'] = i + 7;
|
||||
}
|
||||
dfa->alpha_remap[256] = 17; /* for some reason there's a check that run on dfa->alpha_size-1 */
|
||||
|
||||
/* a b c d e o f 0 1 2 3 4 5 6 7 8 9 */
|
||||
dfa->states[0].next = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
dfa->states[1].next = {2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; /* nothing */
|
||||
dfa->states[2].next = {2,2,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b] */
|
||||
dfa->states[3].next = {2,2,4,4,4,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{1} */
|
||||
dfa->states[4].next = {2,2,5,5,5,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{2} */
|
||||
fill_straight_regex_sequence(dfa, 5, 17, 17); /* [a,b][c-e]{3}of012345678 */
|
||||
dfa->states[17].next = {2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{3}of0123456789 */
|
||||
}
|
||||
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
|
||||
|
||||
typedef ue2::bytecode_ptr<NFA> (*sheng_compile_ptr)(ue2::raw_dfa&,
|
||||
const ue2::CompileContext&,
|
||||
const ue2::ReportManager&,
|
||||
bool,
|
||||
std::set<ue2::dstate_id_t>*);
|
||||
|
||||
typedef void (*init_raw_dfa_ptr)(struct ue2::raw_dfa*, const ReportID);
|
||||
|
||||
|
||||
static inline void init_nfa(struct NFA **out_nfa, sheng_compile_ptr compile_function, init_raw_dfa_ptr init_dfa_function) {
|
||||
ue2::Grey *g = new ue2::Grey();
|
||||
hs_platform_info plat_info = {0, 0, 0, 0};
|
||||
ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g);
|
||||
ue2::ReportManager *rm = new ue2::ReportManager(*g);
|
||||
ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0);
|
||||
ReportID rID = rm->getInternalId(*report);
|
||||
rm->setProgramOffset(0, 0);
|
||||
|
||||
struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX);
|
||||
init_dfa_function(dfa, rID);
|
||||
|
||||
*out_nfa = (compile_function(*dfa, *cc, *rm, false, nullptr)).release();
|
||||
ASSERT_NE(nullptr, *out_nfa);
|
||||
|
||||
delete report;
|
||||
delete rm;
|
||||
delete cc;
|
||||
delete g;
|
||||
}
|
||||
|
||||
static void init_nfa16(struct NFA **out_nfa) {
|
||||
init_nfa(out_nfa, ue2::shengCompile, init_raw_dfa16);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static void init_nfa32(struct NFA **out_nfa) {
|
||||
init_nfa(out_nfa, ue2::sheng32Compile, init_raw_dfa32);
|
||||
}
|
||||
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
|
||||
|
||||
static char state_buffer;
|
||||
|
||||
static inline void init_sheng_queue(struct mq **out_q, uint8_t *buffer, size_t max_size, void (*init_nfa_func)(struct NFA **out_nfa) ) {
|
||||
struct NFA* nfa;
|
||||
init_nfa_func(&nfa);
|
||||
assert(nfa);
|
||||
|
||||
struct mq *q = new mq();
|
||||
|
||||
memset(q, 0, sizeof(struct mq));
|
||||
q->nfa = nfa;
|
||||
q->state = &state_buffer;
|
||||
q->cb = dummy_callback;
|
||||
q->buffer = buffer;
|
||||
q->length = max_size; /* setting this as the max length scanable */
|
||||
|
||||
if (nfa != q->nfa) {
|
||||
printf("Something went wrong while initializing sheng.\n");
|
||||
}
|
||||
nfaQueueInitState(nfa, q);
|
||||
pushQueueAt(q, 0, MQE_START, 0);
|
||||
pushQueueAt(q, 1, MQE_END, q->length );
|
||||
|
||||
*out_q = q;
|
||||
}
|
||||
|
||||
static void init_sheng_queue16(struct mq **out_q, uint8_t *buffer ,size_t max_size) {
|
||||
init_sheng_queue(out_q, buffer, max_size, init_nfa16);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static void init_sheng_queue32(struct mq **out_q, uint8_t *buffer, size_t max_size) {
|
||||
init_sheng_queue(out_q, buffer, max_size, init_nfa32);
|
||||
}
|
||||
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
|
||||
|
||||
static
|
||||
void fill_pattern(u8* buf, size_t buffer_size, unsigned int start_offset, unsigned int period, const char *pattern, unsigned int pattern_length) {
|
||||
memset(buf, '_', buffer_size);
|
||||
|
||||
for (unsigned int i = 0; i < buffer_size - 8; i+= 8) {
|
||||
/* filling with some junk, including some character used for a valid state, to prevent the use of shufti */
|
||||
memcpy(buf + i, "jgohcxbf", 8);
|
||||
}
|
||||
|
||||
for (unsigned int i = start_offset; i < buffer_size - pattern_length; i += period) {
|
||||
memcpy(buf + i, pattern, pattern_length);
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate ground truth to compare to */
|
||||
struct NFA *get_expected_nfa_header(u8 type, unsigned int length, unsigned int nposition) {
|
||||
struct NFA *expected_nfa_header = new struct NFA();
|
||||
memset(expected_nfa_header, 0, sizeof(struct NFA));
|
||||
expected_nfa_header->length = length;
|
||||
expected_nfa_header->type = type;
|
||||
expected_nfa_header->nPositions = nposition;
|
||||
expected_nfa_header->scratchStateSize = 1;
|
||||
expected_nfa_header->streamStateSize = 1;
|
||||
return expected_nfa_header;
|
||||
}
|
||||
|
||||
struct NFA *get_expected_nfa16_header() {
|
||||
return get_expected_nfa_header(SHENG_NFA, 4736, 8);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
struct NFA *get_expected_nfa32_header() {
|
||||
return get_expected_nfa_header(SHENG_NFA_32, 17216, 18);
|
||||
}
|
||||
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
|
||||
|
||||
void test_nfa_equal(const NFA& l, const NFA& r)
|
||||
{
|
||||
EXPECT_EQ(l.flags, r.flags);
|
||||
EXPECT_EQ(l.length, r.length);
|
||||
EXPECT_EQ(l.type, r.type);
|
||||
EXPECT_EQ(l.rAccelType, r.rAccelType);
|
||||
EXPECT_EQ(l.rAccelOffset, r.rAccelOffset);
|
||||
EXPECT_EQ(l.maxBiAnchoredWidth, r.maxBiAnchoredWidth);
|
||||
EXPECT_EQ(l.rAccelData.dc, r.rAccelData.dc);
|
||||
EXPECT_EQ(l.queueIndex, r.queueIndex);
|
||||
EXPECT_EQ(l.nPositions, r.nPositions);
|
||||
EXPECT_EQ(l.scratchStateSize, r.scratchStateSize);
|
||||
EXPECT_EQ(l.streamStateSize, r.streamStateSize);
|
||||
EXPECT_EQ(l.maxWidth, r.maxWidth);
|
||||
EXPECT_EQ(l.minWidth, r.minWidth);
|
||||
EXPECT_EQ(l.maxOffset, r.maxOffset);
|
||||
}
|
||||
|
||||
/* Start of actual tests */
|
||||
|
||||
/*
|
||||
* Runs shengCompile and compares its outputs to previously recorded outputs.
|
||||
*/
|
||||
TEST(Sheng16, std_compile_header) {
|
||||
|
||||
ue2::Grey *g = new ue2::Grey();
|
||||
hs_platform_info plat_info = {0, 0, 0, 0};
|
||||
ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g);
|
||||
ue2::ReportManager *rm = new ue2::ReportManager(*g);
|
||||
ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0);
|
||||
ReportID rID = rm->getInternalId(*report);
|
||||
rm->setProgramOffset(0, 0);
|
||||
|
||||
struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX);
|
||||
init_raw_dfa16(dfa, rID);
|
||||
|
||||
struct NFA *nfa = (shengCompile(*dfa, *cc, *rm, false)).release();
|
||||
EXPECT_NE(nullptr, nfa);
|
||||
|
||||
EXPECT_NE(0, nfa->length);
|
||||
EXPECT_EQ(SHENG_NFA, nfa->type);
|
||||
|
||||
struct NFA *expected_nfa = get_expected_nfa16_header();
|
||||
test_nfa_equal(*expected_nfa, *nfa);
|
||||
|
||||
delete expected_nfa;
|
||||
delete report;
|
||||
delete rm;
|
||||
delete cc;
|
||||
delete g;
|
||||
}
|
||||
|
||||
/*
|
||||
* nfaExecSheng_B is the most basic of the sheng variants. It simply calls the core of the algorithm.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng16, std_run_B) {
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 6;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
struct callback_context context = {period, 0, pattern_length};
|
||||
|
||||
struct NFA* nfa;
|
||||
init_nfa16(&nfa);
|
||||
ASSERT_NE(nullptr, nfa);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
|
||||
char ret_val;
|
||||
unsigned int offset = 0;
|
||||
unsigned int loop_count = 0;
|
||||
for (; loop_count < expected_matches + 1; loop_count++) {
|
||||
ASSERT_LT(offset, buf_size);
|
||||
ret_val = nfaExecSheng_B(nfa,
|
||||
offset,
|
||||
buf + offset,
|
||||
(s64a) buf_size - offset,
|
||||
periodic_pattern_callback,
|
||||
&context);
|
||||
offset = (context.match_count - 1) * context.period + context.pattern_length;
|
||||
if(unlikely(ret_val != MO_ALIVE)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*check normal return*/
|
||||
EXPECT_EQ(MO_ALIVE, ret_val);
|
||||
|
||||
/*check that we don't find additional match nor crash when no match are found*/
|
||||
EXPECT_EQ(expected_matches + 1, loop_count);
|
||||
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, context.match_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* nfaExecSheng_Q runs like the _B version (callback), but exercises the message queue logic.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng16, std_run_Q) {
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 6;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
struct callback_context context = {period, 0, pattern_length};
|
||||
|
||||
init_sheng_queue16(&q, buf, buf_size);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
|
||||
q->cur = 0;
|
||||
q->items[q->cur].location = 0;
|
||||
q->context = &context;
|
||||
q->cb = periodic_pattern_callback;
|
||||
|
||||
nfaExecSheng_Q(q->nfa, q, (s64a) buf_size);
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, context.match_count);
|
||||
|
||||
delete q;
|
||||
}
|
||||
|
||||
/*
|
||||
* nfaExecSheng_Q2 uses the message queue, but stops at match instead of using a callback.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng16, std_run_Q2) {
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 6;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
|
||||
init_sheng_queue16(&q, buf, buf_size);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
|
||||
q->cur = 0;
|
||||
q->items[q->cur].location = 0;
|
||||
|
||||
char ret_val;
|
||||
int location;
|
||||
unsigned int loop_count = 0;
|
||||
do {
|
||||
ret_val = nfaExecSheng_Q2(q->nfa, q, (s64a) buf_size);
|
||||
location = q->items[q->cur].location;
|
||||
loop_count++;
|
||||
} while(likely((ret_val == MO_MATCHES_PENDING) && (location < (int)buf_size) && ((location % period) == pattern_length)));
|
||||
|
||||
/*check if it's a spurious match*/
|
||||
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && ((location % period) != pattern_length));
|
||||
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, loop_count-1);
|
||||
|
||||
delete q;
|
||||
}
|
||||
|
||||
/*
|
||||
* The message queue can also run on the "history" buffer. We test it the same way as the normal
|
||||
* buffer, expecting the same behavior.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng16, history_run_Q2) {
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 6;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
|
||||
init_sheng_queue16(&q, buf, buf_size);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
|
||||
q->history = buf;
|
||||
q->hlength = buf_size;
|
||||
q->cur = 0;
|
||||
q->items[q->cur].location = -200;
|
||||
|
||||
char ret_val;
|
||||
int location;
|
||||
unsigned int loop_count = 0;
|
||||
do {
|
||||
ret_val = nfaExecSheng_Q2(q->nfa, q, 0);
|
||||
location = q->items[q->cur].location;
|
||||
loop_count++;
|
||||
} while(likely((ret_val == MO_MATCHES_PENDING) && (location > -(int)buf_size) && (location < 0) && (((buf_size + location) % period) == pattern_length)));
|
||||
|
||||
/*check if it's a spurious match*/
|
||||
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && (((buf_size + location) % period) != pattern_length));
|
||||
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, loop_count-1);
|
||||
|
||||
delete q;
|
||||
}
|
||||
|
||||
/**
|
||||
* Those tests only covers the basic paths. More tests can cover:
|
||||
* - running for history buffer to current buffer in Q2
|
||||
* - running while expecting no match
|
||||
* - nfaExecSheng_QR
|
||||
* - run sheng when it should call an accelerator and confirm it call them
|
||||
*/
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
|
||||
/*
|
||||
* Runs sheng32Compile and compares its outputs to previously recorded outputs.
|
||||
*/
|
||||
TEST(Sheng32, std_compile_header) {
|
||||
#if defined(HAVE_SVE)
|
||||
if(svcntb()<32) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
ue2::Grey *g = new ue2::Grey();
|
||||
hs_platform_info plat_info = {0, 0, 0, 0};
|
||||
ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g);
|
||||
ue2::ReportManager *rm = new ue2::ReportManager(*g);
|
||||
ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0);
|
||||
ReportID rID = rm->getInternalId(*report);
|
||||
rm->setProgramOffset(0, 0);
|
||||
|
||||
struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX);
|
||||
init_raw_dfa32(dfa, rID);
|
||||
|
||||
struct NFA *nfa = (sheng32Compile(*dfa, *cc, *rm, false)).release();
|
||||
EXPECT_NE(nullptr, nfa);
|
||||
|
||||
EXPECT_NE(0, nfa->length);
|
||||
EXPECT_EQ(SHENG_NFA_32, nfa->type);
|
||||
|
||||
struct NFA *expected_nfa = get_expected_nfa32_header();
|
||||
test_nfa_equal(*expected_nfa, *nfa);
|
||||
|
||||
delete expected_nfa;
|
||||
delete report;
|
||||
delete rm;
|
||||
delete cc;
|
||||
delete g;
|
||||
}
|
||||
|
||||
/*
|
||||
* nfaExecSheng32_B is the most basic of the sheng variants. It simply calls the core of the algorithm.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng32, std_run_B) {
|
||||
#if defined(HAVE_SVE)
|
||||
if(svcntb()<32) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 16;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
struct callback_context context = {period, 0, pattern_length};
|
||||
|
||||
struct NFA* nfa;
|
||||
init_nfa32(&nfa);
|
||||
ASSERT_NE(nullptr, nfa);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
|
||||
char ret_val;
|
||||
unsigned int offset = 0;
|
||||
unsigned int loop_count = 0;
|
||||
for (; loop_count < expected_matches + 1; loop_count++) {
|
||||
ASSERT_LT(offset, buf_size);
|
||||
ret_val = nfaExecSheng32_B(nfa,
|
||||
offset,
|
||||
buf + offset,
|
||||
(s64a) buf_size - offset,
|
||||
periodic_pattern_callback,
|
||||
&context);
|
||||
offset = (context.match_count - 1) * context.period + context.pattern_length;
|
||||
if(unlikely(ret_val != MO_ALIVE)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*check normal return*/
|
||||
EXPECT_EQ(MO_ALIVE, ret_val);
|
||||
|
||||
/*check that we don't find additional match nor crash when no match are found*/
|
||||
EXPECT_EQ(expected_matches + 1, loop_count);
|
||||
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, context.match_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* nfaExecSheng32_Q runs like the _B version (callback), but exercises the message queue logic.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng32, std_run_Q) {
|
||||
#if defined(HAVE_SVE)
|
||||
if(svcntb()<32) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 16;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
struct callback_context context = {period, 0, pattern_length};
|
||||
|
||||
init_sheng_queue32(&q, buf, buf_size);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
|
||||
q->cur = 0;
|
||||
q->items[q->cur].location = 0;
|
||||
q->context = &context;
|
||||
q->cb = periodic_pattern_callback;
|
||||
|
||||
nfaExecSheng32_Q(q->nfa, q, (s64a) buf_size);
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, context.match_count);
|
||||
|
||||
delete q;
|
||||
}
|
||||
|
||||
/*
|
||||
* nfaExecSheng32_Q2 uses the message queue, but stops at match instead of using a callback.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng32, std_run_Q2) {
|
||||
#if defined(HAVE_SVE)
|
||||
if(svcntb()<32) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 16;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
|
||||
init_sheng_queue32(&q, buf, buf_size);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
|
||||
q->cur = 0;
|
||||
q->items[q->cur].location = 0;
|
||||
|
||||
char ret_val;
|
||||
int location;
|
||||
unsigned int loop_count = 0;
|
||||
do {
|
||||
ret_val = nfaExecSheng32_Q2(q->nfa, q, (s64a) buf_size);
|
||||
location = q->items[q->cur].location;
|
||||
loop_count++;
|
||||
} while(likely((ret_val == MO_MATCHES_PENDING) && (location < (int)buf_size) && ((location % period) == pattern_length)));
|
||||
|
||||
/*check if it's a spurious match*/
|
||||
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && ((location % period) != pattern_length));
|
||||
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, loop_count-1);
|
||||
|
||||
delete q;
|
||||
}
|
||||
|
||||
/*
|
||||
* The message queue can also runs on the "history" buffer. We test it the same way as the normal
|
||||
* buffer, expecting the same behavior.
|
||||
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
|
||||
*/
|
||||
TEST(Sheng32, history_run_Q2) {
|
||||
#if defined(HAVE_SVE)
|
||||
if(svcntb()<32) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
struct mq *q;
|
||||
unsigned int pattern_length = 16;
|
||||
unsigned int period = 128;
|
||||
const size_t buf_size = 200;
|
||||
unsigned int expected_matches = buf_size/128 + 1;
|
||||
u8 buf[buf_size];
|
||||
|
||||
init_sheng_queue32(&q, buf, buf_size);
|
||||
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
|
||||
q->history = buf;
|
||||
q->hlength = buf_size;
|
||||
q->cur = 0;
|
||||
q->items[q->cur].location = -200;
|
||||
|
||||
char ret_val;
|
||||
int location;
|
||||
unsigned int loop_count = 0;
|
||||
do {
|
||||
ret_val = nfaExecSheng32_Q2(q->nfa, q, 0);
|
||||
location = q->items[q->cur].location;
|
||||
loop_count++;
|
||||
} while(likely((ret_val == MO_MATCHES_PENDING) && (location > -(int)buf_size) && (location < 0) && (((buf_size + location) % period) == pattern_length)));
|
||||
|
||||
/*check if it's a spurious match*/
|
||||
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && (((buf_size + location) % period) != pattern_length));
|
||||
|
||||
/*check that we have all the matches*/
|
||||
EXPECT_EQ(expected_matches, loop_count-1);
|
||||
|
||||
delete q;
|
||||
}
|
||||
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
|
||||
|
||||
} /* namespace */
|
@ -508,7 +508,7 @@ TEST(SuperVectorUtilsTest,Movemask256c){
|
||||
u8 vec2[32] = {0};
|
||||
u32 r = rand() % 100 + 1;
|
||||
for(int i=0; i<32; i++) {
|
||||
if (r & (1 << i)) {
|
||||
if (r & (1U << i)) {
|
||||
vec[i] = 0xff;
|
||||
}
|
||||
}
|
||||
|
@ -152,7 +152,6 @@ bool HS_CDECL readExpression(const std::string &input, std::string &expr,
|
||||
UNUSED const char *eof = pe;
|
||||
UNUSED const char *ts = p, *te = p;
|
||||
int cs;
|
||||
UNUSED int act;
|
||||
|
||||
assert(p);
|
||||
assert(pe);
|
||||
|
@ -55,7 +55,7 @@ unique_ptr<hs_platform_info> xcompileReadMode(const char *s) {
|
||||
assert(!err);
|
||||
|
||||
string str(s);
|
||||
string mode = str.substr(0, str.find(":"));
|
||||
//string mode = str.substr(0, str.find(":"));
|
||||
string opt = str.substr(str.find(":")+1, str.npos);
|
||||
bool found_mode = false;
|
||||
|
||||
|
@ -223,7 +223,7 @@ public:
|
||||
CorpusProperties &props);
|
||||
~CorpusGeneratorImpl() = default;
|
||||
|
||||
void generateCorpus(vector<string> &data);
|
||||
void generateCorpus(vector<string> &data) override;
|
||||
|
||||
private:
|
||||
unsigned char getRandomChar();
|
||||
@ -419,7 +419,7 @@ public:
|
||||
CorpusProperties &props);
|
||||
~CorpusGeneratorUtf8() = default;
|
||||
|
||||
void generateCorpus(vector<string> &data);
|
||||
void generateCorpus(vector<string> &data) override;
|
||||
|
||||
private:
|
||||
unichar getRandomChar();
|
||||
|
@ -47,7 +47,7 @@ class NGHolder;
|
||||
} // namespace ue2
|
||||
|
||||
struct CorpusGenerationFailure {
|
||||
explicit CorpusGenerationFailure(const std::string s) :
|
||||
explicit CorpusGenerationFailure(const std::string& s) :
|
||||
message(std::move(s)) {}
|
||||
std::string message;
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user