mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch 'develop' into wip-cppcheck271-part2
This commit is contained in:
commit
22c3e3da6e
@ -1221,11 +1221,17 @@ if (NOT BUILD_STATIC_LIBS)
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
add_subdirectory(util)
|
add_subdirectory(util)
|
||||||
add_subdirectory(unit)
|
|
||||||
|
|
||||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
|
option(BUILD_UNIT "Build Hyperscan unit tests (default TRUE)" TRUE)
|
||||||
|
if(BUILD_UNIT)
|
||||||
|
add_subdirectory(unit)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(BUILD_TOOLS "Build Hyperscan tools (default TRUE)" TRUE)
|
||||||
|
if(EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt AND BUILD_TOOLS)
|
||||||
add_subdirectory(tools)
|
add_subdirectory(tools)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
|
if (EXISTS ${CMAKE_SOURCE_DIR}/chimera/CMakeLists.txt AND BUILD_CHIMERA)
|
||||||
add_subdirectory(chimera)
|
add_subdirectory(chimera)
|
||||||
endif()
|
endif()
|
||||||
@ -1240,4 +1246,7 @@ if(BUILD_BENCHMARKS)
|
|||||||
add_subdirectory(benchmarks)
|
add_subdirectory(benchmarks)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
option(BUILD_DOC "Build the Hyperscan documentation (default TRUE)" TRUE)
|
||||||
|
if(BUILD_DOC)
|
||||||
add_subdirectory(doc/dev-reference)
|
add_subdirectory(doc/dev-reference)
|
||||||
|
endif()
|
||||||
|
@ -146,6 +146,7 @@ export CXX="/usr/pkg/gcc12/bin/g++"
|
|||||||
```
|
```
|
||||||
|
|
||||||
In FreeBSD similarly, you might want to install a different compiler.
|
In FreeBSD similarly, you might want to install a different compiler.
|
||||||
|
If you want to use gcc, it is recommended to use gcc12.
|
||||||
You will also, as in NetBSD, need to install cmake, sqlite, boost and ragel packages.
|
You will also, as in NetBSD, need to install cmake, sqlite, boost and ragel packages.
|
||||||
Using the example of gcc12 from pkg:
|
Using the example of gcc12 from pkg:
|
||||||
installing the desired compiler:
|
installing the desired compiler:
|
||||||
@ -164,7 +165,6 @@ the environment variables to point to this compiler:
|
|||||||
export CC="/usr/local/bin/gcc"
|
export CC="/usr/local/bin/gcc"
|
||||||
export CXX="/usr/local/bin/g++"
|
export CXX="/usr/local/bin/g++"
|
||||||
```
|
```
|
||||||
|
|
||||||
A further note in FreeBSD, on the PowerPC and ARM platforms,
|
A further note in FreeBSD, on the PowerPC and ARM platforms,
|
||||||
the gcc12 package installs to a slightly different name, on FreeBSD/ppc,
|
the gcc12 package installs to a slightly different name, on FreeBSD/ppc,
|
||||||
gcc12 will be found using:
|
gcc12 will be found using:
|
||||||
@ -175,12 +175,6 @@ export CXX="/usr/local/bin/g++12"
|
|||||||
|
|
||||||
Then continue with the build as below.
|
Then continue with the build as below.
|
||||||
|
|
||||||
A note about running in FreeBSD: if you built a dynamically linked binary
|
|
||||||
with an alternative compiler, the libraries specific to the compiler that
|
|
||||||
built the binary will probably not be found and the base distro libraries
|
|
||||||
in /lib will be found instead. Adjust LD_LIBRARY_PATH appropriately. For
|
|
||||||
example, with gcc12 installed from pkg, one would want to use
|
|
||||||
```export LD_LIBRARY_PATH=/usr/local/lib/gcc12/```
|
|
||||||
|
|
||||||
## Configure & build
|
## Configure & build
|
||||||
|
|
||||||
|
@ -26,13 +26,13 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <cstdlib>
|
|
||||||
#include <memory>
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "benchmarks.hpp"
|
#include "benchmarks.hpp"
|
||||||
|
|
||||||
@ -43,14 +43,12 @@
|
|||||||
struct hlmMatchEntry {
|
struct hlmMatchEntry {
|
||||||
size_t to;
|
size_t to;
|
||||||
u32 id;
|
u32 id;
|
||||||
hlmMatchEntry(size_t end, u32 identifier) :
|
hlmMatchEntry(size_t end, u32 identifier) : to(end), id(identifier) {}
|
||||||
to(end), id(identifier) {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<hlmMatchEntry> ctxt;
|
std::vector<hlmMatchEntry> ctxt;
|
||||||
|
|
||||||
static
|
static hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
|
||||||
hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
|
|
||||||
UNUSED struct hs_scratch *scratch) {
|
UNUSED struct hs_scratch *scratch) {
|
||||||
DEBUG_PRINTF("match @%zu = %u\n", to, id);
|
DEBUG_PRINTF("match @%zu = %u\n", to, id);
|
||||||
|
|
||||||
@ -60,15 +58,15 @@ hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename InitFunc, typename BenchFunc>
|
template <typename InitFunc, typename BenchFunc>
|
||||||
static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) {
|
static void run_benchmarks(int size, int loops, int max_matches,
|
||||||
|
bool is_reverse, MicroBenchmark &bench,
|
||||||
|
InitFunc &&init, BenchFunc &&func) {
|
||||||
init(bench);
|
init(bench);
|
||||||
double total_sec = 0.0;
|
double total_sec = 0.0;
|
||||||
u64a total_size = 0;
|
|
||||||
double bw = 0.0;
|
|
||||||
double avg_bw = 0.0;
|
|
||||||
double max_bw = 0.0;
|
double max_bw = 0.0;
|
||||||
double avg_time = 0.0;
|
double avg_time = 0.0;
|
||||||
if (max_matches) {
|
if (max_matches) {
|
||||||
|
double avg_bw = 0.0;
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
for (int j = 0; j < max_matches - 1; j++) {
|
for (int j = 0; j < max_matches - 1; j++) {
|
||||||
bench.buf[pos] = 'b';
|
bench.buf[pos] = 'b';
|
||||||
@ -84,11 +82,13 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
|
|||||||
actual_size += res - bench.buf.data();
|
actual_size += res - bench.buf.data();
|
||||||
}
|
}
|
||||||
auto end = std::chrono::steady_clock::now();
|
auto end = std::chrono::steady_clock::now();
|
||||||
double dt = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
double dt = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||||
|
end - start)
|
||||||
|
.count();
|
||||||
total_sec += dt;
|
total_sec += dt;
|
||||||
/*convert microseconds to seconds*/
|
/*convert microseconds to seconds*/
|
||||||
/*calculate bandwidth*/
|
/*calculate bandwidth*/
|
||||||
bw = (actual_size / dt) * 1000000.0 / 1048576.0;
|
double bw = (actual_size / dt) * 1000000.0 / 1048576.0;
|
||||||
/*std::cout << "act_size = " << act_size << std::endl;
|
/*std::cout << "act_size = " << act_size << std::endl;
|
||||||
std::cout << "dt = " << dt << std::endl;
|
std::cout << "dt = " << dt << std::endl;
|
||||||
std::cout << "bw = " << bw << std::endl;*/
|
std::cout << "bw = " << bw << std::endl;*/
|
||||||
@ -102,18 +102,20 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
|
|||||||
avg_bw /= max_matches;
|
avg_bw /= max_matches;
|
||||||
total_sec /= 1000000.0;
|
total_sec /= 1000000.0;
|
||||||
/*convert average time to us*/
|
/*convert average time to us*/
|
||||||
printf(KMAG "%s: %u matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, "
|
printf("%-18s, %-12d, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7.3f\n",
|
||||||
KBLU "average time per call =" RST " %.3f μs," KBLU " max bandwidth = " RST " %.3f MB/s," KBLU " average bandwidth =" RST " %.3f MB/s \n",
|
|
||||||
bench.label, max_matches, size ,loops, total_sec, avg_time, max_bw, avg_bw);
|
bench.label, max_matches, size ,loops, total_sec, avg_time, max_bw, avg_bw);
|
||||||
} else {
|
} else {
|
||||||
|
u64a total_size = 0;
|
||||||
auto start = std::chrono::steady_clock::now();
|
auto start = std::chrono::steady_clock::now();
|
||||||
for (int i = 0; i < loops; i++) {
|
for (int i = 0; i < loops; i++) {
|
||||||
const u8 *res = func(bench);
|
func(bench);
|
||||||
}
|
}
|
||||||
auto end = std::chrono::steady_clock::now();
|
auto end = std::chrono::steady_clock::now();
|
||||||
total_sec += std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
total_sec +=
|
||||||
|
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||||
|
.count();
|
||||||
/*calculate transferred size*/
|
/*calculate transferred size*/
|
||||||
total_size = size * loops;
|
total_size = (u64a)size * (u64a)loops;
|
||||||
/*calculate average time*/
|
/*calculate average time*/
|
||||||
avg_time = total_sec / loops;
|
avg_time = total_sec / loops;
|
||||||
/*convert microseconds to seconds*/
|
/*convert microseconds to seconds*/
|
||||||
@ -122,101 +124,109 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
|
|||||||
max_bw = total_size / total_sec;
|
max_bw = total_size / total_sec;
|
||||||
/*convert to MB/s*/
|
/*convert to MB/s*/
|
||||||
max_bw /= 1048576.0;
|
max_bw /= 1048576.0;
|
||||||
printf(KMAG "%s: no matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, "
|
printf("%-18s, %-12s, %-10d, %-6d, %-10.3f, %-9.3f, %-8.3f, %-7s\n",
|
||||||
KBLU "average time per call =" RST " %.3f μs ," KBLU " bandwidth = " RST " %.3f MB/s \n",
|
bench.label, "0", size, loops, total_sec, avg_time, max_bw, "0");
|
||||||
bench.label, size ,loops, total_sec, avg_time, max_bw );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(){
|
int main(){
|
||||||
int matches[] = {0, MAX_MATCHES};
|
const int matches[] = {0, MAX_MATCHES};
|
||||||
std::vector<size_t> sizes;
|
std::vector<size_t> sizes;
|
||||||
for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2);
|
for (size_t i = 0; i < N; i++)
|
||||||
|
sizes.push_back(16000 << i * 2);
|
||||||
const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa";
|
const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa";
|
||||||
|
printf("%-18s, %-12s, %-10s, %-6s, %-10s, %-9s, %-8s, %-7s\n", "Matcher",
|
||||||
|
"max_matches", "size", "loops", "total_sec", "avg_time", "max_bw",
|
||||||
|
"avg_bw");
|
||||||
for (int m = 0; m < 2; m++) {
|
for (int m = 0; m < 2; m++) {
|
||||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||||
MicroBenchmark bench("Shufti", sizes[i]);
|
MicroBenchmark bench("Shufti", sizes[i]);
|
||||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
run_benchmarks(
|
||||||
|
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
b.chars.set('a');
|
b.chars.set('a');
|
||||||
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||||
memset(b.buf.data(), 'b', b.size);
|
memset(b.buf.data(), 'b', b.size);
|
||||||
},
|
},
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
return shuftiExec(b.lo, b.hi, b.buf.data(),
|
||||||
}
|
b.buf.data() + b.size);
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||||
MicroBenchmark bench("Reverse Shufti", sizes[i]);
|
MicroBenchmark bench("Reverse Shufti", sizes[i]);
|
||||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
run_benchmarks(
|
||||||
|
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
b.chars.set('a');
|
b.chars.set('a');
|
||||||
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||||
memset(b.buf.data(), 'b', b.size);
|
memset(b.buf.data(), 'b', b.size);
|
||||||
},
|
},
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
return rshuftiExec(b.lo, b.hi, b.buf.data(),
|
||||||
}
|
b.buf.data() + b.size);
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||||
MicroBenchmark bench("Truffle", sizes[i]);
|
MicroBenchmark bench("Truffle", sizes[i]);
|
||||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
run_benchmarks(
|
||||||
|
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
b.chars.set('a');
|
b.chars.set('a');
|
||||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||||
memset(b.buf.data(), 'b', b.size);
|
memset(b.buf.data(), 'b', b.size);
|
||||||
},
|
},
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
return truffleExec(b.lo, b.hi, b.buf.data(),
|
||||||
}
|
b.buf.data() + b.size);
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||||
MicroBenchmark bench("Reverse Truffle", sizes[i]);
|
MicroBenchmark bench("Reverse Truffle", sizes[i]);
|
||||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
run_benchmarks(
|
||||||
|
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
b.chars.set('a');
|
b.chars.set('a');
|
||||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||||
memset(b.buf.data(), 'b', b.size);
|
memset(b.buf.data(), 'b', b.size);
|
||||||
},
|
},
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
|
return rtruffleExec(b.lo, b.hi, b.buf.data(),
|
||||||
}
|
b.buf.data() + b.size);
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||||
MicroBenchmark bench("Vermicelli", sizes[i]);
|
MicroBenchmark bench("Vermicelli", sizes[i]);
|
||||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
run_benchmarks(
|
||||||
|
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
b.chars.set('a');
|
b.chars.set('a');
|
||||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||||
memset(b.buf.data(), 'b', b.size);
|
memset(b.buf.data(), 'b', b.size);
|
||||||
},
|
},
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
return vermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
|
return vermicelliExec('a', 'b', b.buf.data(),
|
||||||
}
|
b.buf.data() + b.size);
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||||
MicroBenchmark bench("Reverse Vermicelli", sizes[i]);
|
MicroBenchmark bench("Reverse Vermicelli", sizes[i]);
|
||||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
run_benchmarks(
|
||||||
|
sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
b.chars.set('a');
|
b.chars.set('a');
|
||||||
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
|
||||||
memset(b.buf.data(), 'b', b.size);
|
memset(b.buf.data(), 'b', b.size);
|
||||||
},
|
},
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
return rvermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
|
return rvermicelliExec('a', 'b', b.buf.data(),
|
||||||
}
|
b.buf.data() + b.size);
|
||||||
);
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < std::size(sizes); i++) {
|
for (size_t i = 0; i < std::size(sizes); i++) {
|
||||||
@ -232,20 +242,21 @@ int main(){
|
|||||||
}
|
}
|
||||||
|
|
||||||
MicroBenchmark bench("Noodle", sizes[i]);
|
MicroBenchmark bench("Noodle", sizes[i]);
|
||||||
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
run_benchmarks(
|
||||||
|
sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
ctxt.clear();
|
ctxt.clear();
|
||||||
memset(b.buf.data(), 'a', b.size);
|
memset(b.buf.data(), 'a', b.size);
|
||||||
u32 id = 1000;
|
u32 id = 1000;
|
||||||
ue2::hwlmLiteral lit(str, true, id);
|
ue2::hwlmLiteral lit(str, true, id);
|
||||||
b.nt = ue2::noodBuildTable(lit);
|
b.nt = ue2::noodBuildTable(lit);
|
||||||
assert(b.nt != nullptr);
|
assert(b.nt.get() != nullptr);
|
||||||
},
|
},
|
||||||
[&](MicroBenchmark &b) {
|
[&](MicroBenchmark &b) {
|
||||||
noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch);
|
noodExec(b.nt.get(), b.buf.data(), b.size, 0,
|
||||||
|
hlmSimpleCallback, &b.scratch);
|
||||||
return b.buf.data() + b.size;
|
return b.buf.data() + b.size;
|
||||||
}
|
});
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,30 +26,19 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "hwlm/hwlm_literal.h"
|
||||||
|
#include "hwlm/noodle_build.h"
|
||||||
|
#include "hwlm/noodle_engine.h"
|
||||||
|
#include "hwlm/noodle_internal.h"
|
||||||
#include "nfa/shufti.h"
|
#include "nfa/shufti.h"
|
||||||
#include "nfa/shufticompile.h"
|
#include "nfa/shufticompile.h"
|
||||||
#include "nfa/truffle.h"
|
#include "nfa/truffle.h"
|
||||||
#include "nfa/trufflecompile.h"
|
#include "nfa/trufflecompile.h"
|
||||||
#include "nfa/vermicelli.hpp"
|
#include "nfa/vermicelli.hpp"
|
||||||
#include "hwlm/noodle_build.h"
|
|
||||||
#include "hwlm/noodle_engine.h"
|
|
||||||
#include "hwlm/noodle_internal.h"
|
|
||||||
#include "hwlm/hwlm_literal.h"
|
|
||||||
#include "util/bytecode_ptr.h"
|
|
||||||
#include "scratch.h"
|
#include "scratch.h"
|
||||||
|
#include "util/bytecode_ptr.h"
|
||||||
|
|
||||||
/*define colour control characters*/
|
class MicroBenchmark {
|
||||||
#define RST "\x1B[0m"
|
|
||||||
#define KRED "\x1B[31m"
|
|
||||||
#define KGRN "\x1B[32m"
|
|
||||||
#define KYEL "\x1B[33m"
|
|
||||||
#define KBLU "\x1B[34m"
|
|
||||||
#define KMAG "\x1B[35m"
|
|
||||||
#define KCYN "\x1B[36m"
|
|
||||||
#define KWHT "\x1B[37m"
|
|
||||||
|
|
||||||
class MicroBenchmark
|
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
char const *label;
|
char const *label;
|
||||||
size_t size;
|
size_t size;
|
||||||
@ -64,6 +53,5 @@ public:
|
|||||||
ue2::bytecode_ptr<noodTable> nt;
|
ue2::bytecode_ptr<noodTable> nt;
|
||||||
|
|
||||||
MicroBenchmark(char const *label_, size_t size_)
|
MicroBenchmark(char const *label_, size_t size_)
|
||||||
:label(label_), size(size_), buf(size_) {
|
: label(label_), size(size_), buf(size_){};
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
@ -6,10 +6,10 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
|||||||
set(FREEBSD true)
|
set(FREEBSD true)
|
||||||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||||
#FIXME: find a nicer and more general way of doing this
|
#FIXME: find a nicer and more general way of doing this
|
||||||
if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12")
|
if(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13")
|
||||||
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12")
|
|
||||||
elseif(CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc13")
|
|
||||||
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc13")
|
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc13")
|
||||||
|
elseif(ARCH_AARCH64 AND (CMAKE_C_COMPILER MATCHES "/usr/local/bin/gcc12"))
|
||||||
|
set(CMAKE_BUILD_RPATH "/usr/local/lib/gcc12")
|
||||||
endif()
|
endif()
|
||||||
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ else()
|
|||||||
set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
|
set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
|
||||||
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
|
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
|
||||||
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
|
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
|
||||||
|
set(SPHINX_MAN_DIR "${CMAKE_CURRENT_BINARY_DIR}/man")
|
||||||
|
|
||||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
|
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY)
|
"${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY)
|
||||||
@ -32,4 +33,14 @@ add_custom_target(dev-reference
|
|||||||
"${SPHINX_HTML_DIR}"
|
"${SPHINX_HTML_DIR}"
|
||||||
DEPENDS dev-reference-doxygen
|
DEPENDS dev-reference-doxygen
|
||||||
COMMENT "Building HTML dev reference with Sphinx")
|
COMMENT "Building HTML dev reference with Sphinx")
|
||||||
|
|
||||||
|
add_custom_target(dev-reference-man
|
||||||
|
${SPHINX_BUILD}
|
||||||
|
-b man
|
||||||
|
-c "${CMAKE_CURRENT_BINARY_DIR}"
|
||||||
|
-d "${SPHINX_CACHE_DIR}"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||||
|
"${SPHINX_MAN_DIR}"
|
||||||
|
DEPENDS dev-reference-doxygen
|
||||||
|
COMMENT "Building man page reference with Sphinx")
|
||||||
endif()
|
endif()
|
||||||
|
@ -11,10 +11,10 @@ Introduction
|
|||||||
************
|
************
|
||||||
|
|
||||||
Chimera is a software regular expression matching engine that is a hybrid of
|
Chimera is a software regular expression matching engine that is a hybrid of
|
||||||
Hyperscan and PCRE. The design goals of Chimera are to fully support PCRE
|
Vectorscan and PCRE. The design goals of Chimera are to fully support PCRE
|
||||||
syntax as well as to take advantage of the high performance nature of Hyperscan.
|
syntax as well as to take advantage of the high performance nature of Vectorscan.
|
||||||
|
|
||||||
Chimera inherits the design guideline of Hyperscan with C APIs for compilation
|
Chimera inherits the design guideline of Vectorscan with C APIs for compilation
|
||||||
and scanning.
|
and scanning.
|
||||||
|
|
||||||
The Chimera API itself is composed of two major components:
|
The Chimera API itself is composed of two major components:
|
||||||
@ -65,13 +65,13 @@ For a given database, Chimera provides several guarantees:
|
|||||||
.. note:: Chimera is designed to have the same matching behavior as PCRE,
|
.. note:: Chimera is designed to have the same matching behavior as PCRE,
|
||||||
including greedy/ungreedy, capturing, etc. Chimera reports both
|
including greedy/ungreedy, capturing, etc. Chimera reports both
|
||||||
**start offset** and **end offset** for each match like PCRE. Different
|
**start offset** and **end offset** for each match like PCRE. Different
|
||||||
from the fashion of reporting all matches in Hyperscan, Chimera only reports
|
from the fashion of reporting all matches in Vectorscan, Chimera only reports
|
||||||
non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will
|
non-overlapping matches. For example, the pattern :regexp:`/foofoo/` will
|
||||||
match ``foofoofoofoo`` at offsets (0, 6) and (6, 12).
|
match ``foofoofoofoo`` at offsets (0, 6) and (6, 12).
|
||||||
|
|
||||||
.. note:: Since Chimera is a hybrid of Hyperscan and PCRE in order to support
|
.. note:: Since Chimera is a hybrid of Vectorscan and PCRE in order to support
|
||||||
full PCRE syntax, there will be extra performance overhead compared to
|
full PCRE syntax, there will be extra performance overhead compared to
|
||||||
Hyperscan-only solution. Please always use Hyperscan for better performance
|
Vectorscan-only solution. Please always use Vectorscan for better performance
|
||||||
unless you must need full PCRE syntax support.
|
unless you must need full PCRE syntax support.
|
||||||
|
|
||||||
See :ref:`chruntime` for more details
|
See :ref:`chruntime` for more details
|
||||||
@ -83,12 +83,12 @@ Requirements
|
|||||||
The PCRE library (http://pcre.org/) version 8.41 is required for Chimera.
|
The PCRE library (http://pcre.org/) version 8.41 is required for Chimera.
|
||||||
|
|
||||||
.. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source
|
.. note:: Since Chimera needs to reference PCRE internal function, please place PCRE source
|
||||||
directory under Hyperscan root directory in order to build Chimera.
|
directory under Vectorscan root directory in order to build Chimera.
|
||||||
|
|
||||||
Beside this, both hardware and software requirements of Chimera are the same to Hyperscan.
|
Beside this, both hardware and software requirements of Chimera are the same to Vectorscan.
|
||||||
See :ref:`hardware` and :ref:`software` for more details.
|
See :ref:`hardware` and :ref:`software` for more details.
|
||||||
|
|
||||||
.. note:: Building Hyperscan will automatically generate Chimera library.
|
.. note:: Building Vectorscan will automatically generate Chimera library.
|
||||||
Currently only static library is supported for Chimera, so please
|
Currently only static library is supported for Chimera, so please
|
||||||
use static build type when configure CMake build options.
|
use static build type when configure CMake build options.
|
||||||
|
|
||||||
@ -119,7 +119,7 @@ databases:
|
|||||||
|
|
||||||
Compilation allows the Chimera library to analyze the given pattern(s) and
|
Compilation allows the Chimera library to analyze the given pattern(s) and
|
||||||
pre-determine how to scan for these patterns in an optimized fashion using
|
pre-determine how to scan for these patterns in an optimized fashion using
|
||||||
Hyperscan and PCRE.
|
Vectorscan and PCRE.
|
||||||
|
|
||||||
===============
|
===============
|
||||||
Pattern Support
|
Pattern Support
|
||||||
@ -134,7 +134,7 @@ Semantics
|
|||||||
=========
|
=========
|
||||||
|
|
||||||
Chimera supports the exact same semantics of PCRE library. Moreover, it supports
|
Chimera supports the exact same semantics of PCRE library. Moreover, it supports
|
||||||
multiple simultaneous pattern matching like Hyperscan and the multiple matches
|
multiple simultaneous pattern matching like Vectorscan and the multiple matches
|
||||||
will be reported in order by end offset.
|
will be reported in order by end offset.
|
||||||
|
|
||||||
.. _chruntime:
|
.. _chruntime:
|
||||||
|
@ -9,7 +9,7 @@ Compiling Patterns
|
|||||||
Building a Database
|
Building a Database
|
||||||
*******************
|
*******************
|
||||||
|
|
||||||
The Hyperscan compiler API accepts regular expressions and converts them into a
|
The Vectorscan compiler API accepts regular expressions and converts them into a
|
||||||
compiled pattern database that can then be used to scan data.
|
compiled pattern database that can then be used to scan data.
|
||||||
|
|
||||||
The API provides three functions that compile regular expressions into
|
The API provides three functions that compile regular expressions into
|
||||||
@ -24,7 +24,7 @@ databases:
|
|||||||
#. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above,
|
#. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above,
|
||||||
but allows :ref:`extparam` to be specified for each expression.
|
but allows :ref:`extparam` to be specified for each expression.
|
||||||
|
|
||||||
Compilation allows the Hyperscan library to analyze the given pattern(s) and
|
Compilation allows the Vectorscan library to analyze the given pattern(s) and
|
||||||
pre-determine how to scan for these patterns in an optimized fashion that would
|
pre-determine how to scan for these patterns in an optimized fashion that would
|
||||||
be far too expensive to compute at run-time.
|
be far too expensive to compute at run-time.
|
||||||
|
|
||||||
@ -48,10 +48,10 @@ To compile patterns to be used in streaming mode, the ``mode`` parameter of
|
|||||||
block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode
|
block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode
|
||||||
requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled
|
requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled
|
||||||
for one mode (streaming, block or vectored) can only be used in that mode. The
|
for one mode (streaming, block or vectored) can only be used in that mode. The
|
||||||
version of Hyperscan used to produce a compiled pattern database must match the
|
version of Vectorscan used to produce a compiled pattern database must match the
|
||||||
version of Hyperscan used to scan with it.
|
version of Vectorscan used to scan with it.
|
||||||
|
|
||||||
Hyperscan provides support for targeting a database at a particular CPU
|
Vectorscan provides support for targeting a database at a particular CPU
|
||||||
platform; see :ref:`instr_specialization` for details.
|
platform; see :ref:`instr_specialization` for details.
|
||||||
|
|
||||||
=====================
|
=====================
|
||||||
@ -75,14 +75,14 @@ characters exist in regular grammar like ``[``, ``]``, ``(``, ``)``, ``{``,
|
|||||||
While in pure literal case, all these meta characters lost extra meanings
|
While in pure literal case, all these meta characters lost extra meanings
|
||||||
expect for that they are just common ASCII codes.
|
expect for that they are just common ASCII codes.
|
||||||
|
|
||||||
Hyperscan is initially designed to process common regular expressions. It is
|
Vectorscan is initially designed to process common regular expressions. It is
|
||||||
hence embedded with a complex parser to do comprehensive regular grammar
|
hence embedded with a complex parser to do comprehensive regular grammar
|
||||||
interpretation. Particularly, the identification of above meta characters is the
|
interpretation. Particularly, the identification of above meta characters is the
|
||||||
basic step for the interpretation of far more complex regular grammars.
|
basic step for the interpretation of far more complex regular grammars.
|
||||||
|
|
||||||
However in real cases, patterns may not always be regular expressions. They
|
However in real cases, patterns may not always be regular expressions. They
|
||||||
could just be pure literals. Problem will come if the pure literals contain
|
could just be pure literals. Problem will come if the pure literals contain
|
||||||
regular meta characters. Supposing fed directly into traditional Hyperscan
|
regular meta characters. Supposing fed directly into traditional Vectorscan
|
||||||
compile API, all these meta characters will be interpreted in predefined ways,
|
compile API, all these meta characters will be interpreted in predefined ways,
|
||||||
which is unnecessary and the result is totally out of expectation. To avoid
|
which is unnecessary and the result is totally out of expectation. To avoid
|
||||||
such misunderstanding by traditional API, users have to preprocess these
|
such misunderstanding by traditional API, users have to preprocess these
|
||||||
@ -90,7 +90,7 @@ literal patterns by converting the meta characters into some other formats:
|
|||||||
either by adding a backslash ``\`` before certain meta characters, or by
|
either by adding a backslash ``\`` before certain meta characters, or by
|
||||||
converting all the characters into a hexadecimal representation.
|
converting all the characters into a hexadecimal representation.
|
||||||
|
|
||||||
In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns:
|
In ``v5.2.0``, Vectorscan introduces 2 new compile APIs for pure literal patterns:
|
||||||
|
|
||||||
#. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern
|
#. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern
|
||||||
database.
|
database.
|
||||||
@ -106,7 +106,7 @@ content directly into these APIs without worrying about writing regular meta
|
|||||||
characters in their patterns. No preprocessing work is needed any more.
|
characters in their patterns. No preprocessing work is needed any more.
|
||||||
|
|
||||||
For new APIs, the ``length`` of each literal pattern is a newly added parameter.
|
For new APIs, the ``length`` of each literal pattern is a newly added parameter.
|
||||||
Hyperscan needs to locate the end position of the input expression via clearly
|
Vectorscan needs to locate the end position of the input expression via clearly
|
||||||
knowing each literal's length, not by simply identifying character ``\0`` of a
|
knowing each literal's length, not by simply identifying character ``\0`` of a
|
||||||
string.
|
string.
|
||||||
|
|
||||||
@ -127,19 +127,19 @@ Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
|
|||||||
Pattern Support
|
Pattern Support
|
||||||
***************
|
***************
|
||||||
|
|
||||||
Hyperscan supports the pattern syntax used by the PCRE library ("libpcre"),
|
Vectorscan supports the pattern syntax used by the PCRE library ("libpcre"),
|
||||||
described at <http://www.pcre.org/>. However, not all constructs available in
|
described at <http://www.pcre.org/>. However, not all constructs available in
|
||||||
libpcre are supported. The use of unsupported constructs will result in
|
libpcre are supported. The use of unsupported constructs will result in
|
||||||
compilation errors.
|
compilation errors.
|
||||||
|
|
||||||
The version of PCRE used to validate Hyperscan's interpretation of this syntax
|
The version of PCRE used to validate Vectorscan's interpretation of this syntax
|
||||||
is 8.41 or above.
|
is 8.41 or above.
|
||||||
|
|
||||||
====================
|
====================
|
||||||
Supported Constructs
|
Supported Constructs
|
||||||
====================
|
====================
|
||||||
|
|
||||||
The following regex constructs are supported by Hyperscan:
|
The following regex constructs are supported by Vectorscan:
|
||||||
|
|
||||||
* Literal characters and strings, with all libpcre quoting and character
|
* Literal characters and strings, with all libpcre quoting and character
|
||||||
escapes.
|
escapes.
|
||||||
@ -177,7 +177,7 @@ The following regex constructs are supported by Hyperscan:
|
|||||||
:c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern.
|
:c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern.
|
||||||
|
|
||||||
* Lazy modifiers (:regexp:`?` appended to another quantifier, e.g.
|
* Lazy modifiers (:regexp:`?` appended to another quantifier, e.g.
|
||||||
:regexp:`\\w+?`) are supported but ignored (as Hyperscan reports all
|
:regexp:`\\w+?`) are supported but ignored (as Vectorscan reports all
|
||||||
matches).
|
matches).
|
||||||
|
|
||||||
* Parenthesization, including the named and unnamed capturing and
|
* Parenthesization, including the named and unnamed capturing and
|
||||||
@ -219,15 +219,15 @@ The following regex constructs are supported by Hyperscan:
|
|||||||
.. note:: At this time, not all patterns can be successfully compiled with the
|
.. note:: At this time, not all patterns can be successfully compiled with the
|
||||||
:c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for
|
:c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for
|
||||||
:ref:`som`. The patterns that support this flag are a subset of patterns that
|
:ref:`som`. The patterns that support this flag are a subset of patterns that
|
||||||
can be successfully compiled with Hyperscan; notably, many bounded repeat
|
can be successfully compiled with Vectorscan; notably, many bounded repeat
|
||||||
forms that can be compiled with Hyperscan without the Start of Match flag
|
forms that can be compiled with Vectorscan without the Start of Match flag
|
||||||
enabled cannot be compiled with the flag enabled.
|
enabled cannot be compiled with the flag enabled.
|
||||||
|
|
||||||
======================
|
======================
|
||||||
Unsupported Constructs
|
Unsupported Constructs
|
||||||
======================
|
======================
|
||||||
|
|
||||||
The following regex constructs are not supported by Hyperscan:
|
The following regex constructs are not supported by Vectorscan:
|
||||||
|
|
||||||
* Backreferences and capturing sub-expressions.
|
* Backreferences and capturing sub-expressions.
|
||||||
* Arbitrary zero-width assertions.
|
* Arbitrary zero-width assertions.
|
||||||
@ -246,32 +246,32 @@ The following regex constructs are not supported by Hyperscan:
|
|||||||
Semantics
|
Semantics
|
||||||
*********
|
*********
|
||||||
|
|
||||||
While Hyperscan follows libpcre syntax, it provides different semantics. The
|
While Vectorscan follows libpcre syntax, it provides different semantics. The
|
||||||
major departures from libpcre semantics are motivated by the requirements of
|
major departures from libpcre semantics are motivated by the requirements of
|
||||||
streaming and multiple simultaneous pattern matching.
|
streaming and multiple simultaneous pattern matching.
|
||||||
|
|
||||||
The major departures from libpcre semantics are:
|
The major departures from libpcre semantics are:
|
||||||
|
|
||||||
#. **Multiple pattern matching**: Hyperscan allows matches to be reported for
|
#. **Multiple pattern matching**: Vectorscan allows matches to be reported for
|
||||||
several patterns simultaneously. This is not equivalent to separating the
|
several patterns simultaneously. This is not equivalent to separating the
|
||||||
patterns by :regexp:`|` in libpcre, which evaluates alternations
|
patterns by :regexp:`|` in libpcre, which evaluates alternations
|
||||||
left-to-right.
|
left-to-right.
|
||||||
|
|
||||||
#. **Lack of ordering**: the multiple matches that Hyperscan produces are not
|
#. **Lack of ordering**: the multiple matches that Vectorscan produces are not
|
||||||
guaranteed to be ordered, although they will always fall within the bounds of
|
guaranteed to be ordered, although they will always fall within the bounds of
|
||||||
the current scan.
|
the current scan.
|
||||||
|
|
||||||
#. **End offsets only**: Hyperscan's default behaviour is only to report the end
|
#. **End offsets only**: Vectorscan's default behaviour is only to report the end
|
||||||
offset of a match. Reporting of the start offset can be enabled with
|
offset of a match. Reporting of the start offset can be enabled with
|
||||||
per-expression flags at pattern compile time. See :ref:`som` for details.
|
per-expression flags at pattern compile time. See :ref:`som` for details.
|
||||||
|
|
||||||
#. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against
|
#. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against
|
||||||
``fooxyzbarbar`` will return two matches from Hyperscan -- at the points
|
``fooxyzbarbar`` will return two matches from Vectorscan -- at the points
|
||||||
corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast,
|
corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast,
|
||||||
libpcre semantics by default would report only one match at ``fooxyzbarbar``
|
libpcre semantics by default would report only one match at ``fooxyzbarbar``
|
||||||
(greedy semantics) or, if non-greedy semantics were switched on, one match at
|
(greedy semantics) or, if non-greedy semantics were switched on, one match at
|
||||||
``fooxyzbar``. This means that switching between greedy and non-greedy
|
``fooxyzbar``. This means that switching between greedy and non-greedy
|
||||||
semantics is a no-op in Hyperscan.
|
semantics is a no-op in Vectorscan.
|
||||||
|
|
||||||
To support libpcre quantifier semantics while accurately reporting streaming
|
To support libpcre quantifier semantics while accurately reporting streaming
|
||||||
matches at the time they occur is impossible. For example, consider the pattern
|
matches at the time they occur is impossible. For example, consider the pattern
|
||||||
@ -299,7 +299,7 @@ as in block 3 -- which would constitute a better match for the pattern.
|
|||||||
Start of Match
|
Start of Match
|
||||||
==============
|
==============
|
||||||
|
|
||||||
In standard operation, Hyperscan will only provide the end offset of a match
|
In standard operation, Vectorscan will only provide the end offset of a match
|
||||||
when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag
|
when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag
|
||||||
is specified for a particular pattern, then the same set of matches is
|
is specified for a particular pattern, then the same set of matches is
|
||||||
returned, but each match will also provide the leftmost possible start offset
|
returned, but each match will also provide the leftmost possible start offset
|
||||||
@ -308,7 +308,7 @@ corresponding to its end offset.
|
|||||||
Using the SOM flag entails a number of trade-offs and limitations:
|
Using the SOM flag entails a number of trade-offs and limitations:
|
||||||
|
|
||||||
* Reduced pattern support: For many patterns, tracking SOM is complex and can
|
* Reduced pattern support: For many patterns, tracking SOM is complex and can
|
||||||
result in Hyperscan failing to compile a pattern with a "Pattern too
|
result in Vectorscan failing to compile a pattern with a "Pattern too
|
||||||
large" error, even if the pattern is supported in normal operation.
|
large" error, even if the pattern is supported in normal operation.
|
||||||
* Increased stream state: At scan time, state space is required to track
|
* Increased stream state: At scan time, state space is required to track
|
||||||
potential SOM offsets, and this must be stored in persistent stream state in
|
potential SOM offsets, and this must be stored in persistent stream state in
|
||||||
@ -316,20 +316,20 @@ Using the SOM flag entails a number of trade-offs and limitations:
|
|||||||
required to match a pattern.
|
required to match a pattern.
|
||||||
* Performance overhead: Similarly, there is generally a performance cost
|
* Performance overhead: Similarly, there is generally a performance cost
|
||||||
associated with tracking SOM.
|
associated with tracking SOM.
|
||||||
* Incompatible features: Some other Hyperscan pattern flags (such as
|
* Incompatible features: Some other Vectorscan pattern flags (such as
|
||||||
:c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be
|
:c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be
|
||||||
used in combination with SOM. Specifying them together with
|
used in combination with SOM. Specifying them together with
|
||||||
:c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error.
|
:c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error.
|
||||||
|
|
||||||
In streaming mode, the amount of precision delivered by SOM can be controlled
|
In streaming mode, the amount of precision delivered by SOM can be controlled
|
||||||
with the SOM horizon flags. These instruct Hyperscan to deliver accurate SOM
|
with the SOM horizon flags. These instruct Vectorscan to deliver accurate SOM
|
||||||
information within a certain distance of the end offset, and return a special
|
information within a certain distance of the end offset, and return a special
|
||||||
start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a
|
start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a
|
||||||
small or medium SOM horizon will usually reduce the stream state required for a
|
small or medium SOM horizon will usually reduce the stream state required for a
|
||||||
given database.
|
given database.
|
||||||
|
|
||||||
.. note:: In streaming mode, the start offset returned for a match may refer to
|
.. note:: In streaming mode, the start offset returned for a match may refer to
|
||||||
a point in the stream *before* the current block being scanned. Hyperscan
|
a point in the stream *before* the current block being scanned. Vectorscan
|
||||||
provides no facility for accessing earlier blocks; if the calling application
|
provides no facility for accessing earlier blocks; if the calling application
|
||||||
needs to inspect historical data, then it must store it itself.
|
needs to inspect historical data, then it must store it itself.
|
||||||
|
|
||||||
@ -341,7 +341,7 @@ Extended Parameters
|
|||||||
|
|
||||||
In some circumstances, more control over the matching behaviour of a pattern is
|
In some circumstances, more control over the matching behaviour of a pattern is
|
||||||
required than can be specified easily using regular expression syntax. For
|
required than can be specified easily using regular expression syntax. For
|
||||||
these scenarios, Hyperscan provides the :c:func:`hs_compile_ext_multi` function
|
these scenarios, Vectorscan provides the :c:func:`hs_compile_ext_multi` function
|
||||||
that allows a set of "extended parameters" to be set on a per-pattern basis.
|
that allows a set of "extended parameters" to be set on a per-pattern basis.
|
||||||
|
|
||||||
Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure,
|
Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure,
|
||||||
@ -383,18 +383,18 @@ section.
|
|||||||
Prefiltering Mode
|
Prefiltering Mode
|
||||||
=================
|
=================
|
||||||
|
|
||||||
Hyperscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can
|
Vectorscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can
|
||||||
be used to implement a prefilter for a pattern than Hyperscan would not
|
be used to implement a prefilter for a pattern than Vectorscan would not
|
||||||
ordinarily support.
|
ordinarily support.
|
||||||
|
|
||||||
This flag instructs Hyperscan to compile an "approximate" version of this
|
This flag instructs Vectorscan to compile an "approximate" version of this
|
||||||
pattern for use in a prefiltering application, even if Hyperscan does not
|
pattern for use in a prefiltering application, even if Vectorscan does not
|
||||||
support the pattern in normal operation.
|
support the pattern in normal operation.
|
||||||
|
|
||||||
The set of matches returned when this flag is used is guaranteed to be a
|
The set of matches returned when this flag is used is guaranteed to be a
|
||||||
superset of the matches specified by the non-prefiltering expression.
|
superset of the matches specified by the non-prefiltering expression.
|
||||||
|
|
||||||
If the pattern contains pattern constructs not supported by Hyperscan (such as
|
If the pattern contains pattern constructs not supported by Vectorscan (such as
|
||||||
zero-width assertions, back-references or conditional references) these
|
zero-width assertions, back-references or conditional references) these
|
||||||
constructs will be replaced internally with broader constructs that may match
|
constructs will be replaced internally with broader constructs that may match
|
||||||
more often.
|
more often.
|
||||||
@ -404,7 +404,7 @@ back-reference :regexp:`\\1`. In prefiltering mode, this pattern might be
|
|||||||
approximated by having its back-reference replaced with its referent, forming
|
approximated by having its back-reference replaced with its referent, forming
|
||||||
:regexp:`/\\w+ again \\w+/`.
|
:regexp:`/\\w+ again \\w+/`.
|
||||||
|
|
||||||
Furthermore, in prefiltering mode Hyperscan may simplify a pattern that would
|
Furthermore, in prefiltering mode Vectorscan may simplify a pattern that would
|
||||||
otherwise return a "Pattern too large" error at compile time, or for performance
|
otherwise return a "Pattern too large" error at compile time, or for performance
|
||||||
reasons (subject to the matching guarantee above).
|
reasons (subject to the matching guarantee above).
|
||||||
|
|
||||||
@ -422,22 +422,22 @@ matches for the pattern.
|
|||||||
Instruction Set Specialization
|
Instruction Set Specialization
|
||||||
******************************
|
******************************
|
||||||
|
|
||||||
Hyperscan is able to make use of several modern instruction set features found
|
Vectorscan is able to make use of several modern instruction set features found
|
||||||
on x86 processors to provide improvements in scanning performance.
|
on x86 processors to provide improvements in scanning performance.
|
||||||
|
|
||||||
Some of these features are selected when the library is built; for example,
|
Some of these features are selected when the library is built; for example,
|
||||||
Hyperscan will use the native ``POPCNT`` instruction on processors where it is
|
Vectorscan will use the native ``POPCNT`` instruction on processors where it is
|
||||||
available and the library has been optimized for the host architecture.
|
available and the library has been optimized for the host architecture.
|
||||||
|
|
||||||
.. note:: By default, the Hyperscan runtime is built with the ``-march=native``
|
.. note:: By default, the Vectorscan runtime is built with the ``-march=native``
|
||||||
compiler flag and (where possible) will make use of all instructions known by
|
compiler flag and (where possible) will make use of all instructions known by
|
||||||
the host's C compiler.
|
the host's C compiler.
|
||||||
|
|
||||||
To use some instruction set features, however, Hyperscan must build a
|
To use some instruction set features, however, Vectorscan must build a
|
||||||
specialized database to support them. This means that the target platform must
|
specialized database to support them. This means that the target platform must
|
||||||
be specified at pattern compile time.
|
be specified at pattern compile time.
|
||||||
|
|
||||||
The Hyperscan compiler API functions all accept an optional
|
The Vectorscan compiler API functions all accept an optional
|
||||||
:c:type:`hs_platform_info_t` argument, which describes the target platform
|
:c:type:`hs_platform_info_t` argument, which describes the target platform
|
||||||
for the database to be built. If this argument is NULL, the database will be
|
for the database to be built. If this argument is NULL, the database will be
|
||||||
targeted at the current host platform.
|
targeted at the current host platform.
|
||||||
@ -467,7 +467,7 @@ See :ref:`api_constants` for the full list of CPU tuning and feature flags.
|
|||||||
Approximate matching
|
Approximate matching
|
||||||
********************
|
********************
|
||||||
|
|
||||||
Hyperscan provides an experimental approximate matching mode, which will match
|
Vectorscan provides an experimental approximate matching mode, which will match
|
||||||
patterns within a given edit distance. The exact matching behavior is defined as
|
patterns within a given edit distance. The exact matching behavior is defined as
|
||||||
follows:
|
follows:
|
||||||
|
|
||||||
@ -492,7 +492,7 @@ follows:
|
|||||||
|
|
||||||
Here are a few examples of approximate matching:
|
Here are a few examples of approximate matching:
|
||||||
|
|
||||||
* Pattern :regexp:`/foo/` can match ``foo`` when using regular Hyperscan
|
* Pattern :regexp:`/foo/` can match ``foo`` when using regular Vectorscan
|
||||||
matching behavior. With approximate matching within edit distance 2, the
|
matching behavior. With approximate matching within edit distance 2, the
|
||||||
pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``,
|
pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``,
|
||||||
``f``, and anything else that lies within edit distance 2 of matching corpora
|
``f``, and anything else that lies within edit distance 2 of matching corpora
|
||||||
@ -513,7 +513,7 @@ matching support. Here they are, in a nutshell:
|
|||||||
* Reduced pattern support:
|
* Reduced pattern support:
|
||||||
|
|
||||||
* For many patterns, approximate matching is complex and can result in
|
* For many patterns, approximate matching is complex and can result in
|
||||||
Hyperscan failing to compile a pattern with a "Pattern too large" error,
|
Vectorscan failing to compile a pattern with a "Pattern too large" error,
|
||||||
even if the pattern is supported in normal operation.
|
even if the pattern is supported in normal operation.
|
||||||
* Additionally, some patterns cannot be approximately matched because they
|
* Additionally, some patterns cannot be approximately matched because they
|
||||||
reduce to so-called "vacuous" patterns (patterns that match everything). For
|
reduce to so-called "vacuous" patterns (patterns that match everything). For
|
||||||
@ -548,7 +548,7 @@ Logical Combinations
|
|||||||
********************
|
********************
|
||||||
|
|
||||||
For situations when a user requires behaviour that depends on the presence or
|
For situations when a user requires behaviour that depends on the presence or
|
||||||
absence of matches from groups of patterns, Hyperscan provides support for the
|
absence of matches from groups of patterns, Vectorscan provides support for the
|
||||||
logical combination of patterns in a given pattern set, with three operators:
|
logical combination of patterns in a given pattern set, with three operators:
|
||||||
``NOT``, ``AND`` and ``OR``.
|
``NOT``, ``AND`` and ``OR``.
|
||||||
|
|
||||||
@ -561,7 +561,7 @@ offset is *true* if the expression it refers to is *false* at this offset.
|
|||||||
For example, ``NOT 101`` means that expression 101 has not yet matched at this
|
For example, ``NOT 101`` means that expression 101 has not yet matched at this
|
||||||
offset.
|
offset.
|
||||||
|
|
||||||
A logical combination is passed to Hyperscan at compile time as an expression.
|
A logical combination is passed to Vectorscan at compile time as an expression.
|
||||||
This combination expression will raise matches at every offset where one of its
|
This combination expression will raise matches at every offset where one of its
|
||||||
sub-expressions matches and the logical value of the whole expression is *true*.
|
sub-expressions matches and the logical value of the whole expression is *true*.
|
||||||
|
|
||||||
@ -603,7 +603,7 @@ In a logical combination expression:
|
|||||||
* Whitespace is ignored.
|
* Whitespace is ignored.
|
||||||
|
|
||||||
To use a logical combination expression, it must be passed to one of the
|
To use a logical combination expression, it must be passed to one of the
|
||||||
Hyperscan compile functions (:c:func:`hs_compile_multi`,
|
Vectorscan compile functions (:c:func:`hs_compile_multi`,
|
||||||
:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag,
|
:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag,
|
||||||
which identifies the pattern as a logical combination expression. The patterns
|
which identifies the pattern as a logical combination expression. The patterns
|
||||||
referred to in the logical combination expression must be compiled together in
|
referred to in the logical combination expression must be compiled together in
|
||||||
@ -613,7 +613,7 @@ When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores
|
|||||||
all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the
|
all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the
|
||||||
:c:member:`HS_FLAG_QUIET` flag.
|
:c:member:`HS_FLAG_QUIET` flag.
|
||||||
|
|
||||||
Hyperscan will accept logical combination expressions at compile time that
|
Vectorscan will accept logical combination expressions at compile time that
|
||||||
evaluate to *true* when no patterns have matched, and report the match for
|
evaluate to *true* when no patterns have matched, and report the match for
|
||||||
combination at end of data if no patterns have matched; for example: ::
|
combination at end of data if no patterns have matched; for example: ::
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# Hyperscan documentation build configuration file, created by
|
# Vectorscan documentation build configuration file, created by
|
||||||
# sphinx-quickstart on Tue Sep 29 15:59:19 2015.
|
# sphinx-quickstart on Tue Sep 29 15:59:19 2015.
|
||||||
#
|
#
|
||||||
# This file is execfile()d with the current directory set to its
|
# This file is execfile()d with the current directory set to its
|
||||||
@ -43,8 +43,8 @@ source_suffix = '.rst'
|
|||||||
master_doc = 'index'
|
master_doc = 'index'
|
||||||
|
|
||||||
# General information about the project.
|
# General information about the project.
|
||||||
project = u'Hyperscan'
|
project = u'Vectorscan'
|
||||||
copyright = u'2015-2018, Intel Corporation'
|
copyright = u'2015-2020, Intel Corporation; 2020-2024, VectorCamp; and other contributors'
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
# |version| and |release|, also used in various other places throughout the
|
# |version| and |release|, also used in various other places throughout the
|
||||||
@ -202,7 +202,7 @@ latex_elements = {
|
|||||||
# (source start file, target name, title,
|
# (source start file, target name, title,
|
||||||
# author, documentclass [howto, manual, or own class]).
|
# author, documentclass [howto, manual, or own class]).
|
||||||
latex_documents = [
|
latex_documents = [
|
||||||
('index', 'Hyperscan.tex', u'Hyperscan Documentation',
|
('index', 'Hyperscan.tex', u'Vectorscan Documentation',
|
||||||
u'Intel Corporation', 'manual'),
|
u'Intel Corporation', 'manual'),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -232,8 +232,8 @@ latex_documents = [
|
|||||||
# One entry per manual page. List of tuples
|
# One entry per manual page. List of tuples
|
||||||
# (source start file, name, description, authors, manual section).
|
# (source start file, name, description, authors, manual section).
|
||||||
man_pages = [
|
man_pages = [
|
||||||
('index', 'hyperscan', u'Hyperscan Documentation',
|
('index', 'vectorscan', u'Vectorscan Documentation',
|
||||||
[u'Intel Corporation'], 1)
|
[u'Intel Corporation'], 7)
|
||||||
]
|
]
|
||||||
|
|
||||||
# If true, show URL addresses after external links.
|
# If true, show URL addresses after external links.
|
||||||
@ -246,8 +246,8 @@ man_pages = [
|
|||||||
# (source start file, target name, title, author,
|
# (source start file, target name, title, author,
|
||||||
# dir menu entry, description, category)
|
# dir menu entry, description, category)
|
||||||
texinfo_documents = [
|
texinfo_documents = [
|
||||||
('index', 'Hyperscan', u'Hyperscan Documentation',
|
('index', 'Vectorscan', u'Vectorscan Documentation',
|
||||||
u'Intel Corporation', 'Hyperscan', 'High-performance regular expression matcher.',
|
u'Intel Corporation; VectorCamp', 'Vectorscan', 'High-performance regular expression matcher.',
|
||||||
'Miscellaneous'),
|
'Miscellaneous'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -7,43 +7,41 @@ Getting Started
|
|||||||
Very Quick Start
|
Very Quick Start
|
||||||
****************
|
****************
|
||||||
|
|
||||||
#. Clone Hyperscan ::
|
#. Clone Vectorscan ::
|
||||||
|
|
||||||
cd <where-you-want-hyperscan-source>
|
cd <where-you-want-vectorscan-source>
|
||||||
git clone git://github.com/intel/hyperscan
|
git clone https://github.com/VectorCamp/vectorscan
|
||||||
|
|
||||||
#. Configure Hyperscan
|
#. Configure Vectorscan
|
||||||
|
|
||||||
Ensure that you have the correct :ref:`dependencies <software>` present,
|
Ensure that you have the correct :ref:`dependencies <software>` present,
|
||||||
and then:
|
and then:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
cd <where-you-want-to-build-hyperscan>
|
cd <where-you-want-to-build-vectorscan>
|
||||||
mkdir <build-dir>
|
mkdir <build-dir>
|
||||||
cd <build-dir>
|
cd <build-dir>
|
||||||
cmake [-G <generator>] [options] <hyperscan-source-path>
|
cmake [-G <generator>] [options] <vectorscan-source-path>
|
||||||
|
|
||||||
Known working generators:
|
Known working generators:
|
||||||
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
|
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
|
||||||
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
|
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
|
||||||
* ``Visual Studio 15 2017`` --- Visual Studio projects
|
|
||||||
|
|
||||||
Generators that might work include:
|
Unsupported generators that might work include:
|
||||||
* ``Xcode`` --- OS X Xcode projects.
|
* ``Xcode`` --- OS X Xcode projects.
|
||||||
|
|
||||||
#. Build Hyperscan
|
#. Build Vectorscan
|
||||||
|
|
||||||
Depending on the generator used:
|
Depending on the generator used:
|
||||||
* ``cmake --build .`` --- will build everything
|
* ``cmake --build .`` --- will build everything
|
||||||
* ``make -j<jobs>`` --- use makefiles in parallel
|
* ``make -j<jobs>`` --- use makefiles in parallel
|
||||||
* ``ninja`` --- use Ninja build
|
* ``ninja`` --- use Ninja build
|
||||||
* ``MsBuild.exe`` --- use Visual Studio MsBuild
|
|
||||||
* etc.
|
* etc.
|
||||||
|
|
||||||
#. Check Hyperscan
|
#. Check Vectorscan
|
||||||
|
|
||||||
Run the Hyperscan unit tests: ::
|
Run the Vectorscan unit tests: ::
|
||||||
|
|
||||||
bin/unit-hyperscan
|
bin/unit-hyperscan
|
||||||
|
|
||||||
@ -55,20 +53,23 @@ Requirements
|
|||||||
Hardware
|
Hardware
|
||||||
========
|
========
|
||||||
|
|
||||||
Hyperscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and
|
Vectorscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and
|
||||||
32-bit (IA-32 Architecture) modes.
|
32-bit (IA-32 Architecture) modes as well as Arm v8.0+ aarch64, and POWER 8+ ppc64le
|
||||||
|
machines.
|
||||||
|
|
||||||
Hyperscan is a high performance software library that takes advantage of recent
|
Hyperscan is a high performance software library that takes advantage of recent
|
||||||
Intel architecture advances. At a minimum, support for Supplemental Streaming
|
architecture advances.
|
||||||
SIMD Extensions 3 (SSSE3) is required, which should be available on any modern
|
|
||||||
x86 processor.
|
|
||||||
|
|
||||||
Additionally, Hyperscan can make use of:
|
Additionally, Vectorscan can make use of:
|
||||||
|
|
||||||
* Intel Streaming SIMD Extensions 4.2 (SSE4.2)
|
* Intel Streaming SIMD Extensions 4.2 (SSE4.2)
|
||||||
* the POPCNT instruction
|
* the POPCNT instruction
|
||||||
* Bit Manipulation Instructions (BMI, BMI2)
|
* Bit Manipulation Instructions (BMI, BMI2)
|
||||||
* Intel Advanced Vector Extensions 2 (Intel AVX2)
|
* Intel Advanced Vector Extensions 2 (Intel AVX2)
|
||||||
|
* Arm NEON
|
||||||
|
* Arm SVE and SVE2
|
||||||
|
* Arm SVE2 BITPERM
|
||||||
|
* IBM Power8/Power9 VSX
|
||||||
|
|
||||||
if present.
|
if present.
|
||||||
|
|
||||||
@ -79,40 +80,34 @@ These can be determined at library compile time, see :ref:`target_arch`.
|
|||||||
Software
|
Software
|
||||||
========
|
========
|
||||||
|
|
||||||
As a software library, Hyperscan doesn't impose any particular runtime
|
As a software library, Vectorscan doesn't impose any particular runtime
|
||||||
software requirements, however to build the Hyperscan library we require a
|
software requirements, however to build the Vectorscan library we require a
|
||||||
modern C and C++ compiler -- in particular, Hyperscan requires C99 and C++11
|
modern C and C++ compiler -- in particular, Vectorscan requires C99 and C++17
|
||||||
compiler support. The supported compilers are:
|
compiler support. The supported compilers are:
|
||||||
|
|
||||||
* GCC, v4.8.1 or higher
|
* GCC, v9 or higher
|
||||||
* Clang, v3.4 or higher (with libstdc++ or libc++)
|
* Clang, v5 or higher (with libstdc++ or libc++)
|
||||||
* Intel C++ Compiler v15 or higher
|
|
||||||
* Visual C++ 2017 Build Tools
|
|
||||||
|
|
||||||
Examples of operating systems that Hyperscan is known to work on include:
|
Examples of operating systems that Vectorscan is known to work on include:
|
||||||
|
|
||||||
Linux:
|
Linux:
|
||||||
|
|
||||||
* Ubuntu 14.04 LTS or newer
|
* Ubuntu 20.04 LTS or newer
|
||||||
* RedHat/CentOS 7 or newer
|
* RedHat/CentOS 7 or newer
|
||||||
|
* Fedora 38 or newer
|
||||||
|
* Debian 10
|
||||||
|
|
||||||
FreeBSD:
|
FreeBSD:
|
||||||
|
|
||||||
* 10.0 or newer
|
* 10.0 or newer
|
||||||
|
|
||||||
Windows:
|
|
||||||
|
|
||||||
* 8 or newer
|
|
||||||
|
|
||||||
Mac OS X:
|
Mac OS X:
|
||||||
|
|
||||||
* 10.8 or newer, using XCode/Clang
|
* 10.8 or newer, using XCode/Clang
|
||||||
|
|
||||||
Hyperscan *may* compile and run on other platforms, but there is no guarantee.
|
Vectorscan *may* compile and run on other platforms, but there is no guarantee.
|
||||||
We currently have experimental support for Windows using Intel C++ Compiler
|
|
||||||
or Visual Studio 2017.
|
|
||||||
|
|
||||||
In addition, the following software is required for compiling the Hyperscan library:
|
In addition, the following software is required for compiling the Vectorscan library:
|
||||||
|
|
||||||
======================================================= =========== ======================================
|
======================================================= =========== ======================================
|
||||||
Dependency Version Notes
|
Dependency Version Notes
|
||||||
@ -132,20 +127,20 @@ Ragel, you may use Cygwin to build it from source.
|
|||||||
Boost Headers
|
Boost Headers
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
Compiling Hyperscan depends on a recent version of the Boost C++ header
|
Compiling Vectorscan depends on a recent version of the Boost C++ header
|
||||||
library. If the Boost libraries are installed on the build machine in the
|
library. If the Boost libraries are installed on the build machine in the
|
||||||
usual paths, CMake will find them. If the Boost libraries are not installed,
|
usual paths, CMake will find them. If the Boost libraries are not installed,
|
||||||
the location of the Boost source tree can be specified during the CMake
|
the location of the Boost source tree can be specified during the CMake
|
||||||
configuration step using the ``BOOST_ROOT`` variable (described below).
|
configuration step using the ``BOOST_ROOT`` variable (described below).
|
||||||
|
|
||||||
Another alternative is to put a copy of (or a symlink to) the boost
|
Another alternative is to put a copy of (or a symlink to) the boost
|
||||||
subdirectory in ``<hyperscan-source-path>/include/boost``.
|
subdirectory in ``<vectorscanscan-source-path>/include/boost``.
|
||||||
|
|
||||||
For example: for the Boost-1.59.0 release: ::
|
For example: for the Boost-1.59.0 release: ::
|
||||||
|
|
||||||
ln -s boost_1_59_0/boost <hyperscan-source-path>/include/boost
|
ln -s boost_1_59_0/boost <vectorscan-source-path>/include/boost
|
||||||
|
|
||||||
As Hyperscan uses the header-only parts of Boost, it is not necessary to
|
As Vectorscan uses the header-only parts of Boost, it is not necessary to
|
||||||
compile the Boost libraries.
|
compile the Boost libraries.
|
||||||
|
|
||||||
CMake Configuration
|
CMake Configuration
|
||||||
@ -168,11 +163,12 @@ Common options for CMake include:
|
|||||||
| | Valid options are Debug, Release, RelWithDebInfo, |
|
| | Valid options are Debug, Release, RelWithDebInfo, |
|
||||||
| | and MinSizeRel. Default is RelWithDebInfo. |
|
| | and MinSizeRel. Default is RelWithDebInfo. |
|
||||||
+------------------------+----------------------------------------------------+
|
+------------------------+----------------------------------------------------+
|
||||||
| BUILD_SHARED_LIBS | Build Hyperscan as a shared library instead of |
|
| BUILD_SHARED_LIBS | Build Vectorscan as a shared library instead of |
|
||||||
| | the default static library. |
|
| | the default static library. |
|
||||||
|
| | Default: Off |
|
||||||
+------------------------+----------------------------------------------------+
|
+------------------------+----------------------------------------------------+
|
||||||
| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. |
|
| BUILD_STATIC_LIBS | Build Vectorscan as a static library. |
|
||||||
| | Default off. |
|
| | Default: On |
|
||||||
+------------------------+----------------------------------------------------+
|
+------------------------+----------------------------------------------------+
|
||||||
| BOOST_ROOT | Location of Boost source tree. |
|
| BOOST_ROOT | Location of Boost source tree. |
|
||||||
+------------------------+----------------------------------------------------+
|
+------------------------+----------------------------------------------------+
|
||||||
@ -180,12 +176,64 @@ Common options for CMake include:
|
|||||||
+------------------------+----------------------------------------------------+
|
+------------------------+----------------------------------------------------+
|
||||||
| FAT_RUNTIME | Build the :ref:`fat runtime<fat_runtime>`. Default |
|
| FAT_RUNTIME | Build the :ref:`fat runtime<fat_runtime>`. Default |
|
||||||
| | true on Linux, not available elsewhere. |
|
| | true on Linux, not available elsewhere. |
|
||||||
|
| | Default: Off |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| USE_CPU_NATIVE | Native CPU detection is off by default, however it |
|
||||||
|
| | is possible to build a performance-oriented non-fat|
|
||||||
|
| | library tuned to your CPU. |
|
||||||
|
| | Default: Off |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| SANITIZE | Use libasan sanitizer to detect possible bugs. |
|
||||||
|
| | Valid options are address, memory and undefined. |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| SIMDE_BACKEND | Enable SIMDe backend. If this is chosen all native |
|
||||||
|
| | (SSE/AVX/AVX512/Neon/SVE/VSX) backends will be |
|
||||||
|
| | disabled and a SIMDe SSE4.2 emulation backend will |
|
||||||
|
| | be enabled. This will enable Vectorscan to build |
|
||||||
|
| | and run on architectures without SIMD. |
|
||||||
|
| | Default: Off |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| SIMDE_NATIVE | Enable SIMDe native emulation of x86 SSE4.2 |
|
||||||
|
| | intrinsics on the building platform. That is, |
|
||||||
|
| | SSE4.2 intrinsics will be emulated using Neon on |
|
||||||
|
| | an Arm platform, or VSX on a Power platform, etc. |
|
||||||
|
| | Default: Off |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
|
||||||
|
X86 platform specific options include:
|
||||||
|
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| Variable | Description |
|
||||||
|
+========================+====================================================+
|
||||||
|
| BUILD_AVX2 | Enable code for AVX2. |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| BUILD_AVX512 | Enable code for AVX512. Implies BUILD_AVX2. |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| BUILD_AVX512VBMI | Enable code for AVX512 with VBMI extension. Implies|
|
||||||
|
| | BUILD_AVX512. |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
|
||||||
|
Arm platform specific options include:
|
||||||
|
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| Variable | Description |
|
||||||
|
+========================+====================================================+
|
||||||
|
| BUILD_SVE | Enable code for SVE, like on AWS Graviton3 CPUs. |
|
||||||
|
| | Not much code is ported just for SVE , but enabling|
|
||||||
|
| | SVE code production, does improve code generation, |
|
||||||
|
| | see Benchmarks. |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| BUILD_SVE2 | Enable code for SVE2, implies BUILD_SVE. Most |
|
||||||
|
| | non-Neon code is written for SVE2. |
|
||||||
|
+------------------------+----------------------------------------------------+
|
||||||
|
| BUILD_SVE2_BITPERM | Enable code for SVE2_BITPERM harwdare feature, |
|
||||||
|
| | implies BUILD_SVE2. |
|
||||||
+------------------------+----------------------------------------------------+
|
+------------------------+----------------------------------------------------+
|
||||||
|
|
||||||
For example, to generate a ``Debug`` build: ::
|
For example, to generate a ``Debug`` build: ::
|
||||||
|
|
||||||
cd <build-dir>
|
cd <build-dir>
|
||||||
cmake -DCMAKE_BUILD_TYPE=Debug <hyperscan-source-path>
|
cmake -DCMAKE_BUILD_TYPE=Debug <vectorscan-source-path>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -193,7 +241,7 @@ Build Type
|
|||||||
----------
|
----------
|
||||||
|
|
||||||
CMake determines a number of features for a build based on the Build Type.
|
CMake determines a number of features for a build based on the Build Type.
|
||||||
Hyperscan defaults to ``RelWithDebInfo``, i.e. "release with debugging
|
Vectorscan defaults to ``RelWithDebInfo``, i.e. "release with debugging
|
||||||
information". This is a performance optimized build without runtime assertions
|
information". This is a performance optimized build without runtime assertions
|
||||||
but with debug symbols enabled.
|
but with debug symbols enabled.
|
||||||
|
|
||||||
@ -201,7 +249,7 @@ The other types of builds are:
|
|||||||
|
|
||||||
* ``Release``: as above, but without debug symbols
|
* ``Release``: as above, but without debug symbols
|
||||||
* ``MinSizeRel``: a stripped release build
|
* ``MinSizeRel``: a stripped release build
|
||||||
* ``Debug``: used when developing Hyperscan. Includes runtime assertions
|
* ``Debug``: used when developing Vectorscan. Includes runtime assertions
|
||||||
(which has a large impact on runtime performance), and will also enable
|
(which has a large impact on runtime performance), and will also enable
|
||||||
some other build features like building internal unit
|
some other build features like building internal unit
|
||||||
tests.
|
tests.
|
||||||
@ -211,7 +259,7 @@ The other types of builds are:
|
|||||||
Target Architecture
|
Target Architecture
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Unless using the :ref:`fat runtime<fat_runtime>`, by default Hyperscan will be
|
Unless using the :ref:`fat runtime<fat_runtime>`, by default Vectorscan will be
|
||||||
compiled to target the instruction set of the processor of the machine that
|
compiled to target the instruction set of the processor of the machine that
|
||||||
being used for compilation. This is done via the use of ``-march=native``. The
|
being used for compilation. This is done via the use of ``-march=native``. The
|
||||||
result of this means that a library built on one machine may not work on a
|
result of this means that a library built on one machine may not work on a
|
||||||
@ -223,7 +271,7 @@ CMake, or ``CMAKE_C_FLAGS`` and ``CMAKE_CXX_FLAGS`` on the CMake command line. F
|
|||||||
example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: ::
|
example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: ::
|
||||||
|
|
||||||
cmake -DCMAKE_C_FLAGS="-march=corei7" \
|
cmake -DCMAKE_C_FLAGS="-march=corei7" \
|
||||||
-DCMAKE_CXX_FLAGS="-march=corei7" <hyperscan-source-path>
|
-DCMAKE_CXX_FLAGS="-march=corei7" <vectorscan-source-path>
|
||||||
|
|
||||||
For more information, refer to :ref:`instr_specialization`.
|
For more information, refer to :ref:`instr_specialization`.
|
||||||
|
|
||||||
@ -232,17 +280,17 @@ For more information, refer to :ref:`instr_specialization`.
|
|||||||
Fat Runtime
|
Fat Runtime
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
A feature introduced in Hyperscan v4.4 is the ability for the Hyperscan
|
A feature introduced in Hyperscan v4.4 is the ability for the Vectorscan
|
||||||
library to dispatch the most appropriate runtime code for the host processor.
|
library to dispatch the most appropriate runtime code for the host processor.
|
||||||
This feature is called the "fat runtime", as a single Hyperscan library
|
This feature is called the "fat runtime", as a single Vectorscan library
|
||||||
contains multiple copies of the runtime code for different instruction sets.
|
contains multiple copies of the runtime code for different instruction sets.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
The fat runtime feature is only available on Linux. Release builds of
|
The fat runtime feature is only available on Linux. Release builds of
|
||||||
Hyperscan will default to having the fat runtime enabled where supported.
|
Vectorscan will default to having the fat runtime enabled where supported.
|
||||||
|
|
||||||
When building the library with the fat runtime, the Hyperscan runtime code
|
When building the library with the fat runtime, the Vectorscan runtime code
|
||||||
will be compiled multiple times for these different instruction sets, and
|
will be compiled multiple times for these different instruction sets, and
|
||||||
these compiled objects are combined into one library. There are no changes to
|
these compiled objects are combined into one library. There are no changes to
|
||||||
how user applications are built against this library.
|
how user applications are built against this library.
|
||||||
@ -254,11 +302,11 @@ resolved so that the right version of each API function is used. There is no
|
|||||||
impact on function call performance, as this check and resolution is performed
|
impact on function call performance, as this check and resolution is performed
|
||||||
by the ELF loader once when the binary is loaded.
|
by the ELF loader once when the binary is loaded.
|
||||||
|
|
||||||
If the Hyperscan library is used on x86 systems without ``SSSE3``, the runtime
|
If the Vectorscan library is used on x86 systems without ``SSSE4.2``, the runtime
|
||||||
API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR`
|
API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR`
|
||||||
instead of potentially executing illegal instructions. The API function
|
instead of potentially executing illegal instructions. The API function
|
||||||
:c:func:`hs_valid_platform` can be used by application writers to determine if
|
:c:func:`hs_valid_platform` can be used by application writers to determine if
|
||||||
the current platform is supported by Hyperscan.
|
the current platform is supported by Vectorscan.
|
||||||
|
|
||||||
As of this release, the variants of the runtime that are built, and the CPU
|
As of this release, the variants of the runtime that are built, and the CPU
|
||||||
capability that is required, are the following:
|
capability that is required, are the following:
|
||||||
@ -299,6 +347,11 @@ capability that is required, are the following:
|
|||||||
|
|
||||||
cmake -DBUILD_AVX512VBMI=on <...>
|
cmake -DBUILD_AVX512VBMI=on <...>
|
||||||
|
|
||||||
|
Vectorscan add support for Arm processors and SVE, SV2 and SVE2_BITPERM.
|
||||||
|
example: ::
|
||||||
|
|
||||||
|
cmake -DBUILD_SVE=ON -DBUILD_SVE2=ON -DBUILD_SVE2_BITPERM=ON <...>
|
||||||
|
|
||||||
As the fat runtime requires compiler, libc, and binutils support, at this time
|
As the fat runtime requires compiler, libc, and binutils support, at this time
|
||||||
it will only be enabled for Linux builds where the compiler supports the
|
it will only be enabled for Linux builds where the compiler supports the
|
||||||
`indirect function "ifunc" function attribute
|
`indirect function "ifunc" function attribute
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
###############################################
|
###############################################
|
||||||
Hyperscan |version| Developer's Reference Guide
|
Vectorscan |version| Developer's Reference Guide
|
||||||
###############################################
|
###############################################
|
||||||
|
|
||||||
-------
|
-------
|
||||||
|
@ -5,11 +5,11 @@
|
|||||||
Introduction
|
Introduction
|
||||||
############
|
############
|
||||||
|
|
||||||
Hyperscan is a software regular expression matching engine designed with
|
Vectorscan is a software regular expression matching engine designed with
|
||||||
high performance and flexibility in mind. It is implemented as a library that
|
high performance and flexibility in mind. It is implemented as a library that
|
||||||
exposes a straightforward C API.
|
exposes a straightforward C API.
|
||||||
|
|
||||||
The Hyperscan API itself is composed of two major components:
|
The Vectorscan API itself is composed of two major components:
|
||||||
|
|
||||||
***********
|
***********
|
||||||
Compilation
|
Compilation
|
||||||
@ -17,7 +17,7 @@ Compilation
|
|||||||
|
|
||||||
These functions take a group of regular expressions, along with identifiers and
|
These functions take a group of regular expressions, along with identifiers and
|
||||||
option flags, and compile them into an immutable database that can be used by
|
option flags, and compile them into an immutable database that can be used by
|
||||||
the Hyperscan scanning API. This compilation process performs considerable
|
the Vectorscan scanning API. This compilation process performs considerable
|
||||||
analysis and optimization work in order to build a database that will match the
|
analysis and optimization work in order to build a database that will match the
|
||||||
given expressions efficiently.
|
given expressions efficiently.
|
||||||
|
|
||||||
@ -36,8 +36,8 @@ See :ref:`compilation` for more detail.
|
|||||||
Scanning
|
Scanning
|
||||||
********
|
********
|
||||||
|
|
||||||
Once a Hyperscan database has been created, it can be used to scan data in
|
Once a Vectorscan database has been created, it can be used to scan data in
|
||||||
memory. Hyperscan provides several scanning modes, depending on whether the
|
memory. Vectorscan provides several scanning modes, depending on whether the
|
||||||
data to be scanned is available as a single contiguous block, whether it is
|
data to be scanned is available as a single contiguous block, whether it is
|
||||||
distributed amongst several blocks in memory at the same time, or whether it is
|
distributed amongst several blocks in memory at the same time, or whether it is
|
||||||
to be scanned as a sequence of blocks in a stream.
|
to be scanned as a sequence of blocks in a stream.
|
||||||
@ -45,7 +45,7 @@ to be scanned as a sequence of blocks in a stream.
|
|||||||
Matches are delivered to the application via a user-supplied callback function
|
Matches are delivered to the application via a user-supplied callback function
|
||||||
that is called synchronously for each match.
|
that is called synchronously for each match.
|
||||||
|
|
||||||
For a given database, Hyperscan provides several guarantees:
|
For a given database, Vectorscan provides several guarantees:
|
||||||
|
|
||||||
* No memory allocations occur at runtime with the exception of two
|
* No memory allocations occur at runtime with the exception of two
|
||||||
fixed-size allocations, both of which should be done ahead of time for
|
fixed-size allocations, both of which should be done ahead of time for
|
||||||
@ -56,7 +56,7 @@ For a given database, Hyperscan provides several guarantees:
|
|||||||
call.
|
call.
|
||||||
- **Stream state**: in streaming mode only, some state space is required to
|
- **Stream state**: in streaming mode only, some state space is required to
|
||||||
store data that persists between scan calls for each stream. This allows
|
store data that persists between scan calls for each stream. This allows
|
||||||
Hyperscan to track matches that span multiple blocks of data.
|
Vectorscan to track matches that span multiple blocks of data.
|
||||||
|
|
||||||
* The sizes of the scratch space and stream state (in streaming mode) required
|
* The sizes of the scratch space and stream state (in streaming mode) required
|
||||||
for a given database are fixed and determined at database compile time. This
|
for a given database are fixed and determined at database compile time. This
|
||||||
@ -64,7 +64,7 @@ For a given database, Hyperscan provides several guarantees:
|
|||||||
time, and these structures can be pre-allocated if required for performance
|
time, and these structures can be pre-allocated if required for performance
|
||||||
reasons.
|
reasons.
|
||||||
|
|
||||||
* Any pattern that has successfully been compiled by the Hyperscan compiler can
|
* Any pattern that has successfully been compiled by the Vectorscan compiler can
|
||||||
be scanned against any input. There are no internal resource limits or other
|
be scanned against any input. There are no internal resource limits or other
|
||||||
limitations at runtime that could cause a scan call to return an error.
|
limitations at runtime that could cause a scan call to return an error.
|
||||||
|
|
||||||
@ -74,12 +74,12 @@ See :ref:`runtime` for more detail.
|
|||||||
Tools
|
Tools
|
||||||
*****
|
*****
|
||||||
|
|
||||||
Some utilities for testing and benchmarking Hyperscan are included with the
|
Some utilities for testing and benchmarking Vectorscan are included with the
|
||||||
library. See :ref:`tools` for more information.
|
library. See :ref:`tools` for more information.
|
||||||
|
|
||||||
************
|
************
|
||||||
Example Code
|
Example Code
|
||||||
************
|
************
|
||||||
|
|
||||||
Some simple example code demonstrating the use of the Hyperscan API is
|
Some simple example code demonstrating the use of the Vectorscan API is
|
||||||
available in the ``examples/`` subdirectory of the Hyperscan distribution.
|
available in the ``examples/`` subdirectory of the Vectorscan distribution.
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
Performance Considerations
|
Performance Considerations
|
||||||
##########################
|
##########################
|
||||||
|
|
||||||
Hyperscan supports a wide range of patterns in all three scanning modes. It is
|
Vectorscan supports a wide range of patterns in all three scanning modes. It is
|
||||||
capable of extremely high levels of performance, but certain patterns can
|
capable of extremely high levels of performance, but certain patterns can
|
||||||
reduce performance markedly.
|
reduce performance markedly.
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ For example, caseless matching of :regexp:`/abc/` can be written as:
|
|||||||
* :regexp:`/(?i)abc(?-i)/`
|
* :regexp:`/(?i)abc(?-i)/`
|
||||||
* :regexp:`/abc/i`
|
* :regexp:`/abc/i`
|
||||||
|
|
||||||
Hyperscan is capable of handling all these constructs. Unless there is a
|
Vectorscan is capable of handling all these constructs. Unless there is a
|
||||||
specific reason otherwise, do not rewrite patterns from one form to another.
|
specific reason otherwise, do not rewrite patterns from one form to another.
|
||||||
|
|
||||||
As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be
|
As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be
|
||||||
@ -41,24 +41,24 @@ Library usage
|
|||||||
|
|
||||||
.. tip:: Do not hand-optimize library usage.
|
.. tip:: Do not hand-optimize library usage.
|
||||||
|
|
||||||
The Hyperscan library is capable of dealing with small writes, unusually large
|
The Vectorscan library is capable of dealing with small writes, unusually large
|
||||||
and small pattern sets, etc. Unless there is a specific performance problem
|
and small pattern sets, etc. Unless there is a specific performance problem
|
||||||
with some usage of the library, it is best to use Hyperscan in a simple and
|
with some usage of the library, it is best to use Vectorscan in a simple and
|
||||||
direct fashion. For example, it is unlikely for there to be much benefit in
|
direct fashion. For example, it is unlikely for there to be much benefit in
|
||||||
buffering input to the library into larger blocks unless streaming writes are
|
buffering input to the library into larger blocks unless streaming writes are
|
||||||
tiny (say, 1-2 bytes at a time).
|
tiny (say, 1-2 bytes at a time).
|
||||||
|
|
||||||
Unlike many other pattern matching products, Hyperscan will run faster with
|
Unlike many other pattern matching products, Vectorscan will run faster with
|
||||||
small numbers of patterns and slower with large numbers of patterns in a smooth
|
small numbers of patterns and slower with large numbers of patterns in a smooth
|
||||||
fashion (as opposed to, typically, running at a moderate speed up to some fixed
|
fashion (as opposed to, typically, running at a moderate speed up to some fixed
|
||||||
limit then either breaking or running half as fast).
|
limit then either breaking or running half as fast).
|
||||||
|
|
||||||
Hyperscan also provides high-throughput matching with a single thread of
|
Vectorscan also provides high-throughput matching with a single thread of
|
||||||
control per core; if a database runs at 3.0 Gbps in Hyperscan it means that a
|
control per core; if a database runs at 3.0 Gbps in Vectorscan it means that a
|
||||||
3000-bit block of data will be scanned in 1 microsecond in a single thread of
|
3000-bit block of data will be scanned in 1 microsecond in a single thread of
|
||||||
control, not that it is required to scan 22 3000-bit blocks of data in 22
|
control, not that it is required to scan 22 3000-bit blocks of data in 22
|
||||||
microseconds. Thus, it is not usually necessary to buffer data to supply
|
microseconds. Thus, it is not usually necessary to buffer data to supply
|
||||||
Hyperscan with available parallelism.
|
Vectorscan with available parallelism.
|
||||||
|
|
||||||
********************
|
********************
|
||||||
Block-based matching
|
Block-based matching
|
||||||
@ -72,7 +72,7 @@ accumulated before processing, it should be scanned in block rather than in
|
|||||||
streaming mode.
|
streaming mode.
|
||||||
|
|
||||||
Unnecessary use of streaming mode reduces the number of optimizations that can
|
Unnecessary use of streaming mode reduces the number of optimizations that can
|
||||||
be applied in Hyperscan and may make some patterns run slower.
|
be applied in Vectorscan and may make some patterns run slower.
|
||||||
|
|
||||||
If there is a mixture of 'block' and 'streaming' mode patterns, these should be
|
If there is a mixture of 'block' and 'streaming' mode patterns, these should be
|
||||||
scanned in separate databases except in the case that the streaming patterns
|
scanned in separate databases except in the case that the streaming patterns
|
||||||
@ -107,7 +107,7 @@ Allocate scratch ahead of time
|
|||||||
|
|
||||||
Scratch allocation is not necessarily a cheap operation. Since it is the first
|
Scratch allocation is not necessarily a cheap operation. Since it is the first
|
||||||
time (after compilation or deserialization) that a pattern database is used,
|
time (after compilation or deserialization) that a pattern database is used,
|
||||||
Hyperscan performs some validation checks inside :c:func:`hs_alloc_scratch` and
|
Vectorscan performs some validation checks inside :c:func:`hs_alloc_scratch` and
|
||||||
must also allocate memory.
|
must also allocate memory.
|
||||||
|
|
||||||
Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not
|
Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not
|
||||||
@ -329,7 +329,7 @@ Consequently, :regexp:`/foo.*bar/L` with a check on start of match values after
|
|||||||
the callback is considerably more expensive and general than
|
the callback is considerably more expensive and general than
|
||||||
:regexp:`/foo.{300}bar/`.
|
:regexp:`/foo.{300}bar/`.
|
||||||
|
|
||||||
Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be
|
Similarly, the :cpp:member:`hs_expr_ext::min_length` extended parameter can be
|
||||||
used to specify a lower bound on the length of the matches for a pattern. Using
|
used to specify a lower bound on the length of the matches for a pattern. Using
|
||||||
this facility may be more lightweight in some circumstances than using the SOM
|
this facility may be more lightweight in some circumstances than using the SOM
|
||||||
flag and post-confirming match length in the calling application.
|
flag and post-confirming match length in the calling application.
|
||||||
|
@ -6,35 +6,35 @@ Preface
|
|||||||
Overview
|
Overview
|
||||||
********
|
********
|
||||||
|
|
||||||
Hyperscan is a regular expression engine designed to offer high performance, the
|
Vectorscan is a regular expression engine designed to offer high performance, the
|
||||||
ability to match multiple expressions simultaneously and flexibility in
|
ability to match multiple expressions simultaneously and flexibility in
|
||||||
scanning operation.
|
scanning operation.
|
||||||
|
|
||||||
Patterns are provided to a compilation interface which generates an immutable
|
Patterns are provided to a compilation interface which generates an immutable
|
||||||
pattern database. The scan interface then can be used to scan a target data
|
pattern database. The scan interface then can be used to scan a target data
|
||||||
buffer for the given patterns, returning any matching results from that data
|
buffer for the given patterns, returning any matching results from that data
|
||||||
buffer. Hyperscan also provides a streaming mode, in which matches that span
|
buffer. Vectorscan also provides a streaming mode, in which matches that span
|
||||||
several blocks in a stream are detected.
|
several blocks in a stream are detected.
|
||||||
|
|
||||||
This document is designed to facilitate code-level integration of the Hyperscan
|
This document is designed to facilitate code-level integration of the Vectorscan
|
||||||
library with existing or new applications.
|
library with existing or new applications.
|
||||||
|
|
||||||
:ref:`intro` is a short overview of the Hyperscan library, with more detail on
|
:ref:`intro` is a short overview of the Vectorscan library, with more detail on
|
||||||
the Hyperscan API provided in the subsequent sections: :ref:`compilation` and
|
the Vectorscan API provided in the subsequent sections: :ref:`compilation` and
|
||||||
:ref:`runtime`.
|
:ref:`runtime`.
|
||||||
|
|
||||||
:ref:`perf` provides details on various factors which may impact the
|
:ref:`perf` provides details on various factors which may impact the
|
||||||
performance of a Hyperscan integration.
|
performance of a Vectorscan integration.
|
||||||
|
|
||||||
:ref:`api_constants` and :ref:`api_files` provides a detailed summary of the
|
:ref:`api_constants` and :ref:`api_files` provides a detailed summary of the
|
||||||
Hyperscan Application Programming Interface (API).
|
Vectorscan Application Programming Interface (API).
|
||||||
|
|
||||||
********
|
********
|
||||||
Audience
|
Audience
|
||||||
********
|
********
|
||||||
|
|
||||||
This guide is aimed at developers interested in integrating Hyperscan into an
|
This guide is aimed at developers interested in integrating Vectorscan into an
|
||||||
application. For information on building the Hyperscan library, see the Quick
|
application. For information on building the Vectorscan library, see the Quick
|
||||||
Start Guide.
|
Start Guide.
|
||||||
|
|
||||||
***********
|
***********
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
Scanning for Patterns
|
Scanning for Patterns
|
||||||
#####################
|
#####################
|
||||||
|
|
||||||
Hyperscan provides three different scanning modes, each with its own scan
|
Vectorscan provides three different scanning modes, each with its own scan
|
||||||
function beginning with ``hs_scan``. In addition, streaming mode has a number
|
function beginning with ``hs_scan``. In addition, streaming mode has a number
|
||||||
of other API functions for managing stream state.
|
of other API functions for managing stream state.
|
||||||
|
|
||||||
@ -33,8 +33,8 @@ See :c:type:`match_event_handler` for more information.
|
|||||||
Streaming Mode
|
Streaming Mode
|
||||||
**************
|
**************
|
||||||
|
|
||||||
The core of the Hyperscan streaming runtime API consists of functions to open,
|
The core of the Vectorscan streaming runtime API consists of functions to open,
|
||||||
scan, and close Hyperscan data streams:
|
scan, and close Vectorscan data streams:
|
||||||
|
|
||||||
* :c:func:`hs_open_stream`: allocates and initializes a new stream for scanning.
|
* :c:func:`hs_open_stream`: allocates and initializes a new stream for scanning.
|
||||||
|
|
||||||
@ -57,14 +57,14 @@ will return immediately with :c:member:`HS_SCAN_TERMINATED`. The caller must
|
|||||||
still call :c:func:`hs_close_stream` to complete the clean-up process for that
|
still call :c:func:`hs_close_stream` to complete the clean-up process for that
|
||||||
stream.
|
stream.
|
||||||
|
|
||||||
Streams exist in the Hyperscan library so that pattern matching state can be
|
Streams exist in the Vectorscan library so that pattern matching state can be
|
||||||
maintained across multiple blocks of target data -- without maintaining this
|
maintained across multiple blocks of target data -- without maintaining this
|
||||||
state, it would not be possible to detect patterns that span these blocks of
|
state, it would not be possible to detect patterns that span these blocks of
|
||||||
data. This, however, does come at the cost of requiring an amount of storage
|
data. This, however, does come at the cost of requiring an amount of storage
|
||||||
per-stream (the size of this storage is fixed at compile time), and a slight
|
per-stream (the size of this storage is fixed at compile time), and a slight
|
||||||
performance penalty in some cases to manage the state.
|
performance penalty in some cases to manage the state.
|
||||||
|
|
||||||
While Hyperscan does always support a strict ordering of multiple matches,
|
While Vectorscan does always support a strict ordering of multiple matches,
|
||||||
streaming matches will not be delivered at offsets before the current stream
|
streaming matches will not be delivered at offsets before the current stream
|
||||||
write, with the exception of zero-width asserts, where constructs such as
|
write, with the exception of zero-width asserts, where constructs such as
|
||||||
:regexp:`\\b` and :regexp:`$` can cause a match on the final character of a
|
:regexp:`\\b` and :regexp:`$` can cause a match on the final character of a
|
||||||
@ -76,7 +76,7 @@ Stream Management
|
|||||||
=================
|
=================
|
||||||
|
|
||||||
In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and
|
In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and
|
||||||
:c:func:`hs_close_stream`, the Hyperscan API provides a number of other
|
:c:func:`hs_close_stream`, the Vectorscan API provides a number of other
|
||||||
functions for the management of streams:
|
functions for the management of streams:
|
||||||
|
|
||||||
* :c:func:`hs_reset_stream`: resets a stream to its initial state; this is
|
* :c:func:`hs_reset_stream`: resets a stream to its initial state; this is
|
||||||
@ -98,10 +98,10 @@ A stream object is allocated as a fixed size region of memory which has been
|
|||||||
sized to ensure that no memory allocations are required during scan
|
sized to ensure that no memory allocations are required during scan
|
||||||
operations. When the system is under memory pressure, it may be useful to reduce
|
operations. When the system is under memory pressure, it may be useful to reduce
|
||||||
the memory consumed by streams that are not expected to be used soon. The
|
the memory consumed by streams that are not expected to be used soon. The
|
||||||
Hyperscan API provides calls for translating a stream to and from a compressed
|
Vectorscan API provides calls for translating a stream to and from a compressed
|
||||||
representation for this purpose. The compressed representation differs from the
|
representation for this purpose. The compressed representation differs from the
|
||||||
full stream object as it does not reserve space for components which are not
|
full stream object as it does not reserve space for components which are not
|
||||||
required given the current stream state. The Hyperscan API functions for this
|
required given the current stream state. The Vectorscan API functions for this
|
||||||
functionality are:
|
functionality are:
|
||||||
|
|
||||||
* :c:func:`hs_compress_stream`: fills the provided buffer with a compressed
|
* :c:func:`hs_compress_stream`: fills the provided buffer with a compressed
|
||||||
@ -157,7 +157,7 @@ scanned in block mode.
|
|||||||
Scratch Space
|
Scratch Space
|
||||||
*************
|
*************
|
||||||
|
|
||||||
While scanning data, Hyperscan needs a small amount of temporary memory to store
|
While scanning data, Vectorscan needs a small amount of temporary memory to store
|
||||||
on-the-fly internal data. This amount is unfortunately too large to fit on the
|
on-the-fly internal data. This amount is unfortunately too large to fit on the
|
||||||
stack, particularly for embedded applications, and allocating memory dynamically
|
stack, particularly for embedded applications, and allocating memory dynamically
|
||||||
is too expensive, so a pre-allocated "scratch" space must be provided to the
|
is too expensive, so a pre-allocated "scratch" space must be provided to the
|
||||||
@ -170,7 +170,7 @@ databases, only a single scratch region is necessary: in this case, calling
|
|||||||
will ensure that the scratch space is large enough to support scanning against
|
will ensure that the scratch space is large enough to support scanning against
|
||||||
any of the given databases.
|
any of the given databases.
|
||||||
|
|
||||||
While the Hyperscan library is re-entrant, the use of scratch spaces is not.
|
While the Vectorscan library is re-entrant, the use of scratch spaces is not.
|
||||||
For example, if by design it is deemed necessary to run recursive or nested
|
For example, if by design it is deemed necessary to run recursive or nested
|
||||||
scanning (say, from the match callback function), then an additional scratch
|
scanning (say, from the match callback function), then an additional scratch
|
||||||
space is required for that context.
|
space is required for that context.
|
||||||
@ -219,11 +219,11 @@ For example:
|
|||||||
Custom Allocators
|
Custom Allocators
|
||||||
*****************
|
*****************
|
||||||
|
|
||||||
By default, structures used by Hyperscan at runtime (scratch space, stream
|
By default, structures used by Vectorscan at runtime (scratch space, stream
|
||||||
state, etc) are allocated with the default system allocators, usually
|
state, etc) are allocated with the default system allocators, usually
|
||||||
``malloc()`` and ``free()``.
|
``malloc()`` and ``free()``.
|
||||||
|
|
||||||
The Hyperscan API provides a facility for changing this behaviour to support
|
The Vectorscan API provides a facility for changing this behaviour to support
|
||||||
applications that use custom memory allocators.
|
applications that use custom memory allocators.
|
||||||
|
|
||||||
These functions are:
|
These functions are:
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
Serialization
|
Serialization
|
||||||
#############
|
#############
|
||||||
|
|
||||||
For some applications, compiling Hyperscan pattern databases immediately prior
|
For some applications, compiling Vectorscan pattern databases immediately prior
|
||||||
to use is not an appropriate design. Some users may wish to:
|
to use is not an appropriate design. Some users may wish to:
|
||||||
|
|
||||||
* Compile pattern databases on a different host;
|
* Compile pattern databases on a different host;
|
||||||
@ -14,9 +14,9 @@ to use is not an appropriate design. Some users may wish to:
|
|||||||
|
|
||||||
* Control the region of memory in which the compiled database is located.
|
* Control the region of memory in which the compiled database is located.
|
||||||
|
|
||||||
Hyperscan pattern databases are not completely flat in memory: they contain
|
Vectorscan pattern databases are not completely flat in memory: they contain
|
||||||
pointers and have specific alignment requirements. Therefore, they cannot be
|
pointers and have specific alignment requirements. Therefore, they cannot be
|
||||||
copied (or otherwise relocated) directly. To enable these use cases, Hyperscan
|
copied (or otherwise relocated) directly. To enable these use cases, Vectorscan
|
||||||
provides functionality for serializing and deserializing compiled pattern
|
provides functionality for serializing and deserializing compiled pattern
|
||||||
databases.
|
databases.
|
||||||
|
|
||||||
@ -40,10 +40,10 @@ The API provides the following functions:
|
|||||||
returns a string containing information about the database. This call is
|
returns a string containing information about the database. This call is
|
||||||
analogous to :c:func:`hs_database_info`.
|
analogous to :c:func:`hs_database_info`.
|
||||||
|
|
||||||
.. note:: Hyperscan performs both version and platform compatibility checks
|
.. note:: Vectorscan performs both version and platform compatibility checks
|
||||||
upon deserialization. The :c:func:`hs_deserialize_database` and
|
upon deserialization. The :c:func:`hs_deserialize_database` and
|
||||||
:c:func:`hs_deserialize_database_at` functions will only permit the
|
:c:func:`hs_deserialize_database_at` functions will only permit the
|
||||||
deserialization of databases compiled with (a) the same version of Hyperscan
|
deserialization of databases compiled with (a) the same version of Vectorscan
|
||||||
and (b) platform features supported by the current host platform. See
|
and (b) platform features supported by the current host platform. See
|
||||||
:ref:`instr_specialization` for more information on platform specialization.
|
:ref:`instr_specialization` for more information on platform specialization.
|
||||||
|
|
||||||
@ -51,17 +51,17 @@ The API provides the following functions:
|
|||||||
The Runtime Library
|
The Runtime Library
|
||||||
===================
|
===================
|
||||||
|
|
||||||
The main Hyperscan library (``libhs``) contains both the compiler and runtime
|
The main Vectorscan library (``libhs``) contains both the compiler and runtime
|
||||||
portions of the library. This means that in order to support the Hyperscan
|
portions of the library. This means that in order to support the Vectorscan
|
||||||
compiler, which is written in C++, it requires C++ linkage and has a
|
compiler, which is written in C++, it requires C++ linkage and has a
|
||||||
dependency on the C++ standard library.
|
dependency on the C++ standard library.
|
||||||
|
|
||||||
Many embedded applications require only the scanning ("runtime") portion of the
|
Many embedded applications require only the scanning ("runtime") portion of the
|
||||||
Hyperscan library. In these cases, pattern compilation generally takes place on
|
Vectorscan library. In these cases, pattern compilation generally takes place on
|
||||||
another host, and serialized pattern databases are delivered to the application
|
another host, and serialized pattern databases are delivered to the application
|
||||||
for use.
|
for use.
|
||||||
|
|
||||||
To support these applications without requiring the C++ dependency, a
|
To support these applications without requiring the C++ dependency, a
|
||||||
runtime-only version of the Hyperscan library, called ``libhs_runtime``, is also
|
runtime-only version of the Vectorscan library, called ``libhs_runtime``, is also
|
||||||
distributed. This library does not depend on the C++ standard library and
|
distributed. This library does not depend on the C++ standard library and
|
||||||
provides all Hyperscan functions other that those used to compile databases.
|
provides all Vectorscan functions other that those used to compile databases.
|
||||||
|
@ -4,14 +4,14 @@
|
|||||||
Tools
|
Tools
|
||||||
#####
|
#####
|
||||||
|
|
||||||
This section describes the set of utilities included with the Hyperscan library.
|
This section describes the set of utilities included with the Vectorscan library.
|
||||||
|
|
||||||
********************
|
********************
|
||||||
Quick Check: hscheck
|
Quick Check: hscheck
|
||||||
********************
|
********************
|
||||||
|
|
||||||
The ``hscheck`` tool allows the user to quickly check whether Hyperscan supports
|
The ``hscheck`` tool allows the user to quickly check whether Vectorscan supports
|
||||||
a group of patterns. If a pattern is rejected by Hyperscan's compiler, the
|
a group of patterns. If a pattern is rejected by Vectorscan's compiler, the
|
||||||
compile error is provided on standard output.
|
compile error is provided on standard output.
|
||||||
|
|
||||||
For example, given the following three patterns (the last of which contains a
|
For example, given the following three patterns (the last of which contains a
|
||||||
@ -34,7 +34,7 @@ syntax error) in a file called ``/tmp/test``::
|
|||||||
Benchmarker: hsbench
|
Benchmarker: hsbench
|
||||||
********************
|
********************
|
||||||
|
|
||||||
The ``hsbench`` tool provides an easy way to measure Hyperscan's performance
|
The ``hsbench`` tool provides an easy way to measure Vectorscan's performance
|
||||||
for a particular set of patterns and corpus of data to be scanned.
|
for a particular set of patterns and corpus of data to be scanned.
|
||||||
|
|
||||||
Patterns are supplied in the format described below in
|
Patterns are supplied in the format described below in
|
||||||
@ -44,7 +44,7 @@ easy control of how a corpus is broken into blocks and streams.
|
|||||||
|
|
||||||
.. note:: A group of Python scripts for constructing corpora databases from
|
.. note:: A group of Python scripts for constructing corpora databases from
|
||||||
various input types, such as PCAP network traffic captures or text files, can
|
various input types, such as PCAP network traffic captures or text files, can
|
||||||
be found in the Hyperscan source tree in ``tools/hsbench/scripts``.
|
be found in the Vectorscan source tree in ``tools/hsbench/scripts``.
|
||||||
|
|
||||||
Running hsbench
|
Running hsbench
|
||||||
===============
|
===============
|
||||||
@ -56,7 +56,7 @@ produce output like this::
|
|||||||
$ hsbench -e /tmp/patterns -c /tmp/corpus.db
|
$ hsbench -e /tmp/patterns -c /tmp/corpus.db
|
||||||
|
|
||||||
Signatures: /tmp/patterns
|
Signatures: /tmp/patterns
|
||||||
Hyperscan info: Version: 4.3.1 Features: AVX2 Mode: STREAM
|
Vectorscan info: Version: 5.4.11 Features: AVX2 Mode: STREAM
|
||||||
Expression count: 200
|
Expression count: 200
|
||||||
Bytecode size: 342,540 bytes
|
Bytecode size: 342,540 bytes
|
||||||
Database CRC: 0x6cd6b67c
|
Database CRC: 0x6cd6b67c
|
||||||
@ -77,7 +77,7 @@ takes to perform all twenty scans. The number of repeats can be changed with the
|
|||||||
``-n`` argument, and the results of each scan will be displayed if the
|
``-n`` argument, and the results of each scan will be displayed if the
|
||||||
``--per-scan`` argument is specified.
|
``--per-scan`` argument is specified.
|
||||||
|
|
||||||
To benchmark Hyperscan on more than one core, you can supply a list of cores
|
To benchmark Vectorscan on more than one core, you can supply a list of cores
|
||||||
with the ``-T`` argument, which will instruct ``hsbench`` to start one
|
with the ``-T`` argument, which will instruct ``hsbench`` to start one
|
||||||
benchmark thread per core given and compute the throughput from the time taken
|
benchmark thread per core given and compute the throughput from the time taken
|
||||||
to complete all of them.
|
to complete all of them.
|
||||||
@ -91,17 +91,17 @@ Correctness Testing: hscollider
|
|||||||
*******************************
|
*******************************
|
||||||
|
|
||||||
The ``hscollider`` tool, or Pattern Collider, provides a way to verify
|
The ``hscollider`` tool, or Pattern Collider, provides a way to verify
|
||||||
Hyperscan's matching behaviour. It does this by compiling and scanning patterns
|
Vectorscan's matching behaviour. It does this by compiling and scanning patterns
|
||||||
(either singly or in groups) against known corpora and comparing the results
|
(either singly or in groups) against known corpora and comparing the results
|
||||||
against another engine (the "ground truth"). Two sources of ground truth for
|
against another engine (the "ground truth"). Two sources of ground truth for
|
||||||
comparison are available:
|
comparison are available:
|
||||||
|
|
||||||
* The PCRE library (http://pcre.org/).
|
* The PCRE library (http://pcre.org/).
|
||||||
* An NFA simulation run on Hyperscan's compile-time graph representation. This
|
* An NFA simulation run on Vectorscan's compile-time graph representation. This
|
||||||
is used if PCRE cannot support the pattern or if PCRE execution fails due to
|
is used if PCRE cannot support the pattern or if PCRE execution fails due to
|
||||||
a resource limit.
|
a resource limit.
|
||||||
|
|
||||||
Much of Hyperscan's testing infrastructure is built on ``hscollider``, and the
|
Much of Vectorscan's testing infrastructure is built on ``hscollider``, and the
|
||||||
tool is designed to take advantage of multiple cores and provide considerable
|
tool is designed to take advantage of multiple cores and provide considerable
|
||||||
flexibility in controlling the test. These options are described in the help
|
flexibility in controlling the test. These options are described in the help
|
||||||
(``hscollider -h``) and include:
|
(``hscollider -h``) and include:
|
||||||
@ -116,11 +116,11 @@ flexibility in controlling the test. These options are described in the help
|
|||||||
Using hscollider to debug a pattern
|
Using hscollider to debug a pattern
|
||||||
===================================
|
===================================
|
||||||
|
|
||||||
One common use-case for ``hscollider`` is to determine whether Hyperscan will
|
One common use-case for ``hscollider`` is to determine whether Vectorscan will
|
||||||
match a pattern in the expected location, and whether this accords with PCRE's
|
match a pattern in the expected location, and whether this accords with PCRE's
|
||||||
behaviour for the same case.
|
behaviour for the same case.
|
||||||
|
|
||||||
Here is an example. We put our pattern in a file in Hyperscan's pattern
|
Here is an example. We put our pattern in a file in Vectorscan's pattern
|
||||||
format::
|
format::
|
||||||
|
|
||||||
$ cat /tmp/pat
|
$ cat /tmp/pat
|
||||||
@ -172,7 +172,7 @@ individual matches are displayed in the output::
|
|||||||
|
|
||||||
Total elapsed time: 0.00522815 secs.
|
Total elapsed time: 0.00522815 secs.
|
||||||
|
|
||||||
We can see from this output that both PCRE and Hyperscan find matches ending at
|
We can see from this output that both PCRE and Vectorscan find matches ending at
|
||||||
offset 33 and 45, and so ``hscollider`` considers this test case to have
|
offset 33 and 45, and so ``hscollider`` considers this test case to have
|
||||||
passed.
|
passed.
|
||||||
|
|
||||||
@ -180,13 +180,13 @@ passed.
|
|||||||
corpus alignment 0, and ``-T 1`` instructs us to only use one thread.)
|
corpus alignment 0, and ``-T 1`` instructs us to only use one thread.)
|
||||||
|
|
||||||
.. note:: In default operation, PCRE produces only one match for a scan, unlike
|
.. note:: In default operation, PCRE produces only one match for a scan, unlike
|
||||||
Hyperscan's automata semantics. The ``hscollider`` tool uses libpcre's
|
Vectorscan's automata semantics. The ``hscollider`` tool uses libpcre's
|
||||||
"callout" functionality to match Hyperscan's semantics.
|
"callout" functionality to match Vectorscan's semantics.
|
||||||
|
|
||||||
Running a larger scan test
|
Running a larger scan test
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
A set of patterns for testing purposes are distributed with Hyperscan, and these
|
A set of patterns for testing purposes are distributed with Vectorscan, and these
|
||||||
can be tested via ``hscollider`` on an in-tree build. Two CMake targets are
|
can be tested via ``hscollider`` on an in-tree build. Two CMake targets are
|
||||||
provided to do this easily:
|
provided to do this easily:
|
||||||
|
|
||||||
@ -202,10 +202,10 @@ Debugging: hsdump
|
|||||||
*****************
|
*****************
|
||||||
|
|
||||||
When built in debug mode (using the CMake directive ``CMAKE_BUILD_TYPE`` set to
|
When built in debug mode (using the CMake directive ``CMAKE_BUILD_TYPE`` set to
|
||||||
``Debug``), Hyperscan includes support for dumping information about its
|
``Debug``), Vectorscan includes support for dumping information about its
|
||||||
internals during pattern compilation with the ``hsdump`` tool.
|
internals during pattern compilation with the ``hsdump`` tool.
|
||||||
|
|
||||||
This information is mostly of use to Hyperscan developers familiar with the
|
This information is mostly of use to Vectorscan developers familiar with the
|
||||||
library's internal structure, but can be used to diagnose issues with patterns
|
library's internal structure, but can be used to diagnose issues with patterns
|
||||||
and provide more information in bug reports.
|
and provide more information in bug reports.
|
||||||
|
|
||||||
@ -215,7 +215,7 @@ and provide more information in bug reports.
|
|||||||
Pattern Format
|
Pattern Format
|
||||||
**************
|
**************
|
||||||
|
|
||||||
All of the Hyperscan tools accept patterns in the same format, read from plain
|
All of the Vectorscan tools accept patterns in the same format, read from plain
|
||||||
text files with one pattern per line. Each line looks like this:
|
text files with one pattern per line. Each line looks like this:
|
||||||
|
|
||||||
* ``<integer id>:/<regex>/<flags>``
|
* ``<integer id>:/<regex>/<flags>``
|
||||||
@ -227,12 +227,12 @@ For example::
|
|||||||
3:/^.{10,20}hatstand/m
|
3:/^.{10,20}hatstand/m
|
||||||
|
|
||||||
The integer ID is the value that will be reported when a match is found by
|
The integer ID is the value that will be reported when a match is found by
|
||||||
Hyperscan and must be unique.
|
Vectorscan and must be unique.
|
||||||
|
|
||||||
The pattern itself is a regular expression in PCRE syntax; see
|
The pattern itself is a regular expression in PCRE syntax; see
|
||||||
:ref:`compilation` for more information on supported features.
|
:ref:`compilation` for more information on supported features.
|
||||||
|
|
||||||
The flags are single characters that map to Hyperscan flags as follows:
|
The flags are single characters that map to Vectorscan flags as follows:
|
||||||
|
|
||||||
========= ================================= ===========
|
========= ================================= ===========
|
||||||
Character API Flag Description
|
Character API Flag Description
|
||||||
@ -256,7 +256,7 @@ between braces, separated by commas. For example::
|
|||||||
|
|
||||||
1:/hatstand.*teakettle/s{min_offset=50,max_offset=100}
|
1:/hatstand.*teakettle/s{min_offset=50,max_offset=100}
|
||||||
|
|
||||||
All Hyperscan tools will accept a pattern file (or a directory containing
|
All Vectorscan tools will accept a pattern file (or a directory containing
|
||||||
pattern files) with the ``-e`` argument. If no further arguments constraining
|
pattern files) with the ``-e`` argument. If no further arguments constraining
|
||||||
the pattern set are given, all patterns in those files are used.
|
the pattern set are given, all patterns in those files are used.
|
||||||
|
|
||||||
|
@ -202,7 +202,7 @@ struct FiveTuple {
|
|||||||
unsigned int dstPort;
|
unsigned int dstPort;
|
||||||
|
|
||||||
// Construct a FiveTuple from a TCP or UDP packet.
|
// Construct a FiveTuple from a TCP or UDP packet.
|
||||||
FiveTuple(const struct ip *iphdr) {
|
explicit FiveTuple(const struct ip *iphdr) {
|
||||||
// IP fields
|
// IP fields
|
||||||
protocol = iphdr->ip_p;
|
protocol = iphdr->ip_p;
|
||||||
srcAddr = iphdr->ip_src.s_addr;
|
srcAddr = iphdr->ip_src.s_addr;
|
||||||
@ -391,7 +391,7 @@ public:
|
|||||||
// Close all open Hyperscan streams (potentially generating any
|
// Close all open Hyperscan streams (potentially generating any
|
||||||
// end-anchored matches)
|
// end-anchored matches)
|
||||||
void closeStreams() {
|
void closeStreams() {
|
||||||
for (auto &stream : streams) {
|
for (const auto &stream : streams) {
|
||||||
hs_error_t err =
|
hs_error_t err =
|
||||||
hs_close_stream(stream, scratch, onMatch, &matchCount);
|
hs_close_stream(stream, scratch, onMatch, &matchCount);
|
||||||
if (err != HS_SUCCESS) {
|
if (err != HS_SUCCESS) {
|
||||||
@ -444,7 +444,7 @@ class Sigdata {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
Sigdata() {}
|
Sigdata() {}
|
||||||
Sigdata(const char *filename) {
|
explicit Sigdata(const char *filename) {
|
||||||
parseFile(filename, patterns, flags, ids, originals);
|
parseFile(filename, patterns, flags, ids, originals);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -568,7 +568,7 @@ double measure_block_time(Benchmark &bench, unsigned int repeatCount) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
|
double eval_set(Benchmark &bench, const Sigdata &sigs, unsigned int mode,
|
||||||
unsigned repeatCount, Criterion criterion,
|
unsigned repeatCount, Criterion criterion,
|
||||||
bool diagnose = true) {
|
bool diagnose = true) {
|
||||||
double compileTime = 0;
|
double compileTime = 0;
|
||||||
@ -608,8 +608,9 @@ double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
|
|||||||
scan_time = measure_stream_time(bench, repeatCount);
|
scan_time = measure_stream_time(bench, repeatCount);
|
||||||
}
|
}
|
||||||
size_t bytes = bench.bytes();
|
size_t bytes = bench.bytes();
|
||||||
size_t matches = bench.matches();
|
|
||||||
if (diagnose) {
|
if (diagnose) {
|
||||||
|
size_t matches = bench.matches();
|
||||||
std::ios::fmtflags f(cout.flags());
|
std::ios::fmtflags f(cout.flags());
|
||||||
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
|
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
|
||||||
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
|
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
|
||||||
|
@ -100,15 +100,14 @@ struct FiveTuple {
|
|||||||
unsigned int dstPort;
|
unsigned int dstPort;
|
||||||
|
|
||||||
// Construct a FiveTuple from a TCP or UDP packet.
|
// Construct a FiveTuple from a TCP or UDP packet.
|
||||||
FiveTuple(const struct ip *iphdr) {
|
explicit FiveTuple(const struct ip *iphdr) {
|
||||||
// IP fields
|
// IP fields
|
||||||
protocol = iphdr->ip_p;
|
protocol = iphdr->ip_p;
|
||||||
srcAddr = iphdr->ip_src.s_addr;
|
srcAddr = iphdr->ip_src.s_addr;
|
||||||
dstAddr = iphdr->ip_dst.s_addr;
|
dstAddr = iphdr->ip_dst.s_addr;
|
||||||
|
|
||||||
// UDP/TCP ports
|
// UDP/TCP ports
|
||||||
const struct udphdr *uh =
|
const struct udphdr *uh = reinterpret_cast<const struct udphdr *>(iphdr) + (iphdr->ip_hl * 4);
|
||||||
(const struct udphdr *)(((const char *)iphdr) + (iphdr->ip_hl * 4));
|
|
||||||
srcPort = uh->uh_sport;
|
srcPort = uh->uh_sport;
|
||||||
dstPort = uh->uh_dport;
|
dstPort = uh->uh_dport;
|
||||||
}
|
}
|
||||||
@ -137,7 +136,7 @@ static
|
|||||||
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
|
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
|
||||||
unsigned int flags, void *ctx) {
|
unsigned int flags, void *ctx) {
|
||||||
// Our context points to a size_t storing the match count
|
// Our context points to a size_t storing the match count
|
||||||
size_t *matches = (size_t *)ctx;
|
size_t *matches = static_cast<size_t *>(ctx);
|
||||||
(*matches)++;
|
(*matches)++;
|
||||||
return 0; // continue matching
|
return 0; // continue matching
|
||||||
}
|
}
|
||||||
@ -233,9 +232,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Valid TCP or UDP packet
|
// Valid TCP or UDP packet
|
||||||
const struct ip *iphdr = (const struct ip *)(pktData
|
const struct ip *iphdr = reinterpret_cast<const struct ip *>(pktData) + sizeof(struct ether_header);
|
||||||
+ sizeof(struct ether_header));
|
const char *payload = reinterpret_cast<const char *>(pktData) + offset;
|
||||||
const char *payload = (const char *)pktData + offset;
|
|
||||||
|
|
||||||
size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr),
|
size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr),
|
||||||
stream_map.size())).first->second;
|
stream_map.size())).first->second;
|
||||||
@ -281,7 +279,7 @@ public:
|
|||||||
// Close all open Hyperscan streams (potentially generating any
|
// Close all open Hyperscan streams (potentially generating any
|
||||||
// end-anchored matches)
|
// end-anchored matches)
|
||||||
void closeStreams() {
|
void closeStreams() {
|
||||||
for (auto &stream : streams) {
|
for (const auto &stream : streams) {
|
||||||
hs_error_t err = hs_close_stream(stream, scratch, onMatch,
|
hs_error_t err = hs_close_stream(stream, scratch, onMatch,
|
||||||
&matchCount);
|
&matchCount);
|
||||||
if (err != HS_SUCCESS) {
|
if (err != HS_SUCCESS) {
|
||||||
@ -575,7 +573,7 @@ int main(int argc, char **argv) {
|
|||||||
*/
|
*/
|
||||||
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
|
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
|
||||||
unsigned int *length) {
|
unsigned int *length) {
|
||||||
const ip *iph = (const ip *)(pkt_data + sizeof(ether_header));
|
const ip *iph = reinterpret_cast<const ip *>(pkt_data) + sizeof(ether_header);
|
||||||
const tcphdr *th = nullptr;
|
const tcphdr *th = nullptr;
|
||||||
|
|
||||||
// Ignore packets that aren't IPv4
|
// Ignore packets that aren't IPv4
|
||||||
@ -594,7 +592,7 @@ static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
|
|||||||
|
|
||||||
switch (iph->ip_p) {
|
switch (iph->ip_p) {
|
||||||
case IPPROTO_TCP:
|
case IPPROTO_TCP:
|
||||||
th = (const tcphdr *)((const char *)iph + ihlen);
|
th = reinterpret_cast<const tcphdr *>(iph) + ihlen;
|
||||||
thlen = th->th_off * 4;
|
thlen = th->th_off * 4;
|
||||||
break;
|
break;
|
||||||
case IPPROTO_UDP:
|
case IPPROTO_UDP:
|
||||||
|
@ -67,7 +67,7 @@
|
|||||||
* to pass in the pattern that was being searched for so we can print it out.
|
* to pass in the pattern that was being searched for so we can print it out.
|
||||||
*/
|
*/
|
||||||
static int eventHandler(unsigned int id, unsigned long long from,
|
static int eventHandler(unsigned int id, unsigned long long from,
|
||||||
unsigned long long to, unsigned int flags, void *ctx) {
|
unsigned long long to, unsigned int flags, void *ctx) { // cppcheck-suppress constParameterCallback
|
||||||
printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to);
|
printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -150,7 +150,7 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *pattern = argv[1];
|
char *pattern = argv[1];
|
||||||
char *inputFN = argv[2];
|
const char *inputFN = argv[2];
|
||||||
|
|
||||||
/* First, we attempt to compile the pattern provided on the command line.
|
/* First, we attempt to compile the pattern provided on the command line.
|
||||||
* We assume 'DOTALL' semantics, meaning that the '.' meta-character will
|
* We assume 'DOTALL' semantics, meaning that the '.' meta-character will
|
||||||
|
@ -4,7 +4,7 @@ libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@
|
|||||||
includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@
|
includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@
|
||||||
|
|
||||||
Name: libhs
|
Name: libhs
|
||||||
Description: Intel(R) Hyperscan Library
|
Description: A portable fork of the high-performance regular expression matching library
|
||||||
Version: @HS_VERSION@
|
Version: @HS_VERSION@
|
||||||
Libs: -L${libdir} -lhs
|
Libs: -L${libdir} -lhs
|
||||||
Cflags: -I${includedir}/hs
|
Cflags: -I${includedir}/hs
|
||||||
|
2
simde
2
simde
@ -1 +1 @@
|
|||||||
Subproject commit aae22459fa284e9fc2b7d4b8e4571afa0418125f
|
Subproject commit 416091ebdb9e901b29d026633e73167d6353a0b0
|
@ -176,7 +176,8 @@ void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr,
|
|||||||
auto ecit = edge_cache.find(cache_key);
|
auto ecit = edge_cache.find(cache_key);
|
||||||
if (ecit == edge_cache.end()) {
|
if (ecit == edge_cache.end()) {
|
||||||
DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index);
|
DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index);
|
||||||
NFAEdge e = add_edge(u, v, g);
|
NFAEdge e;
|
||||||
|
std::tie(e, std::ignore) = add_edge(u, v, g);
|
||||||
edge_cache.emplace(cache_key, e);
|
edge_cache.emplace(cache_key, e);
|
||||||
g[e].assert_flags = flags;
|
g[e].assert_flags = flags;
|
||||||
if (++assert_edge_count > MAX_ASSERT_EDGES) {
|
if (++assert_edge_count > MAX_ASSERT_EDGES) {
|
||||||
@ -229,11 +230,12 @@ void checkForMultilineStart(ReportManager &rm, NGHolder &g,
|
|||||||
|
|
||||||
/* we need to interpose a dummy dot vertex between v and accept if
|
/* we need to interpose a dummy dot vertex between v and accept if
|
||||||
* required so that ^ doesn't match trailing \n */
|
* required so that ^ doesn't match trailing \n */
|
||||||
for (const auto &e : out_edges_range(v, g)) {
|
auto deads = [&g=g](const NFAEdge &e) {
|
||||||
if (target(e, g) == g.accept) {
|
return (target(e, g) == g.accept);
|
||||||
dead.emplace_back(e);
|
};
|
||||||
}
|
const auto &er = out_edges_range(v, g);
|
||||||
}
|
std::copy_if(begin(er), end(er), std::back_inserter(dead), deads);
|
||||||
|
|
||||||
/* assert has been resolved; clear flag */
|
/* assert has been resolved; clear flag */
|
||||||
g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
|
g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
|
||||||
}
|
}
|
||||||
|
@ -443,7 +443,7 @@ bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
|||||||
if (!rose) {
|
if (!rose) {
|
||||||
DEBUG_PRINTF("error building rose\n");
|
DEBUG_PRINTF("error building rose\n");
|
||||||
assert(0);
|
assert(0);
|
||||||
return nullptr;
|
return bytecode_ptr<RoseEngine>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
dumpReportManager(ng.rm, ng.cc.grey);
|
dumpReportManager(ng.rm, ng.cc.grey);
|
||||||
@ -478,7 +478,7 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
|||||||
DEBUG_PRINTF("db size %zu\n", db_len);
|
DEBUG_PRINTF("db size %zu\n", db_len);
|
||||||
DEBUG_PRINTF("db platform %llx\n", platform);
|
DEBUG_PRINTF("db platform %llx\n", platform);
|
||||||
|
|
||||||
struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
|
struct hs_database *db = static_cast<struct hs_database *>(hs_database_alloc(db_len));
|
||||||
if (hs_check_alloc(db) != HS_SUCCESS) {
|
if (hs_check_alloc(db) != HS_SUCCESS) {
|
||||||
hs_database_free(db);
|
hs_database_free(db);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -492,7 +492,7 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
|||||||
DEBUG_PRINTF("shift is %zu\n", shift);
|
DEBUG_PRINTF("shift is %zu\n", shift);
|
||||||
|
|
||||||
db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
||||||
char *bytecode = (char *)db + db->bytecode;
|
char *bytecode = reinterpret_cast<char *>(db) + db->bytecode;
|
||||||
assert(ISALIGNED_CL(bytecode));
|
assert(ISALIGNED_CL(bytecode));
|
||||||
|
|
||||||
db->magic = HS_DB_MAGIC;
|
db->magic = HS_DB_MAGIC;
|
||||||
@ -525,7 +525,7 @@ struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
|
|||||||
throw CompileError("Internal error.");
|
throw CompileError("Internal error.");
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *bytecode = (const char *)(rose.get());
|
const char *bytecode = reinterpret_cast<const char *>(rose.get());
|
||||||
const platform_t p = target_to_platform(ng.cc.target_info);
|
const platform_t p = target_to_platform(ng.cc.target_info);
|
||||||
struct hs_database *db = dbCreate(bytecode, *length, p);
|
struct hs_database *db = dbCreate(bytecode, *length, p);
|
||||||
if (!db) {
|
if (!db) {
|
||||||
|
@ -57,15 +57,14 @@ extern const hs_compile_error_t hs_badalloc = {
|
|||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
hs_compile_error_t *generateCompileError(const string &err, int expression) {
|
hs_compile_error_t *generateCompileError(const string &err, int expression) {
|
||||||
hs_compile_error_t *ret =
|
hs_compile_error_t *ret = static_cast<struct hs_compile_error *>(hs_misc_alloc(sizeof(hs_compile_error_t)));
|
||||||
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
hs_error_t e = hs_check_alloc(ret);
|
hs_error_t e = hs_check_alloc(ret);
|
||||||
if (e != HS_SUCCESS) {
|
if (e != HS_SUCCESS) {
|
||||||
hs_misc_free(ret);
|
hs_misc_free(ret);
|
||||||
return const_cast<hs_compile_error_t *>(&hs_badalloc);
|
return const_cast<hs_compile_error_t *>(&hs_badalloc);
|
||||||
}
|
}
|
||||||
char *msg = (char *)hs_misc_alloc(err.size() + 1);
|
char *msg = static_cast<char *>(hs_misc_alloc(err.size() + 1));
|
||||||
if (msg) {
|
if (msg) {
|
||||||
e = hs_check_alloc(msg);
|
e = hs_check_alloc(msg);
|
||||||
if (e != HS_SUCCESS) {
|
if (e != HS_SUCCESS) {
|
||||||
|
@ -542,14 +542,13 @@ u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
|
|||||||
|
|
||||||
// Main aligned loop, processes eight bytes at a time.
|
// Main aligned loop, processes eight bytes at a time.
|
||||||
|
|
||||||
u32 term1, term2;
|
|
||||||
for (size_t li = 0; li < running_length/8; li++) {
|
for (size_t li = 0; li < running_length/8; li++) {
|
||||||
u32 block = *(const u32 *)p_buf;
|
u32 block = *(const u32 *)p_buf;
|
||||||
crc ^= block;
|
crc ^= block;
|
||||||
p_buf += 4;
|
p_buf += 4;
|
||||||
term1 = crc_tableil8_o88[crc & 0x000000FF] ^
|
u32 term1 = crc_tableil8_o88[crc & 0x000000FF] ^
|
||||||
crc_tableil8_o80[(crc >> 8) & 0x000000FF];
|
crc_tableil8_o80[(crc >> 8) & 0x000000FF];
|
||||||
term2 = crc >> 16;
|
u32 term2 = crc >> 16;
|
||||||
crc = term1 ^
|
crc = term1 ^
|
||||||
crc_tableil8_o72[term2 & 0x000000FF] ^
|
crc_tableil8_o72[term2 & 0x000000FF] ^
|
||||||
crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
|
crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
|
||||||
|
@ -79,21 +79,18 @@ static UNUSED
|
|||||||
const platform_t hs_current_platform_no_avx2 = {
|
const platform_t hs_current_platform_no_avx2 = {
|
||||||
HS_PLATFORM_NOAVX2 |
|
HS_PLATFORM_NOAVX2 |
|
||||||
HS_PLATFORM_NOAVX512 |
|
HS_PLATFORM_NOAVX512 |
|
||||||
HS_PLATFORM_NOAVX512VBMI |
|
HS_PLATFORM_NOAVX512VBMI
|
||||||
0,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static UNUSED
|
static UNUSED
|
||||||
const platform_t hs_current_platform_no_avx512 = {
|
const platform_t hs_current_platform_no_avx512 = {
|
||||||
HS_PLATFORM_NOAVX512 |
|
HS_PLATFORM_NOAVX512 |
|
||||||
HS_PLATFORM_NOAVX512VBMI |
|
HS_PLATFORM_NOAVX512VBMI
|
||||||
0,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static UNUSED
|
static UNUSED
|
||||||
const platform_t hs_current_platform_no_avx512vbmi = {
|
const platform_t hs_current_platform_no_avx512vbmi = {
|
||||||
HS_PLATFORM_NOAVX512VBMI |
|
HS_PLATFORM_NOAVX512VBMI
|
||||||
0,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
249
src/dispatcher.c
249
src/dispatcher.c
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2020, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
|
* Copyright (c) 2024, VectorCamp PC
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,6 +31,39 @@
|
|||||||
#include "hs_common.h"
|
#include "hs_common.h"
|
||||||
#include "hs_runtime.h"
|
#include "hs_runtime.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
/* Streamlining the dispatch to eliminate runtime checking/branching:
|
||||||
|
* What we want to do is, first call to the function will run the resolve
|
||||||
|
* code and set the static resolved/dispatch pointer to point to the
|
||||||
|
* correct function. Subsequent calls to the function will go directly to
|
||||||
|
* the resolved ptr. The simplest way to accomplish this is, to
|
||||||
|
* initially set the pointer to the resolve function.
|
||||||
|
* To accomplish this in a manner invisible to the user,
|
||||||
|
* we do involve some rather ugly/confusing macros in here.
|
||||||
|
* There are four macros that assemble the code for each function
|
||||||
|
* we want to dispatch in this manner:
|
||||||
|
* CREATE_DISPATCH
|
||||||
|
* this generates the declarations for the candidate target functions,
|
||||||
|
* for the fat_dispatch function pointer, for the resolve_ function,
|
||||||
|
* points the function pointer to the resolve function, and contains
|
||||||
|
* most of the definition of the resolve function. The very end of the
|
||||||
|
* resolve function is completed by the next macro, because in the
|
||||||
|
* CREATE_DISPATCH macro we have the argument list with the arg declarations,
|
||||||
|
* which is needed to generate correct function signatures, but we
|
||||||
|
* can't generate from this, in a macro, a _call_ to one of those functions.
|
||||||
|
* CONNECT_ARGS_1
|
||||||
|
* this macro fills in the actual call at the end of the resolve function,
|
||||||
|
* with the correct arg list. hence the name connect args.
|
||||||
|
* CONNECT_DISPATCH_2
|
||||||
|
* this macro likewise gives up the beginning of the definition of the
|
||||||
|
* actual entry point function (the 'real name' that's called by the user)
|
||||||
|
* but again in the pass-through call, cannot invoke the target without
|
||||||
|
* getting the arg list , which is supplied by the final macro,
|
||||||
|
* CONNECT_ARGS_3
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||||
#include "util/arch/x86/cpuid_inline.h"
|
#include "util/arch/x86/cpuid_inline.h"
|
||||||
#include "util/join.h"
|
#include "util/join.h"
|
||||||
@ -57,30 +91,38 @@
|
|||||||
return (RTYPE)HS_ARCH_ERROR; \
|
return (RTYPE)HS_ARCH_ERROR; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
/* resolver */ \
|
/* dispatch routing pointer for this function */ \
|
||||||
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
|
/* initially point it at the resolve function */ \
|
||||||
if (check_avx512vbmi()) { \
|
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
|
||||||
return JOIN(avx512vbmi_, NAME); \
|
static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
|
||||||
} \
|
&JOIN(resolve_, NAME); \
|
||||||
if (check_avx512()) { \
|
|
||||||
return JOIN(avx512_, NAME); \
|
|
||||||
} \
|
|
||||||
if (check_avx2()) { \
|
|
||||||
return JOIN(avx2_, NAME); \
|
|
||||||
} \
|
|
||||||
if (check_sse42() && check_popcnt()) { \
|
|
||||||
return JOIN(corei7_, NAME); \
|
|
||||||
} \
|
|
||||||
if (check_ssse3()) { \
|
|
||||||
return JOIN(core2_, NAME); \
|
|
||||||
} \
|
|
||||||
/* anything else is fail */ \
|
|
||||||
return JOIN(error_, NAME); \
|
|
||||||
} \
|
|
||||||
\
|
\
|
||||||
/* function */ \
|
/* resolver */ \
|
||||||
HS_PUBLIC_API \
|
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
|
||||||
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
|
if (check_avx512vbmi()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(avx512vbmi_, NAME); \
|
||||||
|
} \
|
||||||
|
else if (check_avx512()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(avx512_, NAME); \
|
||||||
|
} \
|
||||||
|
else if (check_avx2()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(avx2_, NAME); \
|
||||||
|
} \
|
||||||
|
else if (check_sse42() && check_popcnt()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(corei7_, NAME); \
|
||||||
|
} \
|
||||||
|
else if (check_ssse3()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(core2_, NAME); \
|
||||||
|
} else { \
|
||||||
|
/* anything else is fail */ \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#elif defined(ARCH_AARCH64)
|
#elif defined(ARCH_AARCH64)
|
||||||
#include "util/arch/arm/cpuid_inline.h"
|
#include "util/arch/arm/cpuid_inline.h"
|
||||||
@ -97,99 +139,226 @@
|
|||||||
return (RTYPE)HS_ARCH_ERROR; \
|
return (RTYPE)HS_ARCH_ERROR; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
/* resolver */ \
|
/* dispatch routing pointer for this function */ \
|
||||||
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
|
/* initially point it at the resolve function */ \
|
||||||
if (check_sve2()) { \
|
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
|
||||||
return JOIN(sve2_, NAME); \
|
static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
|
||||||
} \
|
&JOIN(resolve_, NAME); \
|
||||||
if (check_sve()) { \
|
|
||||||
return JOIN(sve_, NAME); \
|
|
||||||
} \
|
|
||||||
if (check_neon()) { \
|
|
||||||
return JOIN(neon_, NAME); \
|
|
||||||
} \
|
|
||||||
/* anything else is fail */ \
|
|
||||||
return JOIN(error_, NAME); \
|
|
||||||
} \
|
|
||||||
\
|
\
|
||||||
/* function */ \
|
/* resolver */ \
|
||||||
HS_PUBLIC_API \
|
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
|
||||||
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
|
if (check_sve2()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(sve2_, NAME); \
|
||||||
|
} \
|
||||||
|
else if (check_sve()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(sve_, NAME); \
|
||||||
|
} \
|
||||||
|
else if (check_neon()) { \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(neon_, NAME); \
|
||||||
|
} else { \
|
||||||
|
/* anything else is fail */ \
|
||||||
|
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
|
||||||
|
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define CONNECT_ARGS_1(RTYPE, NAME, ...) \
|
||||||
|
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
|
||||||
|
#define CONNECT_DISPATCH_2(RTYPE, NAME, ...) \
|
||||||
|
/* new function */ \
|
||||||
|
HS_PUBLIC_API \
|
||||||
|
RTYPE NAME(__VA_ARGS__) { \
|
||||||
|
|
||||||
|
|
||||||
|
#define CONNECT_ARGS_3(RTYPE, NAME, ...) \
|
||||||
|
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
|
||||||
#pragma GCC diagnostic push
|
#pragma GCC diagnostic push
|
||||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||||
#pragma GCC diagnostic push
|
#pragma GCC diagnostic push
|
||||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||||
|
|
||||||
|
/* this gets a bit ugly to compose the static redirect functions,
|
||||||
|
* as we necessarily need first the typed arg list and then just the arg
|
||||||
|
* names, twice in a row, to define the redirect function and the
|
||||||
|
* dispatch function call */
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
|
CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
|
||||||
unsigned length, unsigned flags, hs_scratch_t *scratch,
|
unsigned length, unsigned flags, hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent, void *userCtx);
|
match_event_handler onEvent, void *userCtx);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
|
||||||
|
unsigned length, unsigned flags, hs_scratch_t *scratch,
|
||||||
|
match_event_handler onEvent, void *userCtx);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database,
|
CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database,
|
||||||
size_t *stream_size);
|
size_t *stream_size);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_stream_size, database, stream_size);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_stream_size, const hs_database_t *database,
|
||||||
|
size_t *stream_size);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_stream_size, database, stream_size);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db,
|
CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db,
|
||||||
size_t *size);
|
size_t *size);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_database_size, db, size);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_database_size, const hs_database_t *db,
|
||||||
|
size_t *size);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_database_size, db, size);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db);
|
CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, dbIsValid, db);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, dbIsValid, const hs_database_t *db);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, dbIsValid, db);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db);
|
CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_free_database, db);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_free_database, hs_database_t *db);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_free_database, db);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db,
|
CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db,
|
||||||
unsigned int flags, hs_stream_t **stream);
|
unsigned int flags, hs_stream_t **stream);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_open_stream, db, flags, stream);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_open_stream, const hs_database_t *db,
|
||||||
|
unsigned int flags, hs_stream_t **stream);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_open_stream, db, flags, stream);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
|
CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
|
||||||
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
|
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent, void *ctxt);
|
match_event_handler onEvent, void *ctxt);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
|
||||||
|
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
|
||||||
|
match_event_handler onEvent, void *ctxt);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id,
|
CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id,
|
||||||
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
|
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_close_stream, hs_stream_t *id,
|
||||||
|
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db,
|
CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db,
|
||||||
const char *const *data, const unsigned int *length,
|
const char *const *data, const unsigned int *length,
|
||||||
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
|
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
|
||||||
match_event_handler onevent, void *context);
|
match_event_handler onevent, void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_scan_vector, const hs_database_t *db,
|
||||||
|
const char *const *data, const unsigned int *length,
|
||||||
|
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
|
||||||
|
match_event_handler onevent, void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
|
CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_database_info, db, info);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_database_info, db, info);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
|
CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
|
||||||
const hs_stream_t *from_id);
|
const hs_stream_t *from_id);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_copy_stream, to_id, from_id);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
|
||||||
|
const hs_stream_t *from_id);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_copy_stream, to_id, from_id);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id,
|
CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id,
|
||||||
unsigned int flags, hs_scratch_t *scratch,
|
unsigned int flags, hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent, void *context);
|
match_event_handler onEvent, void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_reset_stream, hs_stream_t *id,
|
||||||
|
unsigned int flags, hs_scratch_t *scratch,
|
||||||
|
match_event_handler onEvent, void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
|
CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
|
||||||
const hs_stream_t *from_id, hs_scratch_t *scratch,
|
const hs_stream_t *from_id, hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent, void *context);
|
match_event_handler onEvent, void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
|
||||||
|
const hs_stream_t *from_id, hs_scratch_t *scratch,
|
||||||
|
match_event_handler onEvent, void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db,
|
CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db,
|
||||||
char **bytes, size_t *length);
|
char **bytes, size_t *length);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_serialize_database, db, bytes, length);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_serialize_database, const hs_database_t *db,
|
||||||
|
char **bytes, size_t *length);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_serialize_database, db, bytes, length);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes,
|
CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes,
|
||||||
const size_t length, hs_database_t **db);
|
const size_t length, hs_database_t **db);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database, bytes, length, db);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database, const char *bytes,
|
||||||
|
const size_t length, hs_database_t **db);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database, bytes, length, db);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes,
|
CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes,
|
||||||
const size_t length, hs_database_t *db);
|
const size_t length, hs_database_t *db);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database_at, bytes, length, db);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database_at, const char *bytes,
|
||||||
|
const size_t length, hs_database_t *db);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database_at, bytes, length, db);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
|
CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
|
||||||
size_t length, char **info);
|
size_t length, char **info);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_info, bytes, length, info);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_info, const char *bytes,
|
||||||
|
size_t length, char **info);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_info, bytes, length, info);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
|
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
|
||||||
const size_t length, size_t *deserialized_size);
|
const size_t length, size_t *deserialized_size);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_size, const char *bytes,
|
||||||
|
const size_t length, size_t *deserialized_size);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
|
CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
|
||||||
char *buf, size_t buf_space, size_t *used_space);
|
char *buf, size_t buf_space, size_t *used_space);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_compress_stream, stream,
|
||||||
|
buf, buf_space, used_space);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
|
||||||
|
char *buf, size_t buf_space, size_t *used_space);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_compress_stream, stream,
|
||||||
|
buf, buf_space, used_space);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db,
|
CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db,
|
||||||
hs_stream_t **stream, const char *buf,size_t buf_size);
|
hs_stream_t **stream, const char *buf,size_t buf_size);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_expand_stream, const hs_database_t *db,
|
||||||
|
hs_stream_t **stream, const char *buf,size_t buf_size);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
|
||||||
|
|
||||||
CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
|
CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
|
||||||
const char *buf, size_t buf_size, hs_scratch_t *scratch,
|
const char *buf, size_t buf_size, hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent, void *context);
|
match_event_handler onEvent, void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_reset_and_expand_stream, to_stream,
|
||||||
|
buf, buf_size, scratch, onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
|
||||||
|
const char *buf, size_t buf_size, hs_scratch_t *scratch,
|
||||||
|
match_event_handler onEvent, void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream,
|
||||||
|
buf, buf_size, scratch, onEvent, context);
|
||||||
|
|
||||||
/** INTERNALS **/
|
/** INTERNALS **/
|
||||||
|
|
||||||
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
||||||
|
CONNECT_ARGS_1(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
|
||||||
|
CONNECT_DISPATCH_2(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
||||||
|
CONNECT_ARGS_3(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
|
||||||
|
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
@ -298,7 +298,7 @@ void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr,
|
|||||||
static really_inline
|
static really_inline
|
||||||
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
||||||
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
||||||
const u8 *ptr, u32 *last_match_id, struct zone *z) {
|
const u8 *ptr, u32 *last_match_id, const struct zone *z) {
|
||||||
const u8 bucket = 8;
|
const u8 bucket = 8;
|
||||||
|
|
||||||
if (likely(!*conf)) {
|
if (likely(!*conf)) {
|
||||||
@ -333,7 +333,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void dumpZoneInfo(UNUSED struct zone *z, UNUSED size_t zone_id) {
|
void dumpZoneInfo(UNUSED const struct zone *z, UNUSED size_t zone_id) {
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
DEBUG_PRINTF("zone: zone=%zu, bufPtr=%p\n", zone_id, z->buf);
|
DEBUG_PRINTF("zone: zone=%zu, bufPtr=%p\n", zone_id, z->buf);
|
||||||
DEBUG_PRINTF("zone: startPtr=%p, endPtr=%p, shift=%u\n",
|
DEBUG_PRINTF("zone: startPtr=%p, endPtr=%p, shift=%u\n",
|
||||||
|
@ -127,7 +127,7 @@ void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void FDRCompiler::createInitialState(FDR *fdr) {
|
void FDRCompiler::createInitialState(FDR *fdr) {
|
||||||
u8 *start = (u8 *)&fdr->start;
|
u8 *start = reinterpret_cast<u8 *>(&fdr->start);
|
||||||
|
|
||||||
/* initial state should to be 1 in each slot in the bucket up to bucket
|
/* initial state should to be 1 in each slot in the bucket up to bucket
|
||||||
* minlen - 1, and 0 thereafter */
|
* minlen - 1, and 0 thereafter */
|
||||||
@ -176,7 +176,7 @@ bytecode_ptr<FDR> FDRCompiler::setupFDR() {
|
|||||||
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
u8 *fdr_base = (u8 *)fdr.get();
|
u8 *fdr_base = reinterpret_cast<u8 *>(fdr.get());
|
||||||
|
|
||||||
// Write header.
|
// Write header.
|
||||||
fdr->size = size;
|
fdr->size = size;
|
||||||
|
@ -58,7 +58,7 @@ u64a make_u64a_mask(const vector<u8> &v) {
|
|||||||
u64a mask = 0;
|
u64a mask = 0;
|
||||||
size_t vlen = v.size();
|
size_t vlen = v.size();
|
||||||
size_t len = std::min(vlen, sizeof(mask));
|
size_t len = std::min(vlen, sizeof(mask));
|
||||||
unsigned char *m = (unsigned char *)&mask;
|
u8 *m = reinterpret_cast<u8 *>(&mask);
|
||||||
memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
|
memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
@ -159,7 +159,7 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
|||||||
map<u32, vector<LiteralIndex> > res2lits;
|
map<u32, vector<LiteralIndex> > res2lits;
|
||||||
hwlm_group_t gm = 0;
|
hwlm_group_t gm = 0;
|
||||||
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
||||||
LitInfo & li = tmpLitInfo[i];
|
const LitInfo & li = tmpLitInfo[i];
|
||||||
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
|
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
|
||||||
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
|
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
|
||||||
res2lits[hash].emplace_back(i);
|
res2lits[hash].emplace_back(i);
|
||||||
@ -245,10 +245,10 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
|||||||
fdrc->groups = gm;
|
fdrc->groups = gm;
|
||||||
|
|
||||||
// After the FDRConfirm, we have the lit index array.
|
// After the FDRConfirm, we have the lit index array.
|
||||||
u8 *fdrc_base = (u8 *)fdrc.get();
|
u8 *fdrc_base = reinterpret_cast<u8 *>(fdrc.get());
|
||||||
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
||||||
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
||||||
u32 *bitsToLitIndex = (u32 *)ptr;
|
u32 *bitsToLitIndex = reinterpret_cast<u32 *>(ptr);
|
||||||
ptr += bitsToLitIndexSize;
|
ptr += bitsToLitIndexSize;
|
||||||
|
|
||||||
// After the lit index array, we have the LitInfo structures themselves,
|
// After the lit index array, we have the LitInfo structures themselves,
|
||||||
@ -265,7 +265,7 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
|||||||
LiteralIndex litIdx = *i;
|
LiteralIndex litIdx = *i;
|
||||||
|
|
||||||
// Write LitInfo header.
|
// Write LitInfo header.
|
||||||
LitInfo &finalLI = *(LitInfo *)ptr;
|
LitInfo &finalLI = *(reinterpret_cast<LitInfo *>(ptr));
|
||||||
finalLI = tmpLitInfo[litIdx];
|
finalLI = tmpLitInfo[litIdx];
|
||||||
|
|
||||||
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
|
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
|
||||||
@ -294,9 +294,6 @@ setupFullConfs(const vector<hwlmLiteral> &lits,
|
|||||||
const EngineDescription &eng,
|
const EngineDescription &eng,
|
||||||
const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
||||||
bool make_small) {
|
bool make_small) {
|
||||||
unique_ptr<TeddyEngineDescription> teddyDescr =
|
|
||||||
getTeddyDescription(eng.getID());
|
|
||||||
|
|
||||||
BC2CONF bc2Conf;
|
BC2CONF bc2Conf;
|
||||||
u32 totalConfirmSize = 0;
|
u32 totalConfirmSize = 0;
|
||||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||||
@ -321,7 +318,7 @@ setupFullConfs(const vector<hwlmLiteral> &lits,
|
|||||||
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64);
|
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64);
|
||||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
u32 *confBase = (u32 *)buf.get();
|
u32 *confBase = reinterpret_cast<u32 *>(buf.get());
|
||||||
u8 *ptr = buf.get() + totalConfSwitchSize;
|
u8 *ptr = buf.get() + totalConfSwitchSize;
|
||||||
assert(ISALIGNED_CL(ptr));
|
assert(ISALIGNED_CL(ptr));
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
|
|||||||
} else if (num_lits < 5000) {
|
} else if (num_lits < 5000) {
|
||||||
// for larger but not huge sizes, go to stride 2 only if we have at
|
// for larger but not huge sizes, go to stride 2 only if we have at
|
||||||
// least minlen 3
|
// least minlen 3
|
||||||
desiredStride = MIN(min_len - 1, 2);
|
desiredStride = std::min(min_len - 1, 2UL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,8 +208,8 @@ bytecode_ptr<u8> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
|||||||
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16);
|
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16);
|
||||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
u32 *floodHeader = (u32 *)buf.get();
|
u32 *floodHeader = reinterpret_cast<u32 *>(buf.get());
|
||||||
FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize);
|
FDRFlood *layoutFlood = reinterpret_cast<FDRFlood *>(buf.get() + floodHeaderSize);
|
||||||
|
|
||||||
u32 currentFloodIndex = 0;
|
u32 currentFloodIndex = 0;
|
||||||
for (const auto &m : flood2chars) {
|
for (const auto &m : flood2chars) {
|
||||||
|
@ -328,7 +328,7 @@ bool pack(const vector<hwlmLiteral> &lits,
|
|||||||
|
|
||||||
static
|
static
|
||||||
void initReinforcedTable(u8 *rmsk) {
|
void initReinforcedTable(u8 *rmsk) {
|
||||||
u64a *mask = (u64a *)rmsk;
|
u64a *mask = reinterpret_cast<u64a *>(rmsk);
|
||||||
fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
|
fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -576,8 +576,8 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
|
|||||||
|
|
||||||
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||||
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
Teddy *teddy = reinterpret_cast<Teddy *>(fdr.get()); // ugly
|
||||||
u8 *teddy_base = (u8 *)teddy;
|
u8 *teddy_base = reinterpret_cast<u8 *>(teddy);
|
||||||
|
|
||||||
// Write header.
|
// Write header.
|
||||||
teddy->size = size;
|
teddy->size = size;
|
||||||
@ -622,7 +622,7 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
|
|||||||
static
|
static
|
||||||
bool assignStringsToBuckets(
|
bool assignStringsToBuckets(
|
||||||
const vector<hwlmLiteral> &lits,
|
const vector<hwlmLiteral> &lits,
|
||||||
TeddyEngineDescription &eng,
|
const TeddyEngineDescription &eng,
|
||||||
map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
|
map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
|
||||||
assert(eng.numMasks <= MAX_NUM_MASKS);
|
assert(eng.numMasks <= MAX_NUM_MASKS);
|
||||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||||
|
@ -52,14 +52,14 @@ u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
|||||||
|
|
||||||
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
|
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
|
||||||
static const TeddyEngineDef defns[] = {
|
static const TeddyEngineDef defns[] = {
|
||||||
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false },
|
{ 3, HS_CPU_FEATURES_AVX2, 1, 16, false },
|
||||||
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true },
|
{ 4, HS_CPU_FEATURES_AVX2, 1, 16, true },
|
||||||
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false },
|
{ 5, HS_CPU_FEATURES_AVX2, 2, 16, false },
|
||||||
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true },
|
{ 6, HS_CPU_FEATURES_AVX2, 2, 16, true },
|
||||||
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false },
|
{ 7, HS_CPU_FEATURES_AVX2, 3, 16, false },
|
||||||
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true },
|
{ 8, HS_CPU_FEATURES_AVX2, 3, 16, true },
|
||||||
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false },
|
{ 9, HS_CPU_FEATURES_AVX2, 4, 16, false },
|
||||||
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true },
|
{ 10, HS_CPU_FEATURES_AVX2, 4, 16, true },
|
||||||
{ 11, 0, 1, 8, false },
|
{ 11, 0, 1, 8, false },
|
||||||
{ 12, 0, 1, 8, true },
|
{ 12, 0, 1, 8, true },
|
||||||
{ 13, 0, 2, 8, false },
|
{ 13, 0, 2, 8, false },
|
||||||
|
@ -589,7 +589,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
|||||||
return HS_COMPILER_ERROR;
|
return HS_COMPILER_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
|
hs_expr_info *rv = static_cast<hs_expr_info *>(hs_misc_alloc(sizeof(*rv)));
|
||||||
if (!rv) {
|
if (!rv) {
|
||||||
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||||
return HS_COMPILER_ERROR;
|
return HS_COMPILER_ERROR;
|
||||||
|
@ -48,6 +48,8 @@ hs_error_t HS_CDECL hs_valid_platform(void) {
|
|||||||
return HS_ARCH_ERROR;
|
return HS_ARCH_ERROR;
|
||||||
}
|
}
|
||||||
#elif !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
|
#elif !defined(VS_SIMDE_BACKEND) && (defined(ARCH_ARM32) || defined(ARCH_AARCH64))
|
||||||
|
//check_neon returns true for now
|
||||||
|
// cppcheck-suppress knownConditionTrueFalse
|
||||||
if (check_neon()) {
|
if (check_neon()) {
|
||||||
return HS_SUCCESS;
|
return HS_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
|
@ -170,8 +170,7 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
|
|||||||
DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
|
DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
|
||||||
*start += delta;
|
*start += delta;
|
||||||
} else if (hlen) {
|
} else if (hlen) {
|
||||||
UNUSED size_t remaining = offset + ptr2 - found;
|
DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", offset + ptr2 - found, hlen);
|
||||||
DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!eng) {
|
if (!eng) {
|
||||||
return nullptr;
|
return bytecode_ptr<HWLM>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(engSize);
|
assert(engSize);
|
||||||
@ -155,6 +155,7 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
|
|||||||
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
|
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
|
||||||
|
|
||||||
h->type = proto.engType;
|
h->type = proto.engType;
|
||||||
|
// cppcheck-suppress cstyleCast
|
||||||
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
||||||
|
|
||||||
return h;
|
return h;
|
||||||
@ -218,10 +219,12 @@ size_t hwlmSize(const HWLM *h) {
|
|||||||
|
|
||||||
switch (h->type) {
|
switch (h->type) {
|
||||||
case HWLM_ENGINE_NOOD:
|
case HWLM_ENGINE_NOOD:
|
||||||
engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
|
// cppcheck-suppress cstyleCast
|
||||||
|
engSize = noodSize(reinterpret_cast<const noodTable *>(HWLM_C_DATA(h)));
|
||||||
break;
|
break;
|
||||||
case HWLM_ENGINE_FDR:
|
case HWLM_ENGINE_FDR:
|
||||||
engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
|
// cppcheck-suppress cstyleCast
|
||||||
|
engSize = fdrSize(reinterpret_cast<const FDR *>(HWLM_C_DATA(h)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ u64a make_u64a_mask(const vector<u8> &v) {
|
|||||||
|
|
||||||
u64a mask = 0;
|
u64a mask = 0;
|
||||||
size_t len = v.size();
|
size_t len = v.size();
|
||||||
unsigned char *m = (unsigned char *)&mask;
|
u8 *m = reinterpret_cast<u8 *>(&mask);
|
||||||
DEBUG_PRINTF("making mask len %zu\n", len);
|
DEBUG_PRINTF("making mask len %zu\n", len);
|
||||||
memcpy(m, &v[0], len);
|
memcpy(m, &v[0], len);
|
||||||
return mask;
|
return mask;
|
||||||
|
@ -427,7 +427,7 @@ void
|
|||||||
accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||||
const AccelScheme &info,
|
const AccelScheme &info,
|
||||||
void *accel_out) {
|
void *accel_out) {
|
||||||
AccelAux *accel = (AccelAux *)accel_out;
|
AccelAux *accel = reinterpret_cast<AccelAux *>(accel_out);
|
||||||
|
|
||||||
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
||||||
info.double_offset);
|
info.double_offset);
|
||||||
@ -474,7 +474,8 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
u8 c1 = info.double_byte.begin()->first & m1;
|
u8 c1 = info.double_byte.begin()->first & m1;
|
||||||
u8 c2 = info.double_byte.begin()->second & m2;
|
u8 c2 = info.double_byte.begin()->second & m2;
|
||||||
#ifdef HAVE_SVE2
|
#ifdef HAVE_SVE2
|
||||||
if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, (u8 *)&accel->mdverm16.mask)) {
|
if (vermicelliDoubleMasked16Build(c1, c2, m1, m2,
|
||||||
|
reinterpret_cast<u8 *>(&accel->mdverm16.mask))) {
|
||||||
accel->accel_type = ACCEL_DVERM16_MASKED;
|
accel->accel_type = ACCEL_DVERM16_MASKED;
|
||||||
accel->mdverm16.offset = verify_u8(info.double_offset);
|
accel->mdverm16.offset = verify_u8(info.double_offset);
|
||||||
accel->mdverm16.c1 = c1;
|
accel->mdverm16.c1 = c1;
|
||||||
@ -483,8 +484,9 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
c1, c2);
|
c1, c2);
|
||||||
return;
|
return;
|
||||||
} else if (info.double_byte.size() <= 8 &&
|
} else if (info.double_byte.size() <= 8 &&
|
||||||
vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask,
|
vermicelliDouble16Build(info.double_byte,
|
||||||
(u8 *)&accel->dverm16.firsts)) {
|
reinterpret_cast<u8 *>(&accel->dverm16.mask),
|
||||||
|
reinterpret_cast<u8 *>(&accel->dverm16.firsts))) {
|
||||||
accel->accel_type = ACCEL_DVERM16;
|
accel->accel_type = ACCEL_DVERM16;
|
||||||
accel->dverm16.offset = verify_u8(info.double_offset);
|
accel->dverm16.offset = verify_u8(info.double_offset);
|
||||||
DEBUG_PRINTF("building double16-vermicelli\n");
|
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||||
@ -504,8 +506,9 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
}
|
}
|
||||||
#ifdef HAVE_SVE2
|
#ifdef HAVE_SVE2
|
||||||
if (info.double_byte.size() <= 8 &&
|
if (info.double_byte.size() <= 8 &&
|
||||||
vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask,
|
vermicelliDouble16Build(info.double_byte,
|
||||||
(u8 *)&accel->dverm16.firsts)) {
|
reinterpret_cast<u8 *>(&accel->dverm16.mask),
|
||||||
|
reinterpret_cast<u8 *>(&accel->dverm16.firsts))) {
|
||||||
accel->accel_type = ACCEL_DVERM16;
|
accel->accel_type = ACCEL_DVERM16;
|
||||||
accel->dverm16.offset = verify_u8(info.double_offset);
|
accel->dverm16.offset = verify_u8(info.double_offset);
|
||||||
DEBUG_PRINTF("building double16-vermicelli\n");
|
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||||
@ -516,9 +519,11 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
|
|
||||||
if (double_byte_ok(info) &&
|
if (double_byte_ok(info) &&
|
||||||
shuftiBuildDoubleMasks(
|
shuftiBuildDoubleMasks(
|
||||||
info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1,
|
info.double_cr, info.double_byte,
|
||||||
(u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2,
|
reinterpret_cast<u8 *>(&accel->dshufti.lo1),
|
||||||
(u8 *)&accel->dshufti.hi2)) {
|
reinterpret_cast<u8 *>(&accel->dshufti.hi1),
|
||||||
|
reinterpret_cast<u8 *>(&accel->dshufti.lo2),
|
||||||
|
reinterpret_cast<u8 *>(&accel->dshufti.hi2))) {
|
||||||
accel->accel_type = ACCEL_DSHUFTI;
|
accel->accel_type = ACCEL_DSHUFTI;
|
||||||
accel->dshufti.offset = verify_u8(info.double_offset);
|
accel->dshufti.offset = verify_u8(info.double_offset);
|
||||||
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
||||||
@ -550,7 +555,7 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
#ifdef HAVE_SVE2
|
#ifdef HAVE_SVE2
|
||||||
if (info.cr.count() <= 16) {
|
if (info.cr.count() <= 16) {
|
||||||
accel->accel_type = ACCEL_VERM16;
|
accel->accel_type = ACCEL_VERM16;
|
||||||
vermicelli16Build(info.cr, (u8 *)&accel->verm16.mask);
|
vermicelli16Build(info.cr, reinterpret_cast<u8 *>(&accel->verm16.mask));
|
||||||
DEBUG_PRINTF("state %hu is vermicelli16\n", this_idx);
|
DEBUG_PRINTF("state %hu is vermicelli16\n", this_idx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -563,16 +568,18 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
accel->accel_type = ACCEL_SHUFTI;
|
accel->accel_type = ACCEL_SHUFTI;
|
||||||
if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo,
|
if (-1 != shuftiBuildMasks(info.cr,
|
||||||
(u8 *)&accel->shufti.hi)) {
|
reinterpret_cast<u8 *>(&accel->shufti.lo),
|
||||||
|
reinterpret_cast<u8 *>(&accel->shufti.hi))) {
|
||||||
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(!info.cr.none());
|
assert(!info.cr.none());
|
||||||
accel->accel_type = ACCEL_TRUFFLE;
|
accel->accel_type = ACCEL_TRUFFLE;
|
||||||
truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1,
|
truffleBuildMasks(info.cr,
|
||||||
(u8 *)&accel->truffle.mask2);
|
reinterpret_cast<u8 *>(&accel->truffle.mask1),
|
||||||
|
reinterpret_cast<u8 *>(&accel->truffle.mask2));
|
||||||
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,8 +84,9 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
||||||
if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo,
|
if (-1 != shuftiBuildMasks(info.single_stops,
|
||||||
(u8 *)&aux->shufti.hi)) {
|
reinterpret_cast<u8 *>(&aux->shufti.lo),
|
||||||
|
reinterpret_cast<u8 *>(&aux->shufti.hi))) {
|
||||||
aux->accel_type = ACCEL_SHUFTI;
|
aux->accel_type = ACCEL_SHUFTI;
|
||||||
aux->shufti.offset = offset;
|
aux->shufti.offset = offset;
|
||||||
DEBUG_PRINTF("shufti built OK\n");
|
DEBUG_PRINTF("shufti built OK\n");
|
||||||
@ -98,8 +99,9 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
|||||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
||||||
aux->accel_type = ACCEL_TRUFFLE;
|
aux->accel_type = ACCEL_TRUFFLE;
|
||||||
aux->truffle.offset = offset;
|
aux->truffle.offset = offset;
|
||||||
truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1,
|
truffleBuildMasks(info.single_stops,
|
||||||
(u8 *)&aux->truffle.mask2);
|
reinterpret_cast<u8 *>(&aux->truffle.mask1),
|
||||||
|
reinterpret_cast<u8 *>(&aux->truffle.mask2));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,8 +221,9 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
|||||||
c1, c2);
|
c1, c2);
|
||||||
return;
|
return;
|
||||||
} else if (outs2 <= 8 &&
|
} else if (outs2 <= 8 &&
|
||||||
vermicelliDouble16Build(info.double_stop2, (u8 *)&aux->dverm16.mask,
|
vermicelliDouble16Build(info.double_stop2,
|
||||||
(u8 *)&aux->dverm16.firsts)) {
|
reinterpret_cast<u8 *>(&aux->dverm16.mask),
|
||||||
|
reinterpret_cast<u8 *>(&aux->dverm16.firsts))) {
|
||||||
aux->accel_type = ACCEL_DVERM16;
|
aux->accel_type = ACCEL_DVERM16;
|
||||||
aux->dverm16.offset = offset;
|
aux->dverm16.offset = offset;
|
||||||
DEBUG_PRINTF("building double16-vermicelli\n");
|
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||||
@ -254,9 +257,11 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
|||||||
aux->accel_type = ACCEL_DSHUFTI;
|
aux->accel_type = ACCEL_DSHUFTI;
|
||||||
aux->dshufti.offset = offset;
|
aux->dshufti.offset = offset;
|
||||||
if (shuftiBuildDoubleMasks(
|
if (shuftiBuildDoubleMasks(
|
||||||
info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1,
|
info.double_stop1, info.double_stop2,
|
||||||
(u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2,
|
reinterpret_cast<u8 *>(&aux->dshufti.lo1),
|
||||||
(u8 *)&aux->dshufti.hi2)) {
|
reinterpret_cast<u8 *>(&aux->dshufti.hi1),
|
||||||
|
reinterpret_cast<u8 *>(&aux->dshufti.lo2),
|
||||||
|
reinterpret_cast<u8 *>(&aux->dshufti.hi2))) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,8 +94,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
|
|||||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||||
|
|
||||||
union RepeatControl *rctrl = getControl(q->state, sub);
|
const union RepeatControl *rctrl = getControl(q->state, sub);
|
||||||
char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
const char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
||||||
info->packedCtrlSize;
|
info->packedCtrlSize;
|
||||||
enum RepeatMatch match =
|
enum RepeatMatch match =
|
||||||
repeatHasMatch(info, rctrl, rstate, offset);
|
repeatHasMatch(info, rctrl, rstate, offset);
|
||||||
@ -118,10 +118,10 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) {
|
|||||||
|
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *active = (u8 *)q->streamState;
|
u8 *active = (u8 *)q->streamState;
|
||||||
u8 *groups = active + c->groupIterOffset;
|
const u8 *groups = active + c->groupIterOffset;
|
||||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
u8 *cur = active + i * c->activeIdxSize;
|
const u8 *cur = active + i * c->activeIdxSize;
|
||||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||||
if (subCastleReportCurrent(c, q,
|
if (subCastleReportCurrent(c, q,
|
||||||
@ -156,8 +156,8 @@ char subCastleInAccept(const struct Castle *c, struct mq *q,
|
|||||||
}
|
}
|
||||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||||
|
|
||||||
union RepeatControl *rctrl = getControl(q->state, sub);
|
const union RepeatControl *rctrl = getControl(q->state, sub);
|
||||||
char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
const char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
||||||
info->packedCtrlSize;
|
info->packedCtrlSize;
|
||||||
enum RepeatMatch match =
|
enum RepeatMatch match =
|
||||||
repeatHasMatch(info, rctrl, rstate, offset);
|
repeatHasMatch(info, rctrl, rstate, offset);
|
||||||
@ -180,10 +180,10 @@ char castleInAccept(const struct Castle *c, struct mq *q,
|
|||||||
|
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *active = (u8 *)q->streamState;
|
u8 *active = (u8 *)q->streamState;
|
||||||
u8 *groups = active + c->groupIterOffset;
|
const u8 *groups = active + c->groupIterOffset;
|
||||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
u8 *cur = active + i * c->activeIdxSize;
|
const u8 *cur = active + i * c->activeIdxSize;
|
||||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||||
if (subCastleInAccept(c, q, report, offset, activeIdx)) {
|
if (subCastleInAccept(c, q, report, offset, activeIdx)) {
|
||||||
@ -213,8 +213,8 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
|
|||||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||||
|
|
||||||
union RepeatControl *rctrl = getControl(full_state, sub);
|
const union RepeatControl *rctrl = getControl(full_state, sub);
|
||||||
char *rstate = (char *)stream_state + sub->streamStateOffset +
|
const char *rstate = (char *)stream_state + sub->streamStateOffset +
|
||||||
info->packedCtrlSize;
|
info->packedCtrlSize;
|
||||||
|
|
||||||
if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
|
if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
|
||||||
@ -242,10 +242,10 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
|
|||||||
|
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *active = (u8 *)stream_state;
|
u8 *active = (u8 *)stream_state;
|
||||||
u8 *groups = active + c->groupIterOffset;
|
const u8 *groups = active + c->groupIterOffset;
|
||||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
u8 *cur = active + i * c->activeIdxSize;
|
const u8 *cur = active + i * c->activeIdxSize;
|
||||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||||
subCastleDeactivateStaleSubs(c, offset, full_state,
|
subCastleDeactivateStaleSubs(c, offset, full_state,
|
||||||
@ -329,8 +329,8 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin,
|
|||||||
size_t *mloc, char *found, const u32 subIdx) {
|
size_t *mloc, char *found, const u32 subIdx) {
|
||||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||||
union RepeatControl *rctrl = getControl(full_state, sub);
|
const union RepeatControl *rctrl = getControl(full_state, sub);
|
||||||
char *rstate = (char *)stream_state + sub->streamStateOffset +
|
const char *rstate = (char *)stream_state + sub->streamStateOffset +
|
||||||
info->packedCtrlSize;
|
info->packedCtrlSize;
|
||||||
|
|
||||||
u64a match = repeatNextMatch(info, rctrl, rstate, begin);
|
u64a match = repeatNextMatch(info, rctrl, rstate, begin);
|
||||||
@ -374,10 +374,10 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
|||||||
|
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *active = (u8 *)stream_state;
|
u8 *active = (u8 *)stream_state;
|
||||||
u8 *groups = active + c->groupIterOffset;
|
const u8 *groups = active + c->groupIterOffset;
|
||||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
u8 *cur = active + i * c->activeIdxSize;
|
const u8 *cur = active + i * c->activeIdxSize;
|
||||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||||
subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
|
subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
|
||||||
@ -386,7 +386,7 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||||
u8 *active = (u8 *)stream_state + c->activeOffset;
|
const u8 *active = (u8 *)stream_state + c->activeOffset;
|
||||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||||
i != MMB_INVALID;
|
i != MMB_INVALID;
|
||||||
i = mmbit_iterate(active, c->numRepeats, i)) {
|
i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||||
@ -400,8 +400,8 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u64a subCastleNextMatch(const struct Castle *c, void *full_state,
|
u64a subCastleNextMatch(const struct Castle *c, const void *full_state,
|
||||||
void *stream_state, const u64a loc,
|
const void *stream_state, const u64a loc,
|
||||||
const u32 subIdx) {
|
const u32 subIdx) {
|
||||||
DEBUG_PRINTF("subcastle %u\n", subIdx);
|
DEBUG_PRINTF("subcastle %u\n", subIdx);
|
||||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||||
@ -489,15 +489,14 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
|
|||||||
// full_state (scratch).
|
// full_state (scratch).
|
||||||
|
|
||||||
u64a offset = end; // min offset of next match
|
u64a offset = end; // min offset of next match
|
||||||
u32 activeIdx = 0;
|
|
||||||
mmbit_clear(matching, c->numRepeats);
|
mmbit_clear(matching, c->numRepeats);
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *active = (u8 *)stream_state;
|
u8 *active = (u8 *)stream_state;
|
||||||
u8 *groups = active + c->groupIterOffset;
|
u8 *groups = active + c->groupIterOffset;
|
||||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
u8 *cur = active + i * c->activeIdxSize;
|
const u8 *cur = active + i * c->activeIdxSize;
|
||||||
activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
u64a match = subCastleNextMatch(c, full_state, stream_state,
|
u64a match = subCastleNextMatch(c, full_state, stream_state,
|
||||||
loc, activeIdx);
|
loc, activeIdx);
|
||||||
set_matching(c, match, groups, matching, c->numGroups, i,
|
set_matching(c, match, groups, matching, c->numGroups, i,
|
||||||
@ -797,7 +796,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
|||||||
|
|
||||||
char found = 0;
|
char found = 0;
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||||
found = mmbit_any(groups, c->numGroups);
|
found = mmbit_any(groups, c->numGroups);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -864,7 +863,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||||
if (mmbit_any_precise(groups, c->numGroups)) {
|
if (mmbit_any_precise(groups, c->numGroups)) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -884,7 +883,7 @@ char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
|
s64a castleLastKillLoc(const struct Castle *c, const struct mq *q) {
|
||||||
assert(q_cur_type(q) == MQE_START);
|
assert(q_cur_type(q) == MQE_START);
|
||||||
assert(q_last_type(q) == MQE_END);
|
assert(q_last_type(q) == MQE_END);
|
||||||
s64a sp = q_cur_loc(q);
|
s64a sp = q_cur_loc(q);
|
||||||
@ -907,7 +906,6 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
|
|||||||
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
|
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
|
||||||
return (s64a)loc - hlen;
|
return (s64a)loc - hlen;
|
||||||
}
|
}
|
||||||
ep = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return sp - 1; /* the repeats are never killed */
|
return sp - 1; /* the repeats are never killed */
|
||||||
@ -959,7 +957,7 @@ char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
|
|
||||||
char found = 0;
|
char found = 0;
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||||
found = mmbit_any_precise(groups, c->numGroups);
|
found = mmbit_any_precise(groups, c->numGroups);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1007,10 +1005,10 @@ char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) {
|
|||||||
|
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *active = (u8 *)q->streamState;
|
u8 *active = (u8 *)q->streamState;
|
||||||
u8 *groups = active + c->groupIterOffset;
|
const u8 *groups = active + c->groupIterOffset;
|
||||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
u8 *cur = active + i * c->activeIdxSize;
|
const u8 *cur = active + i * c->activeIdxSize;
|
||||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||||
const struct SubCastle *sub = getSubCastle(c, activeIdx);
|
const struct SubCastle *sub = getSubCastle(c, activeIdx);
|
||||||
@ -1079,7 +1077,7 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx,
|
|||||||
const struct mq *q, const u64a offset) {
|
const struct mq *q, const u64a offset) {
|
||||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||||
union RepeatControl *rctrl = getControl(q->state, sub);
|
const union RepeatControl *rctrl = getControl(q->state, sub);
|
||||||
char *packed = (char *)q->streamState + sub->streamStateOffset;
|
char *packed = (char *)q->streamState + sub->streamStateOffset;
|
||||||
DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
|
DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
|
||||||
repeatNextMatch(info, rctrl,
|
repeatNextMatch(info, rctrl,
|
||||||
@ -1100,10 +1098,10 @@ char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q,
|
|||||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||||
if (c->exclusive) {
|
if (c->exclusive) {
|
||||||
u8 *active = (u8 *)q->streamState;
|
u8 *active = (u8 *)q->streamState;
|
||||||
u8 *groups = active + c->groupIterOffset;
|
const u8 *groups = active + c->groupIterOffset;
|
||||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
u8 *cur = active + i * c->activeIdxSize;
|
const u8 *cur = active + i * c->activeIdxSize;
|
||||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
|
DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
|
||||||
subCastleQueueCompressState(c, activeIdx, q, offset);
|
subCastleQueueCompressState(c, activeIdx, q, offset);
|
||||||
|
@ -106,25 +106,27 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) {
|
|||||||
#ifdef HAVE_SVE2
|
#ifdef HAVE_SVE2
|
||||||
if (cr.count() <= 16) {
|
if (cr.count() <= 16) {
|
||||||
c->type = CASTLE_NVERM16;
|
c->type = CASTLE_NVERM16;
|
||||||
vermicelli16Build(cr, (u8 *)&c->u.verm16.mask);
|
vermicelli16Build(cr, reinterpret_cast<u8 *>(&c->u.verm16.mask));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (negated.count() <= 16) {
|
if (negated.count() <= 16) {
|
||||||
c->type = CASTLE_VERM16;
|
c->type = CASTLE_VERM16;
|
||||||
vermicelli16Build(negated, (u8 *)&c->u.verm16.mask);
|
vermicelli16Build(negated, reinterpret_cast<u8 *>(&c->u.verm16.mask));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif // HAVE_SVE2
|
#endif // HAVE_SVE2
|
||||||
|
|
||||||
if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo,
|
if (shuftiBuildMasks(negated,
|
||||||
(u8 *)&c->u.shuf.mask_hi) != -1) {
|
reinterpret_cast<u8 *>(&c->u.shuf.mask_lo),
|
||||||
|
reinterpret_cast<u8 *>(&c->u.shuf.mask_hi)) != -1) {
|
||||||
c->type = CASTLE_SHUFTI;
|
c->type = CASTLE_SHUFTI;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->type = CASTLE_TRUFFLE;
|
c->type = CASTLE_TRUFFLE;
|
||||||
truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1,
|
truffleBuildMasks(negated,
|
||||||
(u8 *)&c->u.truffle.mask2);
|
reinterpret_cast<u8 *>(&c->u.truffle.mask1),
|
||||||
|
reinterpret_cast<u8 *>(&c->u.truffle.mask2));
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -227,11 +229,13 @@ vector<u32> removeClique(CliqueGraph &cg) {
|
|||||||
while (!graph_empty(cg)) {
|
while (!graph_empty(cg)) {
|
||||||
const vector<u32> &c = cliquesVec.back();
|
const vector<u32> &c = cliquesVec.back();
|
||||||
vector<CliqueVertex> dead;
|
vector<CliqueVertex> dead;
|
||||||
for (const auto &v : vertices_range(cg)) {
|
|
||||||
if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) {
|
auto deads = [&c=c, &cg=cg](const CliqueVertex &v) {
|
||||||
dead.emplace_back(v);
|
return (find(c.begin(), c.end(), cg[v].stateId) != c.end());
|
||||||
}
|
};
|
||||||
}
|
const auto &vr = vertices_range(cg);
|
||||||
|
std::copy_if(begin(vr), end(vr), std::back_inserter(dead), deads);
|
||||||
|
|
||||||
for (const auto &v : dead) {
|
for (const auto &v : dead) {
|
||||||
clear_vertex(v, cg);
|
clear_vertex(v, cg);
|
||||||
remove_vertex(v, cg);
|
remove_vertex(v, cg);
|
||||||
@ -294,7 +298,7 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
|
|||||||
size_t lower = 0;
|
size_t lower = 0;
|
||||||
size_t total = 0;
|
size_t total = 0;
|
||||||
while (lower < trigSize) {
|
while (lower < trigSize) {
|
||||||
vector<CliqueVertex> vertices;
|
vector<CliqueVertex> clvertices;
|
||||||
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
||||||
|
|
||||||
vector<vector<size_t>> min_reset_dist;
|
vector<vector<size_t>> min_reset_dist;
|
||||||
@ -302,7 +306,7 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
|
|||||||
// get min reset distance for each repeat
|
// get min reset distance for each repeat
|
||||||
for (size_t i = lower; i < upper; i++) {
|
for (size_t i = lower; i < upper; i++) {
|
||||||
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
|
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
|
||||||
vertices.emplace_back(v);
|
clvertices.emplace_back(v);
|
||||||
|
|
||||||
const vector<size_t> &tmp_dist =
|
const vector<size_t> &tmp_dist =
|
||||||
minResetDistToEnd(triggers[i], cr);
|
minResetDistToEnd(triggers[i], cr);
|
||||||
@ -311,11 +315,11 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
|
|||||||
|
|
||||||
// find exclusive pair for each repeat
|
// find exclusive pair for each repeat
|
||||||
for (size_t i = lower; i < upper; i++) {
|
for (size_t i = lower; i < upper; i++) {
|
||||||
CliqueVertex s = vertices[i - lower];
|
CliqueVertex s = clvertices[i - lower];
|
||||||
for (size_t j = i + 1; j < upper; j++) {
|
for (size_t j = i + 1; j < upper; j++) {
|
||||||
if (findExclusivePair(i, j, lower, min_reset_dist,
|
if (findExclusivePair(i, j, lower, min_reset_dist,
|
||||||
triggers)) {
|
triggers)) {
|
||||||
CliqueVertex d = vertices[j - lower];
|
CliqueVertex d = clvertices[j - lower];
|
||||||
add_edge(s, d, *cg);
|
add_edge(s, d, *cg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -600,9 +604,9 @@ buildCastle(const CastleProto &proto,
|
|||||||
nfa->minWidth = verify_u32(minWidth);
|
nfa->minWidth = verify_u32(minWidth);
|
||||||
nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
|
nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
|
||||||
|
|
||||||
char * const base_ptr = (char *)nfa.get() + sizeof(NFA);
|
char * const base_ptr = reinterpret_cast<char *>(nfa.get()) + sizeof(NFA);
|
||||||
char *ptr = base_ptr;
|
char *ptr = base_ptr;
|
||||||
Castle *c = (Castle *)ptr;
|
Castle *c = reinterpret_cast<Castle *>(ptr);
|
||||||
c->numRepeats = verify_u32(subs.size());
|
c->numRepeats = verify_u32(subs.size());
|
||||||
c->numGroups = exclusiveInfo.numGroups;
|
c->numGroups = exclusiveInfo.numGroups;
|
||||||
c->exclusive = verify_s8(exclusive);
|
c->exclusive = verify_s8(exclusive);
|
||||||
@ -613,7 +617,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
writeCastleScanEngine(cr, c);
|
writeCastleScanEngine(cr, c);
|
||||||
|
|
||||||
ptr += sizeof(Castle);
|
ptr += sizeof(Castle);
|
||||||
SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32))));
|
SubCastle *subCastles = reinterpret_cast<SubCastle *>(ROUNDUP_PTR(ptr, alignof(u32)));
|
||||||
copy(subs.begin(), subs.end(), subCastles);
|
copy(subs.begin(), subs.end(), subCastles);
|
||||||
|
|
||||||
u32 length = 0;
|
u32 length = 0;
|
||||||
@ -623,16 +627,16 @@ buildCastle(const CastleProto &proto,
|
|||||||
SubCastle *sub = &subCastles[i];
|
SubCastle *sub = &subCastles[i];
|
||||||
sub->repeatInfoOffset = offset;
|
sub->repeatInfoOffset = offset;
|
||||||
|
|
||||||
ptr = (char *)sub + offset;
|
ptr = reinterpret_cast<char *>(sub) + offset;
|
||||||
memcpy(ptr, &infos[i], sizeof(RepeatInfo));
|
memcpy(ptr, &infos[i], sizeof(RepeatInfo));
|
||||||
|
|
||||||
if (patchSize[i]) {
|
if (patchSize[i]) {
|
||||||
RepeatInfo *info = (RepeatInfo *)ptr;
|
RepeatInfo *info = reinterpret_cast<RepeatInfo *>(ptr);
|
||||||
u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) +
|
u64a *table = reinterpret_cast<u64a *>(ROUNDUP_PTR(info +
|
||||||
sizeof(*info)), alignof(u64a))));
|
sizeof(*info), alignof(u64a)));
|
||||||
copy(tables.begin() + tableIdx,
|
copy(tables.begin() + tableIdx,
|
||||||
tables.begin() + tableIdx + patchSize[i], table);
|
tables.begin() + tableIdx + patchSize[i], table);
|
||||||
u32 diff = (char *)table - (char *)info +
|
u32 diff = reinterpret_cast<ptrdiff_t>(table) - reinterpret_cast<ptrdiff_t>(info) +
|
||||||
sizeof(u64a) * patchSize[i];
|
sizeof(u64a) * patchSize[i];
|
||||||
info->length = diff;
|
info->length = diff;
|
||||||
length += diff;
|
length += diff;
|
||||||
@ -655,7 +659,6 @@ buildCastle(const CastleProto &proto,
|
|||||||
if (!stale_iter.empty()) {
|
if (!stale_iter.empty()) {
|
||||||
c->staleIterOffset = verify_u32(ptr - base_ptr);
|
c->staleIterOffset = verify_u32(ptr - base_ptr);
|
||||||
copy_bytes(ptr, stale_iter);
|
copy_bytes(ptr, stale_iter);
|
||||||
ptr += byte_length(stale_iter);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nfa;
|
return nfa;
|
||||||
@ -922,7 +925,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
|
|||||||
u32 min_bound = pr.bounds.min; // always finite
|
u32 min_bound = pr.bounds.min; // always finite
|
||||||
if (min_bound == 0) { // Vacuous case, we can only do this once.
|
if (min_bound == 0) { // Vacuous case, we can only do this once.
|
||||||
assert(!edge(g.start, g.accept, g).second);
|
assert(!edge(g.start, g.accept, g).second);
|
||||||
NFAEdge e = add_edge(g.start, g.accept, g);
|
NFAEdge e = add_edge(g.start, g.accept, g).first;
|
||||||
g[e].tops.insert(top);
|
g[e].tops.insert(top);
|
||||||
g[u].reports.insert(pr.reports.begin(), pr.reports.end());
|
g[u].reports.insert(pr.reports.begin(), pr.reports.end());
|
||||||
min_bound = 1;
|
min_bound = 1;
|
||||||
@ -931,7 +934,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
|
|||||||
for (u32 i = 0; i < min_bound; i++) {
|
for (u32 i = 0; i < min_bound; i++) {
|
||||||
NFAVertex v = add_vertex(g);
|
NFAVertex v = add_vertex(g);
|
||||||
g[v].char_reach = pr.reach;
|
g[v].char_reach = pr.reach;
|
||||||
NFAEdge e = add_edge(u, v, g);
|
NFAEdge e = add_edge(u, v, g).first;
|
||||||
if (u == g.start) {
|
if (u == g.start) {
|
||||||
g[e].tops.insert(top);
|
g[e].tops.insert(top);
|
||||||
}
|
}
|
||||||
@ -950,7 +953,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
|
|||||||
if (head != u) {
|
if (head != u) {
|
||||||
add_edge(head, v, g);
|
add_edge(head, v, g);
|
||||||
}
|
}
|
||||||
NFAEdge e = add_edge(u, v, g);
|
NFAEdge e = add_edge(u, v, g).first;
|
||||||
if (u == g.start) {
|
if (u == g.start) {
|
||||||
g[e].tops.insert(top);
|
g[e].tops.insert(top);
|
||||||
}
|
}
|
||||||
|
@ -305,6 +305,7 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
|
|||||||
DEBUG_PRINTF("dfa is empty\n");
|
DEBUG_PRINTF("dfa is empty\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
UNUSED const size_t states_before = rdfa.states.size();
|
UNUSED const size_t states_before = rdfa.states.size();
|
||||||
|
|
||||||
HopcroftInfo info(rdfa);
|
HopcroftInfo info(rdfa);
|
||||||
|
@ -978,14 +978,14 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
|
|||||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *ctxt = q->context;
|
|
||||||
u8 s = *(u8 *)q->state;
|
u8 s = *(u8 *)q->state;
|
||||||
u64a offset = q_cur_offset(q);
|
u64a offset = q_cur_offset(q);
|
||||||
struct gough_som_info *som = getSomInfo(q->state);
|
const struct gough_som_info *som = getSomInfo(q->state);
|
||||||
assert(q_cur_type(q) == MQE_START);
|
assert(q_cur_type(q) == MQE_START);
|
||||||
assert(s);
|
assert(s);
|
||||||
|
|
||||||
if (s >= m->accept_limit_8) {
|
if (s >= m->accept_limit_8) {
|
||||||
|
void *ctxt = q->context;
|
||||||
u32 cached_accept_id = 0;
|
u32 cached_accept_id = 0;
|
||||||
u16 cached_accept_state = 0;
|
u16 cached_accept_state = 0;
|
||||||
u32 cached_accept_som = 0;
|
u32 cached_accept_som = 0;
|
||||||
@ -1000,16 +1000,16 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
|||||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
|
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *ctxt = q->context;
|
|
||||||
u16 s = *(u16 *)q->state;
|
u16 s = *(u16 *)q->state;
|
||||||
const struct mstate_aux *aux = get_aux(m, s);
|
const struct mstate_aux *aux = get_aux(m, s);
|
||||||
u64a offset = q_cur_offset(q);
|
u64a offset = q_cur_offset(q);
|
||||||
struct gough_som_info *som = getSomInfo(q->state);
|
const struct gough_som_info *som = getSomInfo(q->state);
|
||||||
assert(q_cur_type(q) == MQE_START);
|
assert(q_cur_type(q) == MQE_START);
|
||||||
DEBUG_PRINTF("state %hu\n", s);
|
DEBUG_PRINTF("state %hu\n", s);
|
||||||
assert(s);
|
assert(s);
|
||||||
|
|
||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
|
void *ctxt = q->context;
|
||||||
u32 cached_accept_id = 0;
|
u32 cached_accept_id = 0;
|
||||||
u16 cached_accept_state = 0;
|
u16 cached_accept_state = 0;
|
||||||
u32 cached_accept_som = 0;
|
u32 cached_accept_som = 0;
|
||||||
|
@ -132,7 +132,7 @@ void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw,
|
void translateRawReports(UNUSED const GoughGraph &cfg, UNUSED const raw_som_dfa &raw,
|
||||||
const flat_map<u32, GoughSSAVarJoin *> &joins_at_s,
|
const flat_map<u32, GoughSSAVarJoin *> &joins_at_s,
|
||||||
UNUSED GoughVertex s,
|
UNUSED GoughVertex s,
|
||||||
const set<som_report> &reports_in,
|
const set<som_report> &reports_in,
|
||||||
@ -206,10 +206,6 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices,
|
|||||||
assert(contains(src_slots, slot_id));
|
assert(contains(src_slots, slot_id));
|
||||||
|
|
||||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||||
if (!vmin) {
|
|
||||||
assert(0);
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
cfg[e].vars.emplace_back(vmin);
|
cfg[e].vars.emplace_back(vmin);
|
||||||
final_var = vmin.get();
|
final_var = vmin.get();
|
||||||
|
|
||||||
@ -321,10 +317,6 @@ void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators,
|
|||||||
DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
|
DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
|
||||||
} else {
|
} else {
|
||||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||||
if (!vmin) {
|
|
||||||
assert(0);
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
cfg[e].vars.emplace_back(vmin);
|
cfg[e].vars.emplace_back(vmin);
|
||||||
final_var = vmin.get();
|
final_var = vmin.get();
|
||||||
|
|
||||||
@ -441,10 +433,11 @@ unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
|
// cppcheck-suppress constParameterReference
|
||||||
void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) {
|
void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) {
|
||||||
vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin();
|
vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin();
|
||||||
while (it != rep.end()) {
|
while (it != rep.end()) {
|
||||||
GoughSSAVar *var = it->second;
|
const GoughSSAVar *var = it->second;
|
||||||
if (!var) {
|
if (!var) {
|
||||||
++it;
|
++it;
|
||||||
continue;
|
continue;
|
||||||
@ -546,7 +539,7 @@ void remove_dead(GoughGraph &g) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (!queue.empty()) {
|
while (!queue.empty()) {
|
||||||
GoughSSAVar *v = queue.back();
|
const GoughSSAVar *v = queue.back();
|
||||||
queue.pop_back();
|
queue.pop_back();
|
||||||
for (GoughSSAVar *var : v->get_inputs()) {
|
for (GoughSSAVar *var : v->get_inputs()) {
|
||||||
if (var->seen) {
|
if (var->seen) {
|
||||||
@ -659,8 +652,8 @@ GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input(
|
// cppcheck-suppress constParameterPointer
|
||||||
GoughSSAVar *input) const {
|
const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input(GoughSSAVar *input) const {
|
||||||
return input_map.at(input);
|
return input_map.at(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -811,7 +804,7 @@ private:
|
|||||||
|
|
||||||
static
|
static
|
||||||
void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
|
void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
|
||||||
map<GoughEdge, edge_join_info> *edge_info) {
|
map<GoughEdge, edge_join_info> &edge_info) {
|
||||||
DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id);
|
DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id);
|
||||||
for (const auto &var : g[v].vars) {
|
for (const auto &var : g[v].vars) {
|
||||||
u32 dest_slot = var->slot;
|
u32 dest_slot = var->slot;
|
||||||
@ -822,7 +815,7 @@ void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (const GoughEdge &incoming_edge : var_edges.second) {
|
for (const GoughEdge &incoming_edge : var_edges.second) {
|
||||||
(*edge_info)[incoming_edge].insert(input, dest_slot);
|
edge_info[incoming_edge].insert(input, dest_slot);
|
||||||
DEBUG_PRINTF("need %u<-%u\n", dest_slot, input);
|
DEBUG_PRINTF("need %u<-%u\n", dest_slot, input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -920,7 +913,7 @@ void build_blocks(const GoughGraph &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
map<GoughEdge, edge_join_info> eji;
|
map<GoughEdge, edge_join_info> eji;
|
||||||
prep_joins_for_generation(g, t, &eji);
|
prep_joins_for_generation(g, t, eji);
|
||||||
|
|
||||||
for (auto &m : eji) {
|
for (auto &m : eji) {
|
||||||
vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)];
|
vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)];
|
||||||
@ -1019,7 +1012,7 @@ void update_accel_prog_offset(const gough_build_strat &gbs,
|
|||||||
verts[gbs.gg[v].state_id] = v;
|
verts[gbs.gg[v].state_id] = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto &m : gbs.built_accel) {
|
for (const auto &m : gbs.built_accel) {
|
||||||
gough_accel *ga = m.first;
|
gough_accel *ga = m.first;
|
||||||
assert(!ga->prog_offset);
|
assert(!ga->prog_offset);
|
||||||
GoughVertex v = verts[m.second];
|
GoughVertex v = verts[m.second];
|
||||||
@ -1052,7 +1045,7 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
|||||||
|| !cc.streaming);
|
|| !cc.streaming);
|
||||||
|
|
||||||
if (!cc.grey.allowGough) {
|
if (!cc.grey.allowGough) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("hello world\n");
|
DEBUG_PRINTF("hello world\n");
|
||||||
@ -1083,11 +1076,12 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
|||||||
auto basic_dfa = mcclellanCompile_i(raw, gbs, cc);
|
auto basic_dfa = mcclellanCompile_i(raw, gbs, cc);
|
||||||
assert(basic_dfa);
|
assert(basic_dfa);
|
||||||
if (!basic_dfa) {
|
if (!basic_dfa) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 alphaShift
|
// cppcheck-suppress cstyleCast
|
||||||
= ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift;
|
const auto nfa = static_cast<const mcclellan *>(getImplNfa(basic_dfa.get()));
|
||||||
|
u8 alphaShift = nfa->alphaShift;
|
||||||
u32 edge_count = (1U << alphaShift) * raw.states.size();
|
u32 edge_count = (1U << alphaShift) * raw.states.size();
|
||||||
|
|
||||||
u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4);
|
u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4);
|
||||||
@ -1128,8 +1122,8 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
|||||||
u32 gough_size = ROUNDUP_N(curr_offset, 16);
|
u32 gough_size = ROUNDUP_N(curr_offset, 16);
|
||||||
auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size);
|
auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size);
|
||||||
|
|
||||||
memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length);
|
memcpy(reinterpret_cast<char *>(gough_dfa.get()), basic_dfa.get(), basic_dfa->length);
|
||||||
memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi));
|
memcpy(reinterpret_cast<char *>(gough_dfa.get()) + haig_offset, &gi, sizeof(gi));
|
||||||
if (gough_dfa->type == MCCLELLAN_NFA_16) {
|
if (gough_dfa->type == MCCLELLAN_NFA_16) {
|
||||||
gough_dfa->type = GOUGH_NFA_16;
|
gough_dfa->type = GOUGH_NFA_16;
|
||||||
} else {
|
} else {
|
||||||
@ -1142,18 +1136,19 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
|||||||
gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision;
|
gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision;
|
||||||
gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a));
|
gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a));
|
||||||
|
|
||||||
mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get());
|
// cppcheck-suppress cstyleCast
|
||||||
|
auto *m = reinterpret_cast<mcclellan *>(getMutableImplNfa(gough_dfa.get()));
|
||||||
m->haig_offset = haig_offset;
|
m->haig_offset = haig_offset;
|
||||||
|
|
||||||
/* update nfa length, haig_info offset (leave mcclellan length alone) */
|
/* update nfa length, haig_info offset (leave mcclellan length alone) */
|
||||||
gough_dfa->length = gough_size;
|
gough_dfa->length = gough_size;
|
||||||
|
|
||||||
/* copy in blocks */
|
/* copy in blocks */
|
||||||
copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks);
|
copy_bytes(reinterpret_cast<u8 *>(gough_dfa.get()) + edge_prog_offset, edge_blocks);
|
||||||
if (top_prog_offset) {
|
if (top_prog_offset) {
|
||||||
copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks);
|
copy_bytes(reinterpret_cast<u8 *>(gough_dfa.get()) + top_prog_offset, top_blocks);
|
||||||
}
|
}
|
||||||
copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks);
|
copy_bytes(reinterpret_cast<u8 *>(gough_dfa.get()) + prog_base_offset, temp_blocks);
|
||||||
|
|
||||||
return gough_dfa;
|
return gough_dfa;
|
||||||
}
|
}
|
||||||
@ -1186,7 +1181,7 @@ AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const {
|
|||||||
void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||||
void *accel_out) {
|
void *accel_out) {
|
||||||
assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
|
assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
|
||||||
gough_accel *accel = (gough_accel *)accel_out;
|
gough_accel *accel = reinterpret_cast<gough_accel *>(accel_out);
|
||||||
/* build a plain accelaux so we can work out where we can get to */
|
/* build a plain accelaux so we can work out where we can get to */
|
||||||
mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel);
|
mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel);
|
||||||
DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx,
|
DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx,
|
||||||
@ -1324,7 +1319,8 @@ void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
|
|||||||
for (const raw_gough_report_list &r : rl) {
|
for (const raw_gough_report_list &r : rl) {
|
||||||
ro.emplace_back(base_offset);
|
ro.emplace_back(base_offset);
|
||||||
|
|
||||||
gough_report_list *p = (gough_report_list *)((char *)n + base_offset);
|
u8 * n_ptr = reinterpret_cast<u8 *>(n);
|
||||||
|
gough_report_list *p = reinterpret_cast<gough_report_list *>(n_ptr + base_offset);
|
||||||
u32 i = 0;
|
u32 i = 0;
|
||||||
|
|
||||||
for (const som_report &sr : r.reports) {
|
for (const som_report &sr : r.reports) {
|
||||||
|
@ -195,7 +195,7 @@ void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
|
|||||||
if (contains(aux.containing_v, var)) {
|
if (contains(aux.containing_v, var)) {
|
||||||
/* def is used by join vertex, value only needs to be live on some
|
/* def is used by join vertex, value only needs to be live on some
|
||||||
* incoming edges */
|
* incoming edges */
|
||||||
GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
|
const GoughSSAVarJoin *vj = reinterpret_cast<const GoughSSAVarJoin *>(var);
|
||||||
const flat_set<GoughEdge> &live_edges
|
const flat_set<GoughEdge> &live_edges
|
||||||
= vj->get_edges_for_input(def);
|
= vj->get_edges_for_input(def);
|
||||||
for (const auto &e : live_edges) {
|
for (const auto &e : live_edges) {
|
||||||
@ -279,7 +279,7 @@ set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
|
|||||||
|
|
||||||
template<typename VarP>
|
template<typename VarP>
|
||||||
void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
|
void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
|
||||||
for (auto &var : vars) {
|
for (const auto &var : vars) {
|
||||||
assert(var->slot == INVALID_SLOT);
|
assert(var->slot == INVALID_SLOT);
|
||||||
var->slot = (*next_slot)++;
|
var->slot = (*next_slot)++;
|
||||||
}
|
}
|
||||||
@ -440,7 +440,7 @@ void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
|
void update_local_slots(GoughGraph &g, const set<GoughSSAVar *> &locals,
|
||||||
u32 local_base) {
|
u32 local_base) {
|
||||||
DEBUG_PRINTF("%zu local variables\n", locals.size());
|
DEBUG_PRINTF("%zu local variables\n", locals.size());
|
||||||
/* local variables only occur on edges (joins are never local) */
|
/* local variables only occur on edges (joins are never local) */
|
||||||
|
@ -56,7 +56,7 @@ extern "C"
|
|||||||
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
|
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
|
||||||
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
|
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
|
||||||
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
|
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
|
||||||
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
|
char gf_name##_reportCurrent(const struct NFA *n, const struct mq *q); \
|
||||||
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
||||||
struct mq *q); \
|
struct mq *q); \
|
||||||
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
|
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
|
||||||
|
@ -332,7 +332,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
|
|||||||
// UE-1636) need to guard cyclic tug-accepts as well.
|
// UE-1636) need to guard cyclic tug-accepts as well.
|
||||||
static really_inline
|
static really_inline
|
||||||
char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||||
u64a offset, ReportID report) {
|
u64a offset, ReportID report) {
|
||||||
assert(limex);
|
assert(limex);
|
||||||
|
|
||||||
@ -382,7 +382,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||||
u64a offset) {
|
u64a offset) {
|
||||||
assert(limex);
|
assert(limex);
|
||||||
|
|
||||||
|
@ -290,7 +290,7 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) {
|
|||||||
|
|
||||||
template<class Mask>
|
template<class Mask>
|
||||||
bool isMaskZero(Mask &m) {
|
bool isMaskZero(Mask &m) {
|
||||||
u8 *m8 = (u8 *)&m;
|
const u8 *m8 = (u8 *)&m;
|
||||||
for (u32 i = 0; i < sizeof(m); i++) {
|
for (u32 i = 0; i < sizeof(m); i++) {
|
||||||
if (m8[i]) {
|
if (m8[i]) {
|
||||||
return false;
|
return false;
|
||||||
@ -329,11 +329,11 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
|
|||||||
// Build a list of vertices with a state index assigned.
|
// Build a list of vertices with a state index assigned.
|
||||||
vector<NFAVertex> verts;
|
vector<NFAVertex> verts;
|
||||||
verts.reserve(args.num_states);
|
verts.reserve(args.num_states);
|
||||||
for (auto v : vertices_range(h)) {
|
auto sidat = [&state_ids=state_ids](const NFAVertex &v) {
|
||||||
if (state_ids.at(v) != NO_STATE) {
|
return (state_ids.at(v) != NO_STATE);
|
||||||
verts.emplace_back(v);
|
};
|
||||||
}
|
const auto &vr = vertices_range(h);
|
||||||
}
|
std::copy_if(begin(vr), end(vr), std::back_inserter(verts), sidat);
|
||||||
|
|
||||||
// Build a mapping from set-of-states -> reachability.
|
// Build a mapping from set-of-states -> reachability.
|
||||||
map<NFAStateSet, CharReach> mapping;
|
map<NFAStateSet, CharReach> mapping;
|
||||||
@ -556,7 +556,8 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
|
|||||||
|
|
||||||
// Similarly, connect (start, startDs) if necessary.
|
// Similarly, connect (start, startDs) if necessary.
|
||||||
if (!edge(g.start, g.startDs, g).second) {
|
if (!edge(g.start, g.startDs, g).second) {
|
||||||
NFAEdge e = add_edge(g.start, g.startDs, g);
|
NFAEdge e;
|
||||||
|
std::tie(e, std::ignore) = add_edge(g.start, g.startDs, g);
|
||||||
tempEdges.emplace_back(e); // Remove edge later.
|
tempEdges.emplace_back(e); // Remove edge later.
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1485,6 +1486,7 @@ u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
u32 j = args.state_ids.at(w);
|
u32 j = args.state_ids.at(w);
|
||||||
|
// j can be NO_STATE if args.state_ids.at(w) returns NO_STATE
|
||||||
if (j == NO_STATE) {
|
if (j == NO_STATE) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1576,7 +1578,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) {
|
|||||||
static
|
static
|
||||||
int getLimexScore(const build_info &args, u32 nShifts) {
|
int getLimexScore(const build_info &args, u32 nShifts) {
|
||||||
const NGHolder &h = args.h;
|
const NGHolder &h = args.h;
|
||||||
u32 maxVarShift = nShifts;
|
u32 maxVarShift;
|
||||||
int score = 0;
|
int score = 0;
|
||||||
|
|
||||||
score += SHIFT_COST * nShifts;
|
score += SHIFT_COST * nShifts;
|
||||||
@ -1704,7 +1706,7 @@ struct Factory {
|
|||||||
static
|
static
|
||||||
void allocState(NFA *nfa, u32 repeatscratchStateSize,
|
void allocState(NFA *nfa, u32 repeatscratchStateSize,
|
||||||
u32 repeatStreamState) {
|
u32 repeatStreamState) {
|
||||||
implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa);
|
const implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa);
|
||||||
|
|
||||||
// LimEx NFAs now store the following in state:
|
// LimEx NFAs now store the following in state:
|
||||||
// 1. state bitvector (always present)
|
// 1. state bitvector (always present)
|
||||||
@ -2222,7 +2224,7 @@ struct Factory {
|
|||||||
static
|
static
|
||||||
bytecode_ptr<NFA> generateNfa(const build_info &args) {
|
bytecode_ptr<NFA> generateNfa(const build_info &args) {
|
||||||
if (args.num_states > NFATraits<dtype>::maxStates) {
|
if (args.num_states > NFATraits<dtype>::maxStates) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build bounded repeat structures.
|
// Build bounded repeat structures.
|
||||||
@ -2581,7 +2583,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
|||||||
|
|
||||||
if (!cc.grey.allowLimExNFA) {
|
if (!cc.grey.allowLimExNFA) {
|
||||||
DEBUG_PRINTF("limex not allowed\n");
|
DEBUG_PRINTF("limex not allowed\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If you ask for a particular type, it had better be an NFA.
|
// If you ask for a particular type, it had better be an NFA.
|
||||||
@ -2616,7 +2618,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
|||||||
|
|
||||||
if (scores.empty()) {
|
if (scores.empty()) {
|
||||||
DEBUG_PRINTF("No NFA returned a valid score for this case.\n");
|
DEBUG_PRINTF("No NFA returned a valid score for this case.\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort acceptable models in priority order, lowest score first.
|
// Sort acceptable models in priority order, lowest score first.
|
||||||
@ -2635,7 +2637,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
|||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("NFA build failed.\n");
|
DEBUG_PRINTF("NFA build failed.\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 countAccelStates(NGHolder &h,
|
u32 countAccelStates(NGHolder &h,
|
||||||
|
@ -302,8 +302,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// A copy of the estate as an array of GPR-sized chunks.
|
// A copy of the estate as an array of GPR-sized chunks.
|
||||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
|
||||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
|
||||||
#ifdef ESTATE_ON_STACK
|
#ifdef ESTATE_ON_STACK
|
||||||
memcpy(chunks, &estate, sizeof(STATE_T));
|
memcpy(chunks, &estate, sizeof(STATE_T));
|
||||||
#else
|
#else
|
||||||
@ -311,7 +311,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
|
|||||||
#endif
|
#endif
|
||||||
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||||
|
|
||||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
|
||||||
base_index[0] = 0;
|
base_index[0] = 0;
|
||||||
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||||
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
||||||
|
@ -927,7 +927,7 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
|
|||||||
context);
|
context);
|
||||||
}
|
}
|
||||||
|
|
||||||
char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) {
|
char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, const struct mq *q) {
|
||||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||||
REPORTCURRENT_FN(limex, q);
|
REPORTCURRENT_FN(limex, q);
|
||||||
return 1;
|
return 1;
|
||||||
@ -984,9 +984,9 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
|
|||||||
assert(q->state && q->streamState);
|
assert(q->state && q->streamState);
|
||||||
|
|
||||||
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
||||||
union RepeatControl *repeat_ctrl =
|
const union RepeatControl *repeat_ctrl =
|
||||||
getRepeatControlBase(q->state, sizeof(STATE_T));
|
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||||
char *repeat_state = q->streamState + limex->stateSize;
|
const char *repeat_state = q->streamState + limex->stateSize;
|
||||||
STATE_T state = *(STATE_T *)q->state;
|
STATE_T state = *(STATE_T *)q->state;
|
||||||
u64a offset = q->offset + q_last_loc(q) + 1;
|
u64a offset = q->offset + q_last_loc(q) + 1;
|
||||||
|
|
||||||
@ -999,9 +999,9 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
|||||||
assert(q->state && q->streamState);
|
assert(q->state && q->streamState);
|
||||||
|
|
||||||
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
||||||
union RepeatControl *repeat_ctrl =
|
const union RepeatControl *repeat_ctrl =
|
||||||
getRepeatControlBase(q->state, sizeof(STATE_T));
|
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||||
char *repeat_state = q->streamState + limex->stateSize;
|
const char *repeat_state = q->streamState + limex->stateSize;
|
||||||
STATE_T state = *(STATE_T *)q->state;
|
STATE_T state = *(STATE_T *)q->state;
|
||||||
u64a offset = q->offset + q_last_loc(q) + 1;
|
u64a offset = q->offset + q_last_loc(q) + 1;
|
||||||
|
|
||||||
@ -1020,9 +1020,9 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
|||||||
|
|
||||||
if (limex->repeatCount) {
|
if (limex->repeatCount) {
|
||||||
u64a offset = q->offset + loc + 1;
|
u64a offset = q->offset + loc + 1;
|
||||||
union RepeatControl *repeat_ctrl =
|
const union RepeatControl *repeat_ctrl =
|
||||||
getRepeatControlBase(q->state, sizeof(STATE_T));
|
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||||
char *repeat_state = q->streamState + limex->stateSize;
|
const char *repeat_state = q->streamState + limex->stateSize;
|
||||||
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state);
|
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,7 +177,7 @@ static
|
|||||||
mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
||||||
assert(isMcClellanType(n->type));
|
assert(isMcClellanType(n->type));
|
||||||
|
|
||||||
mcclellan *m = (mcclellan *)getMutableImplNfa(n);
|
const mcclellan *m = (mcclellan *)getMutableImplNfa(n);
|
||||||
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
||||||
|
|
||||||
mstate_aux *aux = aux_base + i;
|
mstate_aux *aux = aux_base + i;
|
||||||
@ -203,7 +203,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
mstate_aux *aux = getAux(n, succ_table[c_prime]);
|
const mstate_aux *aux = getAux(n, succ_table[c_prime]);
|
||||||
|
|
||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
succ_table[c_prime] |= ACCEPT_FLAG;
|
succ_table[c_prime] |= ACCEPT_FLAG;
|
||||||
@ -232,7 +232,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
mstate_aux *aux = getAux(n, succ_i);
|
const mstate_aux *aux = getAux(n, succ_i);
|
||||||
|
|
||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
succ_i |= ACCEPT_FLAG;
|
succ_i |= ACCEPT_FLAG;
|
||||||
@ -262,7 +262,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
|||||||
// check successful transition
|
// check successful transition
|
||||||
u16 next = unaligned_load_u16((u8 *)trans);
|
u16 next = unaligned_load_u16((u8 *)trans);
|
||||||
if (next < wide_limit) {
|
if (next < wide_limit) {
|
||||||
mstate_aux *aux = getAux(n, next);
|
const mstate_aux *aux = getAux(n, next);
|
||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
next |= ACCEPT_FLAG;
|
next |= ACCEPT_FLAG;
|
||||||
}
|
}
|
||||||
@ -279,7 +279,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
|||||||
if (next_k >= wide_limit) {
|
if (next_k >= wide_limit) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
mstate_aux *aux_k = getAux(n, next_k);
|
const mstate_aux *aux_k = getAux(n, next_k);
|
||||||
if (aux_k->accept) {
|
if (aux_k->accept) {
|
||||||
next_k |= ACCEPT_FLAG;
|
next_k |= ACCEPT_FLAG;
|
||||||
}
|
}
|
||||||
@ -362,7 +362,7 @@ struct raw_report_list {
|
|||||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||||
const ReportManager &rm, bool do_remap) {
|
const ReportManager &rm, bool do_remap) {
|
||||||
if (do_remap) {
|
if (do_remap) {
|
||||||
for (auto &id : reports_in) {
|
for (const auto &id : reports_in) {
|
||||||
reports.insert(rm.getProgramOffset(id));
|
reports.insert(rm.getProgramOffset(id));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -546,7 +546,7 @@ size_t calcWideRegionSize(const dfa_info &info) {
|
|||||||
static
|
static
|
||||||
void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
|
void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
|
||||||
const vector<u32> &reports, const vector<u32> &reports_eod,
|
const vector<u32> &reports, const vector<u32> &reports_eod,
|
||||||
vector<u32> &reportOffsets) {
|
const vector<u32> &reportOffsets) {
|
||||||
const dstate &raw_state = info.states[i];
|
const dstate &raw_state = info.states[i];
|
||||||
aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
|
aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
|
||||||
aux->accept_eod = raw_state.reports_eod.empty() ? 0
|
aux->accept_eod = raw_state.reports_eod.empty() ? 0
|
||||||
@ -631,7 +631,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
|
|||||||
if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
|
if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
|
||||||
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||||
info.size());
|
info.size());
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
|
DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
|
||||||
@ -800,8 +800,8 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i : order) {
|
for (size_t i : order) {
|
||||||
vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||||
vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
|
const vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
|
||||||
|
|
||||||
u16 width = verify_u16(symbol_chain.size());
|
u16 width = verify_u16(symbol_chain.size());
|
||||||
*(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
|
*(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
|
||||||
@ -1373,11 +1373,11 @@ bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
|
|||||||
/* \brief Generate wide_symbol_chain from wide_state_chain. */
|
/* \brief Generate wide_symbol_chain from wide_state_chain. */
|
||||||
static
|
static
|
||||||
void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
||||||
raw_dfa &rdfa = info.raw;
|
const raw_dfa &rdfa = info.raw;
|
||||||
assert(chain_tail.size() == info.wide_state_chain.size());
|
assert(chain_tail.size() == info.wide_state_chain.size());
|
||||||
|
|
||||||
for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
|
for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
|
||||||
vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||||
vector<symbol_t> symbol_chain;
|
vector<symbol_t> symbol_chain;
|
||||||
|
|
||||||
info.extra[state_chain[0]].wideHead = true;
|
info.extra[state_chain[0]].wideHead = true;
|
||||||
@ -1385,7 +1385,6 @@ void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
|||||||
|
|
||||||
for (size_t j = 0; j < width; j++) {
|
for (size_t j = 0; j < width; j++) {
|
||||||
dstate_id_t curr_id = state_chain[j];
|
dstate_id_t curr_id = state_chain[j];
|
||||||
dstate_id_t next_id = state_chain[j + 1];
|
|
||||||
|
|
||||||
// The last state of the chain doesn't belong to a wide state.
|
// The last state of the chain doesn't belong to a wide state.
|
||||||
info.extra[curr_id].wideState = true;
|
info.extra[curr_id].wideState = true;
|
||||||
@ -1394,6 +1393,7 @@ void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
|||||||
if (j == width - 1) {
|
if (j == width - 1) {
|
||||||
symbol_chain.emplace_back(chain_tail[i]);
|
symbol_chain.emplace_back(chain_tail[i]);
|
||||||
} else {
|
} else {
|
||||||
|
dstate_id_t next_id = state_chain[j + 1];
|
||||||
for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
|
for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
|
||||||
if (rdfa.states[curr_id].next[sym] == next_id) {
|
if (rdfa.states[curr_id].next[sym] == next_id) {
|
||||||
symbol_chain.emplace_back(sym);
|
symbol_chain.emplace_back(sym);
|
||||||
|
@ -144,11 +144,11 @@ u8 dfa_info::getAlphaShift() const {
|
|||||||
|
|
||||||
static
|
static
|
||||||
mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(n);
|
const mcsheng *m = reinterpret_cast<const mcsheng *>(getMutableImplNfa(n));
|
||||||
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
mstate_aux *aux_base = reinterpret_cast<mstate_aux *>(reinterpret_cast<u8 *>(n) + m->aux_offset);
|
||||||
|
|
||||||
mstate_aux *aux = aux_base + i;
|
mstate_aux *aux = aux_base + i;
|
||||||
assert((const char *)aux < (const char *)n + m->length);
|
assert(reinterpret_cast<const char *>(aux) < reinterpret_cast<const char *>(n) + m->length);
|
||||||
return aux;
|
return aux;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,8 +192,8 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
for (u32 i = 0; i < N_CHARS; i++) {
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
||||||
memcpy((u8 *)&m->sheng_masks[i],
|
memcpy(reinterpret_cast<u8 *>(&m->sheng_masks[i]),
|
||||||
(u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128));
|
reinterpret_cast<u8 *>(masks[info.alpha_remap[i]].data()), sizeof(m128));
|
||||||
}
|
}
|
||||||
m->sheng_end = sheng_end;
|
m->sheng_end = sheng_end;
|
||||||
m->sheng_accel_limit = sheng_end - 1;
|
m->sheng_accel_limit = sheng_end - 1;
|
||||||
@ -223,7 +223,7 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
|||||||
nfa->type = MCSHENG_NFA_16;
|
nfa->type = MCSHENG_NFA_16;
|
||||||
}
|
}
|
||||||
|
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
|
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||||
for (u32 i = 0; i < 256; i++) {
|
for (u32 i = 0; i < 256; i++) {
|
||||||
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
||||||
}
|
}
|
||||||
@ -244,11 +244,11 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
|||||||
|
|
||||||
static
|
static
|
||||||
mstate_aux *getAux64(NFA *n, dstate_id_t i) {
|
mstate_aux *getAux64(NFA *n, dstate_id_t i) {
|
||||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n);
|
const mcsheng64 *m = reinterpret_cast<const mcsheng64 *>(getMutableImplNfa(n));
|
||||||
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
mstate_aux *aux_base = reinterpret_cast<mstate_aux *>(reinterpret_cast<u8 *>(n) + m->aux_offset);
|
||||||
|
|
||||||
mstate_aux *aux = aux_base + i;
|
mstate_aux *aux = aux_base + i;
|
||||||
assert((const char *)aux < (const char *)n + m->length);
|
assert(reinterpret_cast<const char *>(aux) < reinterpret_cast<const char *>(n) + m->length);
|
||||||
return aux;
|
return aux;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -292,8 +292,8 @@ void createShuffleMasks64(mcsheng64 *m, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
for (u32 i = 0; i < N_CHARS; i++) {
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
||||||
memcpy((u8 *)&m->sheng_succ_masks[i],
|
memcpy(reinterpret_cast<u8 *>(&m->sheng_succ_masks[i]),
|
||||||
(u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512));
|
reinterpret_cast<u8 *>(masks[info.alpha_remap[i]].data()), sizeof(m512));
|
||||||
}
|
}
|
||||||
m->sheng_end = sheng_end;
|
m->sheng_end = sheng_end;
|
||||||
m->sheng_accel_limit = sheng_end - 1;
|
m->sheng_accel_limit = sheng_end - 1;
|
||||||
@ -323,7 +323,7 @@ void populateBasicInfo64(size_t state_size, const dfa_info &info,
|
|||||||
nfa->type = MCSHENG_64_NFA_16;
|
nfa->type = MCSHENG_64_NFA_16;
|
||||||
}
|
}
|
||||||
|
|
||||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa));
|
||||||
for (u32 i = 0; i < 256; i++) {
|
for (u32 i = 0; i < 256; i++) {
|
||||||
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
||||||
}
|
}
|
||||||
@ -534,7 +534,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
|
|||||||
|
|
||||||
static
|
static
|
||||||
dstate_id_t find_sheng_states(dfa_info &info,
|
dstate_id_t find_sheng_states(dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accel_escape_info,
|
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
size_t max_sheng_states) {
|
size_t max_sheng_states) {
|
||||||
RdfaGraph g(info.raw);
|
RdfaGraph g(info.raw);
|
||||||
auto cyclics = find_vertices_in_cycles(g);
|
auto cyclics = find_vertices_in_cycles(g);
|
||||||
@ -650,7 +650,7 @@ void fill_in_aux_info(NFA *nfa, const dfa_info &info,
|
|||||||
const vector<u32> &reports_eod,
|
const vector<u32> &reports_eod,
|
||||||
u32 report_base_offset,
|
u32 report_base_offset,
|
||||||
const raw_report_info &ri) {
|
const raw_report_info &ri) {
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
|
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||||
|
|
||||||
vector<u32> reportOffsets;
|
vector<u32> reportOffsets;
|
||||||
|
|
||||||
@ -667,14 +667,14 @@ void fill_in_aux_info(NFA *nfa, const dfa_info &info,
|
|||||||
assert(accel_offset <= accel_end_offset);
|
assert(accel_offset <= accel_end_offset);
|
||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
info.strat.buildAccel(i, accel_escape_info.at(i),
|
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||||
(void *)((char *)m + this_aux->accel_offset));
|
reinterpret_cast<void *>(reinterpret_cast<char *>(m) + this_aux->accel_offset));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) {
|
u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) {
|
||||||
mstate_aux *aux = getAux(nfa, target_impl_id);
|
const mstate_aux *aux = getAux(nfa, target_impl_id);
|
||||||
u16 flags = 0;
|
u16 flags = 0;
|
||||||
|
|
||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
@ -692,7 +692,7 @@ static
|
|||||||
void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
|
void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
|
||||||
dstate_id_t sheng_end,
|
dstate_id_t sheng_end,
|
||||||
UNUSED dstate_id_t sherman_base) {
|
UNUSED dstate_id_t sherman_base) {
|
||||||
u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng));
|
u16 *succ_table = reinterpret_cast<u16 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng));
|
||||||
|
|
||||||
u8 alphaShift = info.getAlphaShift();
|
u8 alphaShift = info.getAlphaShift();
|
||||||
assert(alphaShift <= 8);
|
assert(alphaShift <= 8);
|
||||||
@ -724,7 +724,7 @@ void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
|
|||||||
const vector<u32> &reports_eod,
|
const vector<u32> &reports_eod,
|
||||||
u32 report_base_offset,
|
u32 report_base_offset,
|
||||||
const raw_report_info &ri) {
|
const raw_report_info &ri) {
|
||||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa));
|
||||||
|
|
||||||
vector<u32> reportOffsets;
|
vector<u32> reportOffsets;
|
||||||
|
|
||||||
@ -741,14 +741,14 @@ void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
|
|||||||
assert(accel_offset <= accel_end_offset);
|
assert(accel_offset <= accel_end_offset);
|
||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
info.strat.buildAccel(i, accel_escape_info.at(i),
|
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||||
(void *)((char *)m + this_aux->accel_offset));
|
reinterpret_cast<void *>(reinterpret_cast<char *>(m) + this_aux->accel_offset));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
|
u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
|
||||||
mstate_aux *aux = getAux64(nfa, target_impl_id);
|
const mstate_aux *aux = getAux64(nfa, target_impl_id);
|
||||||
u16 flags = 0;
|
u16 flags = 0;
|
||||||
|
|
||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
@ -766,7 +766,7 @@ static
|
|||||||
void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info,
|
void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info,
|
||||||
dstate_id_t sheng_end,
|
dstate_id_t sheng_end,
|
||||||
UNUSED dstate_id_t sherman_base) {
|
UNUSED dstate_id_t sherman_base) {
|
||||||
u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64));
|
u16 *succ_table = reinterpret_cast<u16 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng64));
|
||||||
|
|
||||||
u8 alphaShift = info.getAlphaShift();
|
u8 alphaShift = info.getAlphaShift();
|
||||||
assert(alphaShift <= 8);
|
assert(alphaShift <= 8);
|
||||||
@ -955,9 +955,9 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
void fill_in_sherman(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
|
||||||
char *nfa_base = (char *)nfa;
|
char *nfa_base = reinterpret_cast<char *>(nfa);
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
|
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||||
char *sherman_table = nfa_base + m->sherman_offset;
|
char *sherman_table = nfa_base + m->sherman_offset;
|
||||||
|
|
||||||
assert(ISALIGNED_16(sherman_table));
|
assert(ISALIGNED_16(sherman_table));
|
||||||
@ -978,10 +978,10 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
|||||||
assert(len <= 9);
|
assert(len <= 9);
|
||||||
dstate_id_t d = info.states[i].daddy;
|
dstate_id_t d = info.states[i].daddy;
|
||||||
|
|
||||||
*(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
|
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_TYPE_OFFSET)) = SHERMAN_STATE;
|
||||||
*(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
|
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_LEN_OFFSET)) = len;
|
||||||
*(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
|
*(reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_DADDY_OFFSET)) = info.implId(d);
|
||||||
u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
u8 *chars = reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||||
|
|
||||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
@ -989,7 +989,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
u16 *states = reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
||||||
@ -997,7 +997,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
|||||||
info.implId(info.states[i].next[s]));
|
info.implId(info.states[i].next[s]));
|
||||||
u16 entry_val = info.implId(info.states[i].next[s]);
|
u16 entry_val = info.implId(info.states[i].next[s]);
|
||||||
entry_val |= get_edge_flags(nfa, entry_val);
|
entry_val |= get_edge_flags(nfa, entry_val);
|
||||||
unaligned_store_u16((u8 *)states++, entry_val);
|
unaligned_store_u16(reinterpret_cast<u8 *>(states++), entry_val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1018,12 +1018,16 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
|
|
||||||
// Sherman optimization
|
// Sherman optimization
|
||||||
if (info.impl_alpha_size > 16) {
|
if (info.impl_alpha_size > 16) {
|
||||||
|
#ifdef DEBUG
|
||||||
u16 total_daddy = 0;
|
u16 total_daddy = 0;
|
||||||
|
#endif // DEBUG
|
||||||
for (u32 i = 0; i < info.size(); i++) {
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
find_better_daddy(info, i,
|
find_better_daddy(info, i,
|
||||||
is_cyclic_near(info.raw, info.raw.start_anchored),
|
is_cyclic_near(info.raw, info.raw.start_anchored),
|
||||||
grey);
|
grey);
|
||||||
|
#ifdef DEBUG
|
||||||
total_daddy += info.extra[i].daddytaken;
|
total_daddy += info.extra[i].daddytaken;
|
||||||
|
#endif // DEBUG
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||||
@ -1035,7 +1039,7 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
||||||
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||||
info.size());
|
info.size());
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
u16 count_real_states = sherman_limit - sheng_end;
|
u16 count_real_states = sherman_limit - sheng_end;
|
||||||
|
|
||||||
@ -1059,7 +1063,7 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa.get()));
|
||||||
|
|
||||||
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||||
accel_escape_info.size(), arb, single, nfa.get());
|
accel_escape_info.size(), arb, single, nfa.get());
|
||||||
@ -1087,7 +1091,7 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
static
|
static
|
||||||
void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
||||||
dstate_id_t sheng_end) {
|
dstate_id_t sheng_end) {
|
||||||
u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng);
|
u8 *succ_table = reinterpret_cast<u8 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng));
|
||||||
|
|
||||||
u8 alphaShift = info.getAlphaShift();
|
u8 alphaShift = info.getAlphaShift();
|
||||||
assert(alphaShift <= 8);
|
assert(alphaShift <= 8);
|
||||||
@ -1109,9 +1113,9 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
void fill_in_sherman64(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
|
||||||
char *nfa_base = (char *)nfa;
|
char *nfa_base = reinterpret_cast<char *>(nfa);
|
||||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||||
char *sherman_table = nfa_base + m->sherman_offset;
|
char *sherman_table = nfa_base + m->sherman_offset;
|
||||||
|
|
||||||
assert(ISALIGNED_16(sherman_table));
|
assert(ISALIGNED_16(sherman_table));
|
||||||
@ -1132,10 +1136,10 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
|||||||
assert(len <= 9);
|
assert(len <= 9);
|
||||||
dstate_id_t d = info.states[i].daddy;
|
dstate_id_t d = info.states[i].daddy;
|
||||||
|
|
||||||
*(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
|
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_TYPE_OFFSET)) = SHERMAN_STATE;
|
||||||
*(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
|
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_LEN_OFFSET)) = len;
|
||||||
*(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
|
*(reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_DADDY_OFFSET)) = info.implId(d);
|
||||||
u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
u8 *chars = reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||||
|
|
||||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
@ -1143,7 +1147,7 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
u16 *states = reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
||||||
@ -1151,7 +1155,7 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
|||||||
info.implId(info.states[i].next[s]));
|
info.implId(info.states[i].next[s]));
|
||||||
u16 entry_val = info.implId(info.states[i].next[s]);
|
u16 entry_val = info.implId(info.states[i].next[s]);
|
||||||
entry_val |= get_edge_flags64(nfa, entry_val);
|
entry_val |= get_edge_flags64(nfa, entry_val);
|
||||||
unaligned_store_u16((u8 *)states++, entry_val);
|
unaligned_store_u16(reinterpret_cast<u8 *>(states++), entry_val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1172,12 +1176,16 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
|||||||
|
|
||||||
// Sherman optimization
|
// Sherman optimization
|
||||||
if (info.impl_alpha_size > 16) {
|
if (info.impl_alpha_size > 16) {
|
||||||
|
#ifdef DEBUG
|
||||||
u16 total_daddy = 0;
|
u16 total_daddy = 0;
|
||||||
|
#endif // DEBUG
|
||||||
for (u32 i = 0; i < info.size(); i++) {
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
find_better_daddy(info, i,
|
find_better_daddy(info, i,
|
||||||
is_cyclic_near(info.raw, info.raw.start_anchored),
|
is_cyclic_near(info.raw, info.raw.start_anchored),
|
||||||
grey);
|
grey);
|
||||||
|
#ifdef DEBUG
|
||||||
total_daddy += info.extra[i].daddytaken;
|
total_daddy += info.extra[i].daddytaken;
|
||||||
|
#endif // DEBUG
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||||
@ -1189,7 +1197,7 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
|||||||
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
||||||
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||||
info.size());
|
info.size());
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
u16 count_real_states = sherman_limit - sheng_end;
|
u16 count_real_states = sherman_limit - sheng_end;
|
||||||
|
|
||||||
@ -1213,7 +1221,7 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
|||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa.get()));
|
||||||
|
|
||||||
populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||||
accel_escape_info.size(), arb, single, nfa.get());
|
accel_escape_info.size(), arb, single, nfa.get());
|
||||||
@ -1241,7 +1249,7 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
|||||||
static
|
static
|
||||||
void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info,
|
void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info,
|
||||||
dstate_id_t sheng_end) {
|
dstate_id_t sheng_end) {
|
||||||
u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64);
|
u8 *succ_table = reinterpret_cast<u8 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng));
|
||||||
|
|
||||||
u8 alphaShift = info.getAlphaShift();
|
u8 alphaShift = info.getAlphaShift();
|
||||||
assert(alphaShift <= 8);
|
assert(alphaShift <= 8);
|
||||||
@ -1339,7 +1347,7 @@ bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa.get()));
|
||||||
|
|
||||||
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
||||||
&m->accept_limit_8);
|
&m->accept_limit_8);
|
||||||
@ -1392,7 +1400,7 @@ bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa.get()));
|
||||||
|
|
||||||
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
||||||
&m->accept_limit_8);
|
&m->accept_limit_8);
|
||||||
@ -1414,7 +1422,7 @@ bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
if (!cc.grey.allowMcSheng) {
|
if (!cc.grey.allowMcSheng) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
mcclellan_build_strat mbs(raw, rm, false);
|
mcclellan_build_strat mbs(raw, rm, false);
|
||||||
@ -1430,12 +1438,10 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
|
|
||||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||||
= info.strat.getAccelInfo(cc.grey);
|
= info.strat.getAccelInfo(cc.grey);
|
||||||
auto old_states = info.states;
|
|
||||||
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
|
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
|
||||||
|
|
||||||
if (sheng_end <= DEAD_STATE + 1) {
|
if (sheng_end <= DEAD_STATE + 1) {
|
||||||
info.states = old_states;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
return nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bytecode_ptr<NFA> nfa;
|
bytecode_ptr<NFA> nfa;
|
||||||
@ -1447,7 +1453,6 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
info.states = old_states;
|
|
||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1462,12 +1467,12 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
if (!cc.grey.allowMcSheng) {
|
if (!cc.grey.allowMcSheng) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cc.target_info.has_avx512vbmi()) {
|
if (!cc.target_info.has_avx512vbmi()) {
|
||||||
DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
mcclellan_build_strat mbs(raw, rm, false);
|
mcclellan_build_strat mbs(raw, rm, false);
|
||||||
@ -1488,7 +1493,7 @@ bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
|||||||
sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
|
sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
|
||||||
|
|
||||||
if (sheng_end64 <= DEAD_STATE + 1) {
|
if (sheng_end64 <= DEAD_STATE + 1) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
} else {
|
} else {
|
||||||
using64state = true;
|
using64state = true;
|
||||||
}
|
}
|
||||||
|
@ -512,7 +512,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
|||||||
|
|
||||||
verm_restart:;
|
verm_restart:;
|
||||||
assert(buf[curr] == kp->u.verm.c);
|
assert(buf[curr] == kp->u.verm.c);
|
||||||
size_t test = curr;
|
size_t test;
|
||||||
if (curr + min_rep < length) {
|
if (curr + min_rep < length) {
|
||||||
test = curr + min_rep;
|
test = curr + min_rep;
|
||||||
} else {
|
} else {
|
||||||
@ -534,7 +534,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
|||||||
m128 hi = kp->u.shuf.mask_hi;
|
m128 hi = kp->u.shuf.mask_hi;
|
||||||
shuf_restart:
|
shuf_restart:
|
||||||
assert(do_single_shufti(lo, hi, buf[curr]));
|
assert(do_single_shufti(lo, hi, buf[curr]));
|
||||||
size_t test = curr;
|
size_t test;
|
||||||
if (curr + min_rep < length) {
|
if (curr + min_rep < length) {
|
||||||
test = curr + min_rep;
|
test = curr + min_rep;
|
||||||
} else {
|
} else {
|
||||||
@ -556,7 +556,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
|||||||
const m128 mask1 = kp->u.truffle.mask1;
|
const m128 mask1 = kp->u.truffle.mask1;
|
||||||
const m128 mask2 = kp->u.truffle.mask2;
|
const m128 mask2 = kp->u.truffle.mask2;
|
||||||
truffle_restart:;
|
truffle_restart:;
|
||||||
size_t test = curr;
|
size_t test;
|
||||||
if (curr + min_rep < length) {
|
if (curr + min_rep < length) {
|
||||||
test = curr + min_rep;
|
test = curr + min_rep;
|
||||||
} else {
|
} else {
|
||||||
@ -582,7 +582,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
|||||||
|
|
||||||
nverm_restart:;
|
nverm_restart:;
|
||||||
assert(buf[curr] != kp->u.verm.c);
|
assert(buf[curr] != kp->u.verm.c);
|
||||||
size_t test = curr;
|
size_t test;
|
||||||
if (curr + min_rep < length) {
|
if (curr + min_rep < length) {
|
||||||
test = curr + min_rep;
|
test = curr + min_rep;
|
||||||
} else {
|
} else {
|
||||||
@ -607,7 +607,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters,
|
void restartKilo(const struct mpv *m, UNUSED const u8 *active, u8 *reporters,
|
||||||
struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
|
struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
|
||||||
const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
|
const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
|
||||||
const struct mpv_kilopuff *kp = (const void *)(m + 1);
|
const struct mpv_kilopuff *kp = (const void *)(m + 1);
|
||||||
@ -1074,7 +1074,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
|
|||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
const struct mpv *m = getImplNfa(nfa);
|
const struct mpv *m = getImplNfa(nfa);
|
||||||
u8 *reporters = (u8 *)q->state + m->reporter_offset;
|
const u8 *reporters = (u8 *)q->state + m->reporter_offset;
|
||||||
|
|
||||||
if (mmbit_any_precise(reporters, m->kilo_count)) {
|
if (mmbit_any_precise(reporters, m->kilo_count)) {
|
||||||
DEBUG_PRINTF("next byte\n");
|
DEBUG_PRINTF("next byte\n");
|
||||||
@ -1087,7 +1087,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
|
|||||||
next_event = q->items[q->cur].location;
|
next_event = q->items[q->cur].location;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
|
const struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
|
||||||
struct mpv_pq_item *pq
|
struct mpv_pq_item *pq
|
||||||
= (struct mpv_pq_item *)(q->state + m->pq_offset);
|
= (struct mpv_pq_item *)(q->state + m->pq_offset);
|
||||||
if (s->pq_size) {
|
if (s->pq_size) {
|
||||||
|
@ -167,7 +167,7 @@ void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) {
|
|||||||
// We assert that the event is different from its predecessor. If it's a
|
// We assert that the event is different from its predecessor. If it's a
|
||||||
// dupe, you should have used the ordinary pushQueue call.
|
// dupe, you should have used the ordinary pushQueue call.
|
||||||
if (q->end) {
|
if (q->end) {
|
||||||
UNUSED struct mq_item *prev = &q->items[q->end - 1];
|
UNUSED const struct mq_item *prev = &q->items[q->end - 1];
|
||||||
assert(prev->type != e || prev->location != loc);
|
assert(prev->type != e || prev->location != loc);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -251,6 +251,10 @@ void q_skip_forward_to(struct mq *q, s64a min_loc) {
|
|||||||
// Dump the contents of the given queue.
|
// Dump the contents of the given queue.
|
||||||
static never_inline UNUSED
|
static never_inline UNUSED
|
||||||
void debugQueue(const struct mq *q) {
|
void debugQueue(const struct mq *q) {
|
||||||
|
if (q == nullptr) {
|
||||||
|
DEBUG_PRINTF("q=NULL!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa);
|
DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa);
|
||||||
DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n",
|
DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n",
|
||||||
q->offset, q->buffer, q->length, q->history, q->hlength);
|
q->offset, q->buffer, q->length, q->history, q->hlength);
|
||||||
|
@ -68,7 +68,7 @@ void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) {
|
|||||||
// We assert that the event is different from its predecessor. If it's a
|
// We assert that the event is different from its predecessor. If it's a
|
||||||
// dupe, you should have used the ordinary pushQueue call.
|
// dupe, you should have used the ordinary pushQueue call.
|
||||||
if (q->end) {
|
if (q->end) {
|
||||||
UNUSED struct mq_item *prev = &q->items[q->end - 1];
|
UNUSED const struct mq_item *prev = &q->items[q->end - 1];
|
||||||
assert(prev->type != e || prev->location != loc);
|
assert(prev->type != e || prev->location != loc);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2020, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
|
* Copyright (c) 2024, VectorCamp PC
|
||||||
* Copyright (c) 2021, Arm Limited
|
* Copyright (c) 2021, Arm Limited
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
@ -133,6 +134,7 @@ struct ALIGN_CL_DIRECTIVE NFA {
|
|||||||
/* Note: implementation (e.g. a LimEx) directly follows struct in memory */
|
/* Note: implementation (e.g. a LimEx) directly follows struct in memory */
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
// Accessor macro for the implementation NFA: we do things this way to avoid
|
// Accessor macro for the implementation NFA: we do things this way to avoid
|
||||||
// type-punning warnings.
|
// type-punning warnings.
|
||||||
#define getImplNfa(nfa) \
|
#define getImplNfa(nfa) \
|
||||||
@ -140,6 +142,13 @@ struct ALIGN_CL_DIRECTIVE NFA {
|
|||||||
|
|
||||||
// Non-const version of the above, used at compile time.
|
// Non-const version of the above, used at compile time.
|
||||||
#define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA))
|
#define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA))
|
||||||
|
#else
|
||||||
|
// Same versions without C casts to avoid Cppcheck warnings
|
||||||
|
#define getImplNfa(nfa) \
|
||||||
|
(reinterpret_cast<const void *>(reinterpret_cast<const char *>(nfa) + sizeof(struct NFA)))
|
||||||
|
|
||||||
|
#define getMutableImplNfa(nfa) (reinterpret_cast<char *>(nfa) + sizeof(struct NFA))
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) {
|
static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) {
|
||||||
return nfa->flags & NFA_ACCEPTS_EOD;
|
return nfa->flags & NFA_ACCEPTS_EOD;
|
||||||
|
@ -45,7 +45,7 @@ struct RdfaEdgeProps {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> {
|
struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> {
|
||||||
RdfaGraph(const raw_dfa &rdfa);
|
explicit RdfaGraph(const raw_dfa &rdfa);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -785,7 +785,7 @@ enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
|
|||||||
if (diff > info->repeatMax) {
|
if (diff > info->repeatMax) {
|
||||||
DEBUG_PRINTF("range list is stale\n");
|
DEBUG_PRINTF("range list is stale\n");
|
||||||
return REPEAT_STALE;
|
return REPEAT_STALE;
|
||||||
} else if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
} else if (diff >= info->repeatMin) {
|
||||||
return REPEAT_MATCH;
|
return REPEAT_MATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -836,7 +836,7 @@ enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
|
|||||||
if (diff > info->repeatMax) {
|
if (diff > info->repeatMax) {
|
||||||
DEBUG_PRINTF("stale\n");
|
DEBUG_PRINTF("stale\n");
|
||||||
return REPEAT_STALE;
|
return REPEAT_STALE;
|
||||||
} else if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
} else if (diff >= info->repeatMin) {
|
||||||
return REPEAT_MATCH;
|
return REPEAT_MATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,9 +94,6 @@ u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
|
|||||||
static
|
static
|
||||||
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||||
const u32 minPeriod, u32 rv) {
|
const u32 minPeriod, u32 rv) {
|
||||||
u32 cnt = 0;
|
|
||||||
u32 patch_bits = 0;
|
|
||||||
u32 total_size = 0;
|
|
||||||
u32 min = ~0U;
|
u32 min = ~0U;
|
||||||
u32 patch_len = 0;
|
u32 patch_len = 0;
|
||||||
|
|
||||||
@ -105,11 +102,11 @@ u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (u32 i = minPeriod; i <= rv; i++) {
|
for (u32 i = minPeriod; i <= rv; i++) {
|
||||||
cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
u32 cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
||||||
|
|
||||||
// no bit packing version
|
// no bit packing version
|
||||||
patch_bits = calcPackedBits(info->table[i]);
|
u32 patch_bits = calcPackedBits(info->table[i]);
|
||||||
total_size = (patch_bits + 7U) / 8U * cnt;
|
u32 total_size = (patch_bits + 7U) / 8U * cnt;
|
||||||
|
|
||||||
if (total_size < min) {
|
if (total_size < min) {
|
||||||
patch_len = i;
|
patch_len = i;
|
||||||
|
@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
|||||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
// Sheng32
|
// Sheng32
|
||||||
static really_inline
|
static really_inline
|
||||||
const struct sheng32 *get_sheng32(const struct NFA *n) {
|
const struct sheng32 *get_sheng32(const struct NFA *n) {
|
||||||
@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
|
|||||||
}
|
}
|
||||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||||
}
|
}
|
||||||
#endif // end of HAVE_AVX512VBMI
|
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||||
|
|
||||||
/* include Sheng function definitions */
|
/* include Sheng function definitions */
|
||||||
#include "sheng_defs.h"
|
#include "sheng_defs.h"
|
||||||
@ -814,7 +814,6 @@ char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state,
|
|||||||
char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
|
char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
|
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
|
||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *ctxt = q->context;
|
|
||||||
u8 s = *(u8 *)q->state;
|
u8 s = *(u8 *)q->state;
|
||||||
const struct sstate_aux *aux = get_aux(sh, s);
|
const struct sstate_aux *aux = get_aux(sh, s);
|
||||||
u64a offset = q_cur_offset(q);
|
u64a offset = q_cur_offset(q);
|
||||||
@ -823,6 +822,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
|
|||||||
assert(q_cur_type(q) == MQE_START);
|
assert(q_cur_type(q) == MQE_START);
|
||||||
|
|
||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
|
void *ctxt = q->context;
|
||||||
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
|
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
|
||||||
fireSingleReport(cb, ctxt, sh->report, offset);
|
fireSingleReport(cb, ctxt, sh->report, offset);
|
||||||
} else {
|
} else {
|
||||||
@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
// Sheng32
|
// Sheng32
|
||||||
static really_inline
|
static really_inline
|
||||||
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
|
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
|
||||||
@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
|
|||||||
*(u8 *)dest = *(const u8 *)src;
|
*(u8 *)dest = *(const u8 *)src;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif // end of HAVE_AVX512VBMI
|
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||||
|
@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
|
|||||||
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
size_t length, NfaCallback cb, void *context);
|
size_t length, NfaCallback cb, void *context);
|
||||||
|
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
|
||||||
@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
|
|||||||
|
|
||||||
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
size_t length, NfaCallback cb, void *context);
|
size_t length, NfaCallback cb, void *context);
|
||||||
|
#else // !HAVE_AVX512VBMI && !HAVE_SVE
|
||||||
#else // !HAVE_AVX512VBMI
|
|
||||||
|
|
||||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
|||||||
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
|
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
|
||||||
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
|
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
|
||||||
#define nfaExecSheng64_B NFA_API_NO_IMPL
|
#define nfaExecSheng64_B NFA_API_NO_IMPL
|
||||||
#endif // end of HAVE_AVX512VBMI
|
#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
|
||||||
|
|
||||||
|
|
||||||
#endif /* SHENG_H_ */
|
#endif /* SHENG_H_ */
|
||||||
|
@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
|||||||
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
static really_inline
|
static really_inline
|
||||||
u8 isDeadState32(const u8 a) {
|
u8 isDeadState32(const u8 a) {
|
||||||
return a & SHENG32_STATE_DEAD;
|
return a & SHENG32_STATE_DEAD;
|
||||||
@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define SHENG_IMPL sheng_cod
|
#define SHENG_IMPL sheng_cod
|
||||||
#define DEAD_FUNC isDeadState
|
#define DEAD_FUNC isDeadState
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_cod
|
#define SHENG32_IMPL sheng32_cod
|
||||||
#define DEAD_FUNC32 isDeadState32
|
#define DEAD_FUNC32 isDeadState32
|
||||||
#define ACCEPT_FUNC32 isAcceptState32
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef DEAD_FUNC32
|
#undef DEAD_FUNC32
|
||||||
#undef ACCEPT_FUNC32
|
#undef ACCEPT_FUNC32
|
||||||
@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define SHENG_IMPL sheng_co
|
#define SHENG_IMPL sheng_co
|
||||||
#define DEAD_FUNC dummyFunc
|
#define DEAD_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_co
|
#define SHENG32_IMPL sheng32_co
|
||||||
#define DEAD_FUNC32 dummyFunc
|
#define DEAD_FUNC32 dummyFunc
|
||||||
#define ACCEPT_FUNC32 isAcceptState32
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef DEAD_FUNC32
|
#undef DEAD_FUNC32
|
||||||
#undef ACCEPT_FUNC32
|
#undef ACCEPT_FUNC32
|
||||||
@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define SHENG_IMPL sheng_samd
|
#define SHENG_IMPL sheng_samd
|
||||||
#define DEAD_FUNC isDeadState
|
#define DEAD_FUNC isDeadState
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_samd
|
#define SHENG32_IMPL sheng32_samd
|
||||||
#define DEAD_FUNC32 isDeadState32
|
#define DEAD_FUNC32 isDeadState32
|
||||||
#define ACCEPT_FUNC32 isAcceptState32
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef DEAD_FUNC32
|
#undef DEAD_FUNC32
|
||||||
#undef ACCEPT_FUNC32
|
#undef ACCEPT_FUNC32
|
||||||
@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define SHENG_IMPL sheng_sam
|
#define SHENG_IMPL sheng_sam
|
||||||
#define DEAD_FUNC dummyFunc
|
#define DEAD_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_sam
|
#define SHENG32_IMPL sheng32_sam
|
||||||
#define DEAD_FUNC32 dummyFunc
|
#define DEAD_FUNC32 dummyFunc
|
||||||
#define ACCEPT_FUNC32 isAcceptState32
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef DEAD_FUNC32
|
#undef DEAD_FUNC32
|
||||||
#undef ACCEPT_FUNC32
|
#undef ACCEPT_FUNC32
|
||||||
@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define SHENG_IMPL sheng_nmd
|
#define SHENG_IMPL sheng_nmd
|
||||||
#define DEAD_FUNC isDeadState
|
#define DEAD_FUNC isDeadState
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_nmd
|
#define SHENG32_IMPL sheng32_nmd
|
||||||
#define DEAD_FUNC32 isDeadState32
|
#define DEAD_FUNC32 isDeadState32
|
||||||
#define ACCEPT_FUNC32 dummyFunc
|
#define ACCEPT_FUNC32 dummyFunc
|
||||||
@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef DEAD_FUNC32
|
#undef DEAD_FUNC32
|
||||||
#undef ACCEPT_FUNC32
|
#undef ACCEPT_FUNC32
|
||||||
@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define SHENG_IMPL sheng_nm
|
#define SHENG_IMPL sheng_nm
|
||||||
#define DEAD_FUNC dummyFunc
|
#define DEAD_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_nm
|
#define SHENG32_IMPL sheng32_nm
|
||||||
#define DEAD_FUNC32 dummyFunc
|
#define DEAD_FUNC32 dummyFunc
|
||||||
#define ACCEPT_FUNC32 dummyFunc
|
#define ACCEPT_FUNC32 dummyFunc
|
||||||
@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef DEAD_FUNC32
|
#undef DEAD_FUNC32
|
||||||
#undef ACCEPT_FUNC32
|
#undef ACCEPT_FUNC32
|
||||||
@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_coda
|
#define SHENG32_IMPL sheng32_4_coda
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 isDeadState32
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_cod
|
#define SHENG32_IMPL sheng32_4_cod
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 isDeadState32
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_coa
|
#define SHENG32_IMPL sheng32_4_coa
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 dummyFunc
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_co
|
#define SHENG32_IMPL sheng32_4_co
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 dummyFunc
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_samda
|
#define SHENG32_IMPL sheng32_4_samda
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 isDeadState32
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_samd
|
#define SHENG32_IMPL sheng32_4_samd
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 isDeadState32
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_sama
|
#define SHENG32_IMPL sheng32_4_sama
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 dummyFunc
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_sam
|
#define SHENG32_IMPL sheng32_4_sam
|
||||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
#define INNER_DEAD_FUNC32 dummyFunc
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC isAccelState
|
#define OUTER_ACCEL_FUNC isAccelState
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_nmda
|
#define SHENG32_IMPL sheng32_4_nmda
|
||||||
#define INTERESTING_FUNC32 dummyFunc4
|
#define INTERESTING_FUNC32 dummyFunc4
|
||||||
#define INNER_DEAD_FUNC32 dummyFunc
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_nmd
|
#define SHENG32_IMPL sheng32_4_nmd
|
||||||
#define INTERESTING_FUNC32 dummyFunc4
|
#define INTERESTING_FUNC32 dummyFunc4
|
||||||
#define INNER_DEAD_FUNC32 dummyFunc
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#define SHENG32_IMPL sheng32_4_nm
|
#define SHENG32_IMPL sheng32_4_nm
|
||||||
#define INTERESTING_FUNC32 dummyFunc4
|
#define INTERESTING_FUNC32 dummyFunc4
|
||||||
#define INNER_DEAD_FUNC32 dummyFunc
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
#undef SHENG32_IMPL
|
#undef SHENG32_IMPL
|
||||||
#undef INTERESTING_FUNC32
|
#undef INTERESTING_FUNC32
|
||||||
#undef INNER_DEAD_FUNC32
|
#undef INNER_DEAD_FUNC32
|
||||||
|
@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
|||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
static really_inline
|
static really_inline
|
||||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||||
const struct sheng32 *s,
|
const struct sheng32 *s,
|
||||||
@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
}
|
}
|
||||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||||
|
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||||
|
svuint8_t cur_state = svdup_u8(*state);
|
||||||
|
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
#else
|
||||||
m512 cur_state = set1_64x8(*state);
|
m512 cur_state = set1_64x8(*state);
|
||||||
const m512 *masks = s->succ_masks;
|
const m512 *masks = s->succ_masks;
|
||||||
|
#endif
|
||||||
|
|
||||||
while (likely(cur_buf != end)) {
|
while (likely(cur_buf != end)) {
|
||||||
const u8 c = *cur_buf;
|
const u8 c = *cur_buf;
|
||||||
|
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
|
||||||
|
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 tmp = svlastb(lane_pred_32, cur_state);
|
||||||
|
#else
|
||||||
const m512 succ_mask = masks[c];
|
const m512 succ_mask = masks[c];
|
||||||
cur_state = vpermb512(cur_state, succ_mask);
|
cur_state = vpermb512(cur_state, succ_mask);
|
||||||
const u8 tmp = movd512(cur_state);
|
const u8 tmp = movd512(cur_state);
|
||||||
|
#endif
|
||||||
|
|
||||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
|
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
|
||||||
@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
}
|
}
|
||||||
cur_buf++;
|
cur_buf++;
|
||||||
}
|
}
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
*state = svlastb(lane_pred_32, cur_state);
|
||||||
|
#else
|
||||||
*state = movd512(cur_state);
|
*state = movd512(cur_state);
|
||||||
|
#endif
|
||||||
*scan_end = cur_buf;
|
*scan_end = cur_buf;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
}
|
}
|
||||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||||
|
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||||
|
svuint8_t cur_state = svdup_u8(*state);
|
||||||
|
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
#else
|
||||||
m512 cur_state = set1_64x8(*state);
|
m512 cur_state = set1_64x8(*state);
|
||||||
const m512 *masks = s->succ_masks;
|
const m512 *masks = s->succ_masks;
|
||||||
|
#endif
|
||||||
|
|
||||||
while (likely(cur_buf != end)) {
|
while (likely(cur_buf != end)) {
|
||||||
const u8 c = *cur_buf;
|
const u8 c = *cur_buf;
|
||||||
|
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
|
||||||
|
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 tmp = svlastb(lane_pred_64, cur_state);
|
||||||
|
#else
|
||||||
const m512 succ_mask = masks[c];
|
const m512 succ_mask = masks[c];
|
||||||
cur_state = vpermb512(cur_state, succ_mask);
|
cur_state = vpermb512(cur_state, succ_mask);
|
||||||
const u8 tmp = movd512(cur_state);
|
const u8 tmp = movd512(cur_state);
|
||||||
|
#endif
|
||||||
|
|
||||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
|
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
|
||||||
@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
}
|
}
|
||||||
cur_buf++;
|
cur_buf++;
|
||||||
}
|
}
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
*state = svlastb(lane_pred_64, cur_state);
|
||||||
|
#else
|
||||||
*state = movd512(cur_state);
|
*state = movd512(cur_state);
|
||||||
|
#endif
|
||||||
*scan_end = cur_buf;
|
*scan_end = cur_buf;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
|||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||||
static really_inline
|
static really_inline
|
||||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||||
const struct sheng32 *s,
|
const struct sheng32 *s,
|
||||||
@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||||
|
svuint8_t cur_state = svdup_u8(*state);
|
||||||
|
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
#else
|
||||||
m512 cur_state = set1_64x8(*state);
|
m512 cur_state = set1_64x8(*state);
|
||||||
const m512 *masks = s->succ_masks;
|
const m512 *masks = s->succ_masks;
|
||||||
|
#endif
|
||||||
|
|
||||||
while (likely(end - cur_buf >= 4)) {
|
while (likely(end - cur_buf >= 4)) {
|
||||||
const u8 *b1 = cur_buf;
|
const u8 *b1 = cur_buf;
|
||||||
@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
const u8 c3 = *b3;
|
const u8 c3 = *b3;
|
||||||
const u8 c4 = *b4;
|
const u8 c4 = *b4;
|
||||||
|
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
|
||||||
|
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a1 = svlastb(lane_pred_32, cur_state);
|
||||||
|
|
||||||
|
svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
|
||||||
|
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a2 = svlastb(lane_pred_32, cur_state);
|
||||||
|
|
||||||
|
svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
|
||||||
|
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a3 = svlastb(lane_pred_32, cur_state);
|
||||||
|
|
||||||
|
svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
|
||||||
|
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a4 = svlastb(lane_pred_32, cur_state);
|
||||||
|
#else
|
||||||
const m512 succ_mask1 = masks[c1];
|
const m512 succ_mask1 = masks[c1];
|
||||||
cur_state = vpermb512(cur_state, succ_mask1);
|
cur_state = vpermb512(cur_state, succ_mask1);
|
||||||
const u8 a1 = movd512(cur_state);
|
const u8 a1 = movd512(cur_state);
|
||||||
@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
const m512 succ_mask4 = masks[c4];
|
const m512 succ_mask4 = masks[c4];
|
||||||
cur_state = vpermb512(cur_state, succ_mask4);
|
cur_state = vpermb512(cur_state, succ_mask4);
|
||||||
const u8 a4 = movd512(cur_state);
|
const u8 a4 = movd512(cur_state);
|
||||||
|
#endif
|
||||||
|
|
||||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
|
||||||
@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
};
|
};
|
||||||
cur_buf += 4;
|
cur_buf += 4;
|
||||||
}
|
}
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
*state = svlastb(lane_pred_32, cur_state);
|
||||||
|
#else
|
||||||
*state = movd512(cur_state);
|
*state = movd512(cur_state);
|
||||||
|
#endif
|
||||||
*scan_end = cur_buf;
|
*scan_end = cur_buf;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
*scan_end = end;
|
*scan_end = end;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||||
|
svuint8_t cur_state = svdup_u8(*state);
|
||||||
|
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
#else
|
||||||
m512 cur_state = set1_64x8(*state);
|
m512 cur_state = set1_64x8(*state);
|
||||||
const m512 *masks = s->succ_masks;
|
const m512 *masks = s->succ_masks;
|
||||||
|
#endif
|
||||||
|
|
||||||
while (likely(end - cur_buf >= 4)) {
|
while (likely(end - cur_buf >= 4)) {
|
||||||
const u8 *b1 = cur_buf;
|
const u8 *b1 = cur_buf;
|
||||||
@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
const u8 c3 = *b3;
|
const u8 c3 = *b3;
|
||||||
const u8 c4 = *b4;
|
const u8 c4 = *b4;
|
||||||
|
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
|
||||||
|
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a1 = svlastb(lane_pred_64, cur_state);
|
||||||
|
|
||||||
|
svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
|
||||||
|
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a2 = svlastb(lane_pred_64, cur_state);
|
||||||
|
|
||||||
|
svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
|
||||||
|
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a3 = svlastb(lane_pred_64, cur_state);
|
||||||
|
|
||||||
|
svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
|
||||||
|
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||||
|
const u8 a4 = svlastb(lane_pred_64, cur_state);
|
||||||
|
#else
|
||||||
const m512 succ_mask1 = masks[c1];
|
const m512 succ_mask1 = masks[c1];
|
||||||
cur_state = vpermb512(cur_state, succ_mask1);
|
cur_state = vpermb512(cur_state, succ_mask1);
|
||||||
const u8 a1 = movd512(cur_state);
|
const u8 a1 = movd512(cur_state);
|
||||||
@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
const m512 succ_mask4 = masks[c4];
|
const m512 succ_mask4 = masks[c4];
|
||||||
cur_state = vpermb512(cur_state, succ_mask4);
|
cur_state = vpermb512(cur_state, succ_mask4);
|
||||||
const u8 a4 = movd512(cur_state);
|
const u8 a4 = movd512(cur_state);
|
||||||
|
#endif
|
||||||
|
|
||||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
|
||||||
@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
|||||||
}
|
}
|
||||||
cur_buf += 4;
|
cur_buf += 4;
|
||||||
}
|
}
|
||||||
|
#if defined(HAVE_SVE)
|
||||||
|
*state = svlastb(lane_pred_64, cur_state);
|
||||||
|
#else
|
||||||
*state = movd512(cur_state);
|
*state = movd512(cur_state);
|
||||||
|
#endif
|
||||||
*scan_end = cur_buf;
|
*scan_end = cur_buf;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
@ -99,7 +99,7 @@ struct dfa_info {
|
|||||||
return next(idx, TOP);
|
return next(idx, TOP);
|
||||||
}
|
}
|
||||||
dstate &next(dstate_id_t idx, u16 chr) {
|
dstate &next(dstate_id_t idx, u16 chr) {
|
||||||
auto &src = (*this)[idx];
|
const auto &src = (*this)[idx];
|
||||||
auto next_id = src.next[raw.alpha_remap[chr]];
|
auto next_id = src.next[raw.alpha_remap[chr]];
|
||||||
return states[next_id];
|
return states[next_id];
|
||||||
}
|
}
|
||||||
@ -109,7 +109,7 @@ struct dfa_info {
|
|||||||
// if DFA can't die, shift all indices left by 1
|
// if DFA can't die, shift all indices left by 1
|
||||||
return can_die ? idx : idx + 1;
|
return can_die ? idx : idx + 1;
|
||||||
}
|
}
|
||||||
bool isDead(dstate &state) {
|
bool isDead(const dstate &state) {
|
||||||
return raw_id(state.impl_id) == DEAD_STATE;
|
return raw_id(state.impl_id) == DEAD_STATE;
|
||||||
}
|
}
|
||||||
bool isDead(dstate_id_t idx) {
|
bool isDead(dstate_id_t idx) {
|
||||||
@ -117,7 +117,7 @@ struct dfa_info {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool dfaCanDie(raw_dfa &rdfa) {
|
static bool dfaCanDie(const raw_dfa &rdfa) {
|
||||||
for (unsigned chr = 0; chr < 256; chr++) {
|
for (unsigned chr = 0; chr < 256; chr++) {
|
||||||
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
|
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
|
||||||
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
|
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
|
||||||
@ -138,7 +138,7 @@ struct raw_report_list {
|
|||||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||||
const ReportManager &rm, bool do_remap) {
|
const ReportManager &rm, bool do_remap) {
|
||||||
if (do_remap) {
|
if (do_remap) {
|
||||||
for (auto &id : reports_in) {
|
for (const auto &id : reports_in) {
|
||||||
reports.insert(rm.getProgramOffset(id));
|
reports.insert(rm.getProgramOffset(id));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -334,14 +334,14 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static
|
static
|
||||||
u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info,
|
u8 getShengState(UNUSED const dstate &state, UNUSED dfa_info &info,
|
||||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
u8 getShengState<sheng>(dstate &state, dfa_info &info,
|
u8 getShengState<sheng>(const dstate &state, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
u8 s = state.impl_id;
|
u8 s = state.impl_id;
|
||||||
if (!state.reports.empty()) {
|
if (!state.reports.empty()) {
|
||||||
s |= SHENG_STATE_ACCEPT;
|
s |= SHENG_STATE_ACCEPT;
|
||||||
@ -356,8 +356,8 @@ u8 getShengState<sheng>(dstate &state, dfa_info &info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
u8 getShengState<sheng32>(dstate &state, dfa_info &info,
|
u8 getShengState<sheng32>(const dstate &state, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
u8 s = state.impl_id;
|
u8 s = state.impl_id;
|
||||||
if (!state.reports.empty()) {
|
if (!state.reports.empty()) {
|
||||||
s |= SHENG32_STATE_ACCEPT;
|
s |= SHENG32_STATE_ACCEPT;
|
||||||
@ -372,8 +372,8 @@ u8 getShengState<sheng32>(dstate &state, dfa_info &info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
u8 getShengState<sheng64>(dstate &state, dfa_info &info,
|
u8 getShengState<sheng64>(const dstate &state, dfa_info &info,
|
||||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
u8 s = state.impl_id;
|
u8 s = state.impl_id;
|
||||||
if (!state.reports.empty()) {
|
if (!state.reports.empty()) {
|
||||||
s |= SHENG64_STATE_ACCEPT;
|
s |= SHENG64_STATE_ACCEPT;
|
||||||
@ -409,8 +409,8 @@ void fillAccelAux(struct NFA *n, dfa_info &info,
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static
|
static
|
||||||
void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
|
void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer
|
||||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo,
|
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
UNUSED u32 aux_offset, UNUSED u32 report_offset,
|
UNUSED u32 aux_offset, UNUSED u32 report_offset,
|
||||||
UNUSED u32 accel_offset, UNUSED u32 total_size,
|
UNUSED u32 accel_offset, UNUSED u32 total_size,
|
||||||
UNUSED u32 dfa_size) {
|
UNUSED u32 dfa_size) {
|
||||||
@ -418,7 +418,7 @@ void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
|
void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo,
|
const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
u32 aux_offset, u32 report_offset,
|
u32 aux_offset, u32 report_offset,
|
||||||
u32 accel_offset, u32 total_size,
|
u32 accel_offset, u32 total_size,
|
||||||
u32 dfa_size) {
|
u32 dfa_size) {
|
||||||
@ -443,7 +443,7 @@ void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
|
void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo,
|
const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
u32 aux_offset, u32 report_offset,
|
u32 aux_offset, u32 report_offset,
|
||||||
u32 accel_offset, u32 total_size,
|
u32 accel_offset, u32 total_size,
|
||||||
u32 dfa_size) {
|
u32 dfa_size) {
|
||||||
@ -468,7 +468,7 @@ void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info,
|
void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo,
|
const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
u32 aux_offset, u32 report_offset,
|
u32 aux_offset, u32 report_offset,
|
||||||
u32 accel_offset, u32 total_size,
|
u32 accel_offset, u32 total_size,
|
||||||
u32 dfa_size) {
|
u32 dfa_size) {
|
||||||
@ -551,19 +551,19 @@ void fillSingleReport(NFA *n, ReportID r_id) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static
|
static
|
||||||
bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info,
|
bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer
|
||||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
|
bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
for (u16 chr = 0; chr < 256; chr++) {
|
for (u16 chr = 0; chr < 256; chr++) {
|
||||||
u8 buf[16] = {0};
|
u8 buf[16] = {0};
|
||||||
|
|
||||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
auto &succ_state = info.next(idx, chr);
|
const auto &succ_state = info.next(idx, chr);
|
||||||
|
|
||||||
buf[idx] = getShengState<sheng>(succ_state, info, accelInfo);
|
buf[idx] = getShengState<sheng>(succ_state, info, accelInfo);
|
||||||
}
|
}
|
||||||
@ -577,13 +577,13 @@ bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
|
bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
for (u16 chr = 0; chr < 256; chr++) {
|
for (u16 chr = 0; chr < 256; chr++) {
|
||||||
u8 buf[64] = {0};
|
u8 buf[64] = {0};
|
||||||
|
|
||||||
assert(info.size() <= 32);
|
assert(info.size() <= 32);
|
||||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
auto &succ_state = info.next(idx, chr);
|
const auto &succ_state = info.next(idx, chr);
|
||||||
|
|
||||||
buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo);
|
buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo);
|
||||||
buf[32 + idx] = buf[idx];
|
buf[32 + idx] = buf[idx];
|
||||||
@ -598,13 +598,13 @@ bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info,
|
bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
for (u16 chr = 0; chr < 256; chr++) {
|
for (u16 chr = 0; chr < 256; chr++) {
|
||||||
u8 buf[64] = {0};
|
u8 buf[64] = {0};
|
||||||
|
|
||||||
assert(info.size() <= 64);
|
assert(info.size() <= 64);
|
||||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
auto &succ_state = info.next(idx, chr);
|
const auto &succ_state = info.next(idx, chr);
|
||||||
|
|
||||||
if (accelInfo.find(info.raw_id(succ_state.impl_id))
|
if (accelInfo.find(info.raw_id(succ_state.impl_id))
|
||||||
!= accelInfo.end()) {
|
!= accelInfo.end()) {
|
||||||
@ -690,7 +690,7 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) {
|
if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return nfa;
|
return nfa;
|
||||||
@ -701,7 +701,7 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
if (!cc.grey.allowSheng) {
|
if (!cc.grey.allowSheng) {
|
||||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||||
@ -716,7 +716,7 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
info.can_die ? "can" : "cannot", info.size());
|
info.can_die ? "can" : "cannot", info.size());
|
||||||
if (info.size() > 16) {
|
if (info.size() > 16) {
|
||||||
DEBUG_PRINTF("Too many states\n");
|
DEBUG_PRINTF("Too many states\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return shengCompile_int<sheng>(raw, cc, accel_states, strat, info);
|
return shengCompile_int<sheng>(raw, cc, accel_states, strat, info);
|
||||||
@ -727,13 +727,20 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
if (!cc.grey.allowSheng) {
|
if (!cc.grey.allowSheng) {
|
||||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||||
return nullptr;
|
bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_SVE
|
||||||
|
if (svcntb()<32) {
|
||||||
|
DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
|
||||||
|
bytecode_ptr<NFA>(nullptr);
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (!cc.target_info.has_avx512vbmi()) {
|
if (!cc.target_info.has_avx512vbmi()) {
|
||||||
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||||
return nullptr;
|
bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||||
dfa_info info(strat);
|
dfa_info info(strat);
|
||||||
@ -748,7 +755,7 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
assert(info.size() > 16);
|
assert(info.size() > 16);
|
||||||
if (info.size() > 32) {
|
if (info.size() > 32) {
|
||||||
DEBUG_PRINTF("Too many states\n");
|
DEBUG_PRINTF("Too many states\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info);
|
return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info);
|
||||||
@ -759,13 +766,20 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
if (!cc.grey.allowSheng) {
|
if (!cc.grey.allowSheng) {
|
||||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_SVE
|
||||||
|
if (svcntb()<64) {
|
||||||
|
DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
|
||||||
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (!cc.target_info.has_avx512vbmi()) {
|
if (!cc.target_info.has_avx512vbmi()) {
|
||||||
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||||
dfa_info info(strat);
|
dfa_info info(strat);
|
||||||
@ -780,13 +794,13 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
assert(info.size() > 32);
|
assert(info.size() > 32);
|
||||||
if (info.size() > 64) {
|
if (info.size() > 64) {
|
||||||
DEBUG_PRINTF("Too many states\n");
|
DEBUG_PRINTF("Too many states\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
vector<dstate> old_states;
|
vector<dstate> old_states;
|
||||||
old_states = info.states;
|
old_states = info.states;
|
||||||
auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info);
|
auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info);
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
info.states = old_states;
|
info.states = old_states; // cppcheck-suppress unreadVariable
|
||||||
}
|
}
|
||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
@ -264,7 +264,7 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128
|
|||||||
const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||||
const u8 *buf_end) {
|
const u8 *buf_end) {
|
||||||
if (buf_end - buf < VECTORSIZE) {
|
if (buf_end - buf < VECTORSIZE) {
|
||||||
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
|
return shuftiFwdSlow(reinterpret_cast<const u8 *>(&mask_lo), reinterpret_cast<const u8 *>(&mask_hi), buf, buf_end);
|
||||||
}
|
}
|
||||||
return shuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
|
return shuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
|
||||||
}
|
}
|
||||||
@ -272,7 +272,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
|||||||
const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||||
const u8 *buf_end) {
|
const u8 *buf_end) {
|
||||||
if (buf_end - buf < VECTORSIZE) {
|
if (buf_end - buf < VECTORSIZE) {
|
||||||
return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
|
return shuftiRevSlow(reinterpret_cast<const u8 *>(&mask_lo), reinterpret_cast<const u8 *>(&mask_hi), buf, buf_end);
|
||||||
}
|
}
|
||||||
return rshuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
|
return rshuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
|
||||||
#include "tamaramacompile.h"
|
#include "tamaramacompile.h"
|
||||||
|
|
||||||
@ -129,14 +131,10 @@ buildTamarama(const TamaInfo &tamaInfo, const u32 queue,
|
|||||||
sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and
|
sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and
|
||||||
// padding for subengines
|
// padding for subengines
|
||||||
|
|
||||||
|
|
||||||
auto subl = [](size_t z, NFA *sub) {
|
auto subl = [](size_t z, NFA *sub) {
|
||||||
return z + (size_t)(ROUNDUP_CL(sub->length));
|
return z + (size_t)(ROUNDUP_CL(sub->length));
|
||||||
};
|
};
|
||||||
total_size += std::accumulate(tamaInfo.subengines.begin(), tamaInfo.subengines.end(), 0, subl);
|
total_size += std::accumulate(tamaInfo.subengines.begin(), tamaInfo.subengines.end(), 0, subl);
|
||||||
// for (const auto &sub : tamaInfo.subengines) {
|
|
||||||
// total_size += ROUNDUP_CL(sub->length);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// use subSize as a sentinel value for no active subengines,
|
// use subSize as a sentinel value for no active subengines,
|
||||||
// so add one to subSize here
|
// so add one to subSize here
|
||||||
|
@ -227,7 +227,7 @@ const u8 *fwdBlock(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <uint16_t S>
|
template <uint16_t S>
|
||||||
const u8 *truffleExecReal(m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) {
|
const u8 *truffleExecReal(const m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) {
|
||||||
assert(buf && buf_end);
|
assert(buf && buf_end);
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
DEBUG_PRINTF("truffle %p len %zu\n", buf, buf_end - buf);
|
DEBUG_PRINTF("truffle %p len %zu\n", buf, buf_end - buf);
|
||||||
|
@ -193,9 +193,6 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
|
|||||||
|
|
||||||
if (!som) {
|
if (!som) {
|
||||||
mergeCyclicDotStars(g);
|
mergeCyclicDotStars(g);
|
||||||
}
|
|
||||||
|
|
||||||
if (!som) {
|
|
||||||
removeSiblingsOfStartDotStar(g);
|
removeSiblingsOfStartDotStar(g);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -292,7 +289,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
|||||||
|
|
||||||
// Returns true if all components have been added.
|
// Returns true if all components have been added.
|
||||||
static
|
static
|
||||||
bool processComponents(NG &ng, ExpressionInfo &expr,
|
bool processComponents(NG &ng, const ExpressionInfo &expr,
|
||||||
deque<unique_ptr<NGHolder>> &g_comp,
|
deque<unique_ptr<NGHolder>> &g_comp,
|
||||||
const som_type som) {
|
const som_type som) {
|
||||||
const u32 num_components = g_comp.size();
|
const u32 num_components = g_comp.size();
|
||||||
|
@ -166,9 +166,9 @@ void reformAnchoredRepeatsComponent(NGHolder &g,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
NFAVertex dotV = NGHolder::null_vertex();
|
|
||||||
set<NFAVertex> otherV;
|
set<NFAVertex> otherV;
|
||||||
dotV = findReformable(g, compAnchoredStarts, otherV);
|
NFAVertex dotV = findReformable(g, compAnchoredStarts, otherV);
|
||||||
if (dotV == NGHolder::null_vertex()) {
|
if (dotV == NGHolder::null_vertex()) {
|
||||||
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
||||||
return;
|
return;
|
||||||
@ -258,7 +258,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g,
|
|||||||
|
|
||||||
static
|
static
|
||||||
void reformUnanchoredRepeatsComponent(NGHolder &g,
|
void reformUnanchoredRepeatsComponent(NGHolder &g,
|
||||||
set<NFAVertex> &compAnchoredStarts,
|
const set<NFAVertex> &compAnchoredStarts,
|
||||||
set<NFAVertex> &compUnanchoredStarts,
|
set<NFAVertex> &compUnanchoredStarts,
|
||||||
set<NFAVertex> &dead,
|
set<NFAVertex> &dead,
|
||||||
depth *startBegin, depth *startEnd) {
|
depth *startBegin, depth *startEnd) {
|
||||||
@ -269,9 +269,9 @@ void reformUnanchoredRepeatsComponent(NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
NFAVertex dotV = NGHolder::null_vertex();
|
|
||||||
set<NFAVertex> otherV;
|
set<NFAVertex> otherV;
|
||||||
dotV = findReformable(g, compUnanchoredStarts, otherV);
|
NFAVertex dotV = findReformable(g, compUnanchoredStarts, otherV);
|
||||||
if (dotV == NGHolder::null_vertex()) {
|
if (dotV == NGHolder::null_vertex()) {
|
||||||
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
||||||
return;
|
return;
|
||||||
@ -488,15 +488,15 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
|
|||||||
|
|
||||||
// Collect all the other optional dot vertices and the successor vertices
|
// Collect all the other optional dot vertices and the successor vertices
|
||||||
// by walking down the graph from initialDot
|
// by walking down the graph from initialDot
|
||||||
set<NFAVertex> dots, succ;
|
set<NFAVertex> dots, succr;
|
||||||
if (!gatherParticipants(g, start, initialDot, dots, succ)) {
|
if (!gatherParticipants(g, start, initialDot, dots, succr)) {
|
||||||
DEBUG_PRINTF("gatherParticipants failed\n");
|
DEBUG_PRINTF("gatherParticipants failed\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("optional dot repeat with %zu participants, "
|
DEBUG_PRINTF("optional dot repeat with %zu participants, "
|
||||||
"terminating in %zu non-dot nodes\n",
|
"terminating in %zu non-dot nodes\n",
|
||||||
dots.size(), succ.size());
|
dots.size(), succr.size());
|
||||||
|
|
||||||
// Remove all the participants and set the start offset
|
// Remove all the participants and set the start offset
|
||||||
dead.insert(dots.begin(), dots.end());
|
dead.insert(dots.begin(), dots.end());
|
||||||
@ -512,7 +512,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
|
|||||||
assert(startEnd->is_reachable());
|
assert(startEnd->is_reachable());
|
||||||
|
|
||||||
// Connect our successor vertices to both start and startDs.
|
// Connect our successor vertices to both start and startDs.
|
||||||
for (auto v : succ) {
|
for (auto v : succr) {
|
||||||
add_edge_if_not_present(g.start, v, g);
|
add_edge_if_not_present(g.start, v, g);
|
||||||
add_edge_if_not_present(g.startDs, v, g);
|
add_edge_if_not_present(g.startDs, v, g);
|
||||||
}
|
}
|
||||||
@ -558,7 +558,7 @@ void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs,
|
void addDotsBetween(NGHolder &g, NFAVertex lhs, const vector<NFAVertex> &rhs,
|
||||||
depth min_repeat, depth max_repeat) {
|
depth min_repeat, depth max_repeat) {
|
||||||
const bool unbounded = max_repeat.is_infinite();
|
const bool unbounded = max_repeat.is_infinite();
|
||||||
if (unbounded) {
|
if (unbounded) {
|
||||||
|
@ -92,11 +92,12 @@ static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
|
|||||||
static
|
static
|
||||||
vector<NFAEdge> getAsserts(const NGHolder &g) {
|
vector<NFAEdge> getAsserts(const NGHolder &g) {
|
||||||
vector<NFAEdge> out;
|
vector<NFAEdge> out;
|
||||||
for (const auto &e : edges_range(g)) {
|
auto assertflags = [&g=g](const NFAEdge &e) {
|
||||||
if (g[e].assert_flags) {
|
return (g[e].assert_flags);
|
||||||
out.emplace_back(e);
|
};
|
||||||
}
|
const auto &er = edges_range(g);
|
||||||
}
|
std::copy_if(begin(er), end(er), std::back_inserter(out), assertflags);
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,7 +385,10 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
|||||||
/* there may already be a different edge from start to eod if so
|
/* there may already be a different edge from start to eod if so
|
||||||
* we need to make it unconditional and alive
|
* we need to make it unconditional and alive
|
||||||
*/
|
*/
|
||||||
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
|
NFAEdge start_eod;
|
||||||
|
bool exists;
|
||||||
|
std::tie(start_eod, exists) = edge(u, g.acceptEod, g);
|
||||||
|
if (exists) {
|
||||||
g[start_eod].assert_flags = 0;
|
g[start_eod].assert_flags = 0;
|
||||||
dead->erase(start_eod);
|
dead->erase(start_eod);
|
||||||
} else {
|
} else {
|
||||||
@ -437,7 +441,10 @@ void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
|||||||
/* there may already be a different edge from start to eod if so
|
/* there may already be a different edge from start to eod if so
|
||||||
* we need to make it unconditional and alive
|
* we need to make it unconditional and alive
|
||||||
*/
|
*/
|
||||||
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
|
NFAEdge start_eod;
|
||||||
|
bool exists;
|
||||||
|
std::tie(start_eod, exists) = edge(u, g.acceptEod, g);
|
||||||
|
if (exists) {
|
||||||
g[start_eod].assert_flags = 0;
|
g[start_eod].assert_flags = 0;
|
||||||
dead->erase(start_eod);
|
dead->erase(start_eod);
|
||||||
} else {
|
} else {
|
||||||
@ -496,7 +503,8 @@ void ensureCodePointStart(ReportManager &rm, NGHolder &g,
|
|||||||
* boundaries. Assert resolution handles the badness coming from asserts.
|
* boundaries. Assert resolution handles the badness coming from asserts.
|
||||||
* The only other source of trouble is startDs->accept connections.
|
* The only other source of trouble is startDs->accept connections.
|
||||||
*/
|
*/
|
||||||
NFAEdge orig = edge(g.startDs, g.accept, g);
|
NFAEdge orig;
|
||||||
|
std::tie(orig, std::ignore) = edge(g.startDs, g.accept, g);
|
||||||
if (expr.utf8 && orig) {
|
if (expr.utf8 && orig) {
|
||||||
DEBUG_PRINTF("rectifying %u\n", expr.report);
|
DEBUG_PRINTF("rectifying %u\n", expr.report);
|
||||||
Report ir = rm.getBasicInternalReport(expr);
|
Report ir = rm.getBasicInternalReport(expr);
|
||||||
|
@ -514,17 +514,17 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) {
|
|||||||
* for SOM mode. (see UE-1544) */
|
* for SOM mode. (see UE-1544) */
|
||||||
bool optimiseVirtualStarts(NGHolder &g) {
|
bool optimiseVirtualStarts(NGHolder &g) {
|
||||||
vector<NFAEdge> dead;
|
vector<NFAEdge> dead;
|
||||||
|
auto deads = [&g=g](const NFAEdge &e) {
|
||||||
|
return (!is_any_start(source(e, g), g));
|
||||||
|
};
|
||||||
|
|
||||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||||
u32 flags = g[v].assert_flags;
|
u32 flags = g[v].assert_flags;
|
||||||
if (!(flags & POS_FLAG_VIRTUAL_START)) {
|
if (!(flags & POS_FLAG_VIRTUAL_START)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
const auto &e = in_edges_range(v, g);
|
||||||
for (const auto &e : in_edges_range(v, g)) {
|
std::copy_if(begin(e), end(e), std::back_inserter(dead), deads);
|
||||||
if (!is_any_start(source(e, g), g)) {
|
|
||||||
dead.emplace_back(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dead.empty()) {
|
if (dead.empty()) {
|
||||||
|
@ -98,9 +98,9 @@ class ClassInfo {
|
|||||||
public:
|
public:
|
||||||
struct ClassDepth {
|
struct ClassDepth {
|
||||||
ClassDepth() {}
|
ClassDepth() {}
|
||||||
ClassDepth(const NFAVertexDepth &d)
|
explicit ClassDepth(const NFAVertexDepth &d)
|
||||||
: d1(d.fromStart), d2(d.fromStartDotStar) {}
|
: d1(d.fromStart), d2(d.fromStartDotStar) {}
|
||||||
ClassDepth(const NFAVertexRevDepth &rd)
|
explicit ClassDepth(const NFAVertexRevDepth &rd)
|
||||||
: d1(rd.toAccept), d2(rd.toAcceptEod) {}
|
: d1(rd.toAccept), d2(rd.toAcceptEod) {}
|
||||||
DepthMinMax d1;
|
DepthMinMax d1;
|
||||||
DepthMinMax d2;
|
DepthMinMax d2;
|
||||||
@ -159,7 +159,7 @@ public:
|
|||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void append(WorkQueue &other) {
|
void append(const WorkQueue &other) {
|
||||||
for (const auto &e : other) {
|
for (const auto &e : other) {
|
||||||
push(e);
|
push(e);
|
||||||
}
|
}
|
||||||
@ -193,7 +193,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
|
bool outIsIrreducible(const NFAVertex &v, const NGHolder &g) {
|
||||||
unsigned nonSpecialVertices = 0;
|
unsigned nonSpecialVertices = 0;
|
||||||
for (auto w : adjacent_vertices_range(v, g)) {
|
for (auto w : adjacent_vertices_range(v, g)) {
|
||||||
if (!is_special(w, g) && w != v) {
|
if (!is_special(w, g) && w != v) {
|
||||||
@ -205,7 +205,7 @@ bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
|
bool inIsIrreducible(const NFAVertex &v, const NGHolder &g) {
|
||||||
unsigned nonSpecialVertices = 0;
|
unsigned nonSpecialVertices = 0;
|
||||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||||
if (!is_special(u, g) && u != v) {
|
if (!is_special(u, g) && u != v) {
|
||||||
@ -339,9 +339,9 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
|
|||||||
ClassInfo::ClassDepth depth;
|
ClassInfo::ClassDepth depth;
|
||||||
|
|
||||||
if (eq == LEFT_EQUIVALENCE) {
|
if (eq == LEFT_EQUIVALENCE) {
|
||||||
depth = depths[vi->vert_index];
|
depth = ClassInfo::ClassDepth(depths[vi->vert_index]);
|
||||||
} else {
|
} else {
|
||||||
depth = rdepths[vi->vert_index];
|
depth = ClassInfo::ClassDepth(rdepths[vi->vert_index]);
|
||||||
}
|
}
|
||||||
ClassInfo ci(g, *vi, depth, eq);
|
ClassInfo ci(g, *vi, depth, eq);
|
||||||
|
|
||||||
@ -549,8 +549,8 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
|
|||||||
pred_info->succ.erase(old_vertex_info);
|
pred_info->succ.erase(old_vertex_info);
|
||||||
|
|
||||||
// if edge doesn't exist, create it
|
// if edge doesn't exist, create it
|
||||||
NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g);
|
NFAEdge e;
|
||||||
|
std::tie(e, std::ignore) = add_edge_if_not_present(pred_info->v, new_v, g);
|
||||||
// put edge tops, if applicable
|
// put edge tops, if applicable
|
||||||
if (!edgetops.empty()) {
|
if (!edgetops.empty()) {
|
||||||
assert(g[e].tops.empty() || g[e].tops == edgetops);
|
assert(g[e].tops.empty() || g[e].tops == edgetops);
|
||||||
@ -560,7 +560,8 @@ void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
|
|||||||
pred_info->succ.insert(new_vertex_info);
|
pred_info->succ.insert(new_vertex_info);
|
||||||
|
|
||||||
if (new_v_eod) {
|
if (new_v_eod) {
|
||||||
NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
|
NFAEdge ee;
|
||||||
|
std::tie(ee, std::ignore) = add_edge_if_not_present(pred_info->v, new_v_eod,
|
||||||
g);
|
g);
|
||||||
|
|
||||||
// put edge tops, if applicable
|
// put edge tops, if applicable
|
||||||
|
@ -432,7 +432,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
|
bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g,
|
||||||
int *adjust) {
|
int *adjust) {
|
||||||
const auto &reports = all_reports(g);
|
const auto &reports = all_reports(g);
|
||||||
if (reports.empty()) {
|
if (reports.empty()) {
|
||||||
@ -509,14 +509,14 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
|
|||||||
while (v != cyclic) {
|
while (v != cyclic) {
|
||||||
DEBUG_PRINTF("vertex %zu\n", g[v].index);
|
DEBUG_PRINTF("vertex %zu\n", g[v].index);
|
||||||
width++;
|
width++;
|
||||||
auto succ = succs(v, g);
|
auto s = succs(v, g);
|
||||||
if (contains(succ, cyclic)) {
|
if (contains(s, cyclic)) {
|
||||||
if (succ.size() == 1) {
|
if (s.size() == 1) {
|
||||||
v = cyclic;
|
v = cyclic;
|
||||||
} else if (succ.size() == 2) {
|
} else if (s.size() == 2) {
|
||||||
// Cyclic and jump edge.
|
// Cyclic and jump edge.
|
||||||
succ.erase(cyclic);
|
s.erase(cyclic);
|
||||||
NFAVertex v2 = *succ.begin();
|
NFAVertex v2 = *s.begin();
|
||||||
if (!edge(cyclic, v2, g).second) {
|
if (!edge(cyclic, v2, g).second) {
|
||||||
DEBUG_PRINTF("bad form\n");
|
DEBUG_PRINTF("bad form\n");
|
||||||
return false;
|
return false;
|
||||||
@ -527,11 +527,11 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (succ.size() != 1) {
|
if (s.size() != 1) {
|
||||||
DEBUG_PRINTF("bad form\n");
|
DEBUG_PRINTF("bad form\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
v = *succ.begin();
|
v = *s.begin();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -547,12 +547,12 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
|
|||||||
while (!is_any_accept(v, g)) {
|
while (!is_any_accept(v, g)) {
|
||||||
DEBUG_PRINTF("vertex %zu\n", g[v].index);
|
DEBUG_PRINTF("vertex %zu\n", g[v].index);
|
||||||
width++;
|
width++;
|
||||||
auto succ = succs(v, g);
|
auto s = succs(v, g);
|
||||||
if (succ.size() != 1) {
|
if (s.size() != 1) {
|
||||||
DEBUG_PRINTF("bad form\n");
|
DEBUG_PRINTF("bad form\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
v = *succ.begin();
|
v = *s.begin();
|
||||||
}
|
}
|
||||||
|
|
||||||
int offsetAdjust = 0;
|
int offsetAdjust = 0;
|
||||||
@ -572,27 +572,28 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<NFAVertex> preds;
|
vector<NFAVertex> predcs;
|
||||||
vector<NFAEdge> dead;
|
vector<NFAEdge> dead;
|
||||||
|
auto deads = [&g=g](const NFAEdge &e) {
|
||||||
|
return (target(e, g) != g.startDs);
|
||||||
|
};
|
||||||
for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
|
for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
|
||||||
DEBUG_PRINTF("pred %zu\n", g[u].index);
|
DEBUG_PRINTF("pred %zu\n", g[u].index);
|
||||||
if (u == cyclic) {
|
if (u == cyclic) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
preds.emplace_back(u);
|
predcs.emplace_back(u);
|
||||||
|
|
||||||
// We want to delete the out-edges of each predecessor, but need to
|
// We want to delete the out-edges of each predecessor, but need to
|
||||||
// make sure we don't delete the startDs self loop.
|
// make sure we don't delete the startDs self loop.
|
||||||
for (const auto &e : out_edges_range(u, g)) {
|
|
||||||
if (target(e, g) != g.startDs) {
|
const auto &e = out_edges_range(u, g);
|
||||||
dead.emplace_back(e);
|
std::copy_if(begin(e), end(e), std::back_inserter(dead), deads);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_edges(dead, g);
|
remove_edges(dead, g);
|
||||||
|
|
||||||
assert(!preds.empty());
|
assert(!predcs.empty());
|
||||||
|
|
||||||
const CharReach &cr = g[cyclic].char_reach;
|
const CharReach &cr = g[cyclic].char_reach;
|
||||||
|
|
||||||
@ -600,14 +601,14 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
|
|||||||
v = add_vertex(g);
|
v = add_vertex(g);
|
||||||
g[v].char_reach = cr;
|
g[v].char_reach = cr;
|
||||||
|
|
||||||
for (auto u : preds) {
|
for (auto u : predcs) {
|
||||||
add_edge(u, v, g);
|
add_edge(u, v, g);
|
||||||
}
|
}
|
||||||
preds.clear();
|
predcs.clear();
|
||||||
preds.emplace_back(v);
|
predcs.emplace_back(v);
|
||||||
}
|
}
|
||||||
assert(!preds.empty());
|
assert(!predcs.empty());
|
||||||
for (auto u : preds) {
|
for (auto u : predcs) {
|
||||||
add_edge(u, cyclic, g);
|
add_edge(u, cyclic, g);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,15 +66,15 @@ bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
set<NFAVertex> &succs = *anchored ? s_succ : sds_succ;
|
set<NFAVertex> &succrs = *anchored ? s_succ : sds_succ;
|
||||||
succs.erase(g.startDs);
|
succrs.erase(g.startDs);
|
||||||
if (succs.size() != 1) {
|
if (succrs.size() != 1) {
|
||||||
DEBUG_PRINTF("branchy root\n");
|
DEBUG_PRINTF("branchy root\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
NFAVertex u = *anchored ? g.start : g.startDs;
|
NFAVertex u = *anchored ? g.start : g.startDs;
|
||||||
NFAVertex v = *succs.begin();
|
NFAVertex v = *succrs.begin();
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
|
DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
|
||||||
|
@ -71,13 +71,13 @@ vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto succ : adjacent_vertices_range(v, g)) {
|
for (auto succr : adjacent_vertices_range(v, g)) {
|
||||||
// ignore self-loops
|
// ignore self-loops
|
||||||
if (v == succ) {
|
if (v == succr) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1);
|
DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1);
|
||||||
next.insert(succ);
|
next.insert(succr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result[d] = next;
|
result[d] = next;
|
||||||
@ -113,13 +113,13 @@ vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g,
|
|||||||
for (unsigned d = 1; d < depth; d++) {
|
for (unsigned d = 1; d < depth; d++) {
|
||||||
// collect all successors for all current level vertices
|
// collect all successors for all current level vertices
|
||||||
for (auto v : cur) {
|
for (auto v : cur) {
|
||||||
for (auto pred : inv_adjacent_vertices_range(v, g)) {
|
for (auto predc : inv_adjacent_vertices_range(v, g)) {
|
||||||
// ignore self-loops
|
// ignore self-loops
|
||||||
if (v == pred) {
|
if (v == predc) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1);
|
DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1);
|
||||||
next.insert(pred);
|
next.insert(predc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result[d] = next;
|
result[d] = next;
|
||||||
@ -582,11 +582,11 @@ private:
|
|||||||
|
|
||||||
// set up all reports
|
// set up all reports
|
||||||
bool clone = false;
|
bool clone = false;
|
||||||
for (auto &pair : reports_to_vertices) {
|
for (const auto &pair : reports_to_vertices) {
|
||||||
const auto &reports = pair.first;
|
const auto &reports = pair.first;
|
||||||
const auto &vertices = pair.second;
|
const auto &svertices = pair.second;
|
||||||
|
|
||||||
for (auto src : vertices) {
|
for (auto src : svertices) {
|
||||||
// get all predecessors up to edit distance
|
// get all predecessors up to edit distance
|
||||||
auto src_vertices_by_depth =
|
auto src_vertices_by_depth =
|
||||||
gatherPredecessorsByDepth(g, src, edit_distance);
|
gatherPredecessorsByDepth(g, src, edit_distance);
|
||||||
@ -594,7 +594,8 @@ private:
|
|||||||
// find which accepts source vertex connects to
|
// find which accepts source vertex connects to
|
||||||
flat_set<NFAVertex> targets;
|
flat_set<NFAVertex> targets;
|
||||||
for (const auto &accept : accepts) {
|
for (const auto &accept : accepts) {
|
||||||
NFAEdge e = edge(src, accept, g);
|
NFAEdge e;
|
||||||
|
std::tie(e, std::ignore) = edge(src, accept, g);
|
||||||
if (e) {
|
if (e) {
|
||||||
targets.insert(accept);
|
targets.insert(accept);
|
||||||
}
|
}
|
||||||
@ -602,8 +603,8 @@ private:
|
|||||||
assert(targets.size());
|
assert(targets.size());
|
||||||
|
|
||||||
for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) {
|
for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) {
|
||||||
const auto &preds = src_vertices_by_depth[d];
|
const auto &predcs = src_vertices_by_depth[d];
|
||||||
for (auto v : preds) {
|
for (auto v : predcs) {
|
||||||
// only clone a node if it already contains reports
|
// only clone a node if it already contains reports
|
||||||
if (clone && !g[v].reports.empty()) {
|
if (clone && !g[v].reports.empty()) {
|
||||||
create_clone(v, reports, edit_distance - d,
|
create_clone(v, reports, edit_distance - d,
|
||||||
|
@ -514,12 +514,12 @@ static
|
|||||||
bool doHaig(const NGHolder &g, som_type som,
|
bool doHaig(const NGHolder &g, som_type som,
|
||||||
const vector<vector<CharReach>> &triggers, bool unordered_som,
|
const vector<vector<CharReach>> &triggers, bool unordered_som,
|
||||||
raw_som_dfa *rdfa) {
|
raw_som_dfa *rdfa) {
|
||||||
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
|
||||||
a fight */
|
|
||||||
using StateSet = typename Auto::StateSet;
|
using StateSet = typename Auto::StateSet;
|
||||||
vector<StateSet> nfa_state_map;
|
vector<StateSet> nfa_state_map;
|
||||||
Auto n(g, som, triggers, unordered_som);
|
Auto n(g, som, triggers, unordered_som);
|
||||||
try {
|
try {
|
||||||
|
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
||||||
|
a fight */
|
||||||
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||||
DEBUG_PRINTF("state limit exceeded\n");
|
DEBUG_PRINTF("state limit exceeded\n");
|
||||||
return false;
|
return false;
|
||||||
|
@ -154,7 +154,7 @@ bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin,
|
|||||||
const depth &repeatMax, u32 minPeriod,
|
const depth &repeatMax, u32 minPeriod,
|
||||||
bool is_reset, ReportID report) {
|
bool is_reset, ReportID report) {
|
||||||
if (!cr.all()) {
|
if (!cr.all()) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||||
@ -176,7 +176,7 @@ bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin,
|
|||||||
const CharReach escapes(~cr);
|
const CharReach escapes(~cr);
|
||||||
|
|
||||||
if (escapes.count() != 1) {
|
if (escapes.count() != 1) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||||
@ -199,7 +199,7 @@ bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin,
|
|||||||
const CharReach escapes(cr);
|
const CharReach escapes(cr);
|
||||||
|
|
||||||
if (escapes.count() != 1) {
|
if (escapes.count() != 1) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||||
@ -228,7 +228,7 @@ bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin,
|
|||||||
minPeriod, rtype);
|
minPeriod, rtype);
|
||||||
|
|
||||||
if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) {
|
if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("built shuf lbr\n");
|
DEBUG_PRINTF("built shuf lbr\n");
|
||||||
@ -296,7 +296,7 @@ bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin,
|
|||||||
|
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
assert(0);
|
assert(0);
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return nfa;
|
return nfa;
|
||||||
@ -307,11 +307,11 @@ bytecode_ptr<NFA> constructLBR(const CastleProto &proto,
|
|||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
if (!cc.grey.allowLbr) {
|
if (!cc.grey.allowLbr) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (proto.repeats.size() != 1) {
|
if (proto.repeats.size() != 1) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
const PureRepeat &repeat = proto.repeats.begin()->second;
|
const PureRepeat &repeat = proto.repeats.begin()->second;
|
||||||
@ -319,7 +319,7 @@ bytecode_ptr<NFA> constructLBR(const CastleProto &proto,
|
|||||||
|
|
||||||
if (repeat.reports.size() != 1) {
|
if (repeat.reports.size() != 1) {
|
||||||
DEBUG_PRINTF("too many reports\n");
|
DEBUG_PRINTF("too many reports\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_reset;
|
bool is_reset;
|
||||||
@ -346,16 +346,16 @@ bytecode_ptr<NFA> constructLBR(const NGHolder &g,
|
|||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
if (!cc.grey.allowLbr) {
|
if (!cc.grey.allowLbr) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
PureRepeat repeat;
|
PureRepeat repeat;
|
||||||
if (!isPureRepeat(g, repeat)) {
|
if (!isPureRepeat(g, repeat)) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
if (repeat.reports.size() != 1) {
|
if (repeat.reports.size() != 1) {
|
||||||
DEBUG_PRINTF("too many reports\n");
|
DEBUG_PRINTF("too many reports\n");
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
CastleProto proto(g.kind, repeat);
|
CastleProto proto(g.kind, repeat);
|
||||||
|
@ -39,7 +39,7 @@ bytecode_ptr<NFA> buildLbrVerm16(const CharReach &cr, const depth &repeatMin,
|
|||||||
const CharReach escapes(~cr);
|
const CharReach escapes(~cr);
|
||||||
|
|
||||||
if (escapes.count() > 16) {
|
if (escapes.count() > 16) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||||
@ -62,7 +62,7 @@ bytecode_ptr<NFA> buildLbrNVerm16(const CharReach &cr, const depth &repeatMin,
|
|||||||
const CharReach escapes(cr);
|
const CharReach escapes(cr);
|
||||||
|
|
||||||
if (escapes.count() > 16) {
|
if (escapes.count() > 16) {
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||||
|
@ -342,7 +342,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
|
|||||||
map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
|
map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
|
||||||
map<u32, set<NFAVertex>> &tops_out) {
|
map<u32, set<NFAVertex>> &tops_out) {
|
||||||
flat_set<u32> top_inter = unhandled_succ_tops.at(u);
|
flat_set<u32> top_inter = unhandled_succ_tops.at(u);
|
||||||
flat_set<NFAVertex> succs;
|
flat_set<NFAVertex> f_succs;
|
||||||
for (NFAVertex v : adjacent_vertices_range(u, g)) {
|
for (NFAVertex v : adjacent_vertices_range(u, g)) {
|
||||||
if (!contains(unhandled_succ_tops, v)) {
|
if (!contains(unhandled_succ_tops, v)) {
|
||||||
return;
|
return;
|
||||||
@ -360,7 +360,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
|
|||||||
set_intersection(top_inter.begin(), top_inter.end(),
|
set_intersection(top_inter.begin(), top_inter.end(),
|
||||||
v_tops.begin(), v_tops.end(), ni_inserter);
|
v_tops.begin(), v_tops.end(), ni_inserter);
|
||||||
top_inter = std::move(new_inter);
|
top_inter = std::move(new_inter);
|
||||||
succs.insert(v);
|
f_succs.insert(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (top_inter.empty()) {
|
if (top_inter.empty()) {
|
||||||
@ -373,7 +373,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
|
|||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index);
|
DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index);
|
||||||
markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs,
|
markTopSuccAsHandled(u, top_inter, f_succs, tops_out, unhandled_top_succs,
|
||||||
unhandled_succ_tops);
|
unhandled_succ_tops);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,11 +389,11 @@ void reusePredsAsStarts(const NGHolder &g, const map<u32, CharReach> &top_reach,
|
|||||||
/* create list of candidates first, to avoid issues of iter invalidation */
|
/* create list of candidates first, to avoid issues of iter invalidation */
|
||||||
DEBUG_PRINTF("attempting to reuse vertices for top starts\n");
|
DEBUG_PRINTF("attempting to reuse vertices for top starts\n");
|
||||||
vector<NFAVertex> cand_starts;
|
vector<NFAVertex> cand_starts;
|
||||||
for (NFAVertex u : unhandled_succ_tops | map_keys) {
|
auto cands = [&g=g](const NFAVertex &u) {
|
||||||
if (hasSelfLoop(u, g)) {
|
return (hasSelfLoop(u, g));
|
||||||
cand_starts.emplace_back(u);
|
};
|
||||||
}
|
const auto &u = unhandled_succ_tops | map_keys;
|
||||||
}
|
std::copy_if(begin(u), end(u), std::back_inserter(cand_starts), cands);
|
||||||
|
|
||||||
for (NFAVertex u : cand_starts) {
|
for (NFAVertex u : cand_starts) {
|
||||||
if (!contains(unhandled_succ_tops, u)) {
|
if (!contains(unhandled_succ_tops, u)) {
|
||||||
@ -652,7 +652,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
|||||||
u32 numStates = countStates(state_ids);
|
u32 numStates = countStates(state_ids);
|
||||||
if (numStates > NFA_MAX_STATES) {
|
if (numStates > NFA_MAX_STATES) {
|
||||||
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
|
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
map<NFAVertex, BoundedRepeatSummary> br_cyclic;
|
map<NFAVertex, BoundedRepeatSummary> br_cyclic;
|
||||||
@ -722,14 +722,14 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
|
|||||||
assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
|
assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
|
||||||
|
|
||||||
// Do state numbering.
|
// Do state numbering.
|
||||||
auto state_ids = numberStates(h, {});
|
auto state_ids = numberStates(h, flat_set<graph_detail::vertex_descriptor<ue2_graph<NGHolder, NFAGraphVertexProps, NFAGraphEdgeProps>>>());
|
||||||
|
|
||||||
// Quick exit: if we've got an embarrassment of riches, i.e. more states
|
// Quick exit: if we've got an embarrassment of riches, i.e. more states
|
||||||
// than we can implement in our largest NFA model, bail here.
|
// than we can implement in our largest NFA model, bail here.
|
||||||
u32 numStates = countStates(state_ids);
|
u32 numStates = countStates(state_ids);
|
||||||
if (numStates > NFA_MAX_STATES) {
|
if (numStates > NFA_MAX_STATES) {
|
||||||
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
|
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
|
||||||
return nullptr;
|
return bytecode_ptr<NFA>(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(sanityCheckGraph(h, state_ids));
|
assert(sanityCheckGraph(h, state_ids));
|
||||||
|
@ -62,12 +62,12 @@ namespace ue2 {
|
|||||||
static
|
static
|
||||||
void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
|
void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
|
||||||
const flat_set<NFAVertex> &cands,
|
const flat_set<NFAVertex> &cands,
|
||||||
const flat_set<NFAVertex> &preds,
|
const flat_set<NFAVertex> &f_preds,
|
||||||
flat_set<NFAVertex> *next_cands,
|
flat_set<NFAVertex> *next_cands,
|
||||||
flat_set<NFAVertex> *next_preds,
|
flat_set<NFAVertex> *next_preds,
|
||||||
flat_set<NFAVertex> *friends) {
|
flat_set<NFAVertex> *friends) {
|
||||||
for (auto v : cands) {
|
for (auto v : cands) {
|
||||||
if (contains(preds, v)) {
|
if (contains(f_preds, v)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||||
if (!contains(preds, u)) {
|
if (!contains(f_preds, u)) {
|
||||||
DEBUG_PRINTF("bad pred\n");
|
DEBUG_PRINTF("bad pred\n");
|
||||||
goto next_cand;
|
goto next_cand;
|
||||||
}
|
}
|
||||||
@ -116,8 +116,8 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
|
|||||||
|
|
||||||
u32 friend_depth = offset + 1;
|
u32 friend_depth = offset + 1;
|
||||||
|
|
||||||
flat_set<NFAVertex> preds;
|
flat_set<NFAVertex> f_preds;
|
||||||
insert(&preds, inv_adjacent_vertices(v, g));
|
insert(&f_preds, inv_adjacent_vertices(v, g));
|
||||||
const CharReach &cr = g[v].char_reach;
|
const CharReach &cr = g[v].char_reach;
|
||||||
|
|
||||||
flat_set<NFAVertex> cands;
|
flat_set<NFAVertex> cands;
|
||||||
@ -126,9 +126,9 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
|
|||||||
flat_set<NFAVertex> next_preds;
|
flat_set<NFAVertex> next_preds;
|
||||||
flat_set<NFAVertex> next_cands;
|
flat_set<NFAVertex> next_cands;
|
||||||
for (u32 i = 0; i < friend_depth; i++) {
|
for (u32 i = 0; i < friend_depth; i++) {
|
||||||
findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
|
findAccelFriendGeneration(g, cr, cands, f_preds, &next_cands, &next_preds,
|
||||||
friends);
|
friends);
|
||||||
preds.insert(next_preds.begin(), next_preds.end());
|
f_preds.insert(next_preds.begin(), next_preds.end());
|
||||||
next_preds.clear();
|
next_preds.clear();
|
||||||
cands.swap(next_cands);
|
cands.swap(next_cands);
|
||||||
next_cands.clear();
|
next_cands.clear();
|
||||||
@ -321,7 +321,7 @@ struct DAccelScheme {
|
|||||||
bool cd_a = buildDvermMask(a.double_byte);
|
bool cd_a = buildDvermMask(a.double_byte);
|
||||||
bool cd_b = buildDvermMask(b.double_byte);
|
bool cd_b = buildDvermMask(b.double_byte);
|
||||||
if (cd_a != cd_b) {
|
if (cd_a != cd_b) {
|
||||||
return cd_a > cd_b;
|
return cd_a;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -811,11 +811,9 @@ depth_done:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Second option: a two-byte shufti (i.e. less than eight 2-byte
|
// Second option: a two-byte shufti (i.e. less than eight 2-byte
|
||||||
// literals)
|
// literals)
|
||||||
if (depth > 1) {
|
|
||||||
for (unsigned int i = 0; i < (depth - 1); i++) {
|
for (unsigned int i = 0; i < (depth - 1); i++) {
|
||||||
if (depthReach[i].count() * depthReach[i+1].count()
|
if (depthReach[i].count() * depthReach[i+1].count()
|
||||||
<= DOUBLE_SHUFTI_LIMIT) {
|
<= DOUBLE_SHUFTI_LIMIT) {
|
||||||
|
@ -490,9 +490,9 @@ vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
|
|||||||
const size_t edge_count = num_edges(lg);
|
const size_t edge_count = num_edges(lg);
|
||||||
vector<LitEdge> fwd_edges;
|
vector<LitEdge> fwd_edges;
|
||||||
fwd_edges.reserve(edge_count);
|
fwd_edges.reserve(edge_count);
|
||||||
for (const auto &e : edges_range(lg)) {
|
|
||||||
fwd_edges.push_back(e);
|
const auto &e = edges_range(lg);
|
||||||
}
|
std::copy(begin(e), end(e), std::back_inserter(fwd_edges));
|
||||||
|
|
||||||
vector<LitEdge> rev_map(2 * edge_count);
|
vector<LitEdge> rev_map(2 * edge_count);
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ bool bad_mixed_sensitivity(const ue2_literal &s);
|
|||||||
* Score all the edges in the given graph, returning them in \p scores indexed
|
* Score all the edges in the given graph, returning them in \p scores indexed
|
||||||
* by edge_index. */
|
* by edge_index. */
|
||||||
std::vector<u64a> scoreEdges(const NGHolder &h,
|
std::vector<u64a> scoreEdges(const NGHolder &h,
|
||||||
const flat_set<NFAEdge> &known_bad = {});
|
const flat_set<NFAEdge> &known_bad = flat_set<NFAEdge>());
|
||||||
|
|
||||||
/** Returns a score for a literal set. Lower scores are better. */
|
/** Returns a score for a literal set. Lower scores are better. */
|
||||||
u64a scoreSet(const std::set<ue2_literal> &s);
|
u64a scoreSet(const std::set<ue2_literal> &s);
|
||||||
|
@ -98,7 +98,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored,
|
bool splitOffLiteral(NG &ng, const NGHolder &g, NFAVertex v, const bool anchored,
|
||||||
set<NFAVertex> &dead) {
|
set<NFAVertex> &dead) {
|
||||||
DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
|
DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
|
||||||
bool nocase = false, casefixed = false;
|
bool nocase = false, casefixed = false;
|
||||||
|
@ -94,7 +94,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
|
|||||||
/* generate top transitions, false -> top = selfloop */
|
/* generate top transitions, false -> top = selfloop */
|
||||||
bool top_allowed = is_triggered(graph);
|
bool top_allowed = is_triggered(graph);
|
||||||
|
|
||||||
StateSet succ = nfa.dead;
|
StateSet succr = nfa.dead;
|
||||||
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
||||||
NFAVertex u = vByStateId[i];
|
NFAVertex u = vByStateId[i];
|
||||||
|
|
||||||
@ -102,7 +102,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
|
|||||||
if (contains(unused, v)) {
|
if (contains(unused, v)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
succ.set(graph[v].index);
|
succr.set(graph[v].index);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (top_allowed && !nfa.toppable.test(i)) {
|
if (top_allowed && !nfa.toppable.test(i)) {
|
||||||
@ -112,15 +112,15 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StateSet active_squash = succ & squash;
|
StateSet active_squash = succr & squash;
|
||||||
if (active_squash.any()) {
|
if (active_squash.any()) {
|
||||||
for (size_t j = active_squash.find_first(); j != active_squash.npos;
|
for (size_t j = active_squash.find_first(); j != active_squash.npos;
|
||||||
j = active_squash.find_next(j)) {
|
j = active_squash.find_next(j)) {
|
||||||
succ &= squash_mask.find(j)->second;
|
succr &= squash_mask.find(j)->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) {
|
for (size_t j = succr.find_first(); j != succr.npos; j = succr.find_next(j)) {
|
||||||
const CharReach &cr = cr_by_index[j];
|
const CharReach &cr = cr_by_index[j];
|
||||||
for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
|
for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
|
||||||
next[s].set(j); /* already alpha'ed */
|
next[s].set(j); /* already alpha'ed */
|
||||||
|
@ -404,19 +404,19 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
|
|||||||
return v_cr;
|
return v_cr;
|
||||||
}
|
}
|
||||||
|
|
||||||
NFAVertex pred = getSoleSourceVertex(g, v);
|
NFAVertex s_pred = getSoleSourceVertex(g, v);
|
||||||
assert(pred);
|
assert(s_pred);
|
||||||
|
|
||||||
/* require pred to be fed by one vertex OR (start + startDS) */
|
/* require s_pred to be fed by one vertex OR (start + startDS) */
|
||||||
NFAVertex predpred;
|
NFAVertex predpred;
|
||||||
size_t idp = in_degree(pred, g);
|
size_t idp = in_degree(s_pred, g);
|
||||||
if (hasSelfLoop(pred, g)) {
|
if (hasSelfLoop(s_pred, g)) {
|
||||||
return v_cr; /* not cliche */
|
return v_cr; /* not cliche */
|
||||||
} else if (idp == 1) {
|
} else if (idp == 1) {
|
||||||
predpred = getSoleSourceVertex(g, pred);
|
predpred = getSoleSourceVertex(g, s_pred);
|
||||||
} else if (idp == 2
|
} else if (idp == 2
|
||||||
&& edge(g.start, pred, g).second
|
&& edge(g.start, s_pred, g).second
|
||||||
&& edge(g.startDs, pred, g).second) {
|
&& edge(g.startDs, s_pred, g).second) {
|
||||||
predpred = g.startDs;
|
predpred = g.startDs;
|
||||||
} else {
|
} else {
|
||||||
return v_cr; /* not cliche */
|
return v_cr; /* not cliche */
|
||||||
@ -425,7 +425,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
|
|||||||
assert(predpred);
|
assert(predpred);
|
||||||
|
|
||||||
/* require predpred to be cyclic and its cr to be a superset of
|
/* require predpred to be cyclic and its cr to be a superset of
|
||||||
pred and v */
|
s_pred and v */
|
||||||
if (!hasSelfLoop(predpred, g)) {
|
if (!hasSelfLoop(predpred, g)) {
|
||||||
return v_cr; /* not cliche */
|
return v_cr; /* not cliche */
|
||||||
}
|
}
|
||||||
@ -435,7 +435,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
|
|||||||
return v_cr; /* fake cyclic */
|
return v_cr; /* fake cyclic */
|
||||||
}
|
}
|
||||||
|
|
||||||
const CharReach &p_cr = g[pred].char_reach;
|
const CharReach &p_cr = g[s_pred].char_reach;
|
||||||
const CharReach &pp_cr = g[predpred].char_reach;
|
const CharReach &pp_cr = g[predpred].char_reach;
|
||||||
if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
|
if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
|
||||||
return v_cr; /* not cliche */
|
return v_cr; /* not cliche */
|
||||||
@ -446,7 +446,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
|
|||||||
set<NFAVertex> v_succ;
|
set<NFAVertex> v_succ;
|
||||||
insert(&v_succ, adjacent_vertices(v, g));
|
insert(&v_succ, adjacent_vertices(v, g));
|
||||||
set<NFAVertex> p_succ;
|
set<NFAVertex> p_succ;
|
||||||
insert(&p_succ, adjacent_vertices(pred, g));
|
insert(&p_succ, adjacent_vertices(s_pred, g));
|
||||||
|
|
||||||
if (!is_subset_of(v_succ, p_succ)) {
|
if (!is_subset_of(v_succ, p_succ)) {
|
||||||
DEBUG_PRINTF("fail\n");
|
DEBUG_PRINTF("fail\n");
|
||||||
@ -456,7 +456,7 @@ CharReach reduced_cr(NFAVertex v, const NGHolder &g,
|
|||||||
if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
|
if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
|
||||||
/* need to check that reports of v are a subset of p's */
|
/* need to check that reports of v are a subset of p's */
|
||||||
if (!is_subset_of(g[v].reports,
|
if (!is_subset_of(g[v].reports,
|
||||||
g[pred].reports)) {
|
g[s_pred].reports)) {
|
||||||
DEBUG_PRINTF("fail - reports not subset\n");
|
DEBUG_PRINTF("fail - reports not subset\n");
|
||||||
return v_cr; /* not cliche */
|
return v_cr; /* not cliche */
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,8 @@ void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge,
|
|||||||
if (it == allEdges.end()) {
|
if (it == allEdges.end()) {
|
||||||
// No reverse edge, add one.
|
// No reverse edge, add one.
|
||||||
NFAVertex u = source(fwd, g), v = target(fwd, g);
|
NFAVertex u = source(fwd, g), v = target(fwd, g);
|
||||||
NFAEdge rev = add_edge(v, u, g);
|
NFAEdge rev;
|
||||||
|
std::tie(rev, std::ignore) = add_edge(v, u, g);
|
||||||
it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
|
it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
|
||||||
// Add to capacity map.
|
// Add to capacity map.
|
||||||
u32 revIndex = g[rev].index;
|
u32 revIndex = g[rev].index;
|
||||||
|
@ -62,11 +62,13 @@ void pruneUnreachable(NGHolder &g) {
|
|||||||
&& edge(g.accept, g.acceptEod, g).second) {
|
&& edge(g.accept, g.acceptEod, g).second) {
|
||||||
// Trivial case: there are no in-edges to our accepts (other than
|
// Trivial case: there are no in-edges to our accepts (other than
|
||||||
// accept->acceptEod), so all non-specials are unreachable.
|
// accept->acceptEod), so all non-specials are unreachable.
|
||||||
for (auto v : vertices_range(g)) {
|
|
||||||
if (!is_special(v, g)) {
|
auto deads = [&g=g](const NFAVertex &v) {
|
||||||
dead.emplace_back(v);
|
return (!is_special(v, g));
|
||||||
}
|
};
|
||||||
}
|
const auto &vr = vertices_range(g);
|
||||||
|
std::copy_if(begin(vr), end(vr), std::back_inserter(dead), deads);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Walk a reverse graph from acceptEod with Boost's depth_first_visit
|
// Walk a reverse graph from acceptEod with Boost's depth_first_visit
|
||||||
// call.
|
// call.
|
||||||
@ -199,17 +201,17 @@ void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
vector<NFAEdge> dead;
|
vector<NFAEdge> dead;
|
||||||
|
auto deads = [&g=g](const NFAEdge &e) {
|
||||||
|
return (!is_any_accept(target(e, g), g));
|
||||||
|
};
|
||||||
for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
|
for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
|
||||||
if (is_special(u, g)) {
|
if (is_special(u, g)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can prune any out-edges that aren't accepts
|
// We can prune any out-edges that aren't accepts
|
||||||
for (const auto &e : out_edges_range(u, g)) {
|
const auto &er = out_edges_range(u, g);
|
||||||
if (!is_any_accept(target(e, g), g)) {
|
std::copy_if(begin(er), end(er), std::back_inserter(dead), deads);
|
||||||
dead.emplace_back(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dead.empty()) {
|
if (dead.empty()) {
|
||||||
|
@ -244,7 +244,7 @@ u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
|
|||||||
/** Gives a stronger puff trigger when the trigger is connected to a wide
|
/** Gives a stronger puff trigger when the trigger is connected to a wide
|
||||||
* cyclic state (aside from sds) */
|
* cyclic state (aside from sds) */
|
||||||
static
|
static
|
||||||
void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
|
void improveHead(const NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
|
||||||
DEBUG_PRINTF("attempting to improve puff trigger\n");
|
DEBUG_PRINTF("attempting to improve puff trigger\n");
|
||||||
assert(!nodes->empty());
|
assert(!nodes->empty());
|
||||||
const CharReach &puff_cr = g[nodes->back()].char_reach;
|
const CharReach &puff_cr = g[nodes->back()].char_reach;
|
||||||
@ -263,7 +263,7 @@ void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv,
|
void constructPuff(const NGHolder &g, const NFAVertex a, const NFAVertex puffv,
|
||||||
const CharReach &cr, const ReportID report, u32 width,
|
const CharReach &cr, const ReportID report, u32 width,
|
||||||
bool fixed_depth, bool unbounded, bool auto_restart,
|
bool fixed_depth, bool unbounded, bool auto_restart,
|
||||||
RoseBuild &rose, ReportManager &rm,
|
RoseBuild &rose, ReportManager &rm,
|
||||||
@ -361,9 +361,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
|
|||||||
// single report ID on a vertex
|
// single report ID on a vertex
|
||||||
if (is_match_vertex(a, g)) {
|
if (is_match_vertex(a, g)) {
|
||||||
DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
|
DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
|
||||||
if (!nodes.empty()) {
|
|
||||||
nodes.pop_back();
|
nodes.pop_back();
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -307,13 +307,15 @@ void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
|
|||||||
|
|
||||||
static
|
static
|
||||||
bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
|
bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
|
||||||
NFAEdge e = edge(g.start, v, g);
|
|
||||||
|
NFAEdge e;
|
||||||
|
std::tie(e, std::ignore) = edge(g.start, v, g);
|
||||||
return e && !g[e].tops.empty();
|
return e && !g[e].tops.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Transform (1), removal of redundant vertices. */
|
/** Transform (1), removal of redundant vertices. */
|
||||||
static
|
static
|
||||||
bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
|
bool doUselessMergePass(const NGHolder &g, const som_type som, VertexInfoMap &infoMap,
|
||||||
set<NFAVertex> &removable) {
|
set<NFAVertex> &removable) {
|
||||||
/* useless merges can be done in any order, no need to take any care with
|
/* useless merges can be done in any order, no need to take any care with
|
||||||
* ordering */
|
* ordering */
|
||||||
@ -323,7 +325,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
|
|||||||
|
|
||||||
bool changed = false;
|
bool changed = false;
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
VertexInfo &info = infoMap[v];
|
const VertexInfo &info = infoMap[v];
|
||||||
|
|
||||||
if (info.isRemoved) {
|
if (info.isRemoved) {
|
||||||
continue;
|
continue;
|
||||||
@ -439,7 +441,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
|
|||||||
continue; // Conservatively skip anything with nonzero tops.
|
continue; // Conservatively skip anything with nonzero tops.
|
||||||
}
|
}
|
||||||
|
|
||||||
CharReach &otherReach = g[t].char_reach;
|
const CharReach &otherReach = g[t].char_reach;
|
||||||
if (currReach.isSubsetOf(otherReach)) {
|
if (currReach.isSubsetOf(otherReach)) {
|
||||||
DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
|
DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
|
||||||
g[v].index, g[t].index);
|
g[v].index, g[t].index);
|
||||||
@ -636,12 +638,12 @@ bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
|
|||||||
|
|
||||||
NFAVertex start = source(e, g);
|
NFAVertex start = source(e, g);
|
||||||
using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
|
using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
|
||||||
map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
|
|
||||||
|
|
||||||
// Walk the graph backwards from v, examining each node. We fail (return
|
// Walk the graph backwards from v, examining each node. We fail (return
|
||||||
// false) if we encounter a node with reach NOT a subset of domReach, and
|
// false) if we encounter a node with reach NOT a subset of domReach, and
|
||||||
// we stop searching at dom.
|
// we stop searching at dom.
|
||||||
try {
|
try {
|
||||||
|
map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
|
||||||
depth_first_visit(RevGraph(g), start,
|
depth_first_visit(RevGraph(g), start,
|
||||||
ReachSubsetVisitor(domReach),
|
ReachSubsetVisitor(domReach),
|
||||||
make_assoc_property_map(vertexColor),
|
make_assoc_property_map(vertexColor),
|
||||||
@ -664,12 +666,12 @@ bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
|
|||||||
}
|
}
|
||||||
|
|
||||||
NFAVertex start = target(e, g);
|
NFAVertex start = target(e, g);
|
||||||
map<NFAVertex, boost::default_color_type> vertexColor;
|
|
||||||
|
|
||||||
// Walk the graph forward from v, examining each node. We fail (return
|
// Walk the graph forward from v, examining each node. We fail (return
|
||||||
// false) if we encounter a node with reach NOT a subset of domReach, and
|
// false) if we encounter a node with reach NOT a subset of domReach, and
|
||||||
// we stop searching at dom.
|
// we stop searching at dom.
|
||||||
try {
|
try {
|
||||||
|
map<NFAVertex, boost::default_color_type> vertexColor;
|
||||||
depth_first_visit(g, start, ReachSubsetVisitor(domReach),
|
depth_first_visit(g, start, ReachSubsetVisitor(domReach),
|
||||||
make_assoc_property_map(vertexColor),
|
make_assoc_property_map(vertexColor),
|
||||||
VertexIs<NGHolder, NFAVertex>(dom));
|
VertexIs<NGHolder, NFAVertex>(dom));
|
||||||
@ -748,7 +750,7 @@ u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
|
void findCyclicDom(const NGHolder &g, vector<bool> &cyclic,
|
||||||
set<NFAEdge> &dead, som_type som) {
|
set<NFAEdge> &dead, som_type som) {
|
||||||
auto dominators = findDominators(g);
|
auto dominators = findDominators(g);
|
||||||
|
|
||||||
@ -792,7 +794,7 @@ void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
|
void findCyclicPostDom(const NGHolder &g, vector<bool> &cyclic,
|
||||||
set<NFAEdge> &dead) {
|
set<NFAEdge> &dead) {
|
||||||
auto postdominators = findPostDominators(g);
|
auto postdominators = findPostDominators(g);
|
||||||
|
|
||||||
|
@ -393,9 +393,9 @@ void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
|
|||||||
unordered_set<NFAVertex> involved(rsi.vertices.begin(),
|
unordered_set<NFAVertex> involved(rsi.vertices.begin(),
|
||||||
rsi.vertices.end());
|
rsi.vertices.end());
|
||||||
unordered_set<NFAVertex> tail(involved); // to look for back-edges.
|
unordered_set<NFAVertex> tail(involved); // to look for back-edges.
|
||||||
unordered_set<NFAVertex> pred, succ;
|
unordered_set<NFAVertex> v_pred, v_succ;
|
||||||
proper_pred(g, rsi.vertices.front(), pred);
|
proper_pred(g, rsi.vertices.front(), v_pred);
|
||||||
proper_succ(g, rsi.vertices.back(), succ);
|
proper_succ(g, rsi.vertices.back(), v_succ);
|
||||||
|
|
||||||
flat_set<ReportID> reports;
|
flat_set<ReportID> reports;
|
||||||
findFirstReports(g, rsi, reports);
|
findFirstReports(g, rsi, reports);
|
||||||
@ -406,7 +406,7 @@ void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
|
|||||||
for (auto v : rsi.vertices) {
|
for (auto v : rsi.vertices) {
|
||||||
tail.erase(v); // now contains all vertices _after_ this one.
|
tail.erase(v); // now contains all vertices _after_ this one.
|
||||||
|
|
||||||
if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) {
|
if (vertexIsBad(g, v, involved, tail, v_pred, v_succ, reports)) {
|
||||||
recalc = true;
|
recalc = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -793,10 +793,10 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
|
|||||||
|
|
||||||
const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
|
const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
|
||||||
rsi.vertices.end());
|
rsi.vertices.end());
|
||||||
vector<NFAVertex> succs;
|
vector<NFAVertex> g_succs;
|
||||||
getSuccessors(g, rsi, &succs);
|
getSuccessors(g, rsi, &g_succs);
|
||||||
|
|
||||||
unpeelNearEnd(g, rsi, depths, &succs);
|
unpeelNearEnd(g, rsi, depths, &g_succs);
|
||||||
|
|
||||||
// Create our replacement cyclic state with the same reachability and
|
// Create our replacement cyclic state with the same reachability and
|
||||||
// report info as the last vertex in our topo-ordered list.
|
// report info as the last vertex in our topo-ordered list.
|
||||||
@ -824,7 +824,7 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
|
|||||||
|
|
||||||
// Wire cyclic state to tug trigger states built from successors.
|
// Wire cyclic state to tug trigger states built from successors.
|
||||||
vector<NFAVertex> tugs;
|
vector<NFAVertex> tugs;
|
||||||
for (auto v : succs) {
|
for (auto v : g_succs) {
|
||||||
buildTugTrigger(g, cyclic, v, involved, depths, tugs);
|
buildTugTrigger(g, cyclic, v, involved, depths, tugs);
|
||||||
}
|
}
|
||||||
created.insert(tugs.begin(), tugs.end());
|
created.insert(tugs.begin(), tugs.end());
|
||||||
@ -860,10 +860,8 @@ void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
|
|||||||
|
|
||||||
DEBUG_PRINTF("entry\n");
|
DEBUG_PRINTF("entry\n");
|
||||||
|
|
||||||
const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
|
vector<NFAVertex> g_succs;
|
||||||
rsi.vertices.end());
|
getSuccessors(g, rsi, &g_succs);
|
||||||
vector<NFAVertex> succs;
|
|
||||||
getSuccessors(g, rsi, &succs);
|
|
||||||
|
|
||||||
// Create our replacement cyclic state with the same reachability and
|
// Create our replacement cyclic state with the same reachability and
|
||||||
// report info as the last vertex in our topo-ordered list.
|
// report info as the last vertex in our topo-ordered list.
|
||||||
@ -892,15 +890,15 @@ void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
|
|||||||
// In the rose case, our tug is our cyclic, and it's wired to our
|
// In the rose case, our tug is our cyclic, and it's wired to our
|
||||||
// successors (which should be just the accept).
|
// successors (which should be just the accept).
|
||||||
vector<NFAVertex> tugs;
|
vector<NFAVertex> tugs;
|
||||||
assert(succs.size() == 1);
|
assert(g_succs.size() == 1);
|
||||||
for (auto v : succs) {
|
for (auto v : g_succs) {
|
||||||
add_edge(cyclic, v, g);
|
add_edge(cyclic, v, g);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wire pos trigger to accept if min repeat is one -- this deals with cases
|
// Wire pos trigger to accept if min repeat is one -- this deals with cases
|
||||||
// where we can get a pos and tug trigger on the same byte.
|
// where we can get a pos and tug trigger on the same byte.
|
||||||
if (rsi.repeatMin == depth(1)) {
|
if (rsi.repeatMin == depth(1)) {
|
||||||
for (auto v : succs) {
|
for (auto v : g_succs) {
|
||||||
add_edge(pos_trigger, v, g);
|
add_edge(pos_trigger, v, g);
|
||||||
g[pos_trigger].reports = g[cyclic].reports;
|
g[pos_trigger].reports = g[cyclic].reports;
|
||||||
}
|
}
|
||||||
@ -1144,7 +1142,7 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger,
|
|||||||
g[v].char_reach = cr;
|
g[v].char_reach = cr;
|
||||||
add_edge(u, v, g);
|
add_edge(u, v, g);
|
||||||
if (u == g.start) {
|
if (u == g.start) {
|
||||||
g[edge(u, v, g)].tops.insert(top);
|
g[edge(u, v, g).first].tops.insert(top);
|
||||||
}
|
}
|
||||||
u = v;
|
u = v;
|
||||||
}
|
}
|
||||||
@ -1467,9 +1465,9 @@ struct StrawWalker {
|
|||||||
}
|
}
|
||||||
if (ai != ae) {
|
if (ai != ae) {
|
||||||
DEBUG_PRINTF("more than one succ\n");
|
DEBUG_PRINTF("more than one succ\n");
|
||||||
set<NFAVertex> succs;
|
set<NFAVertex> a_succs;
|
||||||
insert(&succs, adjacent_vertices(v, g));
|
insert(&a_succs, adjacent_vertices(v, g));
|
||||||
succs.erase(v);
|
a_succs.erase(v);
|
||||||
for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) {
|
for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) {
|
||||||
next = *ai;
|
next = *ai;
|
||||||
DEBUG_PRINTF("checking %zu\n", g[next].index);
|
DEBUG_PRINTF("checking %zu\n", g[next].index);
|
||||||
@ -1479,7 +1477,7 @@ struct StrawWalker {
|
|||||||
set<NFAVertex> lsuccs;
|
set<NFAVertex> lsuccs;
|
||||||
insert(&lsuccs, adjacent_vertices(next, g));
|
insert(&lsuccs, adjacent_vertices(next, g));
|
||||||
|
|
||||||
if (lsuccs != succs) {
|
if (lsuccs != a_succs) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1887,7 +1885,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd,
|
|||||||
* offset.
|
* offset.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
|
bool improveLeadingRepeat(NGHolder &g, const BoundedRepeatData &rd,
|
||||||
unordered_set<NFAVertex> &created,
|
unordered_set<NFAVertex> &created,
|
||||||
const vector<BoundedRepeatData> &all_repeats) {
|
const vector<BoundedRepeatData> &all_repeats) {
|
||||||
assert(edge(g.startDs, g.startDs, g).second);
|
assert(edge(g.startDs, g.startDs, g).second);
|
||||||
@ -1908,9 +1906,9 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
|
|||||||
}
|
}
|
||||||
|
|
||||||
vector<NFAVertex> straw;
|
vector<NFAVertex> straw;
|
||||||
NFAVertex pred =
|
NFAVertex w_pred =
|
||||||
walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
|
walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
|
||||||
if (pred != g.startDs) {
|
if (w_pred != g.startDs) {
|
||||||
DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
|
DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1958,7 +1956,7 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd,
|
vector<NFAVertex> makeOwnStraw(NGHolder &g, const BoundedRepeatData &rd,
|
||||||
const vector<NFAVertex> &straw) {
|
const vector<NFAVertex> &straw) {
|
||||||
// Straw runs from startDs to our pos trigger.
|
// Straw runs from startDs to our pos trigger.
|
||||||
assert(!straw.empty());
|
assert(!straw.empty());
|
||||||
@ -1992,7 +1990,7 @@ vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd,
|
|||||||
* rewire the straw to start instead of removing the startDs self-loop.
|
* rewire the straw to start instead of removing the startDs self-loop.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd,
|
bool improveLeadingRepeatOutfix(NGHolder &g, const BoundedRepeatData &rd,
|
||||||
unordered_set<NFAVertex> &created,
|
unordered_set<NFAVertex> &created,
|
||||||
const vector<BoundedRepeatData> &all_repeats) {
|
const vector<BoundedRepeatData> &all_repeats) {
|
||||||
assert(g.kind == NFA_OUTFIX);
|
assert(g.kind == NFA_OUTFIX);
|
||||||
@ -2013,9 +2011,9 @@ bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd,
|
|||||||
}
|
}
|
||||||
|
|
||||||
vector<NFAVertex> straw;
|
vector<NFAVertex> straw;
|
||||||
NFAVertex pred =
|
NFAVertex w_pred =
|
||||||
walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
|
walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
|
||||||
if (pred != g.startDs) {
|
if (w_pred != g.startDs) {
|
||||||
DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
|
DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -54,8 +54,8 @@ void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops,
|
|||||||
vector<NFAEdge> &tempEdges) {
|
vector<NFAEdge> &tempEdges) {
|
||||||
for (NFAVertex v : tops) {
|
for (NFAVertex v : tops) {
|
||||||
assert(!isLeafNode(v, g));
|
assert(!isLeafNode(v, g));
|
||||||
|
auto edge_result = add_edge(g.start, v, g);
|
||||||
const NFAEdge &e = add_edge(g.start, v, g);
|
const NFAEdge &e = edge_result.first;
|
||||||
tempEdges.emplace_back(e);
|
tempEdges.emplace_back(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -877,18 +877,18 @@ bool beginsWithDotStar(const NGHolder &g) {
|
|||||||
// We can ignore the successors of start, as matches that begin there will
|
// We can ignore the successors of start, as matches that begin there will
|
||||||
// necessarily have a SOM of 0.
|
// necessarily have a SOM of 0.
|
||||||
|
|
||||||
set<NFAVertex> succ;
|
set<NFAVertex> a_succ;
|
||||||
insert(&succ, adjacent_vertices(g.startDs, g));
|
insert(&a_succ, adjacent_vertices(g.startDs, g));
|
||||||
succ.erase(g.startDs);
|
a_succ.erase(g.startDs);
|
||||||
|
|
||||||
for (auto v : succ) {
|
for (auto v : a_succ) {
|
||||||
// We want 'dot' states that aren't virtual starts.
|
// We want 'dot' states that aren't virtual starts.
|
||||||
if (g[v].char_reach.all() &&
|
if (g[v].char_reach.all() &&
|
||||||
!g[v].assert_flags) {
|
!g[v].assert_flags) {
|
||||||
hasDot = true;
|
hasDot = true;
|
||||||
set<NFAVertex> dotsucc;
|
set<NFAVertex> dotsucc;
|
||||||
insert(&dotsucc, adjacent_vertices(v, g));
|
insert(&dotsucc, adjacent_vertices(v, g));
|
||||||
if (dotsucc != succ) {
|
if (dotsucc != a_succ) {
|
||||||
DEBUG_PRINTF("failed dot-star succ check\n");
|
DEBUG_PRINTF("failed dot-star succ check\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1178,7 +1178,7 @@ void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool doTreePlanningIntl(NGHolder &g,
|
bool doTreePlanningIntl(const NGHolder &g,
|
||||||
const unordered_map<NFAVertex, u32> ®ions,
|
const unordered_map<NFAVertex, u32> ®ions,
|
||||||
const map<u32, region_info> &info,
|
const map<u32, region_info> &info,
|
||||||
map<u32, region_info>::const_iterator picked, u32 bad_region,
|
map<u32, region_info>::const_iterator picked, u32 bad_region,
|
||||||
@ -1293,7 +1293,7 @@ bool doTreePlanningIntl(NGHolder &g,
|
|||||||
DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
|
DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
|
||||||
addMappedReporterVertices(it->second, g, copy_to_orig,
|
addMappedReporterVertices(it->second, g, copy_to_orig,
|
||||||
plan.back().reporters);
|
plan.back().reporters);
|
||||||
} while (it->second.optional && it != info.rend() &&
|
} while (it != info.rend() && it->second.optional &&
|
||||||
(++it)->first > furthest->first);
|
(++it)->first > furthest->first);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -1409,7 +1409,7 @@ bool doSomPlanning(NGHolder &g, bool stuck_in,
|
|||||||
|
|
||||||
/* Need to verify how far the lock covers */
|
/* Need to verify how far the lock covers */
|
||||||
u32 bad_region;
|
u32 bad_region;
|
||||||
NGHolder *ap_pref = plan.back().prefix.get();
|
const NGHolder *ap_pref = plan.back().prefix.get();
|
||||||
NGHolder ap_temp;
|
NGHolder ap_temp;
|
||||||
if (hasBigCycles(*ap_pref)) {
|
if (hasBigCycles(*ap_pref)) {
|
||||||
fillRoughMidfix(&ap_temp, g, regions, info, picked);
|
fillRoughMidfix(&ap_temp, g, regions, info, picked);
|
||||||
@ -1552,7 +1552,7 @@ bool doSomPlanning(NGHolder &g, bool stuck_in,
|
|||||||
DEBUG_PRINTF("region %u contributes reporters to last plan\n",
|
DEBUG_PRINTF("region %u contributes reporters to last plan\n",
|
||||||
it->first);
|
it->first);
|
||||||
addReporterVertices(it->second, g, plan.back().reporters);
|
addReporterVertices(it->second, g, plan.back().reporters);
|
||||||
} while (it->second.optional && it != info.rend() &&
|
} while (it != info.rend() && it->second.optional &&
|
||||||
(++it)->first > furthest->first);
|
(++it)->first > furthest->first);
|
||||||
|
|
||||||
DEBUG_PRINTF("done!\n");
|
DEBUG_PRINTF("done!\n");
|
||||||
@ -1856,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
|
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, const NGHolder &g,
|
||||||
const CompileContext &cc) {
|
const CompileContext &cc) {
|
||||||
depth maxWidth = findMaxWidth(g);
|
depth maxWidth = findMaxWidth(g);
|
||||||
|
|
||||||
@ -2012,7 +2012,7 @@ void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool tryHaig(RoseBuild &rose, NGHolder &g,
|
bool tryHaig(RoseBuild &rose, const NGHolder &g,
|
||||||
const unordered_map<NFAVertex, u32> ®ions,
|
const unordered_map<NFAVertex, u32> ®ions,
|
||||||
som_type som, u32 somPrecision,
|
som_type som, u32 somPrecision,
|
||||||
map<u32, region_info>::const_iterator picked,
|
map<u32, region_info>::const_iterator picked,
|
||||||
@ -2444,13 +2444,9 @@ void makeReportsSomPass(ReportManager &rm, NGHolder &g) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
|
bool doLitHaigSom(NG &ng, const NGHolder &g, som_type som) {
|
||||||
ue2_literal lit;
|
ue2_literal lit;
|
||||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||||
if (!rhs) {
|
|
||||||
assert(0);
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
if (!ng.cc.grey.allowLitHaig) {
|
if (!ng.cc.grey.allowLitHaig) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2515,10 +2511,6 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g,
|
|||||||
ue2_literal lit;
|
ue2_literal lit;
|
||||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||||
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
||||||
if (!rhs || !lhs) {
|
|
||||||
assert(0);
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
|
if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
|
||||||
return false;
|
return false;
|
||||||
@ -2661,7 +2653,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) {
|
bool doMultiLitHaigSom(NG &ng, const NGHolder &g, som_type som) {
|
||||||
set<ue2_literal> lits;
|
set<ue2_literal> lits;
|
||||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||||
if (!ng.cc.grey.allowLitHaig) {
|
if (!ng.cc.grey.allowLitHaig) {
|
||||||
@ -3135,7 +3127,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
|||||||
|
|
||||||
// try a redundancy pass.
|
// try a redundancy pass.
|
||||||
if (addSomRedundancy(g, depths)) {
|
if (addSomRedundancy(g, depths)) {
|
||||||
depths = getDistancesFromSOM(g);
|
depths = getDistancesFromSOM(g); // cppcheck-suppress unreadVariable
|
||||||
}
|
}
|
||||||
|
|
||||||
auto regions = assignRegions(g);
|
auto regions = assignRegions(g);
|
||||||
|
@ -113,9 +113,9 @@ bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
|
|||||||
}
|
}
|
||||||
*numNewVertices += predGroups.size();
|
*numNewVertices += predGroups.size();
|
||||||
|
|
||||||
for (auto &group : predGroups) {
|
for (const auto &group : predGroups) {
|
||||||
const depth &predDepth = group.first;
|
const depth &predDepth = group.first;
|
||||||
const vector<NFAEdge> &preds = group.second;
|
const vector<NFAEdge> &gspreds = group.second;
|
||||||
|
|
||||||
// Clone v for this depth with all its associated out-edges.
|
// Clone v for this depth with all its associated out-edges.
|
||||||
u32 clone_idx = depths.size(); // next index to be used
|
u32 clone_idx = depths.size(); // next index to be used
|
||||||
@ -131,8 +131,8 @@ bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
|
|||||||
add_edge(clone, target(e, g), g[e], g);
|
add_edge(clone, target(e, g), g[e], g);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add in-edges from preds in this group.
|
// Add in-edges from gspreds in this group.
|
||||||
for (const auto &e : preds) {
|
for (const auto &e : gspreds) {
|
||||||
add_edge(source(e, g), clone, g[e], g);
|
add_edge(source(e, g), clone, g[e], g);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -58,11 +58,12 @@ vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
|
|||||||
cloneHolder(g, g_orig, &vmap);
|
cloneHolder(g, g_orig, &vmap);
|
||||||
|
|
||||||
vector<NFAVertex> vstarts;
|
vector<NFAVertex> vstarts;
|
||||||
for (auto v : vertices_range(g)) {
|
auto vstart = [&g=g](const NFAVertex &v) {
|
||||||
if (is_virtual_start(v, g)) {
|
return (is_virtual_start(v, g));
|
||||||
vstarts.emplace_back(v);
|
};
|
||||||
}
|
const auto &vr = vertices_range(g);
|
||||||
}
|
std::copy_if(begin(vr), end(vr), std::back_inserter(vstarts), vstart);
|
||||||
|
|
||||||
vstarts.emplace_back(g.startDs);
|
vstarts.emplace_back(g.startDs);
|
||||||
|
|
||||||
// wire the successors of every virtual start or startDs to g.start.
|
// wire the successors of every virtual start or startDs to g.start.
|
||||||
@ -269,18 +270,6 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
|
|||||||
boost::depth_first_search(c_g, visitor(backEdgeVisitor)
|
boost::depth_first_search(c_g, visitor(backEdgeVisitor)
|
||||||
.root_vertex(c_g.start));
|
.root_vertex(c_g.start));
|
||||||
|
|
||||||
for (const auto &e : be) {
|
|
||||||
NFAVertex s = source(e, c_g);
|
|
||||||
NFAVertex t = target(e, c_g);
|
|
||||||
DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index);
|
|
||||||
if (s != t) {
|
|
||||||
assert(0);
|
|
||||||
DEBUG_PRINTF("eek big cycle\n");
|
|
||||||
rv = true; /* big cycle -> eek */
|
|
||||||
goto exit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("checking acyclic+selfloop graph\n");
|
DEBUG_PRINTF("checking acyclic+selfloop graph\n");
|
||||||
|
|
||||||
rv = !firstMatchIsFirst(c_g);
|
rv = !firstMatchIsFirst(c_g);
|
||||||
|
@ -151,7 +151,8 @@ void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
|
|||||||
|
|
||||||
for (auto pivot : pivots) {
|
for (auto pivot : pivots) {
|
||||||
assert(contains(*rhs_map, pivot));
|
assert(contains(*rhs_map, pivot));
|
||||||
NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs);
|
auto edge_result = add_edge(rhs->start, (*rhs_map)[pivot], *rhs);
|
||||||
|
NFAEdge e = edge_result.first;
|
||||||
(*rhs)[e].tops.insert(DEFAULT_TOP);
|
(*rhs)[e].tops.insert(DEFAULT_TOP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,19 +255,19 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) {
|
void buildSucc(NFAStateSet &ssucc, const NGHolder &g, NFAVertex v) {
|
||||||
for (auto w : adjacent_vertices_range(v, g)) {
|
for (auto w : adjacent_vertices_range(v, g)) {
|
||||||
if (!is_special(w, g)) {
|
if (!is_special(w, g)) {
|
||||||
succ.set(g[w].index);
|
ssucc.set(g[w].index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
|
void buildPred(NFAStateSet &spred, const NGHolder &g, NFAVertex v) {
|
||||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||||
if (!is_special(u, g)) {
|
if (!is_special(u, g)) {
|
||||||
pred.set(g[u].index);
|
spred.set(g[u].index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -409,19 +409,19 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
|
|||||||
|
|
||||||
DEBUG_PRINTF("state %u is cyclic\n", i);
|
DEBUG_PRINTF("state %u is cyclic\n", i);
|
||||||
|
|
||||||
NFAStateSet mask(numStates), succ(numStates), pred(numStates);
|
NFAStateSet mask(numStates), ssucc(numStates), spred(numStates);
|
||||||
buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
|
buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
|
||||||
som_depths, region_map, cache);
|
som_depths, region_map, cache);
|
||||||
buildSucc(succ, g, v);
|
buildSucc(ssucc, g, v);
|
||||||
buildPred(pred, g, v);
|
buildPred(spred, g, v);
|
||||||
const auto &reports = g[v].reports;
|
const auto &reports = g[v].reports;
|
||||||
|
|
||||||
for (size_t j = succ.find_first(); j != succ.npos;
|
for (size_t j = ssucc.find_first(); j != ssucc.npos;
|
||||||
j = succ.find_next(j)) {
|
j = ssucc.find_next(j)) {
|
||||||
NFAVertex vj = vByIndex[j];
|
NFAVertex vj = vByIndex[j];
|
||||||
NFAStateSet pred2(numStates);
|
NFAStateSet pred2(numStates);
|
||||||
buildPred(pred2, g, vj);
|
buildPred(pred2, g, vj);
|
||||||
if (pred2 == pred) {
|
if (pred2 == spred) {
|
||||||
DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
|
DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
|
||||||
NFAStateSet tmp(numStates);
|
NFAStateSet tmp(numStates);
|
||||||
buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
|
buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
|
||||||
@ -430,14 +430,14 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t j = pred.find_first(); j != pred.npos;
|
for (size_t j = spred.find_first(); j != spred.npos;
|
||||||
j = pred.find_next(j)) {
|
j = spred.find_next(j)) {
|
||||||
NFAVertex vj = vByIndex[j];
|
NFAVertex vj = vByIndex[j];
|
||||||
NFAStateSet succ2(numStates);
|
NFAStateSet succ2(numStates);
|
||||||
buildSucc(succ2, g, vj);
|
buildSucc(succ2, g, vj);
|
||||||
/* we can use j as a basis for squashing if its succs are a subset
|
/* we can use j as a basis for squashing if its succs are a subset
|
||||||
* of ours */
|
* of ours */
|
||||||
if ((succ2 & ~succ).any()) {
|
if ((succ2 & ~ssucc).any()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -590,7 +590,7 @@ void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
|
|||||||
|
|
||||||
verts.insert(v);
|
verts.insert(v);
|
||||||
next_vertex:
|
next_vertex:
|
||||||
continue;
|
;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user