mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
MCSHENG64: extend to 64-state based on mcsheng
This commit is contained in:
parent
f06e19e6cb
commit
0102f03c9c
1333
src/nfa/mcsheng.c
1333
src/nfa/mcsheng.c
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -80,5 +80,78 @@ char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest,
|
|||||||
|
|
||||||
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
|
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
|
||||||
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
/* 64-8 bit Sheng-McClellan hybrid */
|
||||||
|
char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
const char *streamState, u64a offset,
|
||||||
|
NfaCallback callback, void *context);
|
||||||
|
char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
|
char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report,
|
||||||
|
struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
|
void *state, u8 key);
|
||||||
|
char nfaExecMcSheng64_8_queueCompressState(const struct NFA *nfa,
|
||||||
|
const struct mq *q, s64a loc);
|
||||||
|
char nfaExecMcSheng64_8_expandState(const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, u64a offset, u8 key);
|
||||||
|
|
||||||
|
#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
|
||||||
|
/* 64-16 bit Sheng-McClellan hybrid */
|
||||||
|
char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
const char *streamState, u64a offset,
|
||||||
|
NfaCallback callback, void *context);
|
||||||
|
char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
|
char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report,
|
||||||
|
struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
|
void *state, u8 key);
|
||||||
|
char nfaExecMcSheng64_16_queueCompressState(const struct NFA *nfa,
|
||||||
|
const struct mq *q, s64a loc);
|
||||||
|
char nfaExecMcSheng64_16_expandState(const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, u64a offset, u8 key);
|
||||||
|
#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#else // !HAVE_AVX512VBMI
|
||||||
|
#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_Q NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_Q2 NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_QR NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_inAnyAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_queueInitState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_queueCompressState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_expandState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_initCompressedState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_testEOD NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_reportCurrent NFA_API_NO_IMPL
|
||||||
|
|
||||||
|
#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_Q NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_Q2 NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_QR NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_inAnyAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_queueInitState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_queueCompressState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_expandState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_initCompressedState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_testEOD NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_reportCurrent NFA_API_NO_IMPL
|
||||||
|
|
||||||
|
#endif //end of HAVE_AVX512VBM
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -64,7 +64,6 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/range/adaptor/map.hpp>
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -244,6 +243,108 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static
|
||||||
|
mstate_aux *getAux64(NFA *n, dstate_id_t i) {
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n);
|
||||||
|
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
||||||
|
|
||||||
|
mstate_aux *aux = aux_base + i;
|
||||||
|
assert((const char *)aux < (const char *)n + m->length);
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void createShuffleMasks64(mcsheng64 *m, const dfa_info &info,
|
||||||
|
dstate_id_t sheng_end,
|
||||||
|
const map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
||||||
|
DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end);
|
||||||
|
assert(sheng_end > DEAD_STATE + 1);
|
||||||
|
assert(sheng_end <= sizeof(m512) + 1);
|
||||||
|
vector<array<u8, sizeof(m512)>> masks;
|
||||||
|
masks.resize(info.alpha_size);
|
||||||
|
/* -1 to avoid wasting a slot as we do not include dead state */
|
||||||
|
vector<dstate_id_t> raw_ids;
|
||||||
|
raw_ids.resize(sheng_end - 1);
|
||||||
|
for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) {
|
||||||
|
assert(info.implId(s)); /* should not map to DEAD_STATE */
|
||||||
|
if (info.is_sheng(s)) {
|
||||||
|
raw_ids[info.extra[s].sheng_id] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < info.alpha_size; i++) {
|
||||||
|
if (i == info.alpha_remap[TOP]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto &mask = masks[i];
|
||||||
|
assert(sizeof(mask) == sizeof(m512));
|
||||||
|
mask.fill(0);
|
||||||
|
|
||||||
|
for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) {
|
||||||
|
dstate_id_t raw_id = raw_ids[sheng_id];
|
||||||
|
dstate_id_t next_id = info.implId(info.states[raw_id].next[i]);
|
||||||
|
if (next_id == DEAD_STATE) {
|
||||||
|
next_id = sheng_end - 1;
|
||||||
|
} else if (next_id < sheng_end) {
|
||||||
|
next_id--;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id);
|
||||||
|
mask[sheng_id] = verify_u8(next_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
|
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
||||||
|
memcpy((u8 *)&m->sheng_succ_masks[i],
|
||||||
|
(u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512));
|
||||||
|
}
|
||||||
|
m->sheng_end = sheng_end;
|
||||||
|
m->sheng_accel_limit = sheng_end - 1;
|
||||||
|
|
||||||
|
for (dstate_id_t s : raw_ids) {
|
||||||
|
if (contains(accel_escape_info, s)) {
|
||||||
|
LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void populateBasicInfo64(size_t state_size, const dfa_info &info,
|
||||||
|
u32 total_size, u32 aux_offset, u32 accel_offset,
|
||||||
|
u32 accel_count, ReportID arb, bool single, NFA *nfa) {
|
||||||
|
assert(state_size == sizeof(u16) || state_size == sizeof(u8));
|
||||||
|
|
||||||
|
nfa->length = total_size;
|
||||||
|
nfa->nPositions = info.states.size();
|
||||||
|
|
||||||
|
nfa->scratchStateSize = verify_u32(state_size);
|
||||||
|
nfa->streamStateSize = verify_u32(state_size);
|
||||||
|
|
||||||
|
if (state_size == sizeof(u8)) {
|
||||||
|
nfa->type = MCSHENG_64_NFA_8;
|
||||||
|
} else {
|
||||||
|
nfa->type = MCSHENG_64_NFA_16;
|
||||||
|
}
|
||||||
|
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||||
|
for (u32 i = 0; i < 256; i++) {
|
||||||
|
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
||||||
|
}
|
||||||
|
m->alphaShift = info.getAlphaShift();
|
||||||
|
m->length = total_size;
|
||||||
|
m->aux_offset = aux_offset;
|
||||||
|
m->accel_offset = accel_offset;
|
||||||
|
m->arb_report = arb;
|
||||||
|
m->state_count = verify_u16(info.size());
|
||||||
|
m->start_anchored = info.implId(info.raw.start_anchored);
|
||||||
|
m->start_floating = info.implId(info.raw.start_floating);
|
||||||
|
m->has_accel = accel_count ? 1 : 0;
|
||||||
|
|
||||||
|
if (single) {
|
||||||
|
m->flags |= MCSHENG_FLAG_SINGLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static
|
static
|
||||||
size_t calcShermanRegionSize(const dfa_info &info) {
|
size_t calcShermanRegionSize(const dfa_info &info) {
|
||||||
size_t rv = 0;
|
size_t rv = 0;
|
||||||
@ -272,7 +373,7 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
|
|||||||
/* returns false on error */
|
/* returns false on error */
|
||||||
static
|
static
|
||||||
bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
|
bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
|
||||||
dstate_id_t *sherman_base) {
|
dstate_id_t *sherman_base) {
|
||||||
info.states[0].impl_id = 0; /* dead is always 0 */
|
info.states[0].impl_id = 0; /* dead is always 0 */
|
||||||
|
|
||||||
vector<dstate_id_t> norm;
|
vector<dstate_id_t> norm;
|
||||||
@ -382,6 +483,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_SHENG_STATES 16
|
#define MAX_SHENG_STATES 16
|
||||||
|
#define MAX_SHENG64_STATES 64
|
||||||
#define MAX_SHENG_LEAKINESS 0.05
|
#define MAX_SHENG_LEAKINESS 0.05
|
||||||
|
|
||||||
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
|
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
|
||||||
@ -435,7 +537,8 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
|
|||||||
|
|
||||||
static
|
static
|
||||||
dstate_id_t find_sheng_states(dfa_info &info,
|
dstate_id_t find_sheng_states(dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
|
size_t max_sheng_states) {
|
||||||
RdfaGraph g(info.raw);
|
RdfaGraph g(info.raw);
|
||||||
auto cyclics = find_vertices_in_cycles(g);
|
auto cyclics = find_vertices_in_cycles(g);
|
||||||
|
|
||||||
@ -470,7 +573,7 @@ dstate_id_t find_sheng_states(dfa_info &info,
|
|||||||
flat_set<dstate_id_t> considered = { DEAD_STATE };
|
flat_set<dstate_id_t> considered = { DEAD_STATE };
|
||||||
bool seen_back_edge = false;
|
bool seen_back_edge = false;
|
||||||
while (!to_consider.empty()
|
while (!to_consider.empty()
|
||||||
&& sheng_states.size() < MAX_SHENG_STATES) {
|
&& sheng_states.size() < max_sheng_states) {
|
||||||
auto v = to_consider.front();
|
auto v = to_consider.front();
|
||||||
to_consider.pop_front();
|
to_consider.pop_front();
|
||||||
if (!considered.insert(g[v].index).second) {
|
if (!considered.insert(g[v].index).second) {
|
||||||
@ -616,6 +719,82 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static
|
||||||
|
void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
|
||||||
|
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
|
u32 accel_offset, UNUSED u32 accel_end_offset,
|
||||||
|
const vector<u32> &reports,
|
||||||
|
const vector<u32> &reports_eod,
|
||||||
|
u32 report_base_offset,
|
||||||
|
const raw_report_info &ri) {
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||||
|
|
||||||
|
vector<u32> reportOffsets;
|
||||||
|
|
||||||
|
ri.fillReportLists(nfa, report_base_offset, reportOffsets);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
|
u16 impl_id = info.implId(i);
|
||||||
|
mstate_aux *this_aux = getAux64(nfa, impl_id);
|
||||||
|
|
||||||
|
fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
|
||||||
|
if (contains(accel_escape_info, i)) {
|
||||||
|
this_aux->accel_offset = accel_offset;
|
||||||
|
accel_offset += info.strat.accelSize();
|
||||||
|
assert(accel_offset <= accel_end_offset);
|
||||||
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||||
|
(void *)((char *)m + this_aux->accel_offset));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
|
||||||
|
mstate_aux *aux = getAux64(nfa, target_impl_id);
|
||||||
|
u16 flags = 0;
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
flags |= ACCEPT_FLAG;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accel_offset) {
|
||||||
|
flags |= ACCEL_FLAG;
|
||||||
|
}
|
||||||
|
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info,
|
||||||
|
dstate_id_t sheng_end,
|
||||||
|
UNUSED dstate_id_t sherman_base) {
|
||||||
|
u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64));
|
||||||
|
|
||||||
|
u8 alphaShift = info.getAlphaShift();
|
||||||
|
assert(alphaShift <= 8);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < info.size(); i++) {
|
||||||
|
if (!info.is_normal(i)) {
|
||||||
|
assert(info.implId(i) < sheng_end || info.is_sherman(i));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(info.implId(i) < sherman_base);
|
||||||
|
u16 normal_id = verify_u16(info.implId(i) - sheng_end);
|
||||||
|
|
||||||
|
for (size_t s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
dstate_id_t raw_succ = info.states[i].next[s];
|
||||||
|
u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s];
|
||||||
|
|
||||||
|
entry = info.implId(raw_succ);
|
||||||
|
entry |= get_edge_flags64(nfa, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MAX_SHERMAN_LIST_LEN 8
|
#define MAX_SHERMAN_LIST_LEN 8
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -934,6 +1113,162 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static
|
||||||
|
void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||||
|
char *nfa_base = (char *)nfa;
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||||
|
char *sherman_table = nfa_base + m->sherman_offset;
|
||||||
|
|
||||||
|
assert(ISALIGNED_16(sherman_table));
|
||||||
|
for (size_t i = 0; i < info.size(); i++) {
|
||||||
|
if (!info.is_sherman(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u16 fs = verify_u16(info.implId(i));
|
||||||
|
DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs);
|
||||||
|
|
||||||
|
assert(fs >= sherman_limit);
|
||||||
|
|
||||||
|
char *curr_sherman_entry
|
||||||
|
= sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
|
||||||
|
assert(curr_sherman_entry <= nfa_base + m->length);
|
||||||
|
|
||||||
|
u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
|
||||||
|
assert(len <= 9);
|
||||||
|
dstate_id_t d = info.states[i].daddy;
|
||||||
|
|
||||||
|
*(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
|
||||||
|
*(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
|
||||||
|
*(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
|
||||||
|
u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||||
|
|
||||||
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
|
*(chars++) = (u8)s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||||
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
|
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
||||||
|
info.implId(d),
|
||||||
|
info.implId(info.states[i].next[s]));
|
||||||
|
u16 entry_val = info.implId(info.states[i].next[s]);
|
||||||
|
entry_val |= get_edge_flags64(nfa, entry_val);
|
||||||
|
unaligned_store_u16((u8 *)states++, entry_val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
||||||
|
const map<dstate_id_t, AccelScheme>&accel_escape_info,
|
||||||
|
const Grey &grey) {
|
||||||
|
DEBUG_PRINTF("building mcsheng 64-16\n");
|
||||||
|
|
||||||
|
vector<u32> reports; /* index in ri for the appropriate report list */
|
||||||
|
vector<u32> reports_eod; /* as above */
|
||||||
|
ReportID arb;
|
||||||
|
u8 single;
|
||||||
|
|
||||||
|
assert(info.getAlphaShift() <= 8);
|
||||||
|
|
||||||
|
// Sherman optimization
|
||||||
|
if (info.impl_alpha_size > 16) {
|
||||||
|
u16 total_daddy = 0;
|
||||||
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
|
find_better_daddy(info, i,
|
||||||
|
is_cyclic_near(info.raw, info.raw.start_anchored),
|
||||||
|
grey);
|
||||||
|
total_daddy += info.extra[i].daddytaken;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||||
|
info.size() * info.impl_alpha_size, info.size(),
|
||||||
|
info.impl_alpha_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 sherman_limit;
|
||||||
|
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
||||||
|
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||||
|
info.size());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
u16 count_real_states = sherman_limit - sheng_end;
|
||||||
|
|
||||||
|
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||||
|
|
||||||
|
size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16)
|
||||||
|
* count_real_states;
|
||||||
|
|
||||||
|
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||||
|
|
||||||
|
size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
|
||||||
|
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
|
||||||
|
size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
|
||||||
|
+ ri->getReportListSize(), 32);
|
||||||
|
size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
|
||||||
|
size_t sherman_size = calcShermanRegionSize(info);
|
||||||
|
|
||||||
|
size_t total_size = sherman_offset + sherman_size;
|
||||||
|
|
||||||
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
||||||
|
|
||||||
|
populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||||
|
accel_escape_info.size(), arb, single, nfa.get());
|
||||||
|
createShuffleMasks64(m, info, sheng_end, accel_escape_info);
|
||||||
|
|
||||||
|
/* copy in the mc header information */
|
||||||
|
m->sherman_offset = sherman_offset;
|
||||||
|
m->sherman_end = total_size;
|
||||||
|
m->sherman_limit = sherman_limit;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end,
|
||||||
|
count_real_states, info.size());
|
||||||
|
|
||||||
|
fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
|
||||||
|
sherman_offset - sizeof(NFA), reports, reports_eod,
|
||||||
|
aux_offset + aux_size, *ri);
|
||||||
|
|
||||||
|
fill_in_succ_table_64_16(nfa.get(), info, sheng_end, sherman_limit);
|
||||||
|
|
||||||
|
fill_in_sherman64(nfa.get(), info, sherman_limit);
|
||||||
|
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info,
|
||||||
|
dstate_id_t sheng_end) {
|
||||||
|
u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64);
|
||||||
|
|
||||||
|
u8 alphaShift = info.getAlphaShift();
|
||||||
|
assert(alphaShift <= 8);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < info.size(); i++) {
|
||||||
|
assert(!info.is_sherman(i));
|
||||||
|
if (!info.is_normal(i)) {
|
||||||
|
assert(info.implId(i) < sheng_end);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u8 normal_id = verify_u8(info.implId(i) - sheng_end);
|
||||||
|
|
||||||
|
for (size_t s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
dstate_id_t raw_succ = info.states[i].next[s];
|
||||||
|
succ_table[((size_t)normal_id << alphaShift) + s]
|
||||||
|
= info.implId(raw_succ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static
|
static
|
||||||
void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
|
void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
|
||||||
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
@ -1031,6 +1366,60 @@ bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static
|
||||||
|
bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
|
||||||
|
const map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
||||||
|
DEBUG_PRINTF("building mcsheng 64-8\n");
|
||||||
|
|
||||||
|
vector<u32> reports;
|
||||||
|
vector<u32> reports_eod;
|
||||||
|
ReportID arb;
|
||||||
|
u8 single;
|
||||||
|
|
||||||
|
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||||
|
|
||||||
|
size_t normal_count = info.size() - sheng_end;
|
||||||
|
|
||||||
|
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count;
|
||||||
|
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||||
|
size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
|
||||||
|
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
|
||||||
|
size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
|
||||||
|
+ ri->getReportListSize(), 32);
|
||||||
|
size_t total_size = accel_offset + accel_size;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("aux_size %zu\n", aux_size);
|
||||||
|
DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
|
||||||
|
DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
|
||||||
|
DEBUG_PRINTF("accel_size %zu\n", accel_size);
|
||||||
|
DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
|
||||||
|
DEBUG_PRINTF("total_size %zu\n", total_size);
|
||||||
|
|
||||||
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
||||||
|
|
||||||
|
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
||||||
|
&m->accept_limit_8);
|
||||||
|
|
||||||
|
populateBasicInfo64(sizeof(u8), info, total_size, aux_offset, accel_offset,
|
||||||
|
accel_escape_info.size(), arb, single, nfa.get());
|
||||||
|
createShuffleMasks64(m, info, sheng_end, accel_escape_info);
|
||||||
|
|
||||||
|
fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
|
||||||
|
total_size - sizeof(NFA), reports, reports_eod,
|
||||||
|
aux_offset + aux_size, *ri);
|
||||||
|
|
||||||
|
fill_in_succ_table_64_8(nfa.get(), info, sheng_end);
|
||||||
|
DEBUG_PRINTF("rl size %zu\n", ri->size());
|
||||||
|
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
if (!cc.grey.allowMcSheng) {
|
if (!cc.grey.allowMcSheng) {
|
||||||
@ -1050,19 +1439,79 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
|
|
||||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||||
= info.strat.getAccelInfo(cc.grey);
|
= info.strat.getAccelInfo(cc.grey);
|
||||||
|
auto old_states = info.states;
|
||||||
|
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
|
||||||
|
|
||||||
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info);
|
|
||||||
if (sheng_end <= DEAD_STATE + 1) {
|
if (sheng_end <= DEAD_STATE + 1) {
|
||||||
|
info.states = old_states;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytecode_ptr<NFA> nfa;
|
bytecode_ptr<NFA> nfa;
|
||||||
|
|
||||||
if (!using8bit) {
|
if (!using8bit) {
|
||||||
nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
|
nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
|
||||||
} else {
|
} else {
|
||||||
nfa = mcshengCompile8(info, sheng_end, accel_escape_info);
|
nfa = mcshengCompile8(info, sheng_end, accel_escape_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!nfa) {
|
||||||
|
info.states = old_states;
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_eod_reports) {
|
||||||
|
nfa->flags |= NFA_ACCEPTS_EOD;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("compile done\n");
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
const ReportManager &rm) {
|
||||||
|
if (!cc.grey.allowMcSheng) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
mcclellan_build_strat mbs(raw, rm, false);
|
||||||
|
dfa_info info(mbs);
|
||||||
|
bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
|
||||||
|
|
||||||
|
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
|
||||||
|
* mode with our semantics */
|
||||||
|
raw.stripExtraEodReports();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_eod_reports = raw.hasEodReports();
|
||||||
|
|
||||||
|
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||||
|
= info.strat.getAccelInfo(cc.grey);
|
||||||
|
bool using64state = false; /*default flag*/
|
||||||
|
dstate_id_t sheng_end64;
|
||||||
|
sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
|
||||||
|
|
||||||
|
if (sheng_end64 <= DEAD_STATE + 1) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
using64state = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytecode_ptr<NFA> nfa;
|
||||||
|
|
||||||
|
if (using64state) {
|
||||||
|
assert((sheng_end64 > 17) && (sheng_end64 <= 65));
|
||||||
|
if (!using8bit) {
|
||||||
|
nfa = mcsheng64Compile16(info, sheng_end64, accel_escape_info, cc.grey);
|
||||||
|
} else {
|
||||||
|
assert(using8bit);
|
||||||
|
nfa = mcsheng64Compile8(info, sheng_end64, accel_escape_info);
|
||||||
|
assert(nfa);
|
||||||
|
assert(nfa->type == MCSHENG_64_NFA_8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
@ -1074,6 +1523,7 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
DEBUG_PRINTF("compile done\n");
|
DEBUG_PRINTF("compile done\n");
|
||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bool has_accel_mcsheng(const NFA *) {
|
bool has_accel_mcsheng(const NFA *) {
|
||||||
return true; /* consider the sheng region as accelerated */
|
return true; /* consider the sheng region as accelerated */
|
||||||
|
@ -42,7 +42,8 @@ struct raw_dfa;
|
|||||||
|
|
||||||
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm);
|
const ReportManager &rm);
|
||||||
|
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
const ReportManager &rm);
|
||||||
bool has_accel_mcsheng(const NFA *nfa);
|
bool has_accel_mcsheng(const NFA *nfa);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -41,3 +41,16 @@ const u64a mcsheng_pext_mask[8] = {
|
|||||||
0x00ff00000000000f,
|
0x00ff00000000000f,
|
||||||
0xff0000000000000f,
|
0xff0000000000000f,
|
||||||
};
|
};
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
const u64a mcsheng64_pext_mask[8] = {
|
||||||
|
0, /* dummy */
|
||||||
|
0x000000000000ff3f,
|
||||||
|
0x0000000000ff003f,
|
||||||
|
0x00000000ff00003f,
|
||||||
|
0x000000ff0000003f,
|
||||||
|
0x0000ff000000003f,
|
||||||
|
0x00ff00000000003f,
|
||||||
|
0xff0000000000003f,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -174,6 +174,126 @@ void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static
|
||||||
|
const mstate_aux *getAux64(const NFA *n, dstate_id_t i) {
|
||||||
|
auto *m = (const mcsheng64 *)getImplNfa(n);
|
||||||
|
auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset);
|
||||||
|
|
||||||
|
const mstate_aux *aux = aux_base + i;
|
||||||
|
|
||||||
|
assert((const char *)aux < (const char *)n + m->length);
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void next_states64(const NFA *n, u16 s, u16 *t) {
|
||||||
|
const mcsheng64 *m = (const mcsheng64 *)getImplNfa(n);
|
||||||
|
const mstate_aux *aux = getAux64(n, s);
|
||||||
|
const u32 as = m->alphaShift;
|
||||||
|
assert(s != DEAD_STATE);
|
||||||
|
|
||||||
|
if (s < m->sheng_end) {
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
u8 sheng_s = s - 1;
|
||||||
|
auto trans_for_c = (const char *)&m->sheng_succ_masks[c];
|
||||||
|
assert(sheng_s < sizeof(m512));
|
||||||
|
u8 raw_succ = trans_for_c[sheng_s];
|
||||||
|
if (raw_succ == m->sheng_end - 1) {
|
||||||
|
t[c] = DEAD_STATE;
|
||||||
|
} else if (raw_succ < m->sheng_end) {
|
||||||
|
t[c] = raw_succ + 1;
|
||||||
|
} else {
|
||||||
|
t[c] = raw_succ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (n->type == MCSHENG_64_NFA_8) {
|
||||||
|
const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng64));
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
u32 normal_id = s - m->sheng_end;
|
||||||
|
t[c] = succ_table[(normal_id << as) + m->remap[c]];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
u16 base_s = s;
|
||||||
|
const char *winfo_base = (const char *)n + m->sherman_offset;
|
||||||
|
const char *state_base
|
||||||
|
= winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);
|
||||||
|
|
||||||
|
if (s >= m->sherman_limit) {
|
||||||
|
base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
|
||||||
|
assert(base_s >= m->sheng_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u16 *succ_table = (const u16 *)((const char *)m
|
||||||
|
+ sizeof(mcsheng64));
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
u32 normal_id = base_s - m->sheng_end;
|
||||||
|
t[c] = succ_table[(normal_id << as) + m->remap[c]];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s >= m->sherman_limit) {
|
||||||
|
UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
|
||||||
|
assert(type == SHERMAN_STATE);
|
||||||
|
u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
|
||||||
|
const char *chars = state_base + SHERMAN_CHARS_OFFSET;
|
||||||
|
const u16 *states = (const u16 *)(state_base
|
||||||
|
+ SHERMAN_STATES_OFFSET(len));
|
||||||
|
|
||||||
|
for (u8 i = 0; i < len; i++) {
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
if (m->remap[c] == chars[i]) {
|
||||||
|
t[c] = unaligned_load_u16((const u8*)&states[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
t[c] &= STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
t[TOP] = aux->top & STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void describeEdge64(FILE *f, const mcsheng64 *m, const u16 *t, u16 i) {
|
||||||
|
for (u16 s = 0; s < N_CHARS; s++) {
|
||||||
|
if (!t[s]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 ss;
|
||||||
|
for (ss = 0; ss < s; ss++) {
|
||||||
|
if (t[s] == t[ss]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ss != s) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
CharReach reach;
|
||||||
|
for (ss = s; ss < 256; ss++) {
|
||||||
|
if (t[s] == t[ss]) {
|
||||||
|
reach.set(ss);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "%u -> %u [ ", i, t[s]);
|
||||||
|
if (i < m->sheng_end && t[s] < m->sheng_end) {
|
||||||
|
fprintf(f, "color = red, fontcolor = red ");
|
||||||
|
}
|
||||||
|
fprintf(f, "label = \"");
|
||||||
|
describeClass(f, reach, 5, CC_OUT_DOT);
|
||||||
|
|
||||||
|
fprintf(f, "\" ];\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
|
void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
|
||||||
switch(accel->accel_type) {
|
switch(accel->accel_type) {
|
||||||
@ -256,6 +376,68 @@ void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static
|
||||||
|
void describeNode64(const NFA *n, const mcsheng64 *m, u16 i, FILE *f) {
|
||||||
|
const mstate_aux *aux = getAux64(n, i);
|
||||||
|
|
||||||
|
bool isSherman = m->sherman_limit && i >= m->sherman_limit;
|
||||||
|
|
||||||
|
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||||
|
"label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
|
||||||
|
|
||||||
|
if (aux->accel_offset) {
|
||||||
|
dumpAccelDot(f, i, (const union AccelAux *)
|
||||||
|
((const char *)m + aux->accel_offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i && i < m->sheng_end) {
|
||||||
|
fprintf(f, "%u [color = red, fontcolor = red]; \n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->top && aux->top != i) {
|
||||||
|
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||||
|
aux->top);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == m->start_anchored) {
|
||||||
|
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == m->start_floating) {
|
||||||
|
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSherman) {
|
||||||
|
const char *winfo_base = (const char *)n + m->sherman_offset;
|
||||||
|
const char *state_base
|
||||||
|
= winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit);
|
||||||
|
assert(state_base < (const char *)m + m->length - sizeof(NFA));
|
||||||
|
UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET);
|
||||||
|
assert(type == SHERMAN_STATE);
|
||||||
|
fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
|
||||||
|
u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
|
||||||
|
if (daddy) {
|
||||||
|
fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
|
||||||
|
i, daddy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i && i < m->sheng_end) {
|
||||||
|
fprintf(f, "subgraph cluster_sheng { %u } \n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpDotPreambleDfa(FILE *f) {
|
void dumpDotPreambleDfa(FILE *f) {
|
||||||
dumpDotPreamble(f);
|
dumpDotPreamble(f);
|
||||||
@ -392,6 +574,133 @@ void dump_text_8(const NFA *nfa, FILE *f) {
|
|||||||
dumpTextReverse(nfa, f);
|
dumpTextReverse(nfa, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static
|
||||||
|
void dump64_dot_16(const NFA *nfa, FILE *f) {
|
||||||
|
auto *m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
|
for (u16 i = 1; i < m->state_count; i++) {
|
||||||
|
describeNode64(nfa, m, i, f);
|
||||||
|
|
||||||
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
|
next_states64(nfa, i, t);
|
||||||
|
|
||||||
|
describeEdge64(f, m, t, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump64_dot_8(const NFA *nfa, FILE *f) {
|
||||||
|
auto m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
|
for (u16 i = 1; i < m->state_count; i++) {
|
||||||
|
describeNode64(nfa, m, i, f);
|
||||||
|
|
||||||
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
|
next_states64(nfa, i, t);
|
||||||
|
|
||||||
|
describeEdge64(f, m, t, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpAccelMasks64(FILE *f, const mcsheng64 *m, const mstate_aux *aux) {
|
||||||
|
fprintf(f, "\n");
|
||||||
|
fprintf(f, "Acceleration\n");
|
||||||
|
fprintf(f, "------------\n");
|
||||||
|
|
||||||
|
for (u16 i = 0; i < m->state_count; i++) {
|
||||||
|
if (!aux[i].accel_offset) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset);
|
||||||
|
fprintf(f, "%05hu ", i);
|
||||||
|
dumpAccelInfo(f, *accel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void describeAlphabet64(FILE *f, const mcsheng64 *m) {
|
||||||
|
map<u8, CharReach> rev;
|
||||||
|
|
||||||
|
for (u16 i = 0; i < N_CHARS; i++) {
|
||||||
|
rev[m->remap[i]].clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u16 i = 0; i < N_CHARS; i++) {
|
||||||
|
rev[m->remap[i]].set(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
map<u8, CharReach>::const_iterator it;
|
||||||
|
fprintf(f, "\nAlphabet\n");
|
||||||
|
for (it = rev.begin(); it != rev.end(); ++it) {
|
||||||
|
fprintf(f, "%3hhu: ", it->first);
|
||||||
|
describeClass(f, it->second, 10240, CC_OUT_TEXT);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpCommonHeader64(FILE *f, const mcsheng64 *m) {
|
||||||
|
fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
|
||||||
|
m->state_count, m->length);
|
||||||
|
fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
|
||||||
|
m->start_floating);
|
||||||
|
fprintf(f, "single accept: %d, has_accel: %d\n",
|
||||||
|
!!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel);
|
||||||
|
fprintf(f, "sheng_end: %hu\n", m->sheng_end);
|
||||||
|
fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump64_text_8(const NFA *nfa, FILE *f) {
|
||||||
|
auto m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
|
||||||
|
|
||||||
|
fprintf(f, "mcsheng 64-8\n");
|
||||||
|
dumpCommonHeader64(f, m);
|
||||||
|
fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
|
||||||
|
m->accept_limit_8);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
|
||||||
|
describeAlphabet64(f, m);
|
||||||
|
dumpAccelMasks64(f, m, aux);
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
dumpTextReverse(nfa, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump64_text_16(const NFA *nfa, FILE *f) {
|
||||||
|
auto *m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
|
||||||
|
|
||||||
|
fprintf(f, "mcsheng 64-16\n");
|
||||||
|
dumpCommonHeader64(f, m);
|
||||||
|
fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
|
||||||
|
(int)m->sherman_end);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
|
||||||
|
describeAlphabet64(f, m);
|
||||||
|
dumpAccelMasks64(f, m, aux);
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
dumpTextReverse(nfa, f);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
|
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
|
||||||
assert(nfa->type == MCSHENG_NFA_16);
|
assert(nfa->type == MCSHENG_NFA_16);
|
||||||
dump_text_16(nfa, StdioFile(base + ".txt", "w"));
|
dump_text_16(nfa, StdioFile(base + ".txt", "w"));
|
||||||
@ -404,4 +713,20 @@ void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
|
|||||||
dump_dot_8(nfa, StdioFile(base + ".dot", "w"));
|
dump_dot_8(nfa, StdioFile(base + ".dot", "w"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nfaExecMcSheng64_16_dump(UNUSED const NFA *nfa, UNUSED const string &base) {
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
assert(nfa->type == MCSHENG_64_NFA_16);
|
||||||
|
dump64_text_16(nfa, StdioFile(base + ".txt", "w"));
|
||||||
|
dump64_dot_16(nfa, StdioFile(base + ".dot", "w"));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void nfaExecMcSheng64_8_dump(UNUSED const NFA *nfa, UNUSED const string &base) {
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
assert(nfa->type == MCSHENG_64_NFA_8);
|
||||||
|
dump64_text_8(nfa, StdioFile(base + ".txt", "w"));
|
||||||
|
dump64_dot_8(nfa, StdioFile(base + ".dot", "w"));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -42,7 +42,8 @@ namespace ue2 {
|
|||||||
|
|
||||||
void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
|
void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
|
||||||
void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
|
void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
|
||||||
|
void nfaExecMcSheng64_8_dump(const struct NFA *nfa, const std::string &base);
|
||||||
|
void nfaExecMcSheng64_16_dump(const struct NFA *nfa, const std::string &base);
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif // DUMP_SUPPORT
|
#endif // DUMP_SUPPORT
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2018, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -92,4 +92,35 @@ struct mcsheng {
|
|||||||
* representing the data from a u64a. */
|
* representing the data from a u64a. */
|
||||||
extern const u64a mcsheng_pext_mask[8];
|
extern const u64a mcsheng_pext_mask[8];
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
struct mcsheng64 {
|
||||||
|
u16 state_count; /**< total number of states */
|
||||||
|
u32 length; /**< length of dfa in bytes */
|
||||||
|
u16 start_anchored; /**< anchored start state */
|
||||||
|
u16 start_floating; /**< floating start state */
|
||||||
|
u32 aux_offset; /**< offset of the aux structures relative to the start of
|
||||||
|
* the nfa structure */
|
||||||
|
u32 sherman_offset; /**< offset of array of sherman state offsets the
|
||||||
|
* state_info structures relative to the start of the
|
||||||
|
* nfa structure */
|
||||||
|
u32 sherman_end; /**< offset of the end of the state_info structures
|
||||||
|
* relative to the start of the nfa structure */
|
||||||
|
u16 sheng_end; /**< first non-sheng state */
|
||||||
|
u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
|
||||||
|
* internal sheng ids */
|
||||||
|
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
|
||||||
|
u16 accept_limit_8; /**< 8 bit, lowest accept state */
|
||||||
|
u16 sherman_limit; /**< lowest sherman state */
|
||||||
|
u8 alphaShift;
|
||||||
|
u8 flags;
|
||||||
|
u8 has_accel; /**< 1 iff there are any accel plans */
|
||||||
|
u8 remap[256]; /**< remaps characters to a smaller alphabet */
|
||||||
|
ReportID arb_report; /**< one of the accepts that this dfa may raise */
|
||||||
|
u32 accel_offset; /**< offset of accel structures from start of McClellan */
|
||||||
|
m512 sheng_succ_masks[N_CHARS];
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const u64a mcsheng64_pext_mask[8];
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -78,6 +78,8 @@
|
|||||||
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
||||||
DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
|
DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
|
||||||
DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
|
DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \
|
||||||
default: \
|
default: \
|
||||||
assert(0); \
|
assert(0); \
|
||||||
}
|
}
|
||||||
|
@ -478,6 +478,37 @@ const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats_other_than_firsts = d
|
|||||||
const char *NFATraits<SHENG_NFA_64>::name = "Sheng 64";
|
const char *NFATraits<SHENG_NFA_64>::name = "Sheng 64";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<MCSHENG_64_NFA_8> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 1;
|
||||||
|
static const bool fast = true;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_accel = has_accel_mcsheng;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<MCSHENG_64_NFA_8>::name = "Shengy64 McShengFace 8";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<MCSHENG_64_NFA_16> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 2;
|
||||||
|
static const bool fast = true;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_accel = has_accel_mcsheng;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16";
|
||||||
|
#endif
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
|
@ -83,6 +83,8 @@ namespace ue2 {
|
|||||||
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
||||||
DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
|
DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
|
||||||
DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
|
DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \
|
||||||
default: \
|
default: \
|
||||||
assert(0); \
|
assert(0); \
|
||||||
}
|
}
|
||||||
|
@ -74,6 +74,8 @@ enum NFAEngineType {
|
|||||||
MCSHENG_NFA_16, /**< magic pseudo nfa */
|
MCSHENG_NFA_16, /**< magic pseudo nfa */
|
||||||
SHENG_NFA_32, /**< magic pseudo nfa */
|
SHENG_NFA_32, /**< magic pseudo nfa */
|
||||||
SHENG_NFA_64, /**< magic pseudo nfa */
|
SHENG_NFA_64, /**< magic pseudo nfa */
|
||||||
|
MCSHENG_64_NFA_8, /**< magic pseudo nfa */
|
||||||
|
MCSHENG_64_NFA_16, /**< magic pseudo nfa */
|
||||||
/** \brief bogus NFA - not used */
|
/** \brief bogus NFA - not used */
|
||||||
INVALID_NFA
|
INVALID_NFA
|
||||||
};
|
};
|
||||||
@ -150,7 +152,12 @@ static really_inline int isMcClellanType(u8 t) {
|
|||||||
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
|
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
|
||||||
* DFA. */
|
* DFA. */
|
||||||
static really_inline int isShengMcClellanType(u8 t) {
|
static really_inline int isShengMcClellanType(u8 t) {
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
return t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16 || t == MCSHENG_NFA_8 ||
|
||||||
|
t == MCSHENG_NFA_16;
|
||||||
|
#else
|
||||||
return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16;
|
return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
|
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
|
||||||
|
@ -632,6 +632,7 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
|
|||||||
* bytecode and that they are usually run on small blocks */
|
* bytecode and that they are usually run on small blocks */
|
||||||
dfa = mcshengCompile(rdfa, cc, rm);
|
dfa = mcshengCompile(rdfa, cc, rm);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512VBMI)
|
#if defined(HAVE_AVX512VBMI)
|
||||||
if (!dfa) {
|
if (!dfa) {
|
||||||
dfa = sheng32Compile(rdfa, cc, rm, false);
|
dfa = sheng32Compile(rdfa, cc, rm, false);
|
||||||
@ -639,6 +640,9 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
|
|||||||
if (!dfa) {
|
if (!dfa) {
|
||||||
dfa = sheng64Compile(rdfa, cc, rm, false);
|
dfa = sheng64Compile(rdfa, cc, rm, false);
|
||||||
}
|
}
|
||||||
|
if (!dfa && !is_transient) {
|
||||||
|
dfa = mcshengCompile64(rdfa, cc, rm);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
if (!dfa) {
|
if (!dfa) {
|
||||||
// Sheng wasn't successful, so unleash McClellan!
|
// Sheng wasn't successful, so unleash McClellan!
|
||||||
|
@ -138,6 +138,12 @@ m128 lshift64_m128(m128 a, unsigned b) {
|
|||||||
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
||||||
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
static really_inline m128 cast512to128(const m512 in) {
|
||||||
|
return _mm512_castsi512_si128(in);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline m128 set16x8(u8 c) {
|
static really_inline m128 set16x8(u8 c) {
|
||||||
return _mm_set1_epi8(c);
|
return _mm_set1_epi8(c);
|
||||||
}
|
}
|
||||||
@ -156,6 +162,12 @@ static really_inline u32 movd512(const m512 in) {
|
|||||||
// so we use 2-step convertions to work around.
|
// so we use 2-step convertions to work around.
|
||||||
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
|
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline u64a movq512(const m512 in) {
|
||||||
|
// NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
|
||||||
|
// so we use 2-step convertions to work around.
|
||||||
|
return _mm_cvtsi128_si64(_mm512_castsi512_si128(in));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline u64a movq(const m128 in) {
|
static really_inline u64a movq(const m128 in) {
|
||||||
@ -1000,6 +1012,11 @@ m512 set8x64(u64a a) {
|
|||||||
return _mm512_set1_epi64(a);
|
return _mm512_set1_epi64(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 set16x32(u32 a) {
|
||||||
|
return _mm512_set1_epi32(a);
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 set512_64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0,
|
m512 set512_64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0,
|
||||||
u64a lo_3, u64a lo_2, u64a lo_1, u64a lo_0) {
|
u64a lo_3, u64a lo_2, u64a lo_1, u64a lo_0) {
|
||||||
@ -1017,6 +1034,26 @@ static really_inline
|
|||||||
m512 set4x128(m128 a) {
|
m512 set4x128(m128 a) {
|
||||||
return _mm512_broadcast_i32x4(a);
|
return _mm512_broadcast_i32x4(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 sadd_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_adds_epu8(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 max_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_max_epu8(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 min_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_min_epu8(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 sub_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_sub_epi8(a, b);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
|
Loading…
x
Reference in New Issue
Block a user