mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-16 09:21:52 +03:00
Merge branch 'develop' into wip-isildur-g-cppcheck66
This commit is contained in:
@@ -400,7 +400,7 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a subCastleNextMatch(const struct Castle *c, void *full_state,
|
||||
u64a subCastleNextMatch(const struct Castle *c, const void *full_state,
|
||||
void *stream_state, const u64a loc,
|
||||
const u32 subIdx) {
|
||||
DEBUG_PRINTF("subcastle %u\n", subIdx);
|
||||
@@ -489,7 +489,6 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
|
||||
// full_state (scratch).
|
||||
|
||||
u64a offset = end; // min offset of next match
|
||||
u32 activeIdx = 0;
|
||||
mmbit_clear(matching, c->numRepeats);
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
@@ -497,7 +496,7 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
u64a match = subCastleNextMatch(c, full_state, stream_state,
|
||||
loc, activeIdx);
|
||||
set_matching(c, match, groups, matching, c->numGroups, i,
|
||||
@@ -907,7 +906,6 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
|
||||
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
|
||||
return (s64a)loc - hlen;
|
||||
}
|
||||
ep = 0;
|
||||
}
|
||||
|
||||
return sp - 1; /* the repeats are never killed */
|
||||
|
||||
@@ -655,7 +655,8 @@ buildCastle(const CastleProto &proto,
|
||||
if (!stale_iter.empty()) {
|
||||
c->staleIterOffset = verify_u32(ptr - base_ptr);
|
||||
copy_bytes(ptr, stale_iter);
|
||||
ptr += byte_length(stale_iter);
|
||||
// Removed unused increment operation
|
||||
// ptr += byte_length(stale_iter);
|
||||
}
|
||||
|
||||
return nfa;
|
||||
|
||||
@@ -1017,7 +1017,7 @@ void update_accel_prog_offset(const gough_build_strat &gbs,
|
||||
verts[gbs.gg[v].state_id] = v;
|
||||
}
|
||||
|
||||
for (auto &m : gbs.built_accel) {
|
||||
for (const auto &m : gbs.built_accel) {
|
||||
gough_accel *ga = m.first;
|
||||
assert(!ga->prog_offset);
|
||||
GoughVertex v = verts[m.second];
|
||||
|
||||
@@ -438,7 +438,7 @@ void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
|
||||
}
|
||||
|
||||
static
|
||||
void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
|
||||
void update_local_slots(GoughGraph &g, const set<GoughSSAVar *> &locals,
|
||||
u32 local_base) {
|
||||
DEBUG_PRINTF("%zu local variables\n", locals.size());
|
||||
/* local variables only occur on edges (joins are never local) */
|
||||
|
||||
@@ -332,7 +332,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
|
||||
// UE-1636) need to guard cyclic tug-accepts as well.
|
||||
static really_inline
|
||||
char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||
u64a offset, ReportID report) {
|
||||
assert(limex);
|
||||
|
||||
@@ -382,7 +382,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
|
||||
static really_inline
|
||||
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||
u64a offset) {
|
||||
assert(limex);
|
||||
|
||||
|
||||
@@ -1481,6 +1481,7 @@ u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
|
||||
continue;
|
||||
}
|
||||
u32 j = args.state_ids.at(w);
|
||||
// j can be NO_STATE if args.state_ids.at(w) returns NO_STATE
|
||||
if (j == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
@@ -1572,7 +1573,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) {
|
||||
static
|
||||
int getLimexScore(const build_info &args, u32 nShifts) {
|
||||
const NGHolder &h = args.h;
|
||||
u32 maxVarShift = nShifts;
|
||||
u32 maxVarShift;
|
||||
int score = 0;
|
||||
|
||||
score += SHIFT_COST * nShifts;
|
||||
|
||||
@@ -361,7 +361,7 @@ struct raw_report_list {
|
||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
if (do_remap) {
|
||||
for (auto &id : reports_in) {
|
||||
for (const auto &id : reports_in) {
|
||||
reports.insert(rm.getProgramOffset(id));
|
||||
}
|
||||
} else {
|
||||
@@ -540,7 +540,7 @@ size_t calcWideRegionSize(const dfa_info &info) {
|
||||
static
|
||||
void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
|
||||
const vector<u32> &reports, const vector<u32> &reports_eod,
|
||||
vector<u32> &reportOffsets) {
|
||||
const vector<u32> &reportOffsets) {
|
||||
const dstate &raw_state = info.states[i];
|
||||
aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
|
||||
aux->accept_eod = raw_state.reports_eod.empty() ? 0
|
||||
@@ -794,8 +794,8 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
|
||||
}
|
||||
|
||||
for (size_t i : order) {
|
||||
vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
|
||||
const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
const vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
|
||||
|
||||
u16 width = verify_u16(symbol_chain.size());
|
||||
*(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
|
||||
@@ -1367,11 +1367,11 @@ bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
|
||||
/* \brief Generate wide_symbol_chain from wide_state_chain. */
|
||||
static
|
||||
void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
||||
raw_dfa &rdfa = info.raw;
|
||||
const raw_dfa &rdfa = info.raw;
|
||||
assert(chain_tail.size() == info.wide_state_chain.size());
|
||||
|
||||
for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
|
||||
vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
vector<symbol_t> symbol_chain;
|
||||
|
||||
info.extra[state_chain[0]].wideHead = true;
|
||||
|
||||
@@ -955,7 +955,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
||||
}
|
||||
|
||||
static
|
||||
void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
void fill_in_sherman(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
char *nfa_base = (char *)nfa;
|
||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
|
||||
char *sherman_table = nfa_base + m->sherman_offset;
|
||||
@@ -1109,7 +1109,7 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
||||
}
|
||||
|
||||
static
|
||||
void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
void fill_in_sherman64(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
char *nfa_base = (char *)nfa;
|
||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||
char *sherman_table = nfa_base + m->sherman_offset;
|
||||
|
||||
@@ -512,7 +512,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
|
||||
verm_restart:;
|
||||
assert(buf[curr] == kp->u.verm.c);
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -534,7 +534,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
m128 hi = kp->u.shuf.mask_hi;
|
||||
shuf_restart:
|
||||
assert(do_single_shufti(lo, hi, buf[curr]));
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -556,7 +556,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
const m128 mask1 = kp->u.truffle.mask1;
|
||||
const m128 mask2 = kp->u.truffle.mask2;
|
||||
truffle_restart:;
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -582,7 +582,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
|
||||
nverm_restart:;
|
||||
assert(buf[curr] != kp->u.verm.c);
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -607,7 +607,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters,
|
||||
void restartKilo(const struct mpv *m, UNUSED const u8 *active, u8 *reporters,
|
||||
struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
|
||||
const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
|
||||
const struct mpv_kilopuff *kp = (const void *)(m + 1);
|
||||
|
||||
@@ -785,7 +785,7 @@ enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
|
||||
if (diff > info->repeatMax) {
|
||||
DEBUG_PRINTF("range list is stale\n");
|
||||
return REPEAT_STALE;
|
||||
} else if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
||||
} else if (diff >= info->repeatMin) {
|
||||
return REPEAT_MATCH;
|
||||
}
|
||||
|
||||
@@ -836,7 +836,7 @@ enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
|
||||
if (diff > info->repeatMax) {
|
||||
DEBUG_PRINTF("stale\n");
|
||||
return REPEAT_STALE;
|
||||
} else if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
||||
} else if (diff >= info->repeatMin) {
|
||||
return REPEAT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@@ -94,9 +94,6 @@ u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
static
|
||||
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
const u32 minPeriod, u32 rv) {
|
||||
u32 cnt = 0;
|
||||
u32 patch_bits = 0;
|
||||
u32 total_size = 0;
|
||||
u32 min = ~0U;
|
||||
u32 patch_len = 0;
|
||||
|
||||
@@ -105,11 +102,11 @@ u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
}
|
||||
|
||||
for (u32 i = minPeriod; i <= rv; i++) {
|
||||
cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
||||
u32 cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
||||
|
||||
// no bit packing version
|
||||
patch_bits = calcPackedBits(info->table[i]);
|
||||
total_size = (patch_bits + 7U) / 8U * cnt;
|
||||
u32 patch_bits = calcPackedBits(info->table[i]);
|
||||
u32 total_size = (patch_bits + 7U) / 8U * cnt;
|
||||
|
||||
if (total_size < min) {
|
||||
patch_len = i;
|
||||
|
||||
@@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
// Sheng32
|
||||
static really_inline
|
||||
const struct sheng32 *get_sheng32(const struct NFA *n) {
|
||||
@@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||
|
||||
/* include Sheng function definitions */
|
||||
#include "sheng_defs.h"
|
||||
@@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
// Sheng32
|
||||
static really_inline
|
||||
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
|
||||
@@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
*(u8 *)dest = *(const u8 *)src;
|
||||
return 0;
|
||||
}
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||
|
||||
@@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
@@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
|
||||
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#else // !HAVE_AVX512VBMI
|
||||
#else // !HAVE_AVX512VBMI && !HAVE_SVE
|
||||
|
||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
@@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
|
||||
#define nfaExecSheng64_B NFA_API_NO_IMPL
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
|
||||
|
||||
|
||||
#endif /* SHENG_H_ */
|
||||
|
||||
@@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
u8 isDeadState32(const u8 a) {
|
||||
return a & SHENG32_STATE_DEAD;
|
||||
@@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_cod
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_cod
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_co
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_co
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_samd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_samd
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_sam
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_sam
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_nmd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_nmd
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 dummyFunc
|
||||
@@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_nm
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_nm
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 dummyFunc
|
||||
@@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_coda
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_cod
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_coa
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_co
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_samda
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_samd
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_sama
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_sam
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC isAccelState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nmda
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nmd
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nm
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
|
||||
@@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const struct sheng32 *s,
|
||||
@@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
|
||||
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 tmp = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
const m512 succ_mask = masks[c];
|
||||
cur_state = vpermb512(cur_state, succ_mask);
|
||||
const u8 tmp = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
|
||||
@@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
@@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
|
||||
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 tmp = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
const m512 succ_mask = masks[c];
|
||||
cur_state = vpermb512(cur_state, succ_mask);
|
||||
const u8 tmp = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
|
||||
@@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
@@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const struct sheng32 *s,
|
||||
@@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
@@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
|
||||
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a1 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
|
||||
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a2 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
|
||||
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a3 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
|
||||
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a4 = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
const m512 succ_mask1 = masks[c1];
|
||||
cur_state = vpermb512(cur_state, succ_mask1);
|
||||
const u8 a1 = movd512(cur_state);
|
||||
@@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const m512 succ_mask4 = masks[c4];
|
||||
cur_state = vpermb512(cur_state, succ_mask4);
|
||||
const u8 a4 = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
|
||||
@@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
};
|
||||
cur_buf += 4;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
@@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
*scan_end = end;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
@@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
|
||||
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a1 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
|
||||
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a2 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
|
||||
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a3 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
|
||||
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a4 = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
const m512 succ_mask1 = masks[c1];
|
||||
cur_state = vpermb512(cur_state, succ_mask1);
|
||||
const u8 a1 = movd512(cur_state);
|
||||
@@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const m512 succ_mask4 = masks[c4];
|
||||
cur_state = vpermb512(cur_state, succ_mask4);
|
||||
const u8 a4 = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
|
||||
@@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf += 4;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ struct dfa_info {
|
||||
return next(idx, TOP);
|
||||
}
|
||||
dstate &next(dstate_id_t idx, u16 chr) {
|
||||
auto &src = (*this)[idx];
|
||||
const auto &src = (*this)[idx];
|
||||
auto next_id = src.next[raw.alpha_remap[chr]];
|
||||
return states[next_id];
|
||||
}
|
||||
@@ -109,7 +109,7 @@ struct dfa_info {
|
||||
// if DFA can't die, shift all indices left by 1
|
||||
return can_die ? idx : idx + 1;
|
||||
}
|
||||
bool isDead(dstate &state) {
|
||||
bool isDead(const dstate &state) {
|
||||
return raw_id(state.impl_id) == DEAD_STATE;
|
||||
}
|
||||
bool isDead(dstate_id_t idx) {
|
||||
@@ -117,7 +117,7 @@ struct dfa_info {
|
||||
}
|
||||
|
||||
private:
|
||||
static bool dfaCanDie(raw_dfa &rdfa) {
|
||||
static bool dfaCanDie(const raw_dfa &rdfa) {
|
||||
for (unsigned chr = 0; chr < 256; chr++) {
|
||||
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
|
||||
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
|
||||
@@ -138,7 +138,7 @@ struct raw_report_list {
|
||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
if (do_remap) {
|
||||
for (auto &id : reports_in) {
|
||||
for (const auto &id : reports_in) {
|
||||
reports.insert(rm.getProgramOffset(id));
|
||||
}
|
||||
} else {
|
||||
@@ -730,10 +730,17 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SVE
|
||||
if (svcntb()<32) {
|
||||
DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
if (!cc.target_info.has_avx512vbmi()) {
|
||||
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||
dfa_info info(strat);
|
||||
@@ -762,10 +769,17 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SVE
|
||||
if (svcntb()<64) {
|
||||
DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
if (!cc.target_info.has_avx512vbmi()) {
|
||||
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||
dfa_info info(strat);
|
||||
|
||||
@@ -227,7 +227,7 @@ const u8 *fwdBlock(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_ma
|
||||
}
|
||||
|
||||
template <uint16_t S>
|
||||
const u8 *truffleExecReal(m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) {
|
||||
const u8 *truffleExecReal(const m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) {
|
||||
assert(buf && buf_end);
|
||||
assert(buf < buf_end);
|
||||
DEBUG_PRINTF("truffle %p len %zu\n", buf, buf_end - buf);
|
||||
@@ -349,4 +349,4 @@ const u8 *rtruffleExecReal(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highse
|
||||
|
||||
return buf - 1;
|
||||
}
|
||||
#endif //HAVE_SVE
|
||||
#endif //HAVE_SVE
|
||||
|
||||
Reference in New Issue
Block a user