mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2026-01-02 06:34:41 +03:00
Merge branch 'develop' into wip-cppcheck271-part2
This commit is contained in:
@@ -427,7 +427,7 @@ void
|
||||
accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
const AccelScheme &info,
|
||||
void *accel_out) {
|
||||
AccelAux *accel = (AccelAux *)accel_out;
|
||||
AccelAux *accel = reinterpret_cast<AccelAux *>(accel_out);
|
||||
|
||||
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
||||
info.double_offset);
|
||||
@@ -474,7 +474,8 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
u8 c1 = info.double_byte.begin()->first & m1;
|
||||
u8 c2 = info.double_byte.begin()->second & m2;
|
||||
#ifdef HAVE_SVE2
|
||||
if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, (u8 *)&accel->mdverm16.mask)) {
|
||||
if (vermicelliDoubleMasked16Build(c1, c2, m1, m2,
|
||||
reinterpret_cast<u8 *>(&accel->mdverm16.mask))) {
|
||||
accel->accel_type = ACCEL_DVERM16_MASKED;
|
||||
accel->mdverm16.offset = verify_u8(info.double_offset);
|
||||
accel->mdverm16.c1 = c1;
|
||||
@@ -483,8 +484,9 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
c1, c2);
|
||||
return;
|
||||
} else if (info.double_byte.size() <= 8 &&
|
||||
vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask,
|
||||
(u8 *)&accel->dverm16.firsts)) {
|
||||
vermicelliDouble16Build(info.double_byte,
|
||||
reinterpret_cast<u8 *>(&accel->dverm16.mask),
|
||||
reinterpret_cast<u8 *>(&accel->dverm16.firsts))) {
|
||||
accel->accel_type = ACCEL_DVERM16;
|
||||
accel->dverm16.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||
@@ -504,8 +506,9 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
}
|
||||
#ifdef HAVE_SVE2
|
||||
if (info.double_byte.size() <= 8 &&
|
||||
vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask,
|
||||
(u8 *)&accel->dverm16.firsts)) {
|
||||
vermicelliDouble16Build(info.double_byte,
|
||||
reinterpret_cast<u8 *>(&accel->dverm16.mask),
|
||||
reinterpret_cast<u8 *>(&accel->dverm16.firsts))) {
|
||||
accel->accel_type = ACCEL_DVERM16;
|
||||
accel->dverm16.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||
@@ -516,9 +519,11 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
|
||||
if (double_byte_ok(info) &&
|
||||
shuftiBuildDoubleMasks(
|
||||
info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1,
|
||||
(u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2,
|
||||
(u8 *)&accel->dshufti.hi2)) {
|
||||
info.double_cr, info.double_byte,
|
||||
reinterpret_cast<u8 *>(&accel->dshufti.lo1),
|
||||
reinterpret_cast<u8 *>(&accel->dshufti.hi1),
|
||||
reinterpret_cast<u8 *>(&accel->dshufti.lo2),
|
||||
reinterpret_cast<u8 *>(&accel->dshufti.hi2))) {
|
||||
accel->accel_type = ACCEL_DSHUFTI;
|
||||
accel->dshufti.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
||||
@@ -550,7 +555,7 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
#ifdef HAVE_SVE2
|
||||
if (info.cr.count() <= 16) {
|
||||
accel->accel_type = ACCEL_VERM16;
|
||||
vermicelli16Build(info.cr, (u8 *)&accel->verm16.mask);
|
||||
vermicelli16Build(info.cr, reinterpret_cast<u8 *>(&accel->verm16.mask));
|
||||
DEBUG_PRINTF("state %hu is vermicelli16\n", this_idx);
|
||||
return;
|
||||
}
|
||||
@@ -563,16 +568,18 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
}
|
||||
|
||||
accel->accel_type = ACCEL_SHUFTI;
|
||||
if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo,
|
||||
(u8 *)&accel->shufti.hi)) {
|
||||
if (-1 != shuftiBuildMasks(info.cr,
|
||||
reinterpret_cast<u8 *>(&accel->shufti.lo),
|
||||
reinterpret_cast<u8 *>(&accel->shufti.hi))) {
|
||||
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!info.cr.none());
|
||||
accel->accel_type = ACCEL_TRUFFLE;
|
||||
truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1,
|
||||
(u8 *)&accel->truffle.mask2);
|
||||
truffleBuildMasks(info.cr,
|
||||
reinterpret_cast<u8 *>(&accel->truffle.mask1),
|
||||
reinterpret_cast<u8 *>(&accel->truffle.mask2));
|
||||
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
||||
}
|
||||
|
||||
|
||||
@@ -84,8 +84,9 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
||||
if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo,
|
||||
(u8 *)&aux->shufti.hi)) {
|
||||
if (-1 != shuftiBuildMasks(info.single_stops,
|
||||
reinterpret_cast<u8 *>(&aux->shufti.lo),
|
||||
reinterpret_cast<u8 *>(&aux->shufti.hi))) {
|
||||
aux->accel_type = ACCEL_SHUFTI;
|
||||
aux->shufti.offset = offset;
|
||||
DEBUG_PRINTF("shufti built OK\n");
|
||||
@@ -98,8 +99,9 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
||||
aux->accel_type = ACCEL_TRUFFLE;
|
||||
aux->truffle.offset = offset;
|
||||
truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1,
|
||||
(u8 *)&aux->truffle.mask2);
|
||||
truffleBuildMasks(info.single_stops,
|
||||
reinterpret_cast<u8 *>(&aux->truffle.mask1),
|
||||
reinterpret_cast<u8 *>(&aux->truffle.mask2));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -219,8 +221,9 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
||||
c1, c2);
|
||||
return;
|
||||
} else if (outs2 <= 8 &&
|
||||
vermicelliDouble16Build(info.double_stop2, (u8 *)&aux->dverm16.mask,
|
||||
(u8 *)&aux->dverm16.firsts)) {
|
||||
vermicelliDouble16Build(info.double_stop2,
|
||||
reinterpret_cast<u8 *>(&aux->dverm16.mask),
|
||||
reinterpret_cast<u8 *>(&aux->dverm16.firsts))) {
|
||||
aux->accel_type = ACCEL_DVERM16;
|
||||
aux->dverm16.offset = offset;
|
||||
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||
@@ -254,9 +257,11 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->accel_type = ACCEL_DSHUFTI;
|
||||
aux->dshufti.offset = offset;
|
||||
if (shuftiBuildDoubleMasks(
|
||||
info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1,
|
||||
(u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2,
|
||||
(u8 *)&aux->dshufti.hi2)) {
|
||||
info.double_stop1, info.double_stop2,
|
||||
reinterpret_cast<u8 *>(&aux->dshufti.lo1),
|
||||
reinterpret_cast<u8 *>(&aux->dshufti.hi1),
|
||||
reinterpret_cast<u8 *>(&aux->dshufti.lo2),
|
||||
reinterpret_cast<u8 *>(&aux->dshufti.hi2))) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,8 +94,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
|
||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||
|
||||
union RepeatControl *rctrl = getControl(q->state, sub);
|
||||
char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
||||
const union RepeatControl *rctrl = getControl(q->state, sub);
|
||||
const char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
||||
info->packedCtrlSize;
|
||||
enum RepeatMatch match =
|
||||
repeatHasMatch(info, rctrl, rstate, offset);
|
||||
@@ -118,10 +118,10 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) {
|
||||
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
const u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
if (subCastleReportCurrent(c, q,
|
||||
@@ -156,8 +156,8 @@ char subCastleInAccept(const struct Castle *c, struct mq *q,
|
||||
}
|
||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||
|
||||
union RepeatControl *rctrl = getControl(q->state, sub);
|
||||
char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
||||
const union RepeatControl *rctrl = getControl(q->state, sub);
|
||||
const char *rstate = (char *)q->streamState + sub->streamStateOffset +
|
||||
info->packedCtrlSize;
|
||||
enum RepeatMatch match =
|
||||
repeatHasMatch(info, rctrl, rstate, offset);
|
||||
@@ -180,10 +180,10 @@ char castleInAccept(const struct Castle *c, struct mq *q,
|
||||
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
const u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
if (subCastleInAccept(c, q, report, offset, activeIdx)) {
|
||||
@@ -213,8 +213,8 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
|
||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||
|
||||
union RepeatControl *rctrl = getControl(full_state, sub);
|
||||
char *rstate = (char *)stream_state + sub->streamStateOffset +
|
||||
const union RepeatControl *rctrl = getControl(full_state, sub);
|
||||
const char *rstate = (char *)stream_state + sub->streamStateOffset +
|
||||
info->packedCtrlSize;
|
||||
|
||||
if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
|
||||
@@ -242,10 +242,10 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
|
||||
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
const u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
subCastleDeactivateStaleSubs(c, offset, full_state,
|
||||
@@ -329,8 +329,8 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin,
|
||||
size_t *mloc, char *found, const u32 subIdx) {
|
||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||
union RepeatControl *rctrl = getControl(full_state, sub);
|
||||
char *rstate = (char *)stream_state + sub->streamStateOffset +
|
||||
const union RepeatControl *rctrl = getControl(full_state, sub);
|
||||
const char *rstate = (char *)stream_state + sub->streamStateOffset +
|
||||
info->packedCtrlSize;
|
||||
|
||||
u64a match = repeatNextMatch(info, rctrl, rstate, begin);
|
||||
@@ -374,10 +374,10 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
||||
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
const u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
|
||||
@@ -386,7 +386,7 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
||||
}
|
||||
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
u8 *active = (u8 *)stream_state + c->activeOffset;
|
||||
const u8 *active = (u8 *)stream_state + c->activeOffset;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID;
|
||||
i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
@@ -400,8 +400,8 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a subCastleNextMatch(const struct Castle *c, void *full_state,
|
||||
void *stream_state, const u64a loc,
|
||||
u64a subCastleNextMatch(const struct Castle *c, const void *full_state,
|
||||
const void *stream_state, const u64a loc,
|
||||
const u32 subIdx) {
|
||||
DEBUG_PRINTF("subcastle %u\n", subIdx);
|
||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||
@@ -489,15 +489,14 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
|
||||
// full_state (scratch).
|
||||
|
||||
u64a offset = end; // min offset of next match
|
||||
u32 activeIdx = 0;
|
||||
mmbit_clear(matching, c->numRepeats);
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
u64a match = subCastleNextMatch(c, full_state, stream_state,
|
||||
loc, activeIdx);
|
||||
set_matching(c, match, groups, matching, c->numGroups, i,
|
||||
@@ -797,7 +796,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
||||
|
||||
char found = 0;
|
||||
if (c->exclusive) {
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
found = mmbit_any(groups, c->numGroups);
|
||||
}
|
||||
|
||||
@@ -864,7 +863,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
||||
}
|
||||
|
||||
if (c->exclusive) {
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
if (mmbit_any_precise(groups, c->numGroups)) {
|
||||
return 1;
|
||||
}
|
||||
@@ -884,7 +883,7 @@ char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
}
|
||||
|
||||
static
|
||||
s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
|
||||
s64a castleLastKillLoc(const struct Castle *c, const struct mq *q) {
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
assert(q_last_type(q) == MQE_END);
|
||||
s64a sp = q_cur_loc(q);
|
||||
@@ -907,7 +906,6 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
|
||||
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
|
||||
return (s64a)loc - hlen;
|
||||
}
|
||||
ep = 0;
|
||||
}
|
||||
|
||||
return sp - 1; /* the repeats are never killed */
|
||||
@@ -959,7 +957,7 @@ char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
|
||||
char found = 0;
|
||||
if (c->exclusive) {
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
const u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
found = mmbit_any_precise(groups, c->numGroups);
|
||||
|
||||
}
|
||||
@@ -1007,10 +1005,10 @@ char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
const u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
const struct SubCastle *sub = getSubCastle(c, activeIdx);
|
||||
@@ -1079,7 +1077,7 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx,
|
||||
const struct mq *q, const u64a offset) {
|
||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||
union RepeatControl *rctrl = getControl(q->state, sub);
|
||||
const union RepeatControl *rctrl = getControl(q->state, sub);
|
||||
char *packed = (char *)q->streamState + sub->streamStateOffset;
|
||||
DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
|
||||
repeatNextMatch(info, rctrl,
|
||||
@@ -1100,10 +1098,10 @@ char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q,
|
||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
const u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
|
||||
subCastleQueueCompressState(c, activeIdx, q, offset);
|
||||
|
||||
@@ -106,25 +106,27 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) {
|
||||
#ifdef HAVE_SVE2
|
||||
if (cr.count() <= 16) {
|
||||
c->type = CASTLE_NVERM16;
|
||||
vermicelli16Build(cr, (u8 *)&c->u.verm16.mask);
|
||||
vermicelli16Build(cr, reinterpret_cast<u8 *>(&c->u.verm16.mask));
|
||||
return;
|
||||
}
|
||||
if (negated.count() <= 16) {
|
||||
c->type = CASTLE_VERM16;
|
||||
vermicelli16Build(negated, (u8 *)&c->u.verm16.mask);
|
||||
vermicelli16Build(negated, reinterpret_cast<u8 *>(&c->u.verm16.mask));
|
||||
return;
|
||||
}
|
||||
#endif // HAVE_SVE2
|
||||
|
||||
if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo,
|
||||
(u8 *)&c->u.shuf.mask_hi) != -1) {
|
||||
if (shuftiBuildMasks(negated,
|
||||
reinterpret_cast<u8 *>(&c->u.shuf.mask_lo),
|
||||
reinterpret_cast<u8 *>(&c->u.shuf.mask_hi)) != -1) {
|
||||
c->type = CASTLE_SHUFTI;
|
||||
return;
|
||||
}
|
||||
|
||||
c->type = CASTLE_TRUFFLE;
|
||||
truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1,
|
||||
(u8 *)&c->u.truffle.mask2);
|
||||
truffleBuildMasks(negated,
|
||||
reinterpret_cast<u8 *>(&c->u.truffle.mask1),
|
||||
reinterpret_cast<u8 *>(&c->u.truffle.mask2));
|
||||
}
|
||||
|
||||
static
|
||||
@@ -227,11 +229,13 @@ vector<u32> removeClique(CliqueGraph &cg) {
|
||||
while (!graph_empty(cg)) {
|
||||
const vector<u32> &c = cliquesVec.back();
|
||||
vector<CliqueVertex> dead;
|
||||
for (const auto &v : vertices_range(cg)) {
|
||||
if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) {
|
||||
dead.emplace_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
auto deads = [&c=c, &cg=cg](const CliqueVertex &v) {
|
||||
return (find(c.begin(), c.end(), cg[v].stateId) != c.end());
|
||||
};
|
||||
const auto &vr = vertices_range(cg);
|
||||
std::copy_if(begin(vr), end(vr), std::back_inserter(dead), deads);
|
||||
|
||||
for (const auto &v : dead) {
|
||||
clear_vertex(v, cg);
|
||||
remove_vertex(v, cg);
|
||||
@@ -294,7 +298,7 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
|
||||
size_t lower = 0;
|
||||
size_t total = 0;
|
||||
while (lower < trigSize) {
|
||||
vector<CliqueVertex> vertices;
|
||||
vector<CliqueVertex> clvertices;
|
||||
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
||||
|
||||
vector<vector<size_t>> min_reset_dist;
|
||||
@@ -302,7 +306,7 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
|
||||
// get min reset distance for each repeat
|
||||
for (size_t i = lower; i < upper; i++) {
|
||||
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
|
||||
vertices.emplace_back(v);
|
||||
clvertices.emplace_back(v);
|
||||
|
||||
const vector<size_t> &tmp_dist =
|
||||
minResetDistToEnd(triggers[i], cr);
|
||||
@@ -311,11 +315,11 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
|
||||
|
||||
// find exclusive pair for each repeat
|
||||
for (size_t i = lower; i < upper; i++) {
|
||||
CliqueVertex s = vertices[i - lower];
|
||||
CliqueVertex s = clvertices[i - lower];
|
||||
for (size_t j = i + 1; j < upper; j++) {
|
||||
if (findExclusivePair(i, j, lower, min_reset_dist,
|
||||
triggers)) {
|
||||
CliqueVertex d = vertices[j - lower];
|
||||
CliqueVertex d = clvertices[j - lower];
|
||||
add_edge(s, d, *cg);
|
||||
}
|
||||
}
|
||||
@@ -600,9 +604,9 @@ buildCastle(const CastleProto &proto,
|
||||
nfa->minWidth = verify_u32(minWidth);
|
||||
nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
|
||||
|
||||
char * const base_ptr = (char *)nfa.get() + sizeof(NFA);
|
||||
char * const base_ptr = reinterpret_cast<char *>(nfa.get()) + sizeof(NFA);
|
||||
char *ptr = base_ptr;
|
||||
Castle *c = (Castle *)ptr;
|
||||
Castle *c = reinterpret_cast<Castle *>(ptr);
|
||||
c->numRepeats = verify_u32(subs.size());
|
||||
c->numGroups = exclusiveInfo.numGroups;
|
||||
c->exclusive = verify_s8(exclusive);
|
||||
@@ -613,7 +617,7 @@ buildCastle(const CastleProto &proto,
|
||||
writeCastleScanEngine(cr, c);
|
||||
|
||||
ptr += sizeof(Castle);
|
||||
SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32))));
|
||||
SubCastle *subCastles = reinterpret_cast<SubCastle *>(ROUNDUP_PTR(ptr, alignof(u32)));
|
||||
copy(subs.begin(), subs.end(), subCastles);
|
||||
|
||||
u32 length = 0;
|
||||
@@ -623,16 +627,16 @@ buildCastle(const CastleProto &proto,
|
||||
SubCastle *sub = &subCastles[i];
|
||||
sub->repeatInfoOffset = offset;
|
||||
|
||||
ptr = (char *)sub + offset;
|
||||
ptr = reinterpret_cast<char *>(sub) + offset;
|
||||
memcpy(ptr, &infos[i], sizeof(RepeatInfo));
|
||||
|
||||
if (patchSize[i]) {
|
||||
RepeatInfo *info = (RepeatInfo *)ptr;
|
||||
u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) +
|
||||
sizeof(*info)), alignof(u64a))));
|
||||
RepeatInfo *info = reinterpret_cast<RepeatInfo *>(ptr);
|
||||
u64a *table = reinterpret_cast<u64a *>(ROUNDUP_PTR(info +
|
||||
sizeof(*info), alignof(u64a)));
|
||||
copy(tables.begin() + tableIdx,
|
||||
tables.begin() + tableIdx + patchSize[i], table);
|
||||
u32 diff = (char *)table - (char *)info +
|
||||
u32 diff = reinterpret_cast<ptrdiff_t>(table) - reinterpret_cast<ptrdiff_t>(info) +
|
||||
sizeof(u64a) * patchSize[i];
|
||||
info->length = diff;
|
||||
length += diff;
|
||||
@@ -655,7 +659,6 @@ buildCastle(const CastleProto &proto,
|
||||
if (!stale_iter.empty()) {
|
||||
c->staleIterOffset = verify_u32(ptr - base_ptr);
|
||||
copy_bytes(ptr, stale_iter);
|
||||
ptr += byte_length(stale_iter);
|
||||
}
|
||||
|
||||
return nfa;
|
||||
@@ -922,7 +925,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
|
||||
u32 min_bound = pr.bounds.min; // always finite
|
||||
if (min_bound == 0) { // Vacuous case, we can only do this once.
|
||||
assert(!edge(g.start, g.accept, g).second);
|
||||
NFAEdge e = add_edge(g.start, g.accept, g);
|
||||
NFAEdge e = add_edge(g.start, g.accept, g).first;
|
||||
g[e].tops.insert(top);
|
||||
g[u].reports.insert(pr.reports.begin(), pr.reports.end());
|
||||
min_bound = 1;
|
||||
@@ -931,7 +934,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
|
||||
for (u32 i = 0; i < min_bound; i++) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
g[v].char_reach = pr.reach;
|
||||
NFAEdge e = add_edge(u, v, g);
|
||||
NFAEdge e = add_edge(u, v, g).first;
|
||||
if (u == g.start) {
|
||||
g[e].tops.insert(top);
|
||||
}
|
||||
@@ -950,7 +953,7 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
|
||||
if (head != u) {
|
||||
add_edge(head, v, g);
|
||||
}
|
||||
NFAEdge e = add_edge(u, v, g);
|
||||
NFAEdge e = add_edge(u, v, g).first;
|
||||
if (u == g.start) {
|
||||
g[e].tops.insert(top);
|
||||
}
|
||||
|
||||
@@ -305,6 +305,7 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
|
||||
DEBUG_PRINTF("dfa is empty\n");
|
||||
}
|
||||
|
||||
// cppcheck-suppress unreadVariable
|
||||
UNUSED const size_t states_before = rdfa.states.size();
|
||||
|
||||
HopcroftInfo info(rdfa);
|
||||
|
||||
@@ -978,14 +978,14 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u8 s = *(u8 *)q->state;
|
||||
u64a offset = q_cur_offset(q);
|
||||
struct gough_som_info *som = getSomInfo(q->state);
|
||||
const struct gough_som_info *som = getSomInfo(q->state);
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
assert(s);
|
||||
|
||||
if (s >= m->accept_limit_8) {
|
||||
void *ctxt = q->context;
|
||||
u32 cached_accept_id = 0;
|
||||
u16 cached_accept_state = 0;
|
||||
u32 cached_accept_som = 0;
|
||||
@@ -1000,16 +1000,16 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u16 s = *(u16 *)q->state;
|
||||
const struct mstate_aux *aux = get_aux(m, s);
|
||||
u64a offset = q_cur_offset(q);
|
||||
struct gough_som_info *som = getSomInfo(q->state);
|
||||
const struct gough_som_info *som = getSomInfo(q->state);
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
DEBUG_PRINTF("state %hu\n", s);
|
||||
assert(s);
|
||||
|
||||
if (aux->accept) {
|
||||
void *ctxt = q->context;
|
||||
u32 cached_accept_id = 0;
|
||||
u16 cached_accept_state = 0;
|
||||
u32 cached_accept_som = 0;
|
||||
|
||||
@@ -132,7 +132,7 @@ void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) {
|
||||
}
|
||||
|
||||
static
|
||||
void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw,
|
||||
void translateRawReports(UNUSED const GoughGraph &cfg, UNUSED const raw_som_dfa &raw,
|
||||
const flat_map<u32, GoughSSAVarJoin *> &joins_at_s,
|
||||
UNUSED GoughVertex s,
|
||||
const set<som_report> &reports_in,
|
||||
@@ -206,10 +206,6 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices,
|
||||
assert(contains(src_slots, slot_id));
|
||||
|
||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||
if (!vmin) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
cfg[e].vars.emplace_back(vmin);
|
||||
final_var = vmin.get();
|
||||
|
||||
@@ -321,10 +317,6 @@ void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators,
|
||||
DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
|
||||
} else {
|
||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||
if (!vmin) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
cfg[e].vars.emplace_back(vmin);
|
||||
final_var = vmin.get();
|
||||
|
||||
@@ -441,10 +433,11 @@ unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
|
||||
}
|
||||
|
||||
static
|
||||
// cppcheck-suppress constParameterReference
|
||||
void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) {
|
||||
vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin();
|
||||
while (it != rep.end()) {
|
||||
GoughSSAVar *var = it->second;
|
||||
const GoughSSAVar *var = it->second;
|
||||
if (!var) {
|
||||
++it;
|
||||
continue;
|
||||
@@ -546,7 +539,7 @@ void remove_dead(GoughGraph &g) {
|
||||
}
|
||||
|
||||
while (!queue.empty()) {
|
||||
GoughSSAVar *v = queue.back();
|
||||
const GoughSSAVar *v = queue.back();
|
||||
queue.pop_back();
|
||||
for (GoughSSAVar *var : v->get_inputs()) {
|
||||
if (var->seen) {
|
||||
@@ -659,8 +652,8 @@ GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input(
|
||||
GoughSSAVar *input) const {
|
||||
// cppcheck-suppress constParameterPointer
|
||||
const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input(GoughSSAVar *input) const {
|
||||
return input_map.at(input);
|
||||
}
|
||||
|
||||
@@ -811,7 +804,7 @@ private:
|
||||
|
||||
static
|
||||
void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
|
||||
map<GoughEdge, edge_join_info> *edge_info) {
|
||||
map<GoughEdge, edge_join_info> &edge_info) {
|
||||
DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id);
|
||||
for (const auto &var : g[v].vars) {
|
||||
u32 dest_slot = var->slot;
|
||||
@@ -822,7 +815,7 @@ void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
|
||||
}
|
||||
|
||||
for (const GoughEdge &incoming_edge : var_edges.second) {
|
||||
(*edge_info)[incoming_edge].insert(input, dest_slot);
|
||||
edge_info[incoming_edge].insert(input, dest_slot);
|
||||
DEBUG_PRINTF("need %u<-%u\n", dest_slot, input);
|
||||
}
|
||||
}
|
||||
@@ -920,7 +913,7 @@ void build_blocks(const GoughGraph &g,
|
||||
}
|
||||
|
||||
map<GoughEdge, edge_join_info> eji;
|
||||
prep_joins_for_generation(g, t, &eji);
|
||||
prep_joins_for_generation(g, t, eji);
|
||||
|
||||
for (auto &m : eji) {
|
||||
vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)];
|
||||
@@ -1019,7 +1012,7 @@ void update_accel_prog_offset(const gough_build_strat &gbs,
|
||||
verts[gbs.gg[v].state_id] = v;
|
||||
}
|
||||
|
||||
for (auto &m : gbs.built_accel) {
|
||||
for (const auto &m : gbs.built_accel) {
|
||||
gough_accel *ga = m.first;
|
||||
assert(!ga->prog_offset);
|
||||
GoughVertex v = verts[m.second];
|
||||
@@ -1052,7 +1045,7 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
|| !cc.streaming);
|
||||
|
||||
if (!cc.grey.allowGough) {
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("hello world\n");
|
||||
@@ -1083,11 +1076,12 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
auto basic_dfa = mcclellanCompile_i(raw, gbs, cc);
|
||||
assert(basic_dfa);
|
||||
if (!basic_dfa) {
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
u8 alphaShift
|
||||
= ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift;
|
||||
// cppcheck-suppress cstyleCast
|
||||
const auto nfa = static_cast<const mcclellan *>(getImplNfa(basic_dfa.get()));
|
||||
u8 alphaShift = nfa->alphaShift;
|
||||
u32 edge_count = (1U << alphaShift) * raw.states.size();
|
||||
|
||||
u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4);
|
||||
@@ -1128,8 +1122,8 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
u32 gough_size = ROUNDUP_N(curr_offset, 16);
|
||||
auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size);
|
||||
|
||||
memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length);
|
||||
memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi));
|
||||
memcpy(reinterpret_cast<char *>(gough_dfa.get()), basic_dfa.get(), basic_dfa->length);
|
||||
memcpy(reinterpret_cast<char *>(gough_dfa.get()) + haig_offset, &gi, sizeof(gi));
|
||||
if (gough_dfa->type == MCCLELLAN_NFA_16) {
|
||||
gough_dfa->type = GOUGH_NFA_16;
|
||||
} else {
|
||||
@@ -1142,18 +1136,19 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision;
|
||||
gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a));
|
||||
|
||||
mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get());
|
||||
// cppcheck-suppress cstyleCast
|
||||
auto *m = reinterpret_cast<mcclellan *>(getMutableImplNfa(gough_dfa.get()));
|
||||
m->haig_offset = haig_offset;
|
||||
|
||||
/* update nfa length, haig_info offset (leave mcclellan length alone) */
|
||||
gough_dfa->length = gough_size;
|
||||
|
||||
/* copy in blocks */
|
||||
copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks);
|
||||
copy_bytes(reinterpret_cast<u8 *>(gough_dfa.get()) + edge_prog_offset, edge_blocks);
|
||||
if (top_prog_offset) {
|
||||
copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks);
|
||||
copy_bytes(reinterpret_cast<u8 *>(gough_dfa.get()) + top_prog_offset, top_blocks);
|
||||
}
|
||||
copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks);
|
||||
copy_bytes(reinterpret_cast<u8 *>(gough_dfa.get()) + prog_base_offset, temp_blocks);
|
||||
|
||||
return gough_dfa;
|
||||
}
|
||||
@@ -1186,7 +1181,7 @@ AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const {
|
||||
void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||
void *accel_out) {
|
||||
assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
|
||||
gough_accel *accel = (gough_accel *)accel_out;
|
||||
gough_accel *accel = reinterpret_cast<gough_accel *>(accel_out);
|
||||
/* build a plain accelaux so we can work out where we can get to */
|
||||
mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel);
|
||||
DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx,
|
||||
@@ -1324,7 +1319,8 @@ void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
|
||||
for (const raw_gough_report_list &r : rl) {
|
||||
ro.emplace_back(base_offset);
|
||||
|
||||
gough_report_list *p = (gough_report_list *)((char *)n + base_offset);
|
||||
u8 * n_ptr = reinterpret_cast<u8 *>(n);
|
||||
gough_report_list *p = reinterpret_cast<gough_report_list *>(n_ptr + base_offset);
|
||||
u32 i = 0;
|
||||
|
||||
for (const som_report &sr : r.reports) {
|
||||
|
||||
@@ -195,7 +195,7 @@ void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
|
||||
if (contains(aux.containing_v, var)) {
|
||||
/* def is used by join vertex, value only needs to be live on some
|
||||
* incoming edges */
|
||||
GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
|
||||
const GoughSSAVarJoin *vj = reinterpret_cast<const GoughSSAVarJoin *>(var);
|
||||
const flat_set<GoughEdge> &live_edges
|
||||
= vj->get_edges_for_input(def);
|
||||
for (const auto &e : live_edges) {
|
||||
@@ -279,7 +279,7 @@ set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
|
||||
|
||||
template<typename VarP>
|
||||
void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
|
||||
for (auto &var : vars) {
|
||||
for (const auto &var : vars) {
|
||||
assert(var->slot == INVALID_SLOT);
|
||||
var->slot = (*next_slot)++;
|
||||
}
|
||||
@@ -440,7 +440,7 @@ void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
|
||||
}
|
||||
|
||||
static
|
||||
void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
|
||||
void update_local_slots(GoughGraph &g, const set<GoughSSAVar *> &locals,
|
||||
u32 local_base) {
|
||||
DEBUG_PRINTF("%zu local variables\n", locals.size());
|
||||
/* local variables only occur on edges (joins are never local) */
|
||||
|
||||
@@ -56,7 +56,7 @@ extern "C"
|
||||
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
|
||||
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
|
||||
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
|
||||
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
|
||||
char gf_name##_reportCurrent(const struct NFA *n, const struct mq *q); \
|
||||
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
||||
struct mq *q); \
|
||||
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
|
||||
|
||||
@@ -332,7 +332,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
|
||||
// UE-1636) need to guard cyclic tug-accepts as well.
|
||||
static really_inline
|
||||
char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||
u64a offset, ReportID report) {
|
||||
assert(limex);
|
||||
|
||||
@@ -382,7 +382,7 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
|
||||
static really_inline
|
||||
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||
const union RepeatControl *repeat_ctrl, const char *repeat_state,
|
||||
u64a offset) {
|
||||
assert(limex);
|
||||
|
||||
|
||||
@@ -290,7 +290,7 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) {
|
||||
|
||||
template<class Mask>
|
||||
bool isMaskZero(Mask &m) {
|
||||
u8 *m8 = (u8 *)&m;
|
||||
const u8 *m8 = (u8 *)&m;
|
||||
for (u32 i = 0; i < sizeof(m); i++) {
|
||||
if (m8[i]) {
|
||||
return false;
|
||||
@@ -329,11 +329,11 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
|
||||
// Build a list of vertices with a state index assigned.
|
||||
vector<NFAVertex> verts;
|
||||
verts.reserve(args.num_states);
|
||||
for (auto v : vertices_range(h)) {
|
||||
if (state_ids.at(v) != NO_STATE) {
|
||||
verts.emplace_back(v);
|
||||
}
|
||||
}
|
||||
auto sidat = [&state_ids=state_ids](const NFAVertex &v) {
|
||||
return (state_ids.at(v) != NO_STATE);
|
||||
};
|
||||
const auto &vr = vertices_range(h);
|
||||
std::copy_if(begin(vr), end(vr), std::back_inserter(verts), sidat);
|
||||
|
||||
// Build a mapping from set-of-states -> reachability.
|
||||
map<NFAStateSet, CharReach> mapping;
|
||||
@@ -556,7 +556,8 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
|
||||
|
||||
// Similarly, connect (start, startDs) if necessary.
|
||||
if (!edge(g.start, g.startDs, g).second) {
|
||||
NFAEdge e = add_edge(g.start, g.startDs, g);
|
||||
NFAEdge e;
|
||||
std::tie(e, std::ignore) = add_edge(g.start, g.startDs, g);
|
||||
tempEdges.emplace_back(e); // Remove edge later.
|
||||
}
|
||||
|
||||
@@ -1485,6 +1486,7 @@ u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
|
||||
continue;
|
||||
}
|
||||
u32 j = args.state_ids.at(w);
|
||||
// j can be NO_STATE if args.state_ids.at(w) returns NO_STATE
|
||||
if (j == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
@@ -1576,7 +1578,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) {
|
||||
static
|
||||
int getLimexScore(const build_info &args, u32 nShifts) {
|
||||
const NGHolder &h = args.h;
|
||||
u32 maxVarShift = nShifts;
|
||||
u32 maxVarShift;
|
||||
int score = 0;
|
||||
|
||||
score += SHIFT_COST * nShifts;
|
||||
@@ -1704,7 +1706,7 @@ struct Factory {
|
||||
static
|
||||
void allocState(NFA *nfa, u32 repeatscratchStateSize,
|
||||
u32 repeatStreamState) {
|
||||
implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa);
|
||||
const implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa);
|
||||
|
||||
// LimEx NFAs now store the following in state:
|
||||
// 1. state bitvector (always present)
|
||||
@@ -2222,7 +2224,7 @@ struct Factory {
|
||||
static
|
||||
bytecode_ptr<NFA> generateNfa(const build_info &args) {
|
||||
if (args.num_states > NFATraits<dtype>::maxStates) {
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
// Build bounded repeat structures.
|
||||
@@ -2581,7 +2583,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
||||
|
||||
if (!cc.grey.allowLimExNFA) {
|
||||
DEBUG_PRINTF("limex not allowed\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
// If you ask for a particular type, it had better be an NFA.
|
||||
@@ -2616,7 +2618,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
||||
|
||||
if (scores.empty()) {
|
||||
DEBUG_PRINTF("No NFA returned a valid score for this case.\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
// Sort acceptable models in priority order, lowest score first.
|
||||
@@ -2635,7 +2637,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("NFA build failed.\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
u32 countAccelStates(NGHolder &h,
|
||||
|
||||
@@ -302,8 +302,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
|
||||
}
|
||||
#else
|
||||
// A copy of the estate as an array of GPR-sized chunks.
|
||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
|
||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
|
||||
#ifdef ESTATE_ON_STACK
|
||||
memcpy(chunks, &estate, sizeof(STATE_T));
|
||||
#else
|
||||
@@ -311,7 +311,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
|
||||
#endif
|
||||
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||
|
||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; // cppcheck-suppress duplicateExpression
|
||||
base_index[0] = 0;
|
||||
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
||||
|
||||
@@ -927,7 +927,7 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
|
||||
context);
|
||||
}
|
||||
|
||||
char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) {
|
||||
char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, const struct mq *q) {
|
||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||
REPORTCURRENT_FN(limex, q);
|
||||
return 1;
|
||||
@@ -984,9 +984,9 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
|
||||
assert(q->state && q->streamState);
|
||||
|
||||
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
||||
union RepeatControl *repeat_ctrl =
|
||||
const union RepeatControl *repeat_ctrl =
|
||||
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
char *repeat_state = q->streamState + limex->stateSize;
|
||||
const char *repeat_state = q->streamState + limex->stateSize;
|
||||
STATE_T state = *(STATE_T *)q->state;
|
||||
u64a offset = q->offset + q_last_loc(q) + 1;
|
||||
|
||||
@@ -999,9 +999,9 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
||||
assert(q->state && q->streamState);
|
||||
|
||||
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
||||
union RepeatControl *repeat_ctrl =
|
||||
const union RepeatControl *repeat_ctrl =
|
||||
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
char *repeat_state = q->streamState + limex->stateSize;
|
||||
const char *repeat_state = q->streamState + limex->stateSize;
|
||||
STATE_T state = *(STATE_T *)q->state;
|
||||
u64a offset = q->offset + q_last_loc(q) + 1;
|
||||
|
||||
@@ -1020,9 +1020,9 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
||||
|
||||
if (limex->repeatCount) {
|
||||
u64a offset = q->offset + loc + 1;
|
||||
union RepeatControl *repeat_ctrl =
|
||||
const union RepeatControl *repeat_ctrl =
|
||||
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
char *repeat_state = q->streamState + limex->stateSize;
|
||||
const char *repeat_state = q->streamState + limex->stateSize;
|
||||
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state);
|
||||
}
|
||||
|
||||
|
||||
@@ -177,7 +177,7 @@ static
|
||||
mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
||||
assert(isMcClellanType(n->type));
|
||||
|
||||
mcclellan *m = (mcclellan *)getMutableImplNfa(n);
|
||||
const mcclellan *m = (mcclellan *)getMutableImplNfa(n);
|
||||
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
||||
|
||||
mstate_aux *aux = aux_base + i;
|
||||
@@ -203,7 +203,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mstate_aux *aux = getAux(n, succ_table[c_prime]);
|
||||
const mstate_aux *aux = getAux(n, succ_table[c_prime]);
|
||||
|
||||
if (aux->accept) {
|
||||
succ_table[c_prime] |= ACCEPT_FLAG;
|
||||
@@ -232,7 +232,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mstate_aux *aux = getAux(n, succ_i);
|
||||
const mstate_aux *aux = getAux(n, succ_i);
|
||||
|
||||
if (aux->accept) {
|
||||
succ_i |= ACCEPT_FLAG;
|
||||
@@ -262,7 +262,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
// check successful transition
|
||||
u16 next = unaligned_load_u16((u8 *)trans);
|
||||
if (next < wide_limit) {
|
||||
mstate_aux *aux = getAux(n, next);
|
||||
const mstate_aux *aux = getAux(n, next);
|
||||
if (aux->accept) {
|
||||
next |= ACCEPT_FLAG;
|
||||
}
|
||||
@@ -279,7 +279,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
if (next_k >= wide_limit) {
|
||||
continue;
|
||||
}
|
||||
mstate_aux *aux_k = getAux(n, next_k);
|
||||
const mstate_aux *aux_k = getAux(n, next_k);
|
||||
if (aux_k->accept) {
|
||||
next_k |= ACCEPT_FLAG;
|
||||
}
|
||||
@@ -362,7 +362,7 @@ struct raw_report_list {
|
||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
if (do_remap) {
|
||||
for (auto &id : reports_in) {
|
||||
for (const auto &id : reports_in) {
|
||||
reports.insert(rm.getProgramOffset(id));
|
||||
}
|
||||
} else {
|
||||
@@ -546,7 +546,7 @@ size_t calcWideRegionSize(const dfa_info &info) {
|
||||
static
|
||||
void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
|
||||
const vector<u32> &reports, const vector<u32> &reports_eod,
|
||||
vector<u32> &reportOffsets) {
|
||||
const vector<u32> &reportOffsets) {
|
||||
const dstate &raw_state = info.states[i];
|
||||
aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
|
||||
aux->accept_eod = raw_state.reports_eod.empty() ? 0
|
||||
@@ -631,7 +631,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
|
||||
if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
|
||||
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||
info.size());
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
|
||||
@@ -800,8 +800,8 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
|
||||
}
|
||||
|
||||
for (size_t i : order) {
|
||||
vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
|
||||
const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
const vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
|
||||
|
||||
u16 width = verify_u16(symbol_chain.size());
|
||||
*(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
|
||||
@@ -1373,11 +1373,11 @@ bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
|
||||
/* \brief Generate wide_symbol_chain from wide_state_chain. */
|
||||
static
|
||||
void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
||||
raw_dfa &rdfa = info.raw;
|
||||
const raw_dfa &rdfa = info.raw;
|
||||
assert(chain_tail.size() == info.wide_state_chain.size());
|
||||
|
||||
for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
|
||||
vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
const vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
|
||||
vector<symbol_t> symbol_chain;
|
||||
|
||||
info.extra[state_chain[0]].wideHead = true;
|
||||
@@ -1385,7 +1385,6 @@ void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
||||
|
||||
for (size_t j = 0; j < width; j++) {
|
||||
dstate_id_t curr_id = state_chain[j];
|
||||
dstate_id_t next_id = state_chain[j + 1];
|
||||
|
||||
// The last state of the chain doesn't belong to a wide state.
|
||||
info.extra[curr_id].wideState = true;
|
||||
@@ -1394,6 +1393,7 @@ void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
|
||||
if (j == width - 1) {
|
||||
symbol_chain.emplace_back(chain_tail[i]);
|
||||
} else {
|
||||
dstate_id_t next_id = state_chain[j + 1];
|
||||
for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
|
||||
if (rdfa.states[curr_id].next[sym] == next_id) {
|
||||
symbol_chain.emplace_back(sym);
|
||||
|
||||
@@ -144,11 +144,11 @@ u8 dfa_info::getAlphaShift() const {
|
||||
|
||||
static
|
||||
mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
||||
mcsheng *m = (mcsheng *)getMutableImplNfa(n);
|
||||
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
||||
const mcsheng *m = reinterpret_cast<const mcsheng *>(getMutableImplNfa(n));
|
||||
mstate_aux *aux_base = reinterpret_cast<mstate_aux *>(reinterpret_cast<u8 *>(n) + m->aux_offset);
|
||||
|
||||
mstate_aux *aux = aux_base + i;
|
||||
assert((const char *)aux < (const char *)n + m->length);
|
||||
assert(reinterpret_cast<const char *>(aux) < reinterpret_cast<const char *>(n) + m->length);
|
||||
return aux;
|
||||
}
|
||||
|
||||
@@ -192,8 +192,8 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info,
|
||||
}
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
||||
memcpy((u8 *)&m->sheng_masks[i],
|
||||
(u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128));
|
||||
memcpy(reinterpret_cast<u8 *>(&m->sheng_masks[i]),
|
||||
reinterpret_cast<u8 *>(masks[info.alpha_remap[i]].data()), sizeof(m128));
|
||||
}
|
||||
m->sheng_end = sheng_end;
|
||||
m->sheng_accel_limit = sheng_end - 1;
|
||||
@@ -223,7 +223,7 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
||||
nfa->type = MCSHENG_NFA_16;
|
||||
}
|
||||
|
||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
|
||||
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
||||
}
|
||||
@@ -244,11 +244,11 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
||||
|
||||
static
|
||||
mstate_aux *getAux64(NFA *n, dstate_id_t i) {
|
||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n);
|
||||
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
||||
const mcsheng64 *m = reinterpret_cast<const mcsheng64 *>(getMutableImplNfa(n));
|
||||
mstate_aux *aux_base = reinterpret_cast<mstate_aux *>(reinterpret_cast<u8 *>(n) + m->aux_offset);
|
||||
|
||||
mstate_aux *aux = aux_base + i;
|
||||
assert((const char *)aux < (const char *)n + m->length);
|
||||
assert(reinterpret_cast<const char *>(aux) < reinterpret_cast<const char *>(n) + m->length);
|
||||
return aux;
|
||||
}
|
||||
|
||||
@@ -292,8 +292,8 @@ void createShuffleMasks64(mcsheng64 *m, const dfa_info &info,
|
||||
}
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
||||
memcpy((u8 *)&m->sheng_succ_masks[i],
|
||||
(u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512));
|
||||
memcpy(reinterpret_cast<u8 *>(&m->sheng_succ_masks[i]),
|
||||
reinterpret_cast<u8 *>(masks[info.alpha_remap[i]].data()), sizeof(m512));
|
||||
}
|
||||
m->sheng_end = sheng_end;
|
||||
m->sheng_accel_limit = sheng_end - 1;
|
||||
@@ -323,7 +323,7 @@ void populateBasicInfo64(size_t state_size, const dfa_info &info,
|
||||
nfa->type = MCSHENG_64_NFA_16;
|
||||
}
|
||||
|
||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa));
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
||||
}
|
||||
@@ -534,7 +534,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
|
||||
|
||||
static
|
||||
dstate_id_t find_sheng_states(dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||
size_t max_sheng_states) {
|
||||
RdfaGraph g(info.raw);
|
||||
auto cyclics = find_vertices_in_cycles(g);
|
||||
@@ -650,7 +650,7 @@ void fill_in_aux_info(NFA *nfa, const dfa_info &info,
|
||||
const vector<u32> &reports_eod,
|
||||
u32 report_base_offset,
|
||||
const raw_report_info &ri) {
|
||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
|
||||
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||
|
||||
vector<u32> reportOffsets;
|
||||
|
||||
@@ -667,14 +667,14 @@ void fill_in_aux_info(NFA *nfa, const dfa_info &info,
|
||||
assert(accel_offset <= accel_end_offset);
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||
(void *)((char *)m + this_aux->accel_offset));
|
||||
reinterpret_cast<void *>(reinterpret_cast<char *>(m) + this_aux->accel_offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) {
|
||||
mstate_aux *aux = getAux(nfa, target_impl_id);
|
||||
const mstate_aux *aux = getAux(nfa, target_impl_id);
|
||||
u16 flags = 0;
|
||||
|
||||
if (aux->accept) {
|
||||
@@ -692,7 +692,7 @@ static
|
||||
void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
|
||||
dstate_id_t sheng_end,
|
||||
UNUSED dstate_id_t sherman_base) {
|
||||
u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng));
|
||||
u16 *succ_table = reinterpret_cast<u16 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng));
|
||||
|
||||
u8 alphaShift = info.getAlphaShift();
|
||||
assert(alphaShift <= 8);
|
||||
@@ -724,7 +724,7 @@ void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
|
||||
const vector<u32> &reports_eod,
|
||||
u32 report_base_offset,
|
||||
const raw_report_info &ri) {
|
||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa));
|
||||
|
||||
vector<u32> reportOffsets;
|
||||
|
||||
@@ -741,14 +741,14 @@ void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
|
||||
assert(accel_offset <= accel_end_offset);
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||
(void *)((char *)m + this_aux->accel_offset));
|
||||
reinterpret_cast<void *>(reinterpret_cast<char *>(m) + this_aux->accel_offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
|
||||
mstate_aux *aux = getAux64(nfa, target_impl_id);
|
||||
const mstate_aux *aux = getAux64(nfa, target_impl_id);
|
||||
u16 flags = 0;
|
||||
|
||||
if (aux->accept) {
|
||||
@@ -766,7 +766,7 @@ static
|
||||
void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info,
|
||||
dstate_id_t sheng_end,
|
||||
UNUSED dstate_id_t sherman_base) {
|
||||
u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64));
|
||||
u16 *succ_table = reinterpret_cast<u16 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng64));
|
||||
|
||||
u8 alphaShift = info.getAlphaShift();
|
||||
assert(alphaShift <= 8);
|
||||
@@ -955,9 +955,9 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
||||
}
|
||||
|
||||
static
|
||||
void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
char *nfa_base = (char *)nfa;
|
||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
|
||||
void fill_in_sherman(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
char *nfa_base = reinterpret_cast<char *>(nfa);
|
||||
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||
char *sherman_table = nfa_base + m->sherman_offset;
|
||||
|
||||
assert(ISALIGNED_16(sherman_table));
|
||||
@@ -978,10 +978,10 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
assert(len <= 9);
|
||||
dstate_id_t d = info.states[i].daddy;
|
||||
|
||||
*(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
|
||||
*(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
|
||||
*(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
|
||||
u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_TYPE_OFFSET)) = SHERMAN_STATE;
|
||||
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_LEN_OFFSET)) = len;
|
||||
*(reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_DADDY_OFFSET)) = info.implId(d);
|
||||
u8 *chars = reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||
|
||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||
@@ -989,7 +989,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
}
|
||||
}
|
||||
|
||||
u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||
u16 *states = reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
||||
@@ -997,7 +997,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
info.implId(info.states[i].next[s]));
|
||||
u16 entry_val = info.implId(info.states[i].next[s]);
|
||||
entry_val |= get_edge_flags(nfa, entry_val);
|
||||
unaligned_store_u16((u8 *)states++, entry_val);
|
||||
unaligned_store_u16(reinterpret_cast<u8 *>(states++), entry_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1018,12 +1018,16 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
||||
|
||||
// Sherman optimization
|
||||
if (info.impl_alpha_size > 16) {
|
||||
#ifdef DEBUG
|
||||
u16 total_daddy = 0;
|
||||
#endif // DEBUG
|
||||
for (u32 i = 0; i < info.size(); i++) {
|
||||
find_better_daddy(info, i,
|
||||
is_cyclic_near(info.raw, info.raw.start_anchored),
|
||||
grey);
|
||||
#ifdef DEBUG
|
||||
total_daddy += info.extra[i].daddytaken;
|
||||
#endif // DEBUG
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||
@@ -1035,7 +1039,7 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
||||
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
||||
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||
info.size());
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
u16 count_real_states = sherman_limit - sheng_end;
|
||||
|
||||
@@ -1059,7 +1063,7 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
|
||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
||||
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa.get()));
|
||||
|
||||
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||
accel_escape_info.size(), arb, single, nfa.get());
|
||||
@@ -1087,7 +1091,7 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
||||
static
|
||||
void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
||||
dstate_id_t sheng_end) {
|
||||
u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng);
|
||||
u8 *succ_table = reinterpret_cast<u8 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng));
|
||||
|
||||
u8 alphaShift = info.getAlphaShift();
|
||||
assert(alphaShift <= 8);
|
||||
@@ -1109,9 +1113,9 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
||||
}
|
||||
|
||||
static
|
||||
void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
char *nfa_base = (char *)nfa;
|
||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||
void fill_in_sherman64(NFA *nfa, const dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
char *nfa_base = reinterpret_cast<char *>(nfa);
|
||||
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa));
|
||||
char *sherman_table = nfa_base + m->sherman_offset;
|
||||
|
||||
assert(ISALIGNED_16(sherman_table));
|
||||
@@ -1132,10 +1136,10 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
assert(len <= 9);
|
||||
dstate_id_t d = info.states[i].daddy;
|
||||
|
||||
*(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
|
||||
*(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
|
||||
*(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
|
||||
u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_TYPE_OFFSET)) = SHERMAN_STATE;
|
||||
*(reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_LEN_OFFSET)) = len;
|
||||
*(reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_DADDY_OFFSET)) = info.implId(d);
|
||||
u8 *chars = reinterpret_cast<u8 *>(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||
|
||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||
@@ -1143,7 +1147,7 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
}
|
||||
}
|
||||
|
||||
u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||
u16 *states = reinterpret_cast<u16 *>(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
||||
@@ -1151,7 +1155,7 @@ void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||
info.implId(info.states[i].next[s]));
|
||||
u16 entry_val = info.implId(info.states[i].next[s]);
|
||||
entry_val |= get_edge_flags64(nfa, entry_val);
|
||||
unaligned_store_u16((u8 *)states++, entry_val);
|
||||
unaligned_store_u16(reinterpret_cast<u8 *>(states++), entry_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1172,12 +1176,16 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
||||
|
||||
// Sherman optimization
|
||||
if (info.impl_alpha_size > 16) {
|
||||
#ifdef DEBUG
|
||||
u16 total_daddy = 0;
|
||||
#endif // DEBUG
|
||||
for (u32 i = 0; i < info.size(); i++) {
|
||||
find_better_daddy(info, i,
|
||||
is_cyclic_near(info.raw, info.raw.start_anchored),
|
||||
grey);
|
||||
#ifdef DEBUG
|
||||
total_daddy += info.extra[i].daddytaken;
|
||||
#endif // DEBUG
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||
@@ -1189,7 +1197,7 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
||||
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
||||
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||
info.size());
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
u16 count_real_states = sherman_limit - sheng_end;
|
||||
|
||||
@@ -1213,7 +1221,7 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
|
||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
||||
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa.get()));
|
||||
|
||||
populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||
accel_escape_info.size(), arb, single, nfa.get());
|
||||
@@ -1241,7 +1249,7 @@ bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
||||
static
|
||||
void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info,
|
||||
dstate_id_t sheng_end) {
|
||||
u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64);
|
||||
u8 *succ_table = reinterpret_cast<u8 *>(reinterpret_cast<char *>(nfa) + sizeof(NFA) + sizeof(mcsheng));
|
||||
|
||||
u8 alphaShift = info.getAlphaShift();
|
||||
assert(alphaShift <= 8);
|
||||
@@ -1339,7 +1347,7 @@ bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
|
||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
||||
mcsheng *m = reinterpret_cast<mcsheng *>(getMutableImplNfa(nfa.get()));
|
||||
|
||||
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
||||
&m->accept_limit_8);
|
||||
@@ -1392,7 +1400,7 @@ bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
|
||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
||||
mcsheng64 *m = reinterpret_cast<mcsheng64 *>(getMutableImplNfa(nfa.get()));
|
||||
|
||||
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
||||
&m->accept_limit_8);
|
||||
@@ -1414,7 +1422,7 @@ bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
|
||||
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
const ReportManager &rm) {
|
||||
if (!cc.grey.allowMcSheng) {
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
mcclellan_build_strat mbs(raw, rm, false);
|
||||
@@ -1430,12 +1438,10 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
|
||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||
= info.strat.getAccelInfo(cc.grey);
|
||||
auto old_states = info.states;
|
||||
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
|
||||
|
||||
if (sheng_end <= DEAD_STATE + 1) {
|
||||
info.states = old_states;
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
bytecode_ptr<NFA> nfa;
|
||||
@@ -1447,7 +1453,6 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
}
|
||||
|
||||
if (!nfa) {
|
||||
info.states = old_states;
|
||||
return nfa;
|
||||
}
|
||||
|
||||
@@ -1462,12 +1467,12 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
||||
const ReportManager &rm) {
|
||||
if (!cc.grey.allowMcSheng) {
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
if (!cc.target_info.has_avx512vbmi()) {
|
||||
DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
mcclellan_build_strat mbs(raw, rm, false);
|
||||
@@ -1488,7 +1493,7 @@ bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
||||
sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
|
||||
|
||||
if (sheng_end64 <= DEAD_STATE + 1) {
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
} else {
|
||||
using64state = true;
|
||||
}
|
||||
|
||||
@@ -512,7 +512,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
|
||||
verm_restart:;
|
||||
assert(buf[curr] == kp->u.verm.c);
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -534,7 +534,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
m128 hi = kp->u.shuf.mask_hi;
|
||||
shuf_restart:
|
||||
assert(do_single_shufti(lo, hi, buf[curr]));
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -556,7 +556,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
const m128 mask1 = kp->u.truffle.mask1;
|
||||
const m128 mask2 = kp->u.truffle.mask2;
|
||||
truffle_restart:;
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -582,7 +582,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
|
||||
nverm_restart:;
|
||||
assert(buf[curr] != kp->u.verm.c);
|
||||
size_t test = curr;
|
||||
size_t test;
|
||||
if (curr + min_rep < length) {
|
||||
test = curr + min_rep;
|
||||
} else {
|
||||
@@ -607,7 +607,7 @@ size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters,
|
||||
void restartKilo(const struct mpv *m, UNUSED const u8 *active, u8 *reporters,
|
||||
struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
|
||||
const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
|
||||
const struct mpv_kilopuff *kp = (const void *)(m + 1);
|
||||
@@ -1074,7 +1074,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||
return 0;
|
||||
} else {
|
||||
const struct mpv *m = getImplNfa(nfa);
|
||||
u8 *reporters = (u8 *)q->state + m->reporter_offset;
|
||||
const u8 *reporters = (u8 *)q->state + m->reporter_offset;
|
||||
|
||||
if (mmbit_any_precise(reporters, m->kilo_count)) {
|
||||
DEBUG_PRINTF("next byte\n");
|
||||
@@ -1087,7 +1087,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||
next_event = q->items[q->cur].location;
|
||||
}
|
||||
|
||||
struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
|
||||
const struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
|
||||
struct mpv_pq_item *pq
|
||||
= (struct mpv_pq_item *)(q->state + m->pq_offset);
|
||||
if (s->pq_size) {
|
||||
|
||||
@@ -167,7 +167,7 @@ void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) {
|
||||
// We assert that the event is different from its predecessor. If it's a
|
||||
// dupe, you should have used the ordinary pushQueue call.
|
||||
if (q->end) {
|
||||
UNUSED struct mq_item *prev = &q->items[q->end - 1];
|
||||
UNUSED const struct mq_item *prev = &q->items[q->end - 1];
|
||||
assert(prev->type != e || prev->location != loc);
|
||||
}
|
||||
#endif
|
||||
@@ -251,6 +251,10 @@ void q_skip_forward_to(struct mq *q, s64a min_loc) {
|
||||
// Dump the contents of the given queue.
|
||||
static never_inline UNUSED
|
||||
void debugQueue(const struct mq *q) {
|
||||
if (q == nullptr) {
|
||||
DEBUG_PRINTF("q=NULL!\n");
|
||||
return;
|
||||
}
|
||||
DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa);
|
||||
DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n",
|
||||
q->offset, q->buffer, q->length, q->history, q->hlength);
|
||||
|
||||
@@ -68,7 +68,7 @@ void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) {
|
||||
// We assert that the event is different from its predecessor. If it's a
|
||||
// dupe, you should have used the ordinary pushQueue call.
|
||||
if (q->end) {
|
||||
UNUSED struct mq_item *prev = &q->items[q->end - 1];
|
||||
UNUSED const struct mq_item *prev = &q->items[q->end - 1];
|
||||
assert(prev->type != e || prev->location != loc);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
* Copyright (c) 2024, VectorCamp PC
|
||||
* Copyright (c) 2021, Arm Limited
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -133,6 +134,7 @@ struct ALIGN_CL_DIRECTIVE NFA {
|
||||
/* Note: implementation (e.g. a LimEx) directly follows struct in memory */
|
||||
} ;
|
||||
|
||||
#ifndef __cplusplus
|
||||
// Accessor macro for the implementation NFA: we do things this way to avoid
|
||||
// type-punning warnings.
|
||||
#define getImplNfa(nfa) \
|
||||
@@ -140,6 +142,13 @@ struct ALIGN_CL_DIRECTIVE NFA {
|
||||
|
||||
// Non-const version of the above, used at compile time.
|
||||
#define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA))
|
||||
#else
|
||||
// Same versions without C casts to avoid Cppcheck warnings
|
||||
#define getImplNfa(nfa) \
|
||||
(reinterpret_cast<const void *>(reinterpret_cast<const char *>(nfa) + sizeof(struct NFA)))
|
||||
|
||||
#define getMutableImplNfa(nfa) (reinterpret_cast<char *>(nfa) + sizeof(struct NFA))
|
||||
#endif
|
||||
|
||||
static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) {
|
||||
return nfa->flags & NFA_ACCEPTS_EOD;
|
||||
|
||||
@@ -45,7 +45,7 @@ struct RdfaEdgeProps {
|
||||
};
|
||||
|
||||
struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> {
|
||||
RdfaGraph(const raw_dfa &rdfa);
|
||||
explicit RdfaGraph(const raw_dfa &rdfa);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -785,7 +785,7 @@ enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
|
||||
if (diff > info->repeatMax) {
|
||||
DEBUG_PRINTF("range list is stale\n");
|
||||
return REPEAT_STALE;
|
||||
} else if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
||||
} else if (diff >= info->repeatMin) {
|
||||
return REPEAT_MATCH;
|
||||
}
|
||||
|
||||
@@ -836,7 +836,7 @@ enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
|
||||
if (diff > info->repeatMax) {
|
||||
DEBUG_PRINTF("stale\n");
|
||||
return REPEAT_STALE;
|
||||
} else if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
||||
} else if (diff >= info->repeatMin) {
|
||||
return REPEAT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@@ -94,9 +94,6 @@ u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
static
|
||||
u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
const u32 minPeriod, u32 rv) {
|
||||
u32 cnt = 0;
|
||||
u32 patch_bits = 0;
|
||||
u32 total_size = 0;
|
||||
u32 min = ~0U;
|
||||
u32 patch_len = 0;
|
||||
|
||||
@@ -105,11 +102,11 @@ u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
|
||||
}
|
||||
|
||||
for (u32 i = minPeriod; i <= rv; i++) {
|
||||
cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
||||
u32 cnt = ((u32)repeatMax + (i - 1)) / i + 1;
|
||||
|
||||
// no bit packing version
|
||||
patch_bits = calcPackedBits(info->table[i]);
|
||||
total_size = (patch_bits + 7U) / 8U * cnt;
|
||||
u32 patch_bits = calcPackedBits(info->table[i]);
|
||||
u32 total_size = (patch_bits + 7U) / 8U * cnt;
|
||||
|
||||
if (total_size < min) {
|
||||
patch_len = i;
|
||||
|
||||
@@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
// Sheng32
|
||||
static really_inline
|
||||
const struct sheng32 *get_sheng32(const struct NFA *n) {
|
||||
@@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||
|
||||
/* include Sheng function definitions */
|
||||
#include "sheng_defs.h"
|
||||
@@ -814,7 +814,6 @@ char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state,
|
||||
char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u8 s = *(u8 *)q->state;
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
u64a offset = q_cur_offset(q);
|
||||
@@ -823,6 +822,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
|
||||
if (aux->accept) {
|
||||
void *ctxt = q->context;
|
||||
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
|
||||
fireSingleReport(cb, ctxt, sh->report, offset);
|
||||
} else {
|
||||
@@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
// Sheng32
|
||||
static really_inline
|
||||
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
|
||||
@@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
*(u8 *)dest = *(const u8 *)src;
|
||||
return 0;
|
||||
}
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
|
||||
|
||||
@@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
@@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
|
||||
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#else // !HAVE_AVX512VBMI
|
||||
#else // !HAVE_AVX512VBMI && !HAVE_SVE
|
||||
|
||||
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
@@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
|
||||
#define nfaExecSheng64_B NFA_API_NO_IMPL
|
||||
#endif // end of HAVE_AVX512VBMI
|
||||
#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
|
||||
|
||||
|
||||
#endif /* SHENG_H_ */
|
||||
|
||||
@@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
u8 isDeadState32(const u8 a) {
|
||||
return a & SHENG32_STATE_DEAD;
|
||||
@@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_cod
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_cod
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_co
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_co
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_samd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_samd
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_sam
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_sam
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 isAcceptState32
|
||||
@@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_nmd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_nmd
|
||||
#define DEAD_FUNC32 isDeadState32
|
||||
#define ACCEPT_FUNC32 dummyFunc
|
||||
@@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define SHENG_IMPL sheng_nm
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_nm
|
||||
#define DEAD_FUNC32 dummyFunc
|
||||
#define ACCEPT_FUNC32 dummyFunc
|
||||
@@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef DEAD_FUNC32
|
||||
#undef ACCEPT_FUNC32
|
||||
@@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_coda
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_cod
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_coa
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_co
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_samda
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_samd
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 isDeadState32
|
||||
@@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_sama
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_sam
|
||||
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC isAccelState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nmda
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nmd
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
@@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#define SHENG32_IMPL sheng32_4_nm
|
||||
#define INTERESTING_FUNC32 dummyFunc4
|
||||
#define INNER_DEAD_FUNC32 dummyFunc
|
||||
@@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) {
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
#undef SHENG32_IMPL
|
||||
#undef INTERESTING_FUNC32
|
||||
#undef INNER_DEAD_FUNC32
|
||||
|
||||
@@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const struct sheng32 *s,
|
||||
@@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
|
||||
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 tmp = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
const m512 succ_mask = masks[c];
|
||||
cur_state = vpermb512(cur_state, succ_mask);
|
||||
const u8 tmp = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
|
||||
@@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
@@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
|
||||
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 tmp = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
const m512 succ_mask = masks[c];
|
||||
cur_state = vpermb512(cur_state, succ_mask);
|
||||
const u8 tmp = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
|
||||
@@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
@@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
|
||||
static really_inline
|
||||
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const struct sheng32 *s,
|
||||
@@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
@@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
|
||||
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a1 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
|
||||
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a2 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
|
||||
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a3 = svlastb(lane_pred_32, cur_state);
|
||||
|
||||
svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
|
||||
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a4 = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
const m512 succ_mask1 = masks[c1];
|
||||
cur_state = vpermb512(cur_state, succ_mask1);
|
||||
const u8 a1 = movd512(cur_state);
|
||||
@@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const m512 succ_mask4 = masks[c4];
|
||||
cur_state = vpermb512(cur_state, succ_mask4);
|
||||
const u8 a4 = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
|
||||
@@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
};
|
||||
cur_buf += 4;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_32, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
@@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
*scan_end = end;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
|
||||
svuint8_t cur_state = svdup_u8(*state);
|
||||
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#else
|
||||
m512 cur_state = set1_64x8(*state);
|
||||
const m512 *masks = s->succ_masks;
|
||||
#endif
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
@@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
#if defined(HAVE_SVE)
|
||||
svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
|
||||
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a1 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
|
||||
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a2 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
|
||||
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a3 = svlastb(lane_pred_64, cur_state);
|
||||
|
||||
svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
|
||||
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
|
||||
const u8 a4 = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
const m512 succ_mask1 = masks[c1];
|
||||
cur_state = vpermb512(cur_state, succ_mask1);
|
||||
const u8 a1 = movd512(cur_state);
|
||||
@@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
const m512 succ_mask4 = masks[c4];
|
||||
cur_state = vpermb512(cur_state, succ_mask4);
|
||||
const u8 a4 = movd512(cur_state);
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
|
||||
@@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||
}
|
||||
cur_buf += 4;
|
||||
}
|
||||
#if defined(HAVE_SVE)
|
||||
*state = svlastb(lane_pred_64, cur_state);
|
||||
#else
|
||||
*state = movd512(cur_state);
|
||||
#endif
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ struct dfa_info {
|
||||
return next(idx, TOP);
|
||||
}
|
||||
dstate &next(dstate_id_t idx, u16 chr) {
|
||||
auto &src = (*this)[idx];
|
||||
const auto &src = (*this)[idx];
|
||||
auto next_id = src.next[raw.alpha_remap[chr]];
|
||||
return states[next_id];
|
||||
}
|
||||
@@ -109,7 +109,7 @@ struct dfa_info {
|
||||
// if DFA can't die, shift all indices left by 1
|
||||
return can_die ? idx : idx + 1;
|
||||
}
|
||||
bool isDead(dstate &state) {
|
||||
bool isDead(const dstate &state) {
|
||||
return raw_id(state.impl_id) == DEAD_STATE;
|
||||
}
|
||||
bool isDead(dstate_id_t idx) {
|
||||
@@ -117,7 +117,7 @@ struct dfa_info {
|
||||
}
|
||||
|
||||
private:
|
||||
static bool dfaCanDie(raw_dfa &rdfa) {
|
||||
static bool dfaCanDie(const raw_dfa &rdfa) {
|
||||
for (unsigned chr = 0; chr < 256; chr++) {
|
||||
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
|
||||
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
|
||||
@@ -138,7 +138,7 @@ struct raw_report_list {
|
||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
if (do_remap) {
|
||||
for (auto &id : reports_in) {
|
||||
for (const auto &id : reports_in) {
|
||||
reports.insert(rm.getProgramOffset(id));
|
||||
}
|
||||
} else {
|
||||
@@ -334,14 +334,14 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||
|
||||
template <typename T>
|
||||
static
|
||||
u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info,
|
||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 getShengState(UNUSED const dstate &state, UNUSED dfa_info &info,
|
||||
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
u8 getShengState<sheng>(dstate &state, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 getShengState<sheng>(const dstate &state, dfa_info &info,
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 s = state.impl_id;
|
||||
if (!state.reports.empty()) {
|
||||
s |= SHENG_STATE_ACCEPT;
|
||||
@@ -356,8 +356,8 @@ u8 getShengState<sheng>(dstate &state, dfa_info &info,
|
||||
}
|
||||
|
||||
template <>
|
||||
u8 getShengState<sheng32>(dstate &state, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 getShengState<sheng32>(const dstate &state, dfa_info &info,
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 s = state.impl_id;
|
||||
if (!state.reports.empty()) {
|
||||
s |= SHENG32_STATE_ACCEPT;
|
||||
@@ -372,8 +372,8 @@ u8 getShengState<sheng32>(dstate &state, dfa_info &info,
|
||||
}
|
||||
|
||||
template <>
|
||||
u8 getShengState<sheng64>(dstate &state, dfa_info &info,
|
||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 getShengState<sheng64>(const dstate &state, dfa_info &info,
|
||||
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 s = state.impl_id;
|
||||
if (!state.reports.empty()) {
|
||||
s |= SHENG64_STATE_ACCEPT;
|
||||
@@ -409,8 +409,8 @@ void fillAccelAux(struct NFA *n, dfa_info &info,
|
||||
|
||||
template <typename T>
|
||||
static
|
||||
void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
|
||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer
|
||||
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
UNUSED u32 aux_offset, UNUSED u32 report_offset,
|
||||
UNUSED u32 accel_offset, UNUSED u32 total_size,
|
||||
UNUSED u32 dfa_size) {
|
||||
@@ -418,7 +418,7 @@ void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
|
||||
|
||||
template <>
|
||||
void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
u32 aux_offset, u32 report_offset,
|
||||
u32 accel_offset, u32 total_size,
|
||||
u32 dfa_size) {
|
||||
@@ -443,7 +443,7 @@ void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
|
||||
|
||||
template <>
|
||||
void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
u32 aux_offset, u32 report_offset,
|
||||
u32 accel_offset, u32 total_size,
|
||||
u32 dfa_size) {
|
||||
@@ -468,7 +468,7 @@ void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
|
||||
|
||||
template <>
|
||||
void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo,
|
||||
u32 aux_offset, u32 report_offset,
|
||||
u32 accel_offset, u32 total_size,
|
||||
u32 dfa_size) {
|
||||
@@ -551,19 +551,19 @@ void fillSingleReport(NFA *n, ReportID r_id) {
|
||||
|
||||
template <typename T>
|
||||
static
|
||||
bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info,
|
||||
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, // cppcheck-suppress constParameterPointer
|
||||
UNUSED const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
for (u16 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[16] = {0};
|
||||
|
||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||
auto &succ_state = info.next(idx, chr);
|
||||
const auto &succ_state = info.next(idx, chr);
|
||||
|
||||
buf[idx] = getShengState<sheng>(succ_state, info, accelInfo);
|
||||
}
|
||||
@@ -577,13 +577,13 @@ bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
|
||||
|
||||
template <>
|
||||
bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
for (u16 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[64] = {0};
|
||||
|
||||
assert(info.size() <= 32);
|
||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||
auto &succ_state = info.next(idx, chr);
|
||||
const auto &succ_state = info.next(idx, chr);
|
||||
|
||||
buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo);
|
||||
buf[32 + idx] = buf[idx];
|
||||
@@ -598,13 +598,13 @@ bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
|
||||
|
||||
template <>
|
||||
bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
const map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
for (u16 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[64] = {0};
|
||||
|
||||
assert(info.size() <= 64);
|
||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||
auto &succ_state = info.next(idx, chr);
|
||||
const auto &succ_state = info.next(idx, chr);
|
||||
|
||||
if (accelInfo.find(info.raw_id(succ_state.impl_id))
|
||||
!= accelInfo.end()) {
|
||||
@@ -690,7 +690,7 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
|
||||
}
|
||||
|
||||
if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) {
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
return nfa;
|
||||
@@ -701,7 +701,7 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
if (!cc.grey.allowSheng) {
|
||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||
@@ -716,7 +716,7 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
info.can_die ? "can" : "cannot", info.size());
|
||||
if (info.size() > 16) {
|
||||
DEBUG_PRINTF("Too many states\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
return shengCompile_int<sheng>(raw, cc, accel_states, strat, info);
|
||||
@@ -727,13 +727,20 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
if (!cc.grey.allowSheng) {
|
||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||
return nullptr;
|
||||
bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SVE
|
||||
if (svcntb()<32) {
|
||||
DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
|
||||
bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
#else
|
||||
if (!cc.target_info.has_avx512vbmi()) {
|
||||
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||
return nullptr;
|
||||
bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||
dfa_info info(strat);
|
||||
@@ -748,7 +755,7 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
assert(info.size() > 16);
|
||||
if (info.size() > 32) {
|
||||
DEBUG_PRINTF("Too many states\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info);
|
||||
@@ -759,13 +766,20 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
if (!cc.grey.allowSheng) {
|
||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SVE
|
||||
if (svcntb()<64) {
|
||||
DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
#else
|
||||
if (!cc.target_info.has_avx512vbmi()) {
|
||||
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||
dfa_info info(strat);
|
||||
@@ -780,13 +794,13 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
||||
assert(info.size() > 32);
|
||||
if (info.size() > 64) {
|
||||
DEBUG_PRINTF("Too many states\n");
|
||||
return nullptr;
|
||||
return bytecode_ptr<NFA>(nullptr);
|
||||
}
|
||||
vector<dstate> old_states;
|
||||
old_states = info.states;
|
||||
auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info);
|
||||
if (!nfa) {
|
||||
info.states = old_states;
|
||||
info.states = old_states; // cppcheck-suppress unreadVariable
|
||||
}
|
||||
return nfa;
|
||||
}
|
||||
|
||||
@@ -264,7 +264,7 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128
|
||||
const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
if (buf_end - buf < VECTORSIZE) {
|
||||
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
|
||||
return shuftiFwdSlow(reinterpret_cast<const u8 *>(&mask_lo), reinterpret_cast<const u8 *>(&mask_hi), buf, buf_end);
|
||||
}
|
||||
return shuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
|
||||
}
|
||||
@@ -272,7 +272,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
if (buf_end - buf < VECTORSIZE) {
|
||||
return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
|
||||
return shuftiRevSlow(reinterpret_cast<const u8 *>(&mask_lo), reinterpret_cast<const u8 *>(&mask_hi), buf, buf_end);
|
||||
}
|
||||
return rshuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
|
||||
}
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include <numeric>
|
||||
|
||||
|
||||
#include "tamaramacompile.h"
|
||||
|
||||
@@ -129,14 +131,10 @@ buildTamarama(const TamaInfo &tamaInfo, const u32 queue,
|
||||
sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and
|
||||
// padding for subengines
|
||||
|
||||
|
||||
auto subl = [](size_t z, NFA *sub) {
|
||||
return z + (size_t)(ROUNDUP_CL(sub->length));
|
||||
};
|
||||
total_size += std::accumulate(tamaInfo.subengines.begin(), tamaInfo.subengines.end(), 0, subl);
|
||||
// for (const auto &sub : tamaInfo.subengines) {
|
||||
// total_size += ROUNDUP_CL(sub->length);
|
||||
// }
|
||||
|
||||
// use subSize as a sentinel value for no active subengines,
|
||||
// so add one to subSize here
|
||||
|
||||
@@ -227,7 +227,7 @@ const u8 *fwdBlock(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_ma
|
||||
}
|
||||
|
||||
template <uint16_t S>
|
||||
const u8 *truffleExecReal(m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) {
|
||||
const u8 *truffleExecReal(const m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) {
|
||||
assert(buf && buf_end);
|
||||
assert(buf < buf_end);
|
||||
DEBUG_PRINTF("truffle %p len %zu\n", buf, buf_end - buf);
|
||||
@@ -349,4 +349,4 @@ const u8 *rtruffleExecReal(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highse
|
||||
|
||||
return buf - 1;
|
||||
}
|
||||
#endif //HAVE_SVE
|
||||
#endif //HAVE_SVE
|
||||
|
||||
Reference in New Issue
Block a user