mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: Extend the interpreter to handle more work
- Use program for EOD sparse iterator - Use program for literal sparse iterator - Eliminate RoseRole, RosePred, RoseVertexProps::role - Small performance optimizations
This commit is contained in:
parent
9cb2233589
commit
d67c7583ea
@ -82,8 +82,8 @@ void init_state_for_block(const struct RoseEngine *t, u8 *state) {
|
||||
assert(t);
|
||||
assert(state);
|
||||
|
||||
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
|
||||
t, t->roleCount, t->rolesWithStateCount);
|
||||
DEBUG_PRINTF("init for Rose %p with %u state indices\n", t,
|
||||
t->rolesWithStateCount);
|
||||
|
||||
// Rose is guaranteed 8-aligned state
|
||||
assert(ISALIGNED_N(state, 8));
|
||||
|
@ -113,12 +113,11 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
const struct RoseRole *roleTable = getRoleTable(t);
|
||||
const struct RosePred *predTable = getPredTable(t);
|
||||
const struct RoseIterMapping *iterMapBase
|
||||
= getByOffset(t, t->eodIterMapOffset);
|
||||
DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset);
|
||||
|
||||
const u32 *programTable = getByOffset(t, t->eodProgramTableOffset);
|
||||
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
|
||||
assert(ISALIGNED(iterMapBase));
|
||||
assert(ISALIGNED(programTable));
|
||||
assert(ISALIGNED(it));
|
||||
|
||||
// Sparse iterator state was allocated earlier
|
||||
@ -133,50 +132,17 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
|
||||
fatbit_clear(handled_roles);
|
||||
|
||||
int work_done = 0; // not read from in this path.
|
||||
|
||||
for (; i != MMB_INVALID;
|
||||
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
|
||||
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
|
||||
const struct RoseIterMapping *iterMap = iterMapBase + idx;
|
||||
const struct RoseIterRole *roles = getByOffset(t, iterMap->offset);
|
||||
assert(ISALIGNED(roles));
|
||||
|
||||
DEBUG_PRINTF("%u roles to consider\n", iterMap->count);
|
||||
for (u32 j = 0; j != iterMap->count; j++) {
|
||||
u32 role = roles[j].role;
|
||||
assert(role < t->roleCount);
|
||||
DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred);
|
||||
const struct RoseRole *tr = roleTable + role;
|
||||
|
||||
if (fatbit_isset(handled_roles, t->roleCount, role)) {
|
||||
DEBUG_PRINTF("role %u already handled by the walk, skip\n",
|
||||
role);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special case: if this role is a trivial case (pred type simple)
|
||||
// we don't need to check any history and we already know the pred
|
||||
// role is on.
|
||||
if (tr->flags & ROSE_ROLE_PRED_SIMPLE) {
|
||||
DEBUG_PRINTF("pred type is simple, no need for checks\n");
|
||||
} else {
|
||||
assert(roles[j].pred < t->predCount);
|
||||
const struct RosePred *tp = predTable + roles[j].pred;
|
||||
if (!roseCheckPredHistory(tp, offset)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* mark role as handled so we don't touch it again in this walk */
|
||||
fatbit_set(handled_roles, t->roleCount, role);
|
||||
|
||||
u64a som = 0;
|
||||
int work_done = 0;
|
||||
hwlmcb_rv_t rv =
|
||||
roseRunRoleProgram(t, tr->programOffset, offset, &som,
|
||||
&(scratch->tctxt), &work_done);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
u32 programOffset = programTable[idx];
|
||||
u64a som = 0;
|
||||
if (roseRunRoleProgram(t, programOffset, offset, &som,
|
||||
&(scratch->tctxt),
|
||||
&work_done) == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,8 +80,8 @@ void roseInitState(const struct RoseEngine *t, u8 *state) {
|
||||
assert(t);
|
||||
assert(state);
|
||||
|
||||
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
|
||||
t, t->roleCount, t->rolesWithStateCount);
|
||||
DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t,
|
||||
t->rolesWithStateCount);
|
||||
|
||||
// Rose is guaranteed 8-aligned state
|
||||
assert(ISALIGNED_N(state, 8));
|
||||
|
@ -1107,10 +1107,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
|
||||
}
|
||||
|
||||
#define PROGRAM_CASE(name) \
|
||||
case ROSE_ROLE_INSTR_##name: { \
|
||||
DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_ROLE_INSTR_##name); \
|
||||
const struct ROSE_ROLE_STRUCT_##name *ri = \
|
||||
(const struct ROSE_ROLE_STRUCT_##name *)pc;
|
||||
case ROSE_INSTR_##name: { \
|
||||
DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \
|
||||
const struct ROSE_STRUCT_##name *ri = \
|
||||
(const struct ROSE_STRUCT_##name *)pc;
|
||||
|
||||
#define PROGRAM_NEXT_INSTRUCTION \
|
||||
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
|
||||
@ -1121,26 +1121,28 @@ static really_inline
|
||||
hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
|
||||
u64a end, u64a *som, struct RoseContext *tctxt,
|
||||
char in_anchored, int *work_done) {
|
||||
assert(programOffset);
|
||||
|
||||
DEBUG_PRINTF("program begins at offset %u\n", programOffset);
|
||||
|
||||
assert(programOffset);
|
||||
assert(programOffset < t->size);
|
||||
|
||||
const char *pc = getByOffset(t, programOffset);
|
||||
|
||||
assert(*(const u8 *)pc != ROSE_ROLE_INSTR_END);
|
||||
assert(*(const u8 *)pc != ROSE_INSTR_END);
|
||||
|
||||
for (;;) {
|
||||
assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
|
||||
u8 code = *(const u8 *)pc;
|
||||
assert(code <= ROSE_ROLE_INSTR_END);
|
||||
assert(code <= ROSE_INSTR_END);
|
||||
|
||||
switch ((enum RoseRoleInstructionCode)code) {
|
||||
switch ((enum RoseInstructionCode)code) {
|
||||
PROGRAM_CASE(ANCHORED_DELAY) {
|
||||
if (in_anchored && end > t->floatingMinLiteralMatchOffset) {
|
||||
DEBUG_PRINTF("delay until playback\n");
|
||||
update_depth(tctxt, ri->depth);
|
||||
tctxt->groups |= ri->groups;
|
||||
*work_done = 1;
|
||||
assert(ri->done_jump); // must progress
|
||||
pc += ri->done_jump;
|
||||
continue;
|
||||
}
|
||||
@ -1151,16 +1153,29 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
|
||||
struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
|
||||
if (end != ci->buf_offset + ci->len) {
|
||||
DEBUG_PRINTF("should only match at end of data\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_ROOT_BOUNDS) {
|
||||
PROGRAM_CASE(CHECK_BOUNDS) {
|
||||
if (!in_anchored &&
|
||||
!roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) {
|
||||
DEBUG_PRINTF("failed root bounds check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_NOT_HANDLED) {
|
||||
struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles;
|
||||
if (fatbit_set(handled, t->handledKeyCount, ri->key)) {
|
||||
DEBUG_PRINTF("key %u already set\n", ri->key);
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
@ -1170,6 +1185,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
|
||||
PROGRAM_CASE(CHECK_LOOKAROUND) {
|
||||
if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) {
|
||||
DEBUG_PRINTF("failed lookaround check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
@ -1180,6 +1196,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
|
||||
if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end,
|
||||
tctxt)) {
|
||||
DEBUG_PRINTF("failed lookaround check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
@ -1334,12 +1351,9 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t,
|
||||
struct RoseContext *tctxt) {
|
||||
/* assert(!tctxt->in_anchored); */
|
||||
/* assert(!tctxt->in_anch_playback); */
|
||||
const struct RoseRole *roleTable = getRoleTable(t);
|
||||
const struct RosePred *predTable = getPredTable(t);
|
||||
const struct RoseIterMapping *iterMapBase
|
||||
= getByOffset(t, tl->iterMapOffset);
|
||||
const u32 *iterProgram = getByOffset(t, tl->iterProgramOffset);
|
||||
const struct mmbit_sparse_iter *it = getByOffset(t, tl->iterOffset);
|
||||
assert(ISALIGNED(iterMapBase));
|
||||
assert(ISALIGNED(iterProgram));
|
||||
assert(ISALIGNED(it));
|
||||
|
||||
// Sparse iterator state was allocated earlier
|
||||
@ -1356,50 +1370,19 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t,
|
||||
fatbit_clear(handled_roles);
|
||||
|
||||
for (; i != MMB_INVALID;
|
||||
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
|
||||
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
|
||||
const struct RoseIterMapping *iterMap = iterMapBase + idx;
|
||||
const struct RoseIterRole *roles = getByOffset(t, iterMap->offset);
|
||||
assert(ISALIGNED(roles));
|
||||
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
|
||||
u32 programOffset = iterProgram[idx];
|
||||
DEBUG_PRINTF("pred state %u (iter idx=%u) is on -> program %u\n", i,
|
||||
idx, programOffset);
|
||||
|
||||
DEBUG_PRINTF("%u roles to consider\n", iterMap->count);
|
||||
for (u32 j = 0; j != iterMap->count; j++) {
|
||||
u32 role = roles[j].role;
|
||||
assert(role < t->roleCount);
|
||||
DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred);
|
||||
const struct RoseRole *tr = roleTable + role;
|
||||
// If this bit is switched on in the sparse iterator, it must be
|
||||
// driving a program.
|
||||
assert(programOffset);
|
||||
|
||||
if (fatbit_isset(handled_roles, t->roleCount, role)) {
|
||||
DEBUG_PRINTF("role %u already handled by the walk, skip\n",
|
||||
role);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special case: if this role is a trivial case (pred type simple)
|
||||
// we don't need to check any history and we already know the pred
|
||||
// role is on.
|
||||
if (tr->flags & ROSE_ROLE_PRED_SIMPLE) {
|
||||
DEBUG_PRINTF("pred type is simple, no need for further"
|
||||
" checks\n");
|
||||
} else {
|
||||
assert(roles[j].pred < t->predCount);
|
||||
const struct RosePred *tp = predTable + roles[j].pred;
|
||||
if (!roseCheckPredHistory(tp, end)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* mark role as handled so we don't touch it again in this walk */
|
||||
fatbit_set(handled_roles, t->roleCount, role);
|
||||
|
||||
if (!tr->programOffset) {
|
||||
continue;
|
||||
}
|
||||
u64a som = 0ULL;
|
||||
if (roseRunRoleProgram_i(t, tr->programOffset, end, &som, tctxt, 0,
|
||||
&work_done) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
u64a som = 0ULL;
|
||||
if (roseRunRoleProgram_i(t, programOffset, end, &som, tctxt, 0,
|
||||
&work_done) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -269,30 +269,6 @@ void update_depth(struct RoseContext *tctxt, u8 depth) {
|
||||
tctxt->depth = d;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int roseCheckHistoryAnch(const struct RosePred *tp, u64a end) {
|
||||
DEBUG_PRINTF("end %llu min %u max %u\n", end, tp->minBound, tp->maxBound);
|
||||
if (tp->maxBound == ROSE_BOUND_INF) {
|
||||
return end >= tp->minBound;
|
||||
} else {
|
||||
return end >= tp->minBound && end <= tp->maxBound;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that a predecessor's history requirements are satisfied.
|
||||
static really_inline
|
||||
int roseCheckPredHistory(const struct RosePred *tp, u64a end) {
|
||||
DEBUG_PRINTF("pred type %u\n", tp->historyCheck);
|
||||
|
||||
if (tp->historyCheck == ROSE_ROLE_HISTORY_ANCH) {
|
||||
return roseCheckHistoryAnch(tp, end);
|
||||
}
|
||||
|
||||
assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE ||
|
||||
tp->historyCheck == ROSE_ROLE_HISTORY_LAST_BYTE);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Note: uses the stashed sparse iter state; cannot be called from
|
||||
* anybody else who is using it */
|
||||
static rose_inline
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -78,77 +78,6 @@ string to_string(nfa_kind k) {
|
||||
return "?";
|
||||
}
|
||||
|
||||
// Get the RoseRole associated with a given vertex in the build graph from the
|
||||
// RoseEngine.
|
||||
static
|
||||
const RoseRole *getRoseRole(const RoseBuildImpl &build,
|
||||
const RoseEngine *engine, RoseVertex v) {
|
||||
if (!engine) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u32 role_idx = build.g[v].role;
|
||||
if (role_idx == MO_INVALID_IDX) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const RoseRole *roles = getRoleTable(engine);
|
||||
return &roles[role_idx];
|
||||
}
|
||||
|
||||
#define SKIP_CASE(name) \
|
||||
case ROSE_ROLE_INSTR_##name: { \
|
||||
const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \
|
||||
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
|
||||
break; \
|
||||
}
|
||||
|
||||
template<int Opcode, class Struct>
|
||||
const Struct *
|
||||
findInstruction(const RoseEngine *t, const RoseRole *role) {
|
||||
if (!role->programOffset) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *pc = (const char *)t + role->programOffset;
|
||||
for (;;) {
|
||||
u8 code = *(const u8 *)pc;
|
||||
assert(code <= ROSE_ROLE_INSTR_END);
|
||||
if (code == Opcode) {
|
||||
return (const Struct *)pc;
|
||||
}
|
||||
// Skip to the next instruction.
|
||||
switch (code) {
|
||||
SKIP_CASE(ANCHORED_DELAY)
|
||||
SKIP_CASE(CHECK_ONLY_EOD)
|
||||
SKIP_CASE(CHECK_ROOT_BOUNDS)
|
||||
SKIP_CASE(CHECK_LEFTFIX)
|
||||
SKIP_CASE(CHECK_LOOKAROUND)
|
||||
SKIP_CASE(SOM_ADJUST)
|
||||
SKIP_CASE(SOM_LEFTFIX)
|
||||
SKIP_CASE(TRIGGER_INFIX)
|
||||
SKIP_CASE(TRIGGER_SUFFIX)
|
||||
SKIP_CASE(REPORT)
|
||||
SKIP_CASE(REPORT_CHAIN)
|
||||
SKIP_CASE(REPORT_EOD)
|
||||
SKIP_CASE(REPORT_SOM_INT)
|
||||
SKIP_CASE(REPORT_SOM)
|
||||
SKIP_CASE(REPORT_SOM_KNOWN)
|
||||
SKIP_CASE(SET_STATE)
|
||||
SKIP_CASE(SET_GROUPS)
|
||||
case ROSE_ROLE_INSTR_END:
|
||||
return nullptr;
|
||||
default:
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#undef SKIP_CASE
|
||||
|
||||
namespace {
|
||||
|
||||
class RoseGraphWriter {
|
||||
@ -174,7 +103,7 @@ public:
|
||||
}
|
||||
|
||||
os << "[label=\"";
|
||||
os << "role=" << g[v].role << "[i" << g[v].idx <<"]\\n";
|
||||
os << "idx=" << g[v].idx <<"\\n";
|
||||
|
||||
for (u32 lit_id : g[v].literals) {
|
||||
writeLiteral(os, lit_id);
|
||||
@ -198,34 +127,23 @@ public:
|
||||
os << " (rep=" << as_string_list(g[v].reports) << ")";
|
||||
}
|
||||
|
||||
const RoseRole *r = getRoseRole(v);
|
||||
|
||||
if (g[v].suffix) {
|
||||
os << "\\nSUFFIX (TOP " << g[v].suffix.top;
|
||||
if (r) {
|
||||
const auto *ri =
|
||||
findInstruction<ROSE_ROLE_INSTR_TRIGGER_SUFFIX,
|
||||
ROSE_ROLE_STRUCT_TRIGGER_SUFFIX>(t, r);
|
||||
if (ri) {
|
||||
os << ", Q" << ri->queue;
|
||||
}
|
||||
} else {
|
||||
// Can't dump the queue number, but we can identify the suffix.
|
||||
if (g[v].suffix.graph) {
|
||||
os << ", graph=" << g[v].suffix.graph.get()
|
||||
<< " " << to_string(g[v].suffix.graph->kind);
|
||||
}
|
||||
if (g[v].suffix.castle) {
|
||||
os << ", castle=" << g[v].suffix.castle.get();
|
||||
}
|
||||
if (g[v].suffix.rdfa) {
|
||||
os << ", dfa=" << g[v].suffix.rdfa.get();
|
||||
}
|
||||
if (g[v].suffix.haig) {
|
||||
os << ", haig=" << g[v].suffix.haig.get();
|
||||
}
|
||||
|
||||
// Can't dump the queue number, but we can identify the suffix.
|
||||
if (g[v].suffix.graph) {
|
||||
os << ", graph=" << g[v].suffix.graph.get() << " "
|
||||
<< to_string(g[v].suffix.graph->kind);
|
||||
}
|
||||
if (g[v].suffix.castle) {
|
||||
os << ", castle=" << g[v].suffix.castle.get();
|
||||
}
|
||||
if (g[v].suffix.rdfa) {
|
||||
os << ", dfa=" << g[v].suffix.rdfa.get();
|
||||
}
|
||||
if (g[v].suffix.haig) {
|
||||
os << ", haig=" << g[v].suffix.haig.get();
|
||||
}
|
||||
|
||||
os << ")";
|
||||
}
|
||||
|
||||
@ -247,15 +165,6 @@ public:
|
||||
build.isRootSuccessor(v) ? "PREFIX" : "INFIX";
|
||||
os << "\\nROSE " << roseKind;
|
||||
os << " (";
|
||||
if (r) {
|
||||
const auto *ri =
|
||||
findInstruction<ROSE_ROLE_INSTR_CHECK_LEFTFIX,
|
||||
ROSE_ROLE_STRUCT_CHECK_LEFTFIX>(t, r);
|
||||
if (ri) {
|
||||
os << "Q" << ri->queue << ", ";
|
||||
}
|
||||
}
|
||||
|
||||
os << "report " << g[v].left.leftfix_report << ")";
|
||||
|
||||
if (g[v].left.graph) {
|
||||
@ -348,10 +257,6 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
const RoseRole *getRoseRole(RoseVertex v) const {
|
||||
return ue2::getRoseRole(build, t, v);
|
||||
}
|
||||
|
||||
set<RoseVertex> ghost;
|
||||
const RoseBuildImpl &build;
|
||||
const RoseEngine *t;
|
||||
@ -383,7 +288,7 @@ namespace {
|
||||
struct CompareVertexRole {
|
||||
explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {}
|
||||
inline bool operator()(const RoseVertex &a, const RoseVertex &b) const {
|
||||
return g[a].role < g[b].role;
|
||||
return g[a].idx < g[b].idx;
|
||||
}
|
||||
private:
|
||||
const RoseGraph &g;
|
||||
@ -483,7 +388,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) {
|
||||
|
||||
for (RoseVertex v : verts) {
|
||||
// role info
|
||||
os << " Role " << g[v].role << ": depth=" << depths.at(v)
|
||||
os << " Index " << g[v].idx << ": depth=" << depths.at(v)
|
||||
<< ", groups=0x" << hex << setw(16) << setfill('0')
|
||||
<< g[v].groups << dec;
|
||||
|
||||
@ -497,14 +402,14 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) {
|
||||
os << ", max_offset=" << g[v].max_offset << endl;
|
||||
// pred info
|
||||
for (const auto &ie : in_edges_range(v, g)) {
|
||||
os << " Predecessor role=";
|
||||
u32 predRole = g[source(ie, g)].role;
|
||||
if (predRole == MO_INVALID_IDX) {
|
||||
const auto &u = source(ie, g);
|
||||
os << " Predecessor idx=";
|
||||
if (u == build.root) {
|
||||
os << "ROOT";
|
||||
} else if (predRole == g[build.anchored_root].role) {
|
||||
} else if (u == build.anchored_root) {
|
||||
os << "ANCHORED_ROOT";
|
||||
} else {
|
||||
os << predRole;
|
||||
os << g[u].idx;
|
||||
}
|
||||
os << ": bounds [" << g[ie].minBound << ", ";
|
||||
if (g[ie].maxBound == ROSE_BOUND_INF) {
|
||||
@ -589,70 +494,6 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
|
||||
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
|
||||
}
|
||||
|
||||
static
|
||||
CharReach bitvectorToReach(const u8 *reach) {
|
||||
CharReach cr;
|
||||
|
||||
for (size_t i = 0; i < 256; i++) {
|
||||
if (reach[i / 8] & (1U << (i % 8))) {
|
||||
cr.set(i);
|
||||
|
||||
}
|
||||
}
|
||||
return cr;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t,
|
||||
const Grey &grey, const string &filename) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << filename;
|
||||
ofstream os(ss.str());
|
||||
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
|
||||
for (RoseVertex v : vertices_range(g)) {
|
||||
const RoseRole *role = getRoseRole(build, t, v);
|
||||
if (!role) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto *ri =
|
||||
findInstruction<ROSE_ROLE_INSTR_CHECK_LOOKAROUND,
|
||||
ROSE_ROLE_STRUCT_CHECK_LOOKAROUND>(t, role);
|
||||
if (!ri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 look_idx = ri->index;
|
||||
const u32 look_count = ri->count;
|
||||
|
||||
os << "Role " << g[v].role << endl;
|
||||
os << " literals: " << as_string_list(g[v].literals) << endl;
|
||||
os << " lookaround: index=" << look_idx << ", count=" << look_count
|
||||
<< endl;
|
||||
|
||||
const s8 *look = look_base + look_idx;
|
||||
const s8 *look_end = look + look_count;
|
||||
const u8 *reach = reach_base + look_idx * REACH_BITVECTOR_LEN;
|
||||
|
||||
for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
|
||||
os << " " << std::setw(4) << std::setfill(' ') << int{*look}
|
||||
<< ": ";
|
||||
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
|
||||
os << endl;
|
||||
}
|
||||
|
||||
os << endl;
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
|
||||
const Grey &grey) {
|
||||
if (!grey.dumpFlags) {
|
||||
@ -692,9 +533,6 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
|
||||
f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w");
|
||||
roseDumpStructRaw(t, f);
|
||||
fclose(f);
|
||||
|
||||
// Lookaround tables.
|
||||
dumpRoseLookaround(build, t, grey, "rose_lookaround.txt");
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -89,12 +89,10 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in,
|
||||
next_nfa_report(0) {
|
||||
// add root vertices to graph
|
||||
g[root].idx = vertexIndex++;
|
||||
g[root].role = MO_INVALID_IDX;
|
||||
g[root].min_offset = 0;
|
||||
g[root].max_offset = 0;
|
||||
|
||||
g[anchored_root].idx = vertexIndex++;
|
||||
g[anchored_root].role = MO_INVALID_IDX;
|
||||
g[anchored_root].min_offset = 0;
|
||||
g[anchored_root].max_offset = 0;
|
||||
}
|
||||
@ -194,7 +192,7 @@ bool RoseBuildImpl::hasLiteralInTable(RoseVertex v,
|
||||
bool RoseBuildImpl::hasNoFloatingRoots() const {
|
||||
for (auto v : adjacent_vertices_range(root, g)) {
|
||||
if (isFloating(v)) {
|
||||
DEBUG_PRINTF("direct floating root %u\n", g[v].role);
|
||||
DEBUG_PRINTF("direct floating root %zu\n", g[v].idx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -202,7 +200,7 @@ bool RoseBuildImpl::hasNoFloatingRoots() const {
|
||||
/* need to check if the anchored_root has any literals which are too deep */
|
||||
for (auto v : adjacent_vertices_range(anchored_root, g)) {
|
||||
if (isFloating(v)) {
|
||||
DEBUG_PRINTF("indirect floating root %u\n", g[v].role);
|
||||
DEBUG_PRINTF("indirect floating root %zu\n", g[v].idx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -40,7 +40,9 @@
|
||||
#include "nfa/nfa_build_util.h"
|
||||
#include "nfa/nfa_dump_api.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/multibit_internal.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
@ -114,159 +116,78 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) {
|
||||
return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset);
|
||||
}
|
||||
|
||||
static
|
||||
const RosePred *getPredTable(const RoseEngine *t, u32 *count) {
|
||||
*count = t->predCount;
|
||||
return (const RosePred *)loadFromByteCodeOffset(t, t->predOffset);
|
||||
}
|
||||
|
||||
static
|
||||
u32 literalsWithDepth(const RoseEngine *t, u8 depth) {
|
||||
u32 n = 0;
|
||||
const RoseLiteral *tl = getLiteralTable(t);
|
||||
const RoseLiteral *tl_end = tl + t->literalCount;
|
||||
|
||||
for (; tl != tl_end; ++tl) {
|
||||
if (tl->minDepth == depth) {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static
|
||||
u32 literalsWithDirectReports(const RoseEngine *t) {
|
||||
return t->totalNumLiterals - t->literalCount;
|
||||
}
|
||||
|
||||
template<typename member_type_ptr>
|
||||
template<typename Predicate>
|
||||
static
|
||||
u32 literalsWithProp(const RoseEngine *t, member_type_ptr prop) {
|
||||
u32 n = 0;
|
||||
size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) {
|
||||
const RoseLiteral *tl = getLiteralTable(t);
|
||||
const RoseLiteral *tl_end = tl + t->literalCount;
|
||||
|
||||
for (; tl != tl_end; ++tl) {
|
||||
if (tl->*prop) {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
template<typename member_type>
|
||||
static
|
||||
u32 rolesWithPropValue(const RoseEngine *t, member_type RoseRole::*prop,
|
||||
member_type value) {
|
||||
u32 n = 0;
|
||||
const RoseRole *tr = getRoleTable(t);
|
||||
const RoseRole *tr_end = tr + t->roleCount;
|
||||
|
||||
for (; tr != tr_end; ++tr) {
|
||||
if (tr->*prop == value) {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
return count_if(tl, tl_end, pred);
|
||||
}
|
||||
|
||||
static
|
||||
u32 literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
|
||||
u32 n = 0;
|
||||
const RoseLiteral *tl = getLiteralTable(t);
|
||||
const RoseLiteral *tl_end = tl + t->literalCount;
|
||||
size_t literalsWithDepth(const RoseEngine *t, u8 depth) {
|
||||
return literalsWithPredicate(
|
||||
t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; });
|
||||
}
|
||||
|
||||
static
|
||||
size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
|
||||
rose_group mask = ~((1ULL << from) - 1);
|
||||
if (to < 64) {
|
||||
mask &= ((1ULL << to) - 1);
|
||||
}
|
||||
|
||||
for (; tl != tl_end; ++tl) {
|
||||
if (tl->groups & mask) {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
return literalsWithPredicate(
|
||||
t, [&mask](const RoseLiteral &l) { return l.groups & mask; });
|
||||
}
|
||||
|
||||
static
|
||||
u32 rolesWithFlag(const RoseEngine *t, u32 flag) {
|
||||
u32 n = 0;
|
||||
const RoseRole *tr = getRoleTable(t);
|
||||
const RoseRole *tr_end = tr + t->roleCount;
|
||||
CharReach bitvectorToReach(const u8 *reach) {
|
||||
CharReach cr;
|
||||
|
||||
for (size_t i = 0; i < 256; i++) {
|
||||
if (reach[i / 8] & (1U << (i % 8))) {
|
||||
cr.set(i);
|
||||
|
||||
for (; tr != tr_end; ++tr) {
|
||||
if (tr->flags & flag) {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
return cr;
|
||||
}
|
||||
|
||||
#define HANDLE_CASE(name) \
|
||||
case ROSE_ROLE_INSTR_##name: { \
|
||||
const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \
|
||||
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
|
||||
break; \
|
||||
}
|
||||
|
||||
static
|
||||
u32 rolesWithInstr(const RoseEngine *t,
|
||||
enum RoseRoleInstructionCode find_code) {
|
||||
u32 n = 0;
|
||||
const RoseRole *tr = getRoleTable(t);
|
||||
const RoseRole *tr_end = tr + t->roleCount;
|
||||
void dumpLookaround(ofstream &os, const RoseEngine *t,
|
||||
const ROSE_STRUCT_CHECK_LOOKAROUND *ri) {
|
||||
assert(ri);
|
||||
|
||||
for (; tr != tr_end; ++tr) {
|
||||
if (!tr->programOffset) {
|
||||
continue;
|
||||
}
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
|
||||
const char *pc = (const char *)t + tr->programOffset;
|
||||
for (;;) {
|
||||
u8 code = *(const u8 *)pc;
|
||||
assert(code <= ROSE_ROLE_INSTR_END);
|
||||
if (code == find_code) {
|
||||
n++;
|
||||
goto next_role;
|
||||
}
|
||||
switch (code) {
|
||||
HANDLE_CASE(CHECK_ONLY_EOD)
|
||||
HANDLE_CASE(CHECK_ROOT_BOUNDS)
|
||||
HANDLE_CASE(CHECK_LOOKAROUND)
|
||||
HANDLE_CASE(CHECK_LEFTFIX)
|
||||
HANDLE_CASE(ANCHORED_DELAY)
|
||||
HANDLE_CASE(SOM_ADJUST)
|
||||
HANDLE_CASE(SOM_LEFTFIX)
|
||||
HANDLE_CASE(TRIGGER_INFIX)
|
||||
HANDLE_CASE(TRIGGER_SUFFIX)
|
||||
HANDLE_CASE(REPORT)
|
||||
HANDLE_CASE(REPORT_CHAIN)
|
||||
HANDLE_CASE(REPORT_EOD)
|
||||
HANDLE_CASE(REPORT_SOM_INT)
|
||||
HANDLE_CASE(REPORT_SOM)
|
||||
HANDLE_CASE(REPORT_SOM_KNOWN)
|
||||
HANDLE_CASE(SET_STATE)
|
||||
HANDLE_CASE(SET_GROUPS)
|
||||
case ROSE_ROLE_INSTR_END:
|
||||
goto next_role;
|
||||
default:
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
next_role:;
|
||||
const s8 *look = look_base + ri->index;
|
||||
const s8 *look_end = look + ri->count;
|
||||
const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN;
|
||||
|
||||
os << " contents:" << endl;
|
||||
|
||||
for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
|
||||
os << " " << std::setw(4) << std::setfill(' ') << int{*look}
|
||||
<< ": ";
|
||||
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
|
||||
os << endl;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
#undef HANDLE_CASE
|
||||
|
||||
#define PROGRAM_CASE(name) \
|
||||
case ROSE_ROLE_INSTR_##name: { \
|
||||
case ROSE_INSTR_##name: { \
|
||||
os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \
|
||||
<< ": " #name " (" << (int)ROSE_ROLE_INSTR_##name << ")" << endl; \
|
||||
const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc;
|
||||
<< ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \
|
||||
const auto *ri = (const struct ROSE_STRUCT_##name *)pc;
|
||||
|
||||
#define PROGRAM_NEXT_INSTRUCTION \
|
||||
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
|
||||
@ -274,11 +195,11 @@ u32 rolesWithInstr(const RoseEngine *t,
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoleProgram(ofstream &os, const char *pc) {
|
||||
void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
const char *pc_base = pc;
|
||||
for (;;) {
|
||||
u8 code = *(const u8 *)pc;
|
||||
assert(code <= ROSE_ROLE_INSTR_END);
|
||||
assert(code <= ROSE_INSTR_END);
|
||||
switch (code) {
|
||||
PROGRAM_CASE(ANCHORED_DELAY) {
|
||||
os << " depth " << u32{ri->depth} << endl;
|
||||
@ -293,17 +214,24 @@ void dumpRoleProgram(ofstream &os, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_ROOT_BOUNDS) {
|
||||
PROGRAM_CASE(CHECK_BOUNDS) {
|
||||
os << " min_bound " << ri->min_bound << endl;
|
||||
os << " max_bound " << ri->max_bound << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_NOT_HANDLED) {
|
||||
os << " key " << ri->key << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_LOOKAROUND) {
|
||||
os << " index " << ri->index << endl;
|
||||
os << " count " << ri->count << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
dumpLookaround(os, t, ri);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -396,26 +324,27 @@ void dumpRoleProgram(ofstream &os, const char *pc) {
|
||||
#undef PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
static
|
||||
void dumpRoseRolePrograms(const RoseEngine *t, const string &filename) {
|
||||
ofstream os(filename);
|
||||
void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset,
|
||||
u32 programTableOffset) {
|
||||
const auto *it =
|
||||
(const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset);
|
||||
const u32 *programTable =
|
||||
(const u32 *)loadFromByteCodeOffset(t, programTableOffset);
|
||||
|
||||
const RoseRole *roles = getRoleTable(t);
|
||||
const char *base = (const char *)t;
|
||||
// Construct a full multibit.
|
||||
const u32 total_bits = t->rolesWithStateCount;
|
||||
const vector<u8> bits(mmbit_size(total_bits), u8{0xff});
|
||||
|
||||
for (u32 i = 0; i < t->roleCount; i++) {
|
||||
const RoseRole *role = &roles[i];
|
||||
os << "Role " << i << endl;
|
||||
|
||||
if (!role->programOffset) {
|
||||
os << " <no program>" << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
dumpRoleProgram(os, base + role->programOffset);
|
||||
os << endl;
|
||||
struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES];
|
||||
u32 idx = 0;
|
||||
for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s);
|
||||
i != MMB_INVALID;
|
||||
i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) {
|
||||
u32 programOffset = programTable[idx];
|
||||
os << "Sparse Iter Program " << idx << " triggered by state " << i
|
||||
<< " @ " << programOffset << ":" << endl;
|
||||
dumpRoleProgram(os, t, (const char *)t + programOffset);
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
static
|
||||
@ -427,12 +356,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
|
||||
|
||||
for (u32 i = 0; i < t->literalCount; i++) {
|
||||
const RoseLiteral *lit = &lits[i];
|
||||
if (!lit->rootProgramOffset) {
|
||||
continue;
|
||||
os << "Literal " << i << endl;
|
||||
os << "---------------" << endl;
|
||||
|
||||
if (lit->rootProgramOffset) {
|
||||
os << "Root Program @ " << lit->rootProgramOffset << ":" << endl;
|
||||
dumpRoleProgram(os, t, base + lit->rootProgramOffset);
|
||||
} else {
|
||||
os << "<No Root Program>" << endl;
|
||||
}
|
||||
|
||||
if (lit->iterOffset != ROSE_OFFSET_INVALID) {
|
||||
dumpSparseIterPrograms(os, t, lit->iterOffset,
|
||||
lit->iterProgramOffset);
|
||||
} else {
|
||||
os << "<No Sparse Iter Programs>" << endl;
|
||||
}
|
||||
|
||||
os << "Literal " << i << endl;
|
||||
dumpRoleProgram(os, base + lit->rootProgramOffset);
|
||||
os << endl;
|
||||
}
|
||||
|
||||
@ -440,37 +380,17 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
|
||||
}
|
||||
|
||||
static
|
||||
const char *historyName(RoseRoleHistory h) {
|
||||
switch (h) {
|
||||
case ROSE_ROLE_HISTORY_NONE:
|
||||
return "history none";
|
||||
case ROSE_ROLE_HISTORY_ANCH:
|
||||
return "history anch";
|
||||
case ROSE_ROLE_HISTORY_LAST_BYTE:
|
||||
return "history last_byte";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
|
||||
ofstream os(filename);
|
||||
|
||||
static
|
||||
void dumpPreds(FILE *f, const RoseEngine *t) {
|
||||
map<RoseRoleHistory, u32> counts;
|
||||
|
||||
u32 predCount = 0;
|
||||
const RosePred *tp = getPredTable(t, &predCount);
|
||||
const RosePred *tp_end = tp + predCount;
|
||||
|
||||
for (; tp != tp_end; ++tp) {
|
||||
assert(tp->historyCheck < ROSE_ROLE_HISTORY_INVALID);
|
||||
counts[(RoseRoleHistory)tp->historyCheck] += 1;
|
||||
if (t->eodIterOffset) {
|
||||
dumpSparseIterPrograms(os, t, t->eodIterOffset,
|
||||
t->eodProgramTableOffset);
|
||||
} else {
|
||||
os << "<No EOD Iter Programs>" << endl;
|
||||
}
|
||||
|
||||
for (map<RoseRoleHistory, u32>::const_iterator it = counts.begin(),
|
||||
ite = counts.end();
|
||||
it != ite; ++it) {
|
||||
fprintf(f, " - %-18s: %u\n", historyName(it->first), it->second);
|
||||
}
|
||||
os.close();
|
||||
}
|
||||
|
||||
static
|
||||
@ -805,16 +725,12 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance);
|
||||
fprintf(f, " - literal table : %zu bytes\n",
|
||||
t->literalCount * sizeof(RoseLiteral));
|
||||
fprintf(f, " - role table : %zu bytes\n",
|
||||
t->roleCount * sizeof(RoseRole));
|
||||
fprintf(f, " - pred table : %zu bytes\n",
|
||||
t->predCount * sizeof(RosePred));
|
||||
fprintf(f, " - role state table : %zu bytes\n",
|
||||
t->rolesWithStateCount * sizeof(u32));
|
||||
fprintf(f, " - nfa info table : %u bytes\n",
|
||||
t->anchoredReportMapOffset - t->nfaInfoOffset);
|
||||
fprintf(f, " - lookaround table : %u bytes\n",
|
||||
t->predOffset - t->lookaroundTableOffset);
|
||||
t->nfaInfoOffset - t->lookaroundTableOffset);
|
||||
fprintf(f, " - lookaround reach : %u bytes\n",
|
||||
t->lookaroundTableOffset - t->lookaroundReachOffset);
|
||||
|
||||
@ -839,46 +755,30 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
fprintf(f, "\n");
|
||||
|
||||
fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups);
|
||||
fprintf(f, "handled key count : %u\n", t->handledKeyCount);
|
||||
fprintf(f, "\n");
|
||||
|
||||
fprintf(f, "number of literals : %u\n", t->totalNumLiterals);
|
||||
fprintf(f, " - delayed : %u\n", t->delay_count);
|
||||
fprintf(f, " - direct report : %u\n",
|
||||
literalsWithDirectReports(t));
|
||||
fprintf(f, " - that squash group : %u\n",
|
||||
literalsWithProp(t, &RoseLiteral::squashesGroup));
|
||||
fprintf(f, " - that squash group : %zu\n",
|
||||
literalsWithPredicate(
|
||||
t, [](const RoseLiteral &l) { return l.squashesGroup != 0; }));
|
||||
fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id);
|
||||
|
||||
u32 group_weak_end = t->group_weak_end;
|
||||
fprintf(f, " - with root program : %zu\n",
|
||||
literalsWithPredicate(t, [](const RoseLiteral &l) {
|
||||
return l.rootProgramOffset != 0;
|
||||
}));
|
||||
fprintf(f, " - with sparse iter : %zu\n",
|
||||
literalsWithPredicate(t, [](const RoseLiteral &l) {
|
||||
return l.iterOffset != ROSE_OFFSET_INVALID;
|
||||
}));
|
||||
fprintf(f, " - in groups ::\n");
|
||||
fprintf(f, " + weak : %u\n",
|
||||
literalsInGroups(t, 0, group_weak_end));
|
||||
fprintf(f, " + general : %u\n",
|
||||
literalsInGroups(t, group_weak_end, sizeof(u64a) * 8));
|
||||
fprintf(f, "number of roles : %u\n", t->roleCount);
|
||||
fprintf(f, " - with state index : %u\n", t->rolesWithStateCount);
|
||||
fprintf(f, " - with leftfix nfa : %u\n",
|
||||
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LEFTFIX));
|
||||
fprintf(f, " - with suffix nfa : %u\n",
|
||||
rolesWithInstr(t, ROSE_ROLE_INSTR_TRIGGER_SUFFIX));
|
||||
fprintf(f, " - with lookaround : %u\n",
|
||||
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LOOKAROUND));
|
||||
fprintf(f, " - with reports : %u\n",
|
||||
rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT));
|
||||
fprintf(f, " - with som reports : %u\n",
|
||||
rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT_SOM_INT));
|
||||
fprintf(f, " - match only at end : %u\n",
|
||||
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ONLY_EOD));
|
||||
fprintf(f, " + anchored : %u\n", t->anchoredMatches);
|
||||
|
||||
fprintf(f, " - simple preds : %u\n",
|
||||
rolesWithFlag(t, ROSE_ROLE_PRED_SIMPLE));
|
||||
fprintf(f, " - bound root preds : %u\n",
|
||||
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS));
|
||||
fprintf(f, " - 'any' preds : %u\n",
|
||||
rolesWithFlag(t, ROSE_ROLE_PRED_ANY));
|
||||
fprintf(f, "number of preds : %u\n", t->predCount);
|
||||
dumpPreds(f, t);
|
||||
fprintf(f, " + weak : %zu\n",
|
||||
literalsInGroups(t, 0, t->group_weak_end));
|
||||
fprintf(f, " + general : %zu\n",
|
||||
literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8));
|
||||
|
||||
u32 depth1 = literalsWithDepth(t, 1);
|
||||
u32 depth2 = literalsWithDepth(t, 2);
|
||||
@ -977,16 +877,13 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, activeArrayCount);
|
||||
DUMP_U32(t, activeLeftCount);
|
||||
DUMP_U32(t, queueCount);
|
||||
DUMP_U32(t, roleOffset);
|
||||
DUMP_U32(t, roleCount);
|
||||
DUMP_U32(t, predOffset);
|
||||
DUMP_U32(t, predCount);
|
||||
DUMP_U32(t, handledKeyCount);
|
||||
DUMP_U32(t, leftOffset);
|
||||
DUMP_U32(t, roseCount);
|
||||
DUMP_U32(t, lookaroundTableOffset);
|
||||
DUMP_U32(t, lookaroundReachOffset);
|
||||
DUMP_U32(t, eodIterOffset);
|
||||
DUMP_U32(t, eodIterMapOffset);
|
||||
DUMP_U32(t, eodProgramTableOffset);
|
||||
DUMP_U32(t, lastByteHistoryIterOffset);
|
||||
DUMP_U32(t, minWidth);
|
||||
DUMP_U32(t, minWidthExcludingBoundaries);
|
||||
@ -1048,52 +945,15 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
|
||||
}
|
||||
|
||||
static
|
||||
void roseDumpPredStructRaw(const RoseEngine *t, FILE *f) {
|
||||
u32 pred_count = 0;
|
||||
const RosePred *pred_table = getPredTable(t, &pred_count);
|
||||
fprintf(f, "pred_count = %u\n", pred_count);
|
||||
if (!pred_table) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const RosePred *p = pred_table; p < pred_table + pred_count; p++) {
|
||||
fprintf(f, "pred[%zu] = {\n", p - pred_table);
|
||||
DUMP_U32(p, role);
|
||||
DUMP_U32(p, minBound);
|
||||
DUMP_U32(p, maxBound);
|
||||
DUMP_U8(p, historyCheck);
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void roseDumpRoleStructRaw(const RoseEngine *t, FILE *f) {
|
||||
const RoseRole *tr = getRoleTable(t);
|
||||
const RoseRole *tr_end = tr + t->roleCount;
|
||||
fprintf(f, "role_count = %zd\n", tr_end - tr);
|
||||
if (!tr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const RoseRole *p = tr; p < tr_end; p++) {
|
||||
fprintf(f, "role[%zu] = {\n", p - tr);
|
||||
DUMP_U32(p, flags);
|
||||
DUMP_U32(p, programOffset);
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
}
|
||||
|
||||
void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) {
|
||||
void roseDumpComponents(const RoseEngine *t, bool dump_raw,
|
||||
const string &base) {
|
||||
dumpComponentInfo(t, base);
|
||||
dumpNfas(t, dump_raw, base);
|
||||
dumpAnchored(t, base);
|
||||
dumpRevComponentInfo(t, base);
|
||||
dumpRevNfas(t, dump_raw, base);
|
||||
|
||||
// Role programs.
|
||||
dumpRoseRolePrograms(t, base + "/rose_role_programs.txt");
|
||||
dumpRoseLitPrograms(t, base + "/rose_lit_root_programs.txt");
|
||||
dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt");
|
||||
dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
|
||||
}
|
||||
|
||||
void roseDumpInternals(const RoseEngine *t, const string &base) {
|
||||
@ -1139,14 +999,6 @@ void roseDumpInternals(const RoseEngine *t, const string &base) {
|
||||
roseDumpStructRaw(t, f);
|
||||
fclose(f);
|
||||
|
||||
f = fopen((base + "/rose_preds.txt").c_str(), "w");
|
||||
roseDumpPredStructRaw(t, f);
|
||||
fclose(f);
|
||||
|
||||
f = fopen((base + "/rose_roles.txt").c_str(), "w");
|
||||
roseDumpRoleStructRaw(t, f);
|
||||
fclose(f);
|
||||
|
||||
roseDumpComponents(t, true, base);
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,7 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "rose_build.h"
|
||||
#include "rose_internal.h" /* role history, etc */
|
||||
#include "rose_internal.h"
|
||||
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
|
||||
#include "util/charreach.h"
|
||||
#include "util/depth.h"
|
||||
@ -65,6 +65,14 @@ enum rose_literal_table {
|
||||
ROSE_EVENT //!< "literal-like" events, such as EOD
|
||||
};
|
||||
|
||||
/** \brief Edge history types. */
|
||||
enum RoseRoleHistory {
|
||||
ROSE_ROLE_HISTORY_NONE, //!< no special history
|
||||
ROSE_ROLE_HISTORY_ANCH, //!< previous role is at a fixed offset
|
||||
ROSE_ROLE_HISTORY_LAST_BYTE, //!< previous role can only match at EOD
|
||||
ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned
|
||||
};
|
||||
|
||||
#include "util/order_check.h"
|
||||
|
||||
/** \brief Provides information about the (pre|in)fix engine to the left of a
|
||||
@ -140,9 +148,6 @@ struct RoseVertexProps {
|
||||
/** \brief Report IDs to fire. */
|
||||
flat_set<ReportID> reports;
|
||||
|
||||
/** \brief Role ID for this vertex. These are what end up in the bytecode. */
|
||||
u32 role = ~u32{0};
|
||||
|
||||
/** \brief Bitmask of groups that this role sets. */
|
||||
rose_group groups = 0;
|
||||
|
||||
|
@ -73,18 +73,55 @@ ReportID literalToReport(u32 id) {
|
||||
return id & ~LITERAL_DR_FLAG;
|
||||
}
|
||||
|
||||
// Structure representing a literal. Each literal may have many roles.
|
||||
/** \brief Structure representing a literal. */
|
||||
struct RoseLiteral {
|
||||
u32 rootProgramOffset; // role program to run for root roles.
|
||||
u32 iterOffset; // offset of sparse iterator, relative to rose
|
||||
u32 iterMapOffset; // offset of the iter mapping table, relative to rose
|
||||
rose_group groups; // bitset of groups that cause this literal to fire.
|
||||
u8 minDepth; // the minimum of this literal's roles' depths (for depths > 1)
|
||||
u8 squashesGroup; /**< literal switches off its group behind it if it sets a
|
||||
* role */
|
||||
u32 delay_mask; /**< bit set indicates that the literal inserts a delayed
|
||||
* match at the given offset */
|
||||
u32 delayIdsOffset; // offset to array of ids to poke in the delay structure
|
||||
/**
|
||||
* \brief Role program to run unconditionally when this literal is seen.
|
||||
*
|
||||
* Offset is relative to RoseEngine, or zero for no program.
|
||||
*/
|
||||
u32 rootProgramOffset;
|
||||
|
||||
/**
|
||||
* \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over
|
||||
* predecessor states.
|
||||
*
|
||||
* Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no
|
||||
* iterator.
|
||||
*/
|
||||
u32 iterOffset;
|
||||
|
||||
/**
|
||||
* \brief Table of role programs to run when triggered by the sparse
|
||||
* iterator, indexed by dense sparse iter index.
|
||||
*
|
||||
* Offset is relative to RoseEngine, zero for no programs.
|
||||
*/
|
||||
u32 iterProgramOffset;
|
||||
|
||||
/** \brief Bitset of groups that cause this literal to fire. */
|
||||
rose_group groups;
|
||||
|
||||
/**
|
||||
* \brief The minimum depth of this literal in the Rose graph (for depths
|
||||
* greater than 1).
|
||||
*/
|
||||
u8 minDepth;
|
||||
|
||||
/**
|
||||
* \brief True if this literal switches off its group behind it when it
|
||||
* sets a role.
|
||||
*/
|
||||
u8 squashesGroup;
|
||||
|
||||
/**
|
||||
* \brief Bitset which indicates that the literal inserts a delayed
|
||||
* match at the given offset.
|
||||
*/
|
||||
u32 delay_mask;
|
||||
|
||||
/** \brief Offset to array of ids to poke in the delay structure. */
|
||||
u32 delayIdsOffset;
|
||||
};
|
||||
|
||||
/* Allocation of Rose literal ids
|
||||
@ -179,15 +216,6 @@ struct RoseLiteral {
|
||||
* terminals.
|
||||
*/
|
||||
|
||||
// We have different types of role history storage.
|
||||
enum RoseRoleHistory {
|
||||
ROSE_ROLE_HISTORY_NONE, // I'm sorry, I don't recall.
|
||||
ROSE_ROLE_HISTORY_ANCH, // used when previous role is at a fixed offset
|
||||
ROSE_ROLE_HISTORY_LAST_BYTE, /* used when previous role can only match at the
|
||||
* last byte of a stream */
|
||||
ROSE_ROLE_HISTORY_INVALID // history not yet assigned
|
||||
};
|
||||
|
||||
struct RoseCountingMiracle {
|
||||
char shufti; /** 1: count shufti class; 0: count a single character */
|
||||
u8 count; /** minimum number of occurrences for the counting
|
||||
@ -225,15 +253,6 @@ struct NfaInfo {
|
||||
* matches */
|
||||
};
|
||||
|
||||
/* We allow different types of role-predecessor relationships. These are stored
|
||||
* in with the flags */
|
||||
#define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no
|
||||
* offset tracking */
|
||||
#define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */
|
||||
|
||||
#define ROSE_ROLE_PRED_CLEAR_MASK \
|
||||
(~(ROSE_ROLE_PRED_SIMPLE | ROSE_ROLE_PRED_ANY))
|
||||
|
||||
#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
|
||||
* whole byte (OWB) (streaming only). Other
|
||||
* values in OWB are reserved for zombie
|
||||
@ -241,33 +260,6 @@ struct NfaInfo {
|
||||
#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
|
||||
* prefix checks */
|
||||
|
||||
// Structure representing a literal role.
|
||||
struct RoseRole {
|
||||
u32 flags;
|
||||
u32 programOffset; /**< offset to program to run. */
|
||||
};
|
||||
|
||||
// Structure representing a predecessor relationship
|
||||
struct RosePred {
|
||||
u32 role; // index of predecessor role
|
||||
u32 minBound; // min bound on distance from pred (_ANCH ->absolute offset)
|
||||
u32 maxBound; /* max bound on distance from pred, or ROSE_BOUND_INF
|
||||
* (_ANCH -> absolute offset ) */
|
||||
u8 historyCheck; // from enum RoseRoleHistory
|
||||
};
|
||||
|
||||
// Structure mapping between the dense index produced by the literal sparse
|
||||
// iterator and a list of roles.
|
||||
struct RoseIterMapping {
|
||||
u32 offset; // offset into iter role table
|
||||
u32 count; // number of roles
|
||||
};
|
||||
|
||||
struct RoseIterRole {
|
||||
u32 role;
|
||||
u32 pred;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Rose state offsets.
|
||||
*
|
||||
@ -376,8 +368,6 @@ struct RoseBoundaryReports {
|
||||
// 1c. eod-anchored literal matcher table
|
||||
// 1d. small block table
|
||||
// 2. array of RoseLiteral (literalCount entries)
|
||||
// 3. array of RoseRole (roleCount entries)
|
||||
// 4. array of RosePred (predCount entries)
|
||||
// 8. array of NFA offsets, one per queue
|
||||
// 9. array of state offsets, one per queue (+)
|
||||
// 10. array of role ids for the set of all root roles
|
||||
@ -447,10 +437,10 @@ struct RoseEngine {
|
||||
u32 activeArrayCount; //number of nfas tracked in the active array
|
||||
u32 activeLeftCount; //number of nfas tracked in the active rose array
|
||||
u32 queueCount; /**< number of nfa queues */
|
||||
u32 roleOffset; // offset of RoseRole array (bytes)
|
||||
u32 roleCount; // number of RoseRole entries
|
||||
u32 predOffset; // offset of RosePred array (bytes)
|
||||
u32 predCount; // number of RosePred entries
|
||||
|
||||
/** \brief Number of keys used by CHECK_SET_HANDLED instructions in role
|
||||
* programs. Used to size the handled_roles fatbit in scratch. */
|
||||
u32 handledKeyCount;
|
||||
|
||||
u32 leftOffset;
|
||||
u32 roseCount;
|
||||
@ -459,7 +449,7 @@ struct RoseEngine {
|
||||
* bytes each) */
|
||||
|
||||
u32 eodIterOffset; // or 0 if no eod iterator
|
||||
u32 eodIterMapOffset;
|
||||
u32 eodProgramTableOffset;
|
||||
|
||||
u32 lastByteHistoryIterOffset; // if non-zero
|
||||
|
||||
@ -614,22 +604,6 @@ const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) {
|
||||
return tl;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RoseRole *getRoleTable(const struct RoseEngine *t) {
|
||||
const struct RoseRole *r
|
||||
= (const struct RoseRole *)((const char *)t + t->roleOffset);
|
||||
assert(ISALIGNED_N(r, 4));
|
||||
return r;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RosePred *getPredTable(const struct RoseEngine *t) {
|
||||
const struct RosePred *p
|
||||
= (const struct RosePred *)((const char *)t + t->predOffset);
|
||||
assert(ISALIGNED_N(p, 4));
|
||||
return p;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
|
||||
const struct LeftNfaInfo *r
|
||||
|
@ -40,54 +40,61 @@
|
||||
#define ROSE_INSTR_MIN_ALIGN 8U
|
||||
|
||||
/** \brief Role program instruction opcodes. */
|
||||
enum RoseRoleInstructionCode {
|
||||
ROSE_ROLE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
|
||||
ROSE_ROLE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
|
||||
ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS, //!< Bounds on distance from root.
|
||||
ROSE_ROLE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
|
||||
ROSE_ROLE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state.
|
||||
ROSE_ROLE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
|
||||
ROSE_ROLE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine.
|
||||
ROSE_ROLE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine.
|
||||
ROSE_ROLE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine.
|
||||
ROSE_ROLE_INSTR_REPORT, //!< Fire an ordinary report.
|
||||
ROSE_ROLE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV).
|
||||
ROSE_ROLE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time.
|
||||
ROSE_ROLE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only.
|
||||
ROSE_ROLE_INSTR_REPORT_SOM, //!< Manipulate SOM and report.
|
||||
ROSE_ROLE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
|
||||
ROSE_ROLE_INSTR_SET_STATE, //!< Switch a state index on.
|
||||
ROSE_ROLE_INSTR_SET_GROUPS, //!< Set some literal group bits.
|
||||
ROSE_ROLE_INSTR_END //!< End of program.
|
||||
enum RoseInstructionCode {
|
||||
ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
|
||||
ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
|
||||
ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0.
|
||||
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
|
||||
ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
|
||||
ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state.
|
||||
ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
|
||||
ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine.
|
||||
ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine.
|
||||
ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine.
|
||||
ROSE_INSTR_REPORT, //!< Fire an ordinary report.
|
||||
ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV).
|
||||
ROSE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time.
|
||||
ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only.
|
||||
ROSE_INSTR_REPORT_SOM, //!< Manipulate SOM and report.
|
||||
ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
|
||||
ROSE_INSTR_SET_STATE, //!< Switch a state index on.
|
||||
ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits.
|
||||
ROSE_INSTR_END //!< End of program.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_ANCHORED_DELAY {
|
||||
struct ROSE_STRUCT_ANCHORED_DELAY {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 depth; //!< Depth for this state.
|
||||
rose_group groups; //!< Bitmask.
|
||||
u32 done_jump; //!< Jump forward this many bytes if successful.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_CHECK_ONLY_EOD {
|
||||
struct ROSE_STRUCT_CHECK_ONLY_EOD {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS {
|
||||
struct ROSE_STRUCT_CHECK_BOUNDS {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 min_bound; //!< Min distance from zero.
|
||||
u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF).
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_CHECK_LOOKAROUND {
|
||||
struct ROSE_STRUCT_CHECK_NOT_HANDLED {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 key; //!< Key in the "handled_roles" fatbit in scratch.
|
||||
u32 fail_jump; //!< Jump forward this many bytes if we have seen key before.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 index;
|
||||
u32 count;
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX {
|
||||
struct ROSE_STRUCT_CHECK_LEFTFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 queue; //!< Queue of leftfix to check.
|
||||
u32 lag; //!< Lag of leftfix for this case.
|
||||
@ -95,72 +102,72 @@ struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX {
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_SOM_ADJUST {
|
||||
struct ROSE_STRUCT_SOM_ADJUST {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 distance; //!< Distance to EOM.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_SOM_LEFTFIX {
|
||||
struct ROSE_STRUCT_SOM_LEFTFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 queue; //!< Queue index of leftfix providing SOM.
|
||||
u32 lag; //!< Lag of leftfix for this case.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_TRIGGER_INFIX {
|
||||
struct ROSE_STRUCT_TRIGGER_INFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 cancel; //!< Cancels previous top event.
|
||||
u32 queue; //!< Queue index of infix.
|
||||
u32 event; //!< Queue event, from MQE_*.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_TRIGGER_SUFFIX {
|
||||
struct ROSE_STRUCT_TRIGGER_SUFFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u32 queue; //!< Queue index of suffix.
|
||||
u32 event; //!< Queue event, from MQE_*.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_REPORT {
|
||||
struct ROSE_STRUCT_REPORT {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_REPORT_CHAIN {
|
||||
struct ROSE_STRUCT_REPORT_CHAIN {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_REPORT_EOD {
|
||||
struct ROSE_STRUCT_REPORT_EOD {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_REPORT_SOM_INT {
|
||||
struct ROSE_STRUCT_REPORT_SOM_INT {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_REPORT_SOM {
|
||||
struct ROSE_STRUCT_REPORT_SOM {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN {
|
||||
struct ROSE_STRUCT_REPORT_SOM_KNOWN {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_SET_STATE {
|
||||
struct ROSE_STRUCT_SET_STATE {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 depth; //!< Depth for this state.
|
||||
u32 index; //!< State index in multibit.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_SET_GROUPS {
|
||||
struct ROSE_STRUCT_SET_GROUPS {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
rose_group groups; //!< Bitmask.
|
||||
};
|
||||
|
||||
struct ROSE_ROLE_STRUCT_END {
|
||||
struct ROSE_STRUCT_END {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
};
|
||||
|
||||
|
@ -172,15 +172,6 @@ const struct internal_report *getInternalReport(const struct RoseEngine *t,
|
||||
return reports + intId;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RoseRole *getRoleByOffset(const struct RoseEngine *t, u32 offset) {
|
||||
const struct RoseRole *tr = (const void *)((const char *)t + offset);
|
||||
|
||||
assert((size_t)(tr - getRoleTable(t)) < t->roleCount);
|
||||
DEBUG_PRINTF("get root role %zu\n", tr - getRoleTable(t));
|
||||
return tr;
|
||||
}
|
||||
|
||||
#define ANCHORED_MATCH_SENTINEL (~0U)
|
||||
|
||||
static really_inline
|
||||
|
@ -90,7 +90,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
|
||||
+ bStateSize + tStateSize
|
||||
+ fullStateSize + 63 /* cacheline padding */
|
||||
+ nfa_context_size
|
||||
+ fatbit_size(proto->roleCount) /* handled roles */
|
||||
+ fatbit_size(proto->handledKeyCount) /* handled roles */
|
||||
+ fatbit_size(queueCount) /* active queue array */
|
||||
+ 2 * fatbit_size(deduperCount) /* need odd and even logs */
|
||||
+ 2 * fatbit_size(deduperCount) /* ditto som logs */
|
||||
@ -192,7 +192,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
|
||||
current += fatbit_size(queueCount);
|
||||
|
||||
s->handled_roles = (struct fatbit *)current;
|
||||
current += fatbit_size(proto->roleCount);
|
||||
current += fatbit_size(proto->handledKeyCount);
|
||||
|
||||
s->deduper.log[0] = (struct fatbit *)current;
|
||||
current += fatbit_size(deduperCount);
|
||||
@ -312,9 +312,9 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) {
|
||||
proto->delay_count = rose->delay_count;
|
||||
}
|
||||
|
||||
if (rose->roleCount > proto->roleCount) {
|
||||
if (rose->handledKeyCount > proto->handledKeyCount) {
|
||||
resize = 1;
|
||||
proto->roleCount = rose->roleCount;
|
||||
proto->handledKeyCount = rose->handledKeyCount;
|
||||
}
|
||||
|
||||
if (rose->tStateSize > proto->tStateSize) {
|
||||
|
@ -180,7 +180,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
|
||||
u32 delay_count;
|
||||
u32 scratchSize;
|
||||
u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE];
|
||||
u32 roleCount;
|
||||
u32 handledKeyCount;
|
||||
struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already
|
||||
* handled by this literal */
|
||||
u64a *som_store; /**< array of som locations */
|
||||
|
Loading…
x
Reference in New Issue
Block a user