From d67c7583eac87a4ca98f3ef01dcbbba3a66766d6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 4 Dec 2015 16:17:28 +1100 Subject: [PATCH] rose: Extend the interpreter to handle more work - Use program for EOD sparse iterator - Use program for literal sparse iterator - Eliminate RoseRole, RosePred, RoseVertexProps::role - Small performance optimizations --- src/rose/block.c | 4 +- src/rose/eod.c | 58 +- src/rose/init.c | 4 +- src/rose/match.c | 97 ++- src/rose/match.h | 24 - src/rose/rose_build_bytecode.cpp | 1105 +++++++++++++----------------- src/rose/rose_build_dump.cpp | 206 +----- src/rose/rose_build_misc.cpp | 6 +- src/rose/rose_dump.cpp | 376 +++------- src/rose/rose_graph.h | 13 +- src/rose/rose_internal.h | 132 ++-- src/rose/rose_program.h | 81 ++- src/rose/runtime.h | 9 - src/scratch.c | 8 +- src/scratch.h | 2 +- 15 files changed, 766 insertions(+), 1359 deletions(-) diff --git a/src/rose/block.c b/src/rose/block.c index ae7d5545..cfcb8341 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -82,8 +82,8 @@ void init_state_for_block(const struct RoseEngine *t, u8 *state) { assert(t); assert(state); - DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n", - t, t->roleCount, t->rolesWithStateCount); + DEBUG_PRINTF("init for Rose %p with %u state indices\n", t, + t->rolesWithStateCount); // Rose is guaranteed 8-aligned state assert(ISALIGNED_N(state, 8)); diff --git a/src/rose/eod.c b/src/rose/eod.c index 60bf2ea2..dec07b54 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -113,12 +113,11 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, return MO_CONTINUE_MATCHING; } - const struct RoseRole *roleTable = getRoleTable(t); - const struct RosePred *predTable = getPredTable(t); - const struct RoseIterMapping *iterMapBase - = getByOffset(t, t->eodIterMapOffset); + DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset); + + const u32 *programTable = getByOffset(t, t->eodProgramTableOffset); const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); - assert(ISALIGNED(iterMapBase)); + assert(ISALIGNED(programTable)); assert(ISALIGNED(it)); // Sparse iterator state was allocated earlier @@ -133,50 +132,17 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, fatbit_clear(handled_roles); + int work_done = 0; // not read from in this path. + for (; i != MMB_INVALID; i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx); - const struct RoseIterMapping *iterMap = iterMapBase + idx; - const struct RoseIterRole *roles = getByOffset(t, iterMap->offset); - assert(ISALIGNED(roles)); - - DEBUG_PRINTF("%u roles to consider\n", iterMap->count); - for (u32 j = 0; j != iterMap->count; j++) { - u32 role = roles[j].role; - assert(role < t->roleCount); - DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred); - const struct RoseRole *tr = roleTable + role; - - if (fatbit_isset(handled_roles, t->roleCount, role)) { - DEBUG_PRINTF("role %u already handled by the walk, skip\n", - role); - continue; - } - - // Special case: if this role is a trivial case (pred type simple) - // we don't need to check any history and we already know the pred - // role is on. - if (tr->flags & ROSE_ROLE_PRED_SIMPLE) { - DEBUG_PRINTF("pred type is simple, no need for checks\n"); - } else { - assert(roles[j].pred < t->predCount); - const struct RosePred *tp = predTable + roles[j].pred; - if (!roseCheckPredHistory(tp, offset)) { - continue; - } - } - - /* mark role as handled so we don't touch it again in this walk */ - fatbit_set(handled_roles, t->roleCount, role); - - u64a som = 0; - int work_done = 0; - hwlmcb_rv_t rv = - roseRunRoleProgram(t, tr->programOffset, offset, &som, - &(scratch->tctxt), &work_done); - if (rv == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } + u32 programOffset = programTable[idx]; + u64a som = 0; + if (roseRunRoleProgram(t, programOffset, offset, &som, + &(scratch->tctxt), + &work_done) == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; } } diff --git a/src/rose/init.c b/src/rose/init.c index c2eccd40..d2f85f2c 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -80,8 +80,8 @@ void roseInitState(const struct RoseEngine *t, u8 *state) { assert(t); assert(state); - DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n", - t, t->roleCount, t->rolesWithStateCount); + DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t, + t->rolesWithStateCount); // Rose is guaranteed 8-aligned state assert(ISALIGNED_N(state, 8)); diff --git a/src/rose/match.c b/src/rose/match.c index ac995866..591abcfb 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1107,10 +1107,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { } #define PROGRAM_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ - DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_ROLE_INSTR_##name); \ - const struct ROSE_ROLE_STRUCT_##name *ri = \ - (const struct ROSE_ROLE_STRUCT_##name *)pc; + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; #define PROGRAM_NEXT_INSTRUCTION \ pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ @@ -1121,26 +1121,28 @@ static really_inline hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, u64a end, u64a *som, struct RoseContext *tctxt, char in_anchored, int *work_done) { - assert(programOffset); - DEBUG_PRINTF("program begins at offset %u\n", programOffset); + assert(programOffset); + assert(programOffset < t->size); + const char *pc = getByOffset(t, programOffset); - assert(*(const u8 *)pc != ROSE_ROLE_INSTR_END); + assert(*(const u8 *)pc != ROSE_INSTR_END); for (;;) { assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); + assert(code <= ROSE_INSTR_END); - switch ((enum RoseRoleInstructionCode)code) { + switch ((enum RoseInstructionCode)code) { PROGRAM_CASE(ANCHORED_DELAY) { if (in_anchored && end > t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("delay until playback\n"); update_depth(tctxt, ri->depth); tctxt->groups |= ri->groups; *work_done = 1; + assert(ri->done_jump); // must progress pc += ri->done_jump; continue; } @@ -1151,16 +1153,29 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, struct core_info *ci = &tctxtToScratch(tctxt)->core_info; if (end != ci->buf_offset + ci->len) { DEBUG_PRINTF("should only match at end of data\n"); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + PROGRAM_CASE(CHECK_BOUNDS) { if (!in_anchored && !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { DEBUG_PRINTF("failed root bounds check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles; + if (fatbit_set(handled, t->handledKeyCount, ri->key)) { + DEBUG_PRINTF("key %u already set\n", ri->key); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } @@ -1170,6 +1185,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(CHECK_LOOKAROUND) { if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } @@ -1180,6 +1196,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, tctxt)) { DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; } @@ -1334,12 +1351,9 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, struct RoseContext *tctxt) { /* assert(!tctxt->in_anchored); */ /* assert(!tctxt->in_anch_playback); */ - const struct RoseRole *roleTable = getRoleTable(t); - const struct RosePred *predTable = getPredTable(t); - const struct RoseIterMapping *iterMapBase - = getByOffset(t, tl->iterMapOffset); + const u32 *iterProgram = getByOffset(t, tl->iterProgramOffset); const struct mmbit_sparse_iter *it = getByOffset(t, tl->iterOffset); - assert(ISALIGNED(iterMapBase)); + assert(ISALIGNED(iterProgram)); assert(ISALIGNED(it)); // Sparse iterator state was allocated earlier @@ -1356,50 +1370,19 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, fatbit_clear(handled_roles); for (; i != MMB_INVALID; - i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { - DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx); - const struct RoseIterMapping *iterMap = iterMapBase + idx; - const struct RoseIterRole *roles = getByOffset(t, iterMap->offset); - assert(ISALIGNED(roles)); + i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { + u32 programOffset = iterProgram[idx]; + DEBUG_PRINTF("pred state %u (iter idx=%u) is on -> program %u\n", i, + idx, programOffset); - DEBUG_PRINTF("%u roles to consider\n", iterMap->count); - for (u32 j = 0; j != iterMap->count; j++) { - u32 role = roles[j].role; - assert(role < t->roleCount); - DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred); - const struct RoseRole *tr = roleTable + role; + // If this bit is switched on in the sparse iterator, it must be + // driving a program. + assert(programOffset); - if (fatbit_isset(handled_roles, t->roleCount, role)) { - DEBUG_PRINTF("role %u already handled by the walk, skip\n", - role); - continue; - } - - // Special case: if this role is a trivial case (pred type simple) - // we don't need to check any history and we already know the pred - // role is on. - if (tr->flags & ROSE_ROLE_PRED_SIMPLE) { - DEBUG_PRINTF("pred type is simple, no need for further" - " checks\n"); - } else { - assert(roles[j].pred < t->predCount); - const struct RosePred *tp = predTable + roles[j].pred; - if (!roseCheckPredHistory(tp, end)) { - continue; - } - } - - /* mark role as handled so we don't touch it again in this walk */ - fatbit_set(handled_roles, t->roleCount, role); - - if (!tr->programOffset) { - continue; - } - u64a som = 0ULL; - if (roseRunRoleProgram_i(t, tr->programOffset, end, &som, tctxt, 0, - &work_done) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } + u64a som = 0ULL; + if (roseRunRoleProgram_i(t, programOffset, end, &som, tctxt, 0, + &work_done) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; } } diff --git a/src/rose/match.h b/src/rose/match.h index a39bebf3..6bcf781e 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -269,30 +269,6 @@ void update_depth(struct RoseContext *tctxt, u8 depth) { tctxt->depth = d; } -static really_inline -int roseCheckHistoryAnch(const struct RosePred *tp, u64a end) { - DEBUG_PRINTF("end %llu min %u max %u\n", end, tp->minBound, tp->maxBound); - if (tp->maxBound == ROSE_BOUND_INF) { - return end >= tp->minBound; - } else { - return end >= tp->minBound && end <= tp->maxBound; - } -} - -// Check that a predecessor's history requirements are satisfied. -static really_inline -int roseCheckPredHistory(const struct RosePred *tp, u64a end) { - DEBUG_PRINTF("pred type %u\n", tp->historyCheck); - - if (tp->historyCheck == ROSE_ROLE_HISTORY_ANCH) { - return roseCheckHistoryAnch(tp, end); - } - - assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE || - tp->historyCheck == ROSE_ROLE_HISTORY_LAST_BYTE); - return 1; -} - /* Note: uses the stashed sparse iter state; cannot be called from * anybody else who is using it */ static rose_inline diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 978d413d..6b6e443f 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -165,52 +165,53 @@ struct left_build_info { }; /** \brief Role instruction model used at compile time. */ -class RoleInstruction { +class RoseInstruction { public: - RoleInstruction() { + RoseInstruction() { memset(&u, 0, sizeof(u)); - u.end.code = ROSE_ROLE_INSTR_END; + u.end.code = ROSE_INSTR_END; } - explicit RoleInstruction(enum RoseRoleInstructionCode c) { + explicit RoseInstruction(enum RoseInstructionCode c) { memset(&u, 0, sizeof(u)); u.end.code = c; } - bool operator<(const RoleInstruction &a) const { + bool operator<(const RoseInstruction &a) const { return memcmp(&u, &a.u, sizeof(u)) < 0; } - bool operator==(const RoleInstruction &a) const { + bool operator==(const RoseInstruction &a) const { return memcmp(&u, &a.u, sizeof(u)) == 0; } - enum RoseRoleInstructionCode code() const { + enum RoseInstructionCode code() const { // Note that this sort of type-punning (relying on identical initial // layout) is explicitly allowed by the C++11 standard. - return (enum RoseRoleInstructionCode)u.end.code; + return (enum RoseInstructionCode)u.end.code; } const void *get() const { switch (code()) { - case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; - case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return &u.checkRootBounds; - case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; - case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; - case ROSE_ROLE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; - case ROSE_ROLE_INSTR_SOM_ADJUST: return &u.somAdjust; - case ROSE_ROLE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; - case ROSE_ROLE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; - case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; - case ROSE_ROLE_INSTR_REPORT: return &u.report; - case ROSE_ROLE_INSTR_REPORT_CHAIN: return &u.reportChain; - case ROSE_ROLE_INSTR_REPORT_EOD: return &u.reportEod; - case ROSE_ROLE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; - case ROSE_ROLE_INSTR_REPORT_SOM: return &u.reportSom; - case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; - case ROSE_ROLE_INSTR_SET_STATE: return &u.setState; - case ROSE_ROLE_INSTR_SET_GROUPS: return &u.setGroups; - case ROSE_ROLE_INSTR_END: return &u.end; + case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; + case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; + case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; + case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; + case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; + case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; + case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; + case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; + case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; + case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; + case ROSE_INSTR_REPORT: return &u.report; + case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; + case ROSE_INSTR_REPORT_EOD: return &u.reportEod; + case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; + case ROSE_INSTR_REPORT_SOM: return &u.reportSom; + case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; + case ROSE_INSTR_SET_STATE: return &u.setState; + case ROSE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_INSTR_END: return &u.end; } assert(0); return &u.end; @@ -218,85 +219,84 @@ public: size_t length() const { switch (code()) { - case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); - case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return sizeof(u.checkRootBounds); - case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); - case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); - case ROSE_ROLE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); - case ROSE_ROLE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); - case ROSE_ROLE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); - case ROSE_ROLE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); - case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); - case ROSE_ROLE_INSTR_REPORT: return sizeof(u.report); - case ROSE_ROLE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); - case ROSE_ROLE_INSTR_REPORT_EOD: return sizeof(u.reportEod); - case ROSE_ROLE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); - case ROSE_ROLE_INSTR_REPORT_SOM: return sizeof(u.reportSom); - case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); - case ROSE_ROLE_INSTR_SET_STATE: return sizeof(u.setState); - case ROSE_ROLE_INSTR_SET_GROUPS: return sizeof(u.setGroups); - case ROSE_ROLE_INSTR_END: return sizeof(u.end); + case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); + case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); + case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); + case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); + case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); + case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); + case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); + case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); + case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); + case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); + case ROSE_INSTR_REPORT: return sizeof(u.report); + case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); + case ROSE_INSTR_REPORT_EOD: return sizeof(u.reportEod); + case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); + case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); + case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); + case ROSE_INSTR_SET_STATE: return sizeof(u.setState); + case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_INSTR_END: return sizeof(u.end); } return 0; } union { - ROSE_ROLE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; - ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS checkRootBounds; - ROSE_ROLE_STRUCT_CHECK_LOOKAROUND checkLookaround; - ROSE_ROLE_STRUCT_CHECK_LEFTFIX checkLeftfix; - ROSE_ROLE_STRUCT_ANCHORED_DELAY anchoredDelay; - ROSE_ROLE_STRUCT_SOM_ADJUST somAdjust; - ROSE_ROLE_STRUCT_SOM_LEFTFIX somLeftfix; - ROSE_ROLE_STRUCT_TRIGGER_INFIX triggerInfix; - ROSE_ROLE_STRUCT_TRIGGER_SUFFIX triggerSuffix; - ROSE_ROLE_STRUCT_REPORT report; - ROSE_ROLE_STRUCT_REPORT_CHAIN reportChain; - ROSE_ROLE_STRUCT_REPORT_EOD reportEod; - ROSE_ROLE_STRUCT_REPORT_SOM_INT reportSomInt; - ROSE_ROLE_STRUCT_REPORT_SOM reportSom; - ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; - ROSE_ROLE_STRUCT_SET_STATE setState; - ROSE_ROLE_STRUCT_SET_GROUPS setGroups; - ROSE_ROLE_STRUCT_END end; + ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; + ROSE_STRUCT_CHECK_BOUNDS checkBounds; + ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; + ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; + ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix; + ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; + ROSE_STRUCT_SOM_ADJUST somAdjust; + ROSE_STRUCT_SOM_LEFTFIX somLeftfix; + ROSE_STRUCT_TRIGGER_INFIX triggerInfix; + ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix; + ROSE_STRUCT_REPORT report; + ROSE_STRUCT_REPORT_CHAIN reportChain; + ROSE_STRUCT_REPORT_EOD reportEod; + ROSE_STRUCT_REPORT_SOM_INT reportSomInt; + ROSE_STRUCT_REPORT_SOM reportSom; + ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; + ROSE_STRUCT_SET_STATE setState; + ROSE_STRUCT_SET_GROUPS setGroups; + ROSE_STRUCT_END end; } u; }; struct build_context : boost::noncopyable { - /** \brief Rose Role information. - * These entries are filled in by a number of functions as other tables are - * created. - */ - vector roleTable; - - /** \brief Role program mapping, keyed by index in roleTable. */ - vector> rolePrograms; - /** \brief minimum depth in number of hops from root/anchored root. */ map depths; /** \brief information about engines to the left of a vertex */ map leftfix_info; + /** \brief mapping from suffix to queue index. */ + map suffixes; + + /** \brief Mapping from vertex to key, for vertices with a + * CHECK_NOT_HANDLED instruction. */ + ue2::unordered_map handledKeys; + /** \brief Number of roles with a state bit. - * This set by buildInitialRoleTable() and should be constant throughout + * + * This is set by assignStateIndices() and should be constant throughout * the rest of the compile. */ size_t numStates = 0; - // Very simple cache from sparse iter to offset, used when building up - // iterators in early misc. + /** \brief Very simple cache from sparse iter to offset, used when building + * up iterators in early misc. */ map, u32> iterCache; - /** \brief maps RoseRole index to a list of RosePred indices */ - map > rolePredecessors; + /** \brief LookEntry list cache, so that we don't have to go scanning + * through the full list to find cases we've used already. */ + ue2::unordered_map, size_t> lookaround_cache; /** \brief Lookaround table for Rose roles. */ vector lookaround; - /** \brief Map from literal final ID to a set of non-root role IDs. */ - ue2::unordered_map> litNonRootRoles; - /** \brief State indices, for those roles that have them. */ ue2::unordered_map roleStateIndices; @@ -401,18 +401,6 @@ const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { return n; } -/* vertex ordered by their role index */ -static -vector get_ordered_verts(const RoseGraph &g) { - vector verts; - insert(&verts, verts.end(), vertices_range(g)); - sort(verts.begin(), verts.end(), - [&g](const RoseVertex &a, const RoseVertex &b) { - return g[a].role < g[b].role; - }); - return verts; -} - static u32 countRosePrefixes(const vector &roses) { u32 num = 0; @@ -1255,9 +1243,8 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, } static -void findSuffixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, - map *suffixes) { - const RoseGraph &g = tbi.g; +void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { + const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].suffix) { @@ -1269,13 +1256,13 @@ void findSuffixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph()); // We may have already built this NFA. - if (contains(*suffixes, s)) { + if (contains(bc.suffixes, s)) { continue; } - u32 queue = qif.get_queue(); + u32 queue = build.qif.get_queue(); DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue); - suffixes->insert(make_pair(s, queue)); + bc.suffixes.emplace(s, queue); } } @@ -1300,7 +1287,6 @@ void setSuffixProperties(NFA &n, const suffix_id &suff, static bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, - map *suffixes, set *no_retrigger_queues) { map > suffixTriggers; findSuffixTriggers(tbi, &suffixTriggers); @@ -1309,7 +1295,7 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, // (unique) queue indices, so that we call add_nfa_to_blob in the same // order. vector> ordered; - for (const auto &e : *suffixes) { + for (const auto &e : bc.suffixes) { ordered.emplace_back(e.second, e.first); } sort(begin(ordered), end(ordered)); @@ -1404,11 +1390,10 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { static bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, - map *suffixes, set *no_retrigger_queues, u32 *leftfixBeginQueue) { - findSuffixes(tbi, qif, suffixes); + assignSuffixQueues(tbi, bc); - if (!buildSuffixes(tbi, bc, suffixes, no_retrigger_queues)) { + if (!buildSuffixes(tbi, bc, no_retrigger_queues)) { return false; } @@ -2242,24 +2227,6 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { return addIteratorToTable(bc, iter); } -#ifdef DEBUG -static -const char *describeHistory(RoseRoleHistory history) { - switch (history) { - case ROSE_ROLE_HISTORY_NONE: - return "NONE"; - case ROSE_ROLE_HISTORY_ANCH: - return "ANCH (previous role at fixed offset)"; - case ROSE_ROLE_HISTORY_LAST_BYTE: - return "LAST_BYTE (previous role matches only at EOD)"; - case ROSE_ROLE_HISTORY_INVALID: - return "INVALID"; - } - assert(0); - return "UNKNOWN"; -} -#endif - static void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) { // Global limit. @@ -2308,13 +2275,12 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { static void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, const QueueIndexFactory &qif, - const map &suffixes, vector *out) { out->resize(qif.allocated_count()); map > qi_to_ekeys; /* for determinism */ - for (const auto &e : suffixes) { + for (const auto &e : bc.suffixes) { const suffix_id &s = e.first; u32 qi = e.second; set ekeys = reportsToEkeys(all_reports(s), tbi.rm); @@ -2396,7 +2362,6 @@ bool hasInternalReport(const set &reports, const ReportManager &rm) { static void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, const vector &outfixes, - const map &suffixes, const vector &ekeyListOffsets, const set &no_retrigger_queues, NfaInfo *infos) { @@ -2422,7 +2387,7 @@ void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, } // Mark suffixes that only trigger external reports. - for (const auto &e : suffixes) { + for (const auto &e : bc.suffixes) { const suffix_id &s = e.first; u32 qi = e.second; @@ -2437,7 +2402,7 @@ void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, if (!g[v].suffix) { continue; } - u32 qi = suffixes.at(g[v].suffix); + u32 qi = bc.suffixes.at(g[v].suffix); if (build.isInETable(v)) { infos[qi].eod = 1; } @@ -2622,46 +2587,61 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { * fail_jump/done_jump targets set correctly. */ static -vector -flattenRoleProgram(const vector> &program) { - vector out; +vector +flattenRoleProgram(const vector> &programs) { + vector out; vector offsets; // offset of each instruction (bytes) vector targets; // jump target for each instruction + DEBUG_PRINTF("%zu programs\n", programs.size()); + size_t curr_offset = 0; - for (const auto &prog : program) { - for (const auto &ri : prog) { + for (const auto &program : programs) { + DEBUG_PRINTF("program with %zu instructions\n", program.size()); + for (const auto &ri : program) { out.push_back(ri); offsets.push_back(curr_offset); curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } - for (size_t i = 0; i < prog.size(); i++) { + for (size_t i = 0; i < program.size(); i++) { targets.push_back(curr_offset); } } // Add an END instruction. - out.emplace_back(ROSE_ROLE_INSTR_END); + out.emplace_back(ROSE_INSTR_END); offsets.push_back(curr_offset); targets.push_back(curr_offset); + assert(targets.size() == out.size()); + assert(offsets.size() == out.size()); + for (size_t i = 0; i < out.size(); i++) { auto &ri = out[i]; switch (ri.code()) { - case ROSE_ROLE_INSTR_ANCHORED_DELAY: + case ROSE_INSTR_ANCHORED_DELAY: + assert(targets[i] > offsets[i]); // jumps always progress ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: + case ROSE_INSTR_CHECK_ONLY_EOD: + assert(targets[i] > offsets[i]); ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: - ri.u.checkRootBounds.fail_jump = targets[i] - offsets[i]; + case ROSE_INSTR_CHECK_BOUNDS: + assert(targets[i] > offsets[i]); + ri.u.checkBounds.fail_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: + case ROSE_INSTR_CHECK_NOT_HANDLED: + assert(targets[i] > offsets[i]); + ri.u.checkNotHandled.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_INSTR_CHECK_LOOKAROUND: + assert(targets[i] > offsets[i]); ri.u.checkLookaround.fail_jump = targets[i] - offsets[i]; break; - case ROSE_ROLE_INSTR_CHECK_LEFTFIX: + case ROSE_INSTR_CHECK_LEFTFIX: + assert(targets[i] > offsets[i]); ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i]; break; default: @@ -2673,8 +2653,9 @@ flattenRoleProgram(const vector> &program) { } static -u32 writeRoleProgram(build_context &bc, vector &program) { +u32 writeRoleProgram(build_context &bc, vector &program) { DEBUG_PRINTF("writing %zu instructions\n", program.size()); + u32 programOffset = 0; for (const auto &ri : program) { u32 offset = @@ -2685,52 +2666,10 @@ u32 writeRoleProgram(build_context &bc, vector &program) { programOffset = offset; } } + DEBUG_PRINTF("program begins at offset %u\n", programOffset); return programOffset; } -static -void buildRootRolePrograms(const RoseBuildImpl &build, build_context &bc, - vector &literalTable) { - for (u32 id = 0; id < literalTable.size(); id++) { - DEBUG_PRINTF("lit %u\n", id); - const auto &lit_info = **getLiteralInfoByFinalId(build, id).begin(); - - flat_set root_roles; // with programs to run. - - for (RoseVertex v : lit_info.vertices) { - if (!build.isRootSuccessor(v)) { - continue; - } - if (build.hasDirectFinalId(v)) { - DEBUG_PRINTF("[skip root role %u as direct]\n", - build.g[v].role); - continue; - } - DEBUG_PRINTF("root role %u\n", build.g[v].role); - root_roles.insert(build.g[v].role); - } - - vector> root_prog; - for (const auto &role : root_roles) { - assert(role < bc.rolePrograms.size()); - const auto &role_prog = bc.rolePrograms[role]; - if (role_prog.empty()) { - continue; - } - root_prog.push_back(role_prog); - } - - RoseLiteral &tl = literalTable[id]; - if (root_prog.empty()) { - tl.rootProgramOffset = 0; - continue; - } - - auto final_program = flattenRoleProgram(root_prog); - tl.rootProgramOffset = writeRoleProgram(bc, final_program); - } -} - static void buildActiveLeftIter(const vector &leftTable, vector &out) { @@ -2780,27 +2719,6 @@ bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, return false; } -static -void fetchEodAnchors(map > &eods, - const RoseGraph &g) { - for (auto v : vertices_range(g)) { - if (!g[v].eod_accept) { - continue; - } - - DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].idx, - in_degree(v, g)); - - assert(!g[v].reports.empty()); - for (const auto r : g[v].reports) { - // In-edges go into eod list. - for (const auto &e : in_edges_range(v, g)) { - eods[r].push_back(e); - } - } - } -} - /* creates (and adds to rose) a sparse iterator visiting pred states/roles, * returns a pair: * - the offset of the itermap @@ -2808,10 +2726,12 @@ void fetchEodAnchors(map > &eods, */ static pair addPredSparseIter(build_context &bc, - const map > &predStates) { + const map &predPrograms) { vector keys; - for (u32 k : predStates | map_keys) { - keys.push_back(k); + vector programTable; + for (const auto &elem : predPrograms) { + keys.push_back(elem.first); + programTable.push_back(elem.second); } vector iter; @@ -2819,121 +2739,10 @@ pair addPredSparseIter(build_context &bc, assert(!iter.empty()); DEBUG_PRINTF("iter size = %zu\n", iter.size()); - // Build mapping tables and add to iter table u32 iterOffset = addIteratorToTable(bc, iter); - - vector itermap; - for (const auto &p : predStates) { - u32 iterRoleOffset = add_to_engine_blob(bc, p.second.begin(), - p.second.end()); - itermap.push_back(RoseIterMapping()); - itermap.back().offset = iterRoleOffset; - itermap.back().count = verify_u32(p.second.size()); - } - u32 iterMapOffset = add_to_engine_blob(bc, itermap.begin(), itermap.end()); - - return make_pair(iterMapOffset, iterOffset); -} - -static -void createPred(const RoseBuildImpl &tbi, build_context &bc, - const RoseEdge &e, vector &predTable) { - const RoseGraph &g = tbi.g; - - DEBUG_PRINTF("building pred %zu of type %s\n", predTable.size(), - describeHistory(g[e].history)); - RoseVertex u = source(e, g); - RoseVertex v = target(e, g); - - u32 lit_length = 0; - if (!g[v].eod_accept) { - // Use the minimum literal length. - lit_length = verify_u32(tbi.minLiteralLen(v)); - } - - bc.rolePredecessors[g[v].role].push_back(verify_u32(predTable.size())); - - predTable.push_back(RosePred()); - RosePred &tp = predTable.back(); - memset(&tp, 0, sizeof(tp)); - tp.role = g[u].role; - tp.minBound = g[e].minBound + lit_length; - tp.maxBound = g[e].maxBound == ROSE_BOUND_INF ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; - - // Find the history scheme appropriate to this edge. Note that these may be - // updated later, as the history collected by the predecessor role is - // dependent on all its out edges. - tp.historyCheck = g[e].history; - if (tp.historyCheck == ROSE_ROLE_HISTORY_ANCH) { - assert(g[u].max_offset != ROSE_BOUND_INF); - /* pred role does not need to know about history scheme */ - DEBUG_PRINTF("absing (%u,%u + %u) u%u/%zu v%u/%zu\n", tp.minBound, - tp.maxBound, g[u].max_offset, g[u].role, g[u].idx, - g[v].role, g[v].idx); - tp.minBound += g[u].max_offset; /* make absolute */ - if (tp.maxBound != ROSE_BOUND_INF) { - tp.maxBound += g[u].max_offset; /* make absolute */ - } - } - - if (tp.historyCheck == ROSE_ROLE_HISTORY_NONE) { - tp.minBound = 0; - } - - DEBUG_PRINTF("built pred %zu of %u %u %hhu:%s\n", predTable.size() - 1, - tp.minBound, tp.maxBound, tp.historyCheck, - describeHistory((RoseRoleHistory)tp.historyCheck)); -} - -/* returns a pair containing the iter map offset and iter offset */ -static -pair buildEodAnchorRoles(RoseBuildImpl &tbi, build_context &bc, - vector &predTable) { - const RoseGraph &g = tbi.g; - map > eods; - fetchEodAnchors(eods, g); - - if (eods.empty()) { - DEBUG_PRINTF("no EOD anchors\n"); - return {0, 0}; - } - - // pred state id -> role/pred entries - map > predStates; - - for (const auto &er : eods) { - // Create a role to fire this particular report. - DEBUG_PRINTF("creating EOD accept role %zu for report %u\n", - bc.roleTable.size(), er.first); - bc.roleTable.push_back(RoseRole()); - RoseRole &tr = bc.roleTable.back(); - memset(&tr, 0, sizeof(tr)); - - bc.rolePrograms.push_back({}); - auto &program = bc.rolePrograms.back(); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_EOD); - ri.u.report.report = er.first; - program.push_back(ri); - - // Collect the state IDs of this report's vertices to add to the EOD - // sparse iterator, creating pred entries appropriately. - for (const auto &e : er.second) { - RoseVertex v = source(e, g); - DEBUG_PRINTF("vertex %zu has role %u\n", g[v].idx, g[v].role); - assert(contains(bc.roleStateIndices, v)); - u32 predStateIdx = bc.roleStateIndices.at(v); - - createPred(tbi, bc, e, predTable); - RoseIterRole ir = { - (u32)(bc.roleTable.size() - 1), - (u32)(predTable.size() - 1) - }; - predStates[predStateIdx].push_back(ir); - } - } - - return addPredSparseIter(bc, predStates); + u32 programTableOffset = + add_to_engine_blob(bc, begin(programTable), end(programTable)); + return make_pair(programTableOffset, iterOffset); } static @@ -3087,8 +2896,7 @@ bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program, - ue2::unordered_map, size_t> &lookaround_cache) { + vector &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -3113,20 +2921,20 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, DEBUG_PRINTF("role has lookaround\n"); u32 look_idx; - auto it = lookaround_cache.find(look); - if (it != lookaround_cache.end()) { + auto it = bc.lookaround_cache.find(look); + if (it != bc.lookaround_cache.end()) { DEBUG_PRINTF("reusing look at idx %zu\n", it->second); look_idx = verify_u32(it->second); } else { size_t idx = bc.lookaround.size(); - lookaround_cache.emplace(look, idx); + bc.lookaround_cache.emplace(look, idx); insert(&bc.lookaround, bc.lookaround.end(), look); DEBUG_PRINTF("adding look at idx %zu\n", idx); look_idx = verify_u32(idx); } u32 look_count = verify_u32(look.size()); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LOOKAROUND); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND); ri.u.checkLookaround.index = look_idx; ri.u.checkLookaround.count = look_count; program.push_back(ri); @@ -3134,7 +2942,7 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + vector &program) { auto it = bc.leftfix_info.find(v); if (it == end(bc.leftfix_info)) { return; @@ -3147,7 +2955,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!build.cc.streaming || build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LEFTFIX); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX); ri.u.checkLeftfix.queue = lni.queue; ri.u.checkLeftfix.lag = build.g[v].left.lag; ri.u.checkLeftfix.report = build.g[v].left.leftfix_report; @@ -3156,7 +2964,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { + RoseVertex v, vector &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { return; @@ -3165,7 +2973,7 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, // TODO: also limit to matches that can occur after // floatingMinLiteralMatchOffset. - auto ri = RoleInstruction(ROSE_ROLE_INSTR_ANCHORED_DELAY); + auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY); ri.u.anchoredDelay.depth = (u8)min(254U, bc.depths.at(v)); ri.u.anchoredDelay.groups = build.g[v].groups; program.push_back(ri); @@ -3173,7 +2981,7 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, static void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + vector &program) { const auto &g = build.g; /* we are a suffaig - need to update role to provide som to the @@ -3182,13 +2990,13 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, if (g[v].left.tracksSom()) { assert(contains(bc.leftfix_info, v)); const left_build_info &lni = bc.leftfix_info.at(v); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_LEFTFIX); + auto ri = RoseInstruction(ROSE_INSTR_SOM_LEFTFIX); ri.u.somLeftfix.queue = lni.queue; ri.u.somLeftfix.lag = g[v].left.lag; program.push_back(ri); has_som = true; } else if (g[v].som_adjust) { - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_ADJUST); + auto ri = RoseInstruction(ROSE_INSTR_SOM_ADJUST); ri.u.somAdjust.distance = g[v].som_adjust; program.push_back(ri); has_som = true; @@ -3199,19 +3007,17 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(id < build.rm.numReports()); const Report &ir = build.rm.getReport(id); if (isInternalSomReport(ir)) { - auto ri = - RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM - : ROSE_ROLE_INSTR_REPORT_SOM_INT); + auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM + : ROSE_INSTR_REPORT_SOM_INT); ri.u.report.report = id; program.push_back(ri); } else if (ir.type == INTERNAL_ROSE_CHAIN) { - auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_CHAIN); + auto ri = RoseInstruction(ROSE_INSTR_REPORT_CHAIN); ri.u.report.report = id; program.push_back(ri); } else { - auto ri = - RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM_KNOWN - : ROSE_ROLE_INSTR_REPORT); + auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM_KNOWN + : ROSE_INSTR_REPORT); ri.u.report.report = id; program.push_back(ri); } @@ -3220,14 +3026,13 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - const map &suffixes, - vector &program) { + vector &program) { const auto &g = build.g; if (!g[v].suffix) { return; } - assert(contains(suffixes, g[v].suffix)); - u32 qi = suffixes.at(g[v].suffix); + assert(contains(bc.suffixes, g[v].suffix)); + u32 qi = bc.suffixes.at(g[v].suffix); assert(contains(bc.engineOffsets, qi)); const NFA *nfa = get_nfa_from_blob(bc, qi); u32 suffixEvent; @@ -3242,7 +3047,7 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); suffixEvent = MQE_TOP; } - auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_SUFFIX); + auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_SUFFIX); ri.u.triggerSuffix.queue = qi; ri.u.triggerSuffix.event = suffixEvent; program.push_back(ri); @@ -3250,21 +3055,21 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleGroups(const rose_group &groups, - vector &program) { + vector &program) { if (!groups) { return; } - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_GROUPS); + auto ri = RoseInstruction(ROSE_INSTR_SET_GROUPS); ri.u.setGroups.groups = groups; program.push_back(ri); } static void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, - RoseVertex u, vector &program) { + RoseVertex u, vector &program) { const auto &g = build.g; - vector infix_program; + vector infix_program; for (const auto &e : out_edges_range(u, g)) { RoseVertex v = target(e, g); @@ -3290,7 +3095,7 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, assert(top < MQE_INVALID); } - auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_INFIX); + auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_INFIX); ri.u.triggerInfix.queue = lbi.queue; ri.u.triggerInfix.event = top; ri.u.triggerInfix.cancel = g[e].rose_cancel_prev_top; @@ -3312,165 +3117,28 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, } static -void makeRoleSetState(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program, - u32 *nextStateIndex) { - const auto &g = build.g; - - // Leaf nodes don't need state indices, as they don't have successors. - if (isLeafNode(v, g)) { +void makeRoleSetState(const build_context &bc, RoseVertex v, + vector &program) { + // We only need this instruction if a state index has been assigned to this + // vertex. + auto it = bc.roleStateIndices.find(v); + if (it == end(bc.roleStateIndices)) { return; } - /* TODO: also don't need a state index if all edges are nfa based */ - - u32 idx = (*nextStateIndex)++; - auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_STATE); + u32 idx = it->second; + auto ri = RoseInstruction(ROSE_INSTR_SET_STATE); ri.u.setState.index = idx; ri.u.setState.depth = (u8)min(254U, bc.depths.at(v)); program.push_back(ri); - bc.roleStateIndices.emplace(v, idx); } static -void createRoleEntry(RoseBuildImpl &tbi, build_context &bc, - RoseVertex v, vector &roleTable, - ue2::unordered_map, size_t> &lookaround_cache, - const map &suffixes, u32 *nextStateIndex) { - RoseGraph &g = tbi.g; - - // set role ID in the graph where we can find it later - u32 roleId = verify_u32(roleTable.size()); - g[v].role = roleId; - // track id if it's a nonroot role for use in buildSparseIter - if (!tbi.isRootSuccessor(v)) { - for (const auto &lit_id : g[v].literals) { - u32 final_id = tbi.literal_info.at(lit_id).final_id; - bc.litNonRootRoles[final_id].insert(roleId); - } - } - - roleTable.push_back(RoseRole()); - RoseRole &tr = roleTable.back(); - memset(&tr, 0, sizeof(tr)); - - DEBUG_PRINTF("creating role %u for i%zu, eod %u, s (%p,%p)\n", roleId, - g[v].idx, (u32)g[v].eod_accept, g[v].suffix.graph.get(), - g[v].suffix.haig.get()); - - // Build role program. - - assert(bc.rolePrograms.size() == roleId); - bc.rolePrograms.push_back({}); - vector &program = bc.rolePrograms.back(); - - // First, add program instructions that enforce preconditions without - // effects. - - makeRoleAnchoredDelay(tbi, bc, v, program); - - if (onlyAtEod(tbi, v)) { - DEBUG_PRINTF("only at eod\n"); - program.push_back(RoleInstruction(ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); - } - - makeRoleLookaround(tbi, bc, v, program, lookaround_cache); - makeRoleCheckLeftfix(tbi, bc, v, program); - - // Next, we can add program instructions that have effects. - - makeRoleReports(tbi, bc, v, program); - makeRoleInfixTriggers(tbi, bc, v, program); - makeRoleSuffix(tbi, bc, v, suffixes, program); - makeRoleSetState(tbi, bc, v, program, nextStateIndex); - makeRoleGroups(g[v].groups, program); -} - -static -void writeRolePrograms(build_context &bc) { - assert(bc.roleTable.size() == bc.rolePrograms.size()); - - for (size_t i = 0; i < bc.roleTable.size(); i++) { - auto &role = bc.roleTable[i]; - auto &program = bc.rolePrograms[i]; - - if (program.empty()) { - role.programOffset = 0; - continue; - } - - // Safety check: all precondition checks should occur before - // instructions with effects. - assert(is_partitioned( - begin(program), end(program), [](const RoleInstruction &ri) { - // CHECK_LEFTFIX is the last precondition check. - return ri.code() <= ROSE_ROLE_INSTR_CHECK_LEFTFIX; - })); - - // Apply jump fixups. - auto final_program = flattenRoleProgram({program}); - - // Write into bytecode. - role.programOffset = writeRoleProgram(bc, final_program); - } -} - -// Construct an initial role table containing the basic role information. -static -void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc, - const map &suffixes) { - DEBUG_PRINTF("building role table\n"); - - const RoseGraph &g = tbi.g; - vector &roleTable = bc.roleTable; - - // Create a list of vertices, ordered by depth. - vector verts; - insert(&verts, verts.end(), vertices(g)); - sort(begin(verts), end(verts), [&bc, &g](const RoseVertex &a, - const RoseVertex &b) { - return tie(bc.depths.at(a), g[a].idx) < tie(bc.depths.at(b), g[b].idx); - }); - - // LookEntry list cache, so that we don't have to go scanning through the - // full list to find cases we've used already. - ue2::unordered_map, size_t> lookaround_cache; - - // Write a role entry for every vertex that represents a real literal. - // Direct reports are skipped. - // We start the state indices from one after the last one used (on the - // anchored root, if it exists). - u32 stateIndex = verify_u32(roleTable.size()); - - for (RoseVertex v : verts) { - if (tbi.isVirtualVertex(v)) { - DEBUG_PRINTF("vertex idx=%zu is virtual\n", g[v].idx); - continue; - } - if (tbi.hasDirectFinalId(v)) { - DEBUG_PRINTF("vertex idx=%zu is direct report\n", g[v].idx); - continue; - } - - assert(!g[v].literals.empty()); - createRoleEntry(tbi, bc, v, roleTable, lookaround_cache, suffixes, - &stateIndex); - } - - bc.numStates = stateIndex; - DEBUG_PRINTF("wrote %zu roles with %u states\n", roleTable.size(), - stateIndex); -} - -static -void makeRoleCheckRootBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, - vector &program) { +void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, vector &program) { const RoseGraph &g = build.g; const RoseVertex u = source(e, g); - assert(u == build.root || u == build.anchored_root); - // Use the minimum literal length. u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); @@ -3491,97 +3159,143 @@ void makeRoleCheckRootBounds(const RoseBuildImpl &build, RoseVertex v, assert(max_bound <= ROSE_BOUND_INF); assert(min_bound <= max_bound); - auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS); - ri.u.checkRootBounds.min_bound = min_bound; - ri.u.checkRootBounds.max_bound = max_bound; + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS); + ri.u.checkBounds.min_bound = min_bound; + ri.u.checkBounds.max_bound = max_bound; // This precondition instruction should go near the start of // the program, after the ONLY_EOD check if it's present. auto it = - find_if(begin(program), end(program), [](const RoleInstruction &ri) { - return ri.code() > ROSE_ROLE_INSTR_CHECK_ONLY_EOD; + find_if(begin(program), end(program), [](const RoseInstruction &ri) { + return ri.code() > ROSE_INSTR_CHECK_ONLY_EOD; }); program.insert(it, ri); } -// Construct pred table and sparse iterators over preds. static -void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, - vector &predTable) { - const RoseGraph &g = tbi.g; +vector makeRoleProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { + const RoseGraph &g = build.g; + auto v = target(e, g); - // We write our preds out in role index order just to give things some - // repeatability. - vector verts = get_ordered_verts(g); + vector program; - for (RoseVertex v : verts) { - if (tbi.isAnyStart(v) || g[v].role == MO_INVALID_IDX) { + // First, add program instructions that enforce preconditions without + // effects. + + makeRoleAnchoredDelay(build, bc, v, program); + + if (onlyAtEod(build, v)) { + DEBUG_PRINTF("only at eod\n"); + program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD)); + } + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + makeRoleLookaround(build, bc, v, program); + makeRoleCheckLeftfix(build, bc, v, program); + + // Next, we can add program instructions that have effects. + + makeRoleReports(build, bc, v, program); + makeRoleInfixTriggers(build, bc, v, program); + makeRoleSuffix(build, bc, v, program); + makeRoleSetState(bc, v, program); + makeRoleGroups(g[v].groups, program); + + return program; +} + +static +void findRootEdges(const RoseBuildImpl &build, RoseVertex src, + map> &root_edges_map) { + const auto &g = build.g; + for (const auto &e : out_edges_range(src, g)) { + const auto &v = target(e, g); + if (build.hasDirectFinalId(v)) { + continue; // Skip direct reports. + } + for (auto lit_id : g[v].literals) { + assert(lit_id < build.literal_info.size()); + u32 final_id = build.literal_info.at(lit_id).final_id; + if (final_id != MO_INVALID_IDX) { + root_edges_map[final_id].insert(e); + } + } + } +} + +static +void buildRootRolePrograms(RoseBuildImpl &build, build_context &bc, + vector &literalTable) { + const auto &g = build.g; + + map> root_edges_map; // lit id -> root edges + findRootEdges(build, build.root, root_edges_map); + findRootEdges(build, build.anchored_root, root_edges_map); + + for (u32 id = 0; id < literalTable.size(); id++) { + const auto &root_edges = root_edges_map[id]; + DEBUG_PRINTF("lit %u has %zu root edges\n", id, root_edges.size()); + + // Sort edges by (source, target) vertex indices to ensure + // deterministic program construction. + vector ordered_edges(begin(root_edges), end(root_edges)); + sort(begin(ordered_edges), end(ordered_edges), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].idx, g[target(a, g)].idx) < + tie(g[source(b, g)].idx, g[target(b, g)].idx); + }); + + vector> root_prog; + for (const auto &e : ordered_edges) { + DEBUG_PRINTF("edge (%zu,%zu)\n", g[source(e, g)].idx, + g[target(e, g)].idx); + auto role_prog = makeRoleProgram(build, bc, e); + if (role_prog.empty()) { + continue; + } + root_prog.push_back(role_prog); + } + + RoseLiteral &tl = literalTable[id]; + if (root_prog.empty()) { + tl.rootProgramOffset = 0; continue; } - assert(g[v].role < bc.roleTable.size()); - RoseRole &tr = bc.roleTable.at(g[v].role); - - // Assumption: if a vertex is a root role, it must have only one - // predecessor. - assert(!tbi.isRootSuccessor(v) || in_degree(v, g) == 1); - - // Check if we can use a "simple" check, i.e. one pred, bounds [0, - // inf], no overlap and not anchor->float transition. - if (in_degree(v, g) == 1) { - const RoseEdge &e = *in_edges(v, g).first; - RoseVertex u = source(e, g); - DEBUG_PRINTF("single edge: (role=%u)->(role=%u) with bounds " - "[%u, %u]\n", g[u].role, g[v].role, g[e].minBound, - g[e].maxBound); - if (tbi.isAnyStart(u)) { - // Solely root roles can be handled with no check at all (for - // very simple cases), or a bounds check in the role program. - assert(u != tbi.root || g[e].maxBound == ROSE_BOUND_INF); - if (u == tbi.root && g[e].minBound == 0) { - DEBUG_PRINTF("root role with .* edge, no pred needed\n"); - continue; /* no pred required */ - } - - tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - auto &program = bc.rolePrograms[g[v].role]; - makeRoleCheckRootBounds(tbi, v, e, program); - continue; - } - - assert(!g[u].literals.empty() && !g[v].literals.empty()); - bool pseudo_delay_history = true; - for (u32 ul : g[u].literals) { - pseudo_delay_history = !!tbi.literals.right.at(ul).delay; - } - if (!pseudo_delay_history) { - DEBUG_PRINTF("max_overlap = %zu\n", - tbi.maxLiteralOverlap(u, v)); - } - if (g[e].minBound == 0 && g[e].maxBound == ROSE_BOUND_INF - && (pseudo_delay_history || !tbi.maxLiteralOverlap(u, v))) { - tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - tr.flags |= ROSE_ROLE_PRED_SIMPLE; - bc.rolePredecessors[g[v].role].push_back(g[u].role); - continue; - } - } - - assert(in_degree(v, g) >= 1); - tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - tr.flags |= ROSE_ROLE_PRED_ANY; - - // Collect in-edges, ordered by the state index of the predecessor. - vector edges = make_vector_from(in_edges(v, g)); - sort(edges.begin(), edges.end(), - EdgeSourceStateCompare(g, bc.roleStateIndices)); - - for (const auto &e : edges) { - createPred(tbi, bc, e, predTable); - } + auto final_program = flattenRoleProgram(root_prog); + tl.rootProgramOffset = writeRoleProgram(bc, final_program); } } +static +void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { + const auto &g = build.g; + + u32 state = 0; + + for (auto v : vertices_range(g)) { + // Virtual vertices (starts, EOD accept vertices) never need state + // indices. + if (build.isVirtualVertex(v)) { + continue; + } + // Leaf nodes don't need state indices, as they don't have successors. + if (isLeafNode(v, g)) { + continue; + } + /* TODO: also don't need a state index if all edges are nfa based */ + bc.roleStateIndices.emplace(v, state++); + } + + DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state, + num_vertices(g)); + bc.numStates = state; +} + static bool hasUsefulStops(const left_build_info &rbi) { for (u32 i = 0; i < N_CHARS; i++) { @@ -3606,8 +3320,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, u32 lagIndex = 0; - vector verts = get_ordered_verts(g); - for (RoseVertex v : verts) { + for (RoseVertex v : vertices_range(g)) { if (!g[v].left) { continue; } @@ -3683,60 +3396,188 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, *laggedRoseCount = lagIndex; } +static +void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, + vector &program) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED); + + u32 handled_key; + if (contains(bc.handledKeys, v)) { + handled_key = bc.handledKeys.at(v); + } else { + handled_key = verify_u32(bc.handledKeys.size()); + bc.handledKeys.emplace(v, handled_key); + } + + ri.u.checkNotHandled.key = handled_key; + + // This program may be triggered by different predecessors, with different + // offset bounds. We must ensure we put this check/set operation after the + // bounds check to deal with this case. + auto it = + find_if(begin(program), end(program), [](const RoseInstruction &ri) { + return ri.code() > ROSE_INSTR_CHECK_BOUNDS; + }); + program.insert(it, ri); +} + +static +vector makeSparseIterProgram(RoseBuildImpl &build, + build_context &bc, + const RoseEdge &e) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + auto program = makeRoleProgram(build, bc, e); + + if (hasGreaterInDegree(1, v, g)) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(bc, v, program); + } + + return program; +} + +static +void buildLitSparseIter(RoseBuildImpl &build, build_context &bc, + vector &verts, RoseLiteral &tl) { + const auto &g = build.g; + + if (verts.empty()) { + // This literal has no non-root roles => no sparse iter + tl.iterOffset = ROSE_OFFSET_INVALID; + tl.iterProgramOffset = 0; + return; + } + + // Deterministic ordering. + sort(begin(verts), end(verts), + [&g](RoseVertex a, RoseVertex b) { return g[a].idx < g[b].idx; }); + + // pred state id -> list of programs + map>> predProgramLists; + + for (const auto &v : verts) { + DEBUG_PRINTF("vertex %zu\n", g[v].idx); + for (const auto &e : in_edges_range(v, g)) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. + } + + assert(contains(bc.roleStateIndices, u)); + u32 pred_state = bc.roleStateIndices.at(u); + + DEBUG_PRINTF("pred %zu (state %u)\n", g[u].idx, pred_state); + + auto program = makeSparseIterProgram(build, bc, e); + predProgramLists[pred_state].push_back(program); + } + } + + map predPrograms; + for (const auto &e : predProgramLists) { + auto program = flattenRoleProgram(e.second); + u32 offset = writeRoleProgram(bc, program); + predPrograms.emplace(e.first, offset); + } + + tie(tl.iterProgramOffset, tl.iterOffset) = + addPredSparseIter(bc, predPrograms); +} + // Build sparse iterators for literals. static void buildSparseIter(RoseBuildImpl &build, build_context &bc, - vector &literalTable, - const vector &predTable) { + vector &literalTable) { const RoseGraph &g = build.g; - // Construct a mapping from role ids to state indices. - ue2::unordered_map role_to_state; - for (const auto &m : bc.roleStateIndices) { - role_to_state.emplace(g[m.first].role, m.second); + // Find all our non-root roles. + ue2::unordered_map> litNonRootVertices; + for (const auto &v : vertices_range(g)) { + if (build.isRootSuccessor(v)) { + continue; + } + for (const auto &lit_id : g[v].literals) { + u32 final_id = build.literal_info.at(lit_id).final_id; + litNonRootVertices[final_id].push_back(v); + } } for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { - RoseLiteral &tl = literalTable[finalId]; + buildLitSparseIter(build, bc, litNonRootVertices[finalId], + literalTable[finalId]); + } +} - if (!contains(bc.litNonRootRoles, finalId)) { - // This literal has no nonroot roles => no sparse iter - tl.iterOffset = ROSE_OFFSET_INVALID; - tl.iterMapOffset = ROSE_OFFSET_INVALID; +static +vector makeEodAnchorProgram(RoseBuildImpl &build, + build_context &bc, + const RoseEdge &e) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + vector program; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + if (hasGreaterInDegree(1, v, g)) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(bc, v, program); + } + + for (const auto &report : g[v].reports) { + auto ri = RoseInstruction(ROSE_INSTR_REPORT_EOD); + ri.u.report.report = report; + program.push_back(ri); + } + + return program; +} + +/* returns a pair containing the iter map offset and iter offset */ +static +pair buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) { + const RoseGraph &g = build.g; + + // pred state id -> list of programs + map>> predProgramLists; + + for (auto v : vertices_range(g)) { + if (!g[v].eod_accept) { continue; } - const auto &roles = bc.litNonRootRoles.at(finalId); - assert(!roles.empty()); + DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].idx, + in_degree(v, g)); - // Collect the state IDs of the predecessors of the roles of this - // literal. + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); - // pred state id -> role/pred entries - map > predStates; + assert(contains(bc.roleStateIndices, u)); + u32 predStateIdx = bc.roleStateIndices.at(u); - for (u32 r : roles) { - const RoseRole &tr = bc.roleTable.at(r); - if (tr.flags & ROSE_ROLE_PRED_SIMPLE) { - u32 p = bc.rolePredecessors.at(r)[0]; - assert(p != ROSE_OFFSET_INVALID); - RoseIterRole ir = { r, ROSE_OFFSET_INVALID }; - assert(contains(role_to_state, p)); - predStates[role_to_state.at(p)].push_back(ir); - } else { - const vector &myPreds = bc.rolePredecessors.at(r); - for (u32 pred_entry : myPreds) { - u32 p = predTable.at(pred_entry).role; - RoseIterRole ir = { r, pred_entry }; - assert(p < bc.roleTable.size()); - assert(contains(role_to_state, p)); - predStates[role_to_state.at(p)].push_back(ir); - } - } + auto program = makeEodAnchorProgram(build, bc, e); + predProgramLists[predStateIdx].push_back(program); } - - tie(tl.iterMapOffset, tl.iterOffset) = addPredSparseIter(bc, predStates); } + + if (predProgramLists.empty()) { + DEBUG_PRINTF("no eod anchored roles\n"); + return {0, 0}; + } + + map predPrograms; + for (const auto &e : predProgramLists) { + DEBUG_PRINTF("pred %u has %zu programs\n", e.first, e.second.size()); + auto program = flattenRoleProgram(e.second); + u32 offset = writeRoleProgram(bc, program); + predPrograms.emplace(e.first, offset); + } + + return addPredSparseIter(bc, predPrograms); } static @@ -3885,7 +3726,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.depths = findDepths(*this); // Build NFAs - map suffixes; set no_retrigger_queues; bool mpv_as_outfix; prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); @@ -3896,7 +3736,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 outfixEndQueue = qif.allocated_count(); u32 leftfixBeginQueue = outfixEndQueue; - if (!buildNfas(*this, bc, qif, &suffixes, &no_retrigger_queues, + if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &leftfixBeginQueue)) { return nullptr; } @@ -3913,14 +3753,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(ISALIGNED_16(lit_benefits_size)); vector suffixEkeyLists; - buildSuffixEkeyLists(*this, bc, qif, suffixes, &suffixEkeyLists); + buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists); - buildInitialRoleTable(*this, bc, suffixes); - - DEBUG_PRINTF("roletable %zu\n", bc.roleTable.size()); - - vector predTable; - buildPredTable(*this, bc, predTable); + assignStateIndices(*this, bc); u32 laggedRoseCount = 0; vector leftInfoTable; @@ -3930,13 +3765,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { vector literalTable; buildLiteralTable(*this, bc, literalTable); - buildSparseIter(*this, bc, literalTable, predTable); + buildSparseIter(*this, bc, literalTable); u32 eodIterOffset; - u32 eodIterMapOffset; - - tie(eodIterMapOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc, - predTable); + u32 eodProgramTableOffset; + tie(eodProgramTableOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc); vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -3944,13 +3777,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 lastByteOffset = buildLastByteIter(g, bc); // Enforce role table resource limit. - if (bc.roleTable.size() > cc.grey.limitRoseRoleCount) { + if (num_vertices(g) > cc.grey.limitRoseRoleCount) { throw ResourceLimitError(); } - // Write role programs into the engine blob. - writeRolePrograms(bc); - // Write root programs for literals into the engine blob. buildRootRolePrograms(*this, bc, literalTable); @@ -4010,10 +3840,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 literalLen = sizeof(RoseLiteral) * literalTable.size(); currOffset = literalOffset + literalLen; - u32 roleOffset = ROUNDUP_N(currOffset, alignof(RoseRole)); - u32 roleLen = sizeof(RoseRole) * bc.roleTable.size(); - currOffset = roleOffset + roleLen; - u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size(); currOffset = leftOffset + roseLen; @@ -4026,10 +3852,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 lookaroundTableLen = sizeof(s8) * bc.lookaround.size(); currOffset = lookaroundTableOffset + lookaroundTableLen; - u32 predOffset = ROUNDUP_N(currOffset, alignof(RosePred)); - u32 predLen = sizeof(RosePred) * predTable.size(); - currOffset = predOffset + predLen; - u32 nfaInfoOffset = ROUNDUP_N(currOffset, sizeof(u32)); u32 nfaInfoLen = sizeof(NfaInfo) * queue_count; currOffset = nfaInfoOffset + nfaInfoLen; @@ -4147,13 +3969,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->activeArrayCount = activeArrayCount; engine->activeLeftCount = activeLeftCount; engine->queueCount = queue_count; + engine->handledKeyCount = bc.handledKeys.size(); engine->group_weak_end = group_weak_end; engine->rolesWithStateCount = bc.numStates; - engine->roleOffset = roleOffset; - engine->roleCount = verify_u32(bc.roleTable.size()); engine->leftOffset = leftOffset; engine->roseCount = verify_u32(leftInfoTable.size()); engine->lookaroundTableOffset = lookaroundTableOffset; @@ -4162,8 +3983,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->outfixEndQueue = outfixEndQueue; engine->leftfixBeginQueue = leftfixBeginQueue; engine->initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; - engine->predOffset = predOffset; - engine->predCount = verify_u32(predTable.size()); engine->stateSize = mmbit_size(bc.numStates); engine->anchorStateSize = anchorStateSize; engine->nfaInfoOffset = nfaInfoOffset; @@ -4173,7 +3992,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->multidirectOffset = multidirectOffset; engine->eodIterOffset = eodIterOffset; - engine->eodIterMapOffset = eodIterMapOffset; + engine->eodProgramTableOffset = eodProgramTableOffset; engine->lastByteHistoryIterOffset = lastByteOffset; @@ -4263,7 +4082,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { } NfaInfo *nfa_infos = (NfaInfo *)(ptr + nfaInfoOffset); - populateNfaInfoBasics(*this, bc, outfixes, suffixes, suffixEkeyLists, + populateNfaInfoBasics(*this, bc, outfixes, suffixEkeyLists, no_retrigger_queues, nfa_infos); updateNfaState(bc, &engine->stateOffsets, nfa_infos, &engine->scratchStateSize, &engine->nfaStateSize, @@ -4275,14 +4094,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Copy in other tables copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); copy_bytes(ptr + engine->literalOffset, literalTable); - copy_bytes(ptr + engine->roleOffset, bc.roleTable); copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, ptr + lookaroundReachOffset, bc.lookaround); fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); - copy_bytes(ptr + engine->predOffset, predTable); copy_bytes(ptr + engine->anchoredReportMapOffset, art); copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); copy_bytes(ptr + engine->multidirectOffset, mdr_reports); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index d69d28d6..d8048eee 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -78,77 +78,6 @@ string to_string(nfa_kind k) { return "?"; } -// Get the RoseRole associated with a given vertex in the build graph from the -// RoseEngine. -static -const RoseRole *getRoseRole(const RoseBuildImpl &build, - const RoseEngine *engine, RoseVertex v) { - if (!engine) { - return nullptr; - } - - u32 role_idx = build.g[v].role; - if (role_idx == MO_INVALID_IDX) { - return nullptr; - } - - const RoseRole *roles = getRoleTable(engine); - return &roles[role_idx]; -} - -#define SKIP_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ - const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -template -const Struct * -findInstruction(const RoseEngine *t, const RoseRole *role) { - if (!role->programOffset) { - return nullptr; - } - - const char *pc = (const char *)t + role->programOffset; - for (;;) { - u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); - if (code == Opcode) { - return (const Struct *)pc; - } - // Skip to the next instruction. - switch (code) { - SKIP_CASE(ANCHORED_DELAY) - SKIP_CASE(CHECK_ONLY_EOD) - SKIP_CASE(CHECK_ROOT_BOUNDS) - SKIP_CASE(CHECK_LEFTFIX) - SKIP_CASE(CHECK_LOOKAROUND) - SKIP_CASE(SOM_ADJUST) - SKIP_CASE(SOM_LEFTFIX) - SKIP_CASE(TRIGGER_INFIX) - SKIP_CASE(TRIGGER_SUFFIX) - SKIP_CASE(REPORT) - SKIP_CASE(REPORT_CHAIN) - SKIP_CASE(REPORT_EOD) - SKIP_CASE(REPORT_SOM_INT) - SKIP_CASE(REPORT_SOM) - SKIP_CASE(REPORT_SOM_KNOWN) - SKIP_CASE(SET_STATE) - SKIP_CASE(SET_GROUPS) - case ROSE_ROLE_INSTR_END: - return nullptr; - default: - assert(0); - return nullptr; - } - } - - return nullptr; -} - -#undef SKIP_CASE - namespace { class RoseGraphWriter { @@ -174,7 +103,7 @@ public: } os << "[label=\""; - os << "role=" << g[v].role << "[i" << g[v].idx <<"]\\n"; + os << "idx=" << g[v].idx <<"\\n"; for (u32 lit_id : g[v].literals) { writeLiteral(os, lit_id); @@ -198,34 +127,23 @@ public: os << " (rep=" << as_string_list(g[v].reports) << ")"; } - const RoseRole *r = getRoseRole(v); - if (g[v].suffix) { os << "\\nSUFFIX (TOP " << g[v].suffix.top; - if (r) { - const auto *ri = - findInstruction(t, r); - if (ri) { - os << ", Q" << ri->queue; - } - } else { - // Can't dump the queue number, but we can identify the suffix. - if (g[v].suffix.graph) { - os << ", graph=" << g[v].suffix.graph.get() - << " " << to_string(g[v].suffix.graph->kind); - } - if (g[v].suffix.castle) { - os << ", castle=" << g[v].suffix.castle.get(); - } - if (g[v].suffix.rdfa) { - os << ", dfa=" << g[v].suffix.rdfa.get(); - } - if (g[v].suffix.haig) { - os << ", haig=" << g[v].suffix.haig.get(); - } - + // Can't dump the queue number, but we can identify the suffix. + if (g[v].suffix.graph) { + os << ", graph=" << g[v].suffix.graph.get() << " " + << to_string(g[v].suffix.graph->kind); } + if (g[v].suffix.castle) { + os << ", castle=" << g[v].suffix.castle.get(); + } + if (g[v].suffix.rdfa) { + os << ", dfa=" << g[v].suffix.rdfa.get(); + } + if (g[v].suffix.haig) { + os << ", haig=" << g[v].suffix.haig.get(); + } + os << ")"; } @@ -247,15 +165,6 @@ public: build.isRootSuccessor(v) ? "PREFIX" : "INFIX"; os << "\\nROSE " << roseKind; os << " ("; - if (r) { - const auto *ri = - findInstruction(t, r); - if (ri) { - os << "Q" << ri->queue << ", "; - } - } - os << "report " << g[v].left.leftfix_report << ")"; if (g[v].left.graph) { @@ -348,10 +257,6 @@ private: } } - const RoseRole *getRoseRole(RoseVertex v) const { - return ue2::getRoseRole(build, t, v); - } - set ghost; const RoseBuildImpl &build; const RoseEngine *t; @@ -383,7 +288,7 @@ namespace { struct CompareVertexRole { explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {} inline bool operator()(const RoseVertex &a, const RoseVertex &b) const { - return g[a].role < g[b].role; + return g[a].idx < g[b].idx; } private: const RoseGraph &g; @@ -483,7 +388,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { for (RoseVertex v : verts) { // role info - os << " Role " << g[v].role << ": depth=" << depths.at(v) + os << " Index " << g[v].idx << ": depth=" << depths.at(v) << ", groups=0x" << hex << setw(16) << setfill('0') << g[v].groups << dec; @@ -497,14 +402,14 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { os << ", max_offset=" << g[v].max_offset << endl; // pred info for (const auto &ie : in_edges_range(v, g)) { - os << " Predecessor role="; - u32 predRole = g[source(ie, g)].role; - if (predRole == MO_INVALID_IDX) { + const auto &u = source(ie, g); + os << " Predecessor idx="; + if (u == build.root) { os << "ROOT"; - } else if (predRole == g[build.anchored_root].role) { + } else if (u == build.anchored_root) { os << "ANCHORED_ROOT"; } else { - os << predRole; + os << g[u].idx; } os << ": bounds [" << g[ie].minBound << ", "; if (g[ie].maxBound == ROSE_BOUND_INF) { @@ -589,70 +494,6 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); } -static -CharReach bitvectorToReach(const u8 *reach) { - CharReach cr; - - for (size_t i = 0; i < 256; i++) { - if (reach[i / 8] & (1U << (i % 8))) { - cr.set(i); - - } - } - return cr; -} - -static -void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t, - const Grey &grey, const string &filename) { - stringstream ss; - ss << grey.dumpPath << filename; - ofstream os(ss.str()); - - const RoseGraph &g = build.g; - - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - - for (RoseVertex v : vertices_range(g)) { - const RoseRole *role = getRoseRole(build, t, v); - if (!role) { - continue; - } - - const auto *ri = - findInstruction(t, role); - if (!ri) { - continue; - } - - const u32 look_idx = ri->index; - const u32 look_count = ri->count; - - os << "Role " << g[v].role << endl; - os << " literals: " << as_string_list(g[v].literals) << endl; - os << " lookaround: index=" << look_idx << ", count=" << look_count - << endl; - - const s8 *look = look_base + look_idx; - const s8 *look_end = look + look_count; - const u8 *reach = reach_base + look_idx * REACH_BITVECTOR_LEN; - - for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { - os << " " << std::setw(4) << std::setfill(' ') << int{*look} - << ": "; - describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); - os << endl; - } - - os << endl; - } - - os.close(); -} - void dumpRose(const RoseBuild &build_base, const RoseEngine *t, const Grey &grey) { if (!grey.dumpFlags) { @@ -692,9 +533,6 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t, f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w"); roseDumpStructRaw(t, f); fclose(f); - - // Lookaround tables. - dumpRoseLookaround(build, t, grey, "rose_lookaround.txt"); } } // namespace ue2 diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 109c2d26..9ec26d4c 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -89,12 +89,10 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, next_nfa_report(0) { // add root vertices to graph g[root].idx = vertexIndex++; - g[root].role = MO_INVALID_IDX; g[root].min_offset = 0; g[root].max_offset = 0; g[anchored_root].idx = vertexIndex++; - g[anchored_root].role = MO_INVALID_IDX; g[anchored_root].min_offset = 0; g[anchored_root].max_offset = 0; } @@ -194,7 +192,7 @@ bool RoseBuildImpl::hasLiteralInTable(RoseVertex v, bool RoseBuildImpl::hasNoFloatingRoots() const { for (auto v : adjacent_vertices_range(root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("direct floating root %u\n", g[v].role); + DEBUG_PRINTF("direct floating root %zu\n", g[v].idx); return false; } } @@ -202,7 +200,7 @@ bool RoseBuildImpl::hasNoFloatingRoots() const { /* need to check if the anchored_root has any literals which are too deep */ for (auto v : adjacent_vertices_range(anchored_root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("indirect floating root %u\n", g[v].role); + DEBUG_PRINTF("indirect floating root %zu\n", g[v].idx); return false; } } diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 6ec89064..aa13a627 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -40,7 +40,9 @@ #include "nfa/nfa_build_util.h" #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" +#include "util/dump_charclass.h" #include "util/multibit_internal.h" +#include "util/multibit.h" #include #include @@ -114,159 +116,78 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); } -static -const RosePred *getPredTable(const RoseEngine *t, u32 *count) { - *count = t->predCount; - return (const RosePred *)loadFromByteCodeOffset(t, t->predOffset); -} - -static -u32 literalsWithDepth(const RoseEngine *t, u8 depth) { - u32 n = 0; - const RoseLiteral *tl = getLiteralTable(t); - const RoseLiteral *tl_end = tl + t->literalCount; - - for (; tl != tl_end; ++tl) { - if (tl->minDepth == depth) { - n++; - } - } - return n; -} - static u32 literalsWithDirectReports(const RoseEngine *t) { return t->totalNumLiterals - t->literalCount; } -template +template static -u32 literalsWithProp(const RoseEngine *t, member_type_ptr prop) { - u32 n = 0; +size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) { const RoseLiteral *tl = getLiteralTable(t); const RoseLiteral *tl_end = tl + t->literalCount; - for (; tl != tl_end; ++tl) { - if (tl->*prop) { - n++; - } - } - return n; -} - -template -static -u32 rolesWithPropValue(const RoseEngine *t, member_type RoseRole::*prop, - member_type value) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; - - for (; tr != tr_end; ++tr) { - if (tr->*prop == value) { - n++; - } - } - return n; + return count_if(tl, tl_end, pred); } static -u32 literalsInGroups(const RoseEngine *t, u32 from, u32 to) { - u32 n = 0; - const RoseLiteral *tl = getLiteralTable(t); - const RoseLiteral *tl_end = tl + t->literalCount; +size_t literalsWithDepth(const RoseEngine *t, u8 depth) { + return literalsWithPredicate( + t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; }); +} +static +size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) { rose_group mask = ~((1ULL << from) - 1); if (to < 64) { mask &= ((1ULL << to) - 1); } - for (; tl != tl_end; ++tl) { - if (tl->groups & mask) { - n++; - } - } - return n; + return literalsWithPredicate( + t, [&mask](const RoseLiteral &l) { return l.groups & mask; }); } static -u32 rolesWithFlag(const RoseEngine *t, u32 flag) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; +CharReach bitvectorToReach(const u8 *reach) { + CharReach cr; + + for (size_t i = 0; i < 256; i++) { + if (reach[i / 8] & (1U << (i % 8))) { + cr.set(i); - for (; tr != tr_end; ++tr) { - if (tr->flags & flag) { - n++; } } - return n; + return cr; } -#define HANDLE_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ - const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - static -u32 rolesWithInstr(const RoseEngine *t, - enum RoseRoleInstructionCode find_code) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; +void dumpLookaround(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_CHECK_LOOKAROUND *ri) { + assert(ri); - for (; tr != tr_end; ++tr) { - if (!tr->programOffset) { - continue; - } + const u8 *base = (const u8 *)t; + const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); + const u8 *reach_base = base + t->lookaroundReachOffset; - const char *pc = (const char *)t + tr->programOffset; - for (;;) { - u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); - if (code == find_code) { - n++; - goto next_role; - } - switch (code) { - HANDLE_CASE(CHECK_ONLY_EOD) - HANDLE_CASE(CHECK_ROOT_BOUNDS) - HANDLE_CASE(CHECK_LOOKAROUND) - HANDLE_CASE(CHECK_LEFTFIX) - HANDLE_CASE(ANCHORED_DELAY) - HANDLE_CASE(SOM_ADJUST) - HANDLE_CASE(SOM_LEFTFIX) - HANDLE_CASE(TRIGGER_INFIX) - HANDLE_CASE(TRIGGER_SUFFIX) - HANDLE_CASE(REPORT) - HANDLE_CASE(REPORT_CHAIN) - HANDLE_CASE(REPORT_EOD) - HANDLE_CASE(REPORT_SOM_INT) - HANDLE_CASE(REPORT_SOM) - HANDLE_CASE(REPORT_SOM_KNOWN) - HANDLE_CASE(SET_STATE) - HANDLE_CASE(SET_GROUPS) - case ROSE_ROLE_INSTR_END: - goto next_role; - default: - assert(0); - return 0; - } - } - next_role:; + const s8 *look = look_base + ri->index; + const s8 *look_end = look + ri->count; + const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN; + + os << " contents:" << endl; + + for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { + os << " " << std::setw(4) << std::setfill(' ') << int{*look} + << ": "; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; } - return n; } -#undef HANDLE_CASE - #define PROGRAM_CASE(name) \ - case ROSE_ROLE_INSTR_##name: { \ + case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ - << ": " #name " (" << (int)ROSE_ROLE_INSTR_##name << ")" << endl; \ - const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; + << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ + const auto *ri = (const struct ROSE_STRUCT_##name *)pc; #define PROGRAM_NEXT_INSTRUCTION \ pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ @@ -274,11 +195,11 @@ u32 rolesWithInstr(const RoseEngine *t, } static -void dumpRoleProgram(ofstream &os, const char *pc) { +void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *pc_base = pc; for (;;) { u8 code = *(const u8 *)pc; - assert(code <= ROSE_ROLE_INSTR_END); + assert(code <= ROSE_INSTR_END); switch (code) { PROGRAM_CASE(ANCHORED_DELAY) { os << " depth " << u32{ri->depth} << endl; @@ -293,17 +214,24 @@ void dumpRoleProgram(ofstream &os, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + PROGRAM_CASE(CHECK_BOUNDS) { os << " min_bound " << ri->min_bound << endl; os << " max_bound " << ri->max_bound << endl; os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_NOT_HANDLED) { + os << " key " << ri->key << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LOOKAROUND) { os << " index " << ri->index << endl; os << " count " << ri->count << endl; os << " fail_jump +" << ri->fail_jump << endl; + dumpLookaround(os, t, ri); } PROGRAM_NEXT_INSTRUCTION @@ -396,26 +324,27 @@ void dumpRoleProgram(ofstream &os, const char *pc) { #undef PROGRAM_NEXT_INSTRUCTION static -void dumpRoseRolePrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); +void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset, + u32 programTableOffset) { + const auto *it = + (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset); + const u32 *programTable = + (const u32 *)loadFromByteCodeOffset(t, programTableOffset); - const RoseRole *roles = getRoleTable(t); - const char *base = (const char *)t; + // Construct a full multibit. + const u32 total_bits = t->rolesWithStateCount; + const vector bits(mmbit_size(total_bits), u8{0xff}); - for (u32 i = 0; i < t->roleCount; i++) { - const RoseRole *role = &roles[i]; - os << "Role " << i << endl; - - if (!role->programOffset) { - os << " " << endl; - continue; - } - - dumpRoleProgram(os, base + role->programOffset); - os << endl; + struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES]; + u32 idx = 0; + for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s); + i != MMB_INVALID; + i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) { + u32 programOffset = programTable[idx]; + os << "Sparse Iter Program " << idx << " triggered by state " << i + << " @ " << programOffset << ":" << endl; + dumpRoleProgram(os, t, (const char *)t + programOffset); } - - os.close(); } static @@ -427,12 +356,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { for (u32 i = 0; i < t->literalCount; i++) { const RoseLiteral *lit = &lits[i]; - if (!lit->rootProgramOffset) { - continue; + os << "Literal " << i << endl; + os << "---------------" << endl; + + if (lit->rootProgramOffset) { + os << "Root Program @ " << lit->rootProgramOffset << ":" << endl; + dumpRoleProgram(os, t, base + lit->rootProgramOffset); + } else { + os << "" << endl; + } + + if (lit->iterOffset != ROSE_OFFSET_INVALID) { + dumpSparseIterPrograms(os, t, lit->iterOffset, + lit->iterProgramOffset); + } else { + os << "" << endl; } - os << "Literal " << i << endl; - dumpRoleProgram(os, base + lit->rootProgramOffset); os << endl; } @@ -440,37 +380,17 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { } static -const char *historyName(RoseRoleHistory h) { - switch (h) { - case ROSE_ROLE_HISTORY_NONE: - return "history none"; - case ROSE_ROLE_HISTORY_ANCH: - return "history anch"; - case ROSE_ROLE_HISTORY_LAST_BYTE: - return "history last_byte"; - default: - return "unknown"; - } -} +void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); -static -void dumpPreds(FILE *f, const RoseEngine *t) { - map counts; - - u32 predCount = 0; - const RosePred *tp = getPredTable(t, &predCount); - const RosePred *tp_end = tp + predCount; - - for (; tp != tp_end; ++tp) { - assert(tp->historyCheck < ROSE_ROLE_HISTORY_INVALID); - counts[(RoseRoleHistory)tp->historyCheck] += 1; + if (t->eodIterOffset) { + dumpSparseIterPrograms(os, t, t->eodIterOffset, + t->eodProgramTableOffset); + } else { + os << "" << endl; } - for (map::const_iterator it = counts.begin(), - ite = counts.end(); - it != ite; ++it) { - fprintf(f, " - %-18s: %u\n", historyName(it->first), it->second); - } + os.close(); } static @@ -805,16 +725,12 @@ void roseDumpText(const RoseEngine *t, FILE *f) { sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); fprintf(f, " - literal table : %zu bytes\n", t->literalCount * sizeof(RoseLiteral)); - fprintf(f, " - role table : %zu bytes\n", - t->roleCount * sizeof(RoseRole)); - fprintf(f, " - pred table : %zu bytes\n", - t->predCount * sizeof(RosePred)); fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %u bytes\n", t->anchoredReportMapOffset - t->nfaInfoOffset); fprintf(f, " - lookaround table : %u bytes\n", - t->predOffset - t->lookaroundTableOffset); + t->nfaInfoOffset - t->lookaroundTableOffset); fprintf(f, " - lookaround reach : %u bytes\n", t->lookaroundTableOffset - t->lookaroundReachOffset); @@ -839,46 +755,30 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "\n"); fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); + fprintf(f, "handled key count : %u\n", t->handledKeyCount); fprintf(f, "\n"); fprintf(f, "number of literals : %u\n", t->totalNumLiterals); fprintf(f, " - delayed : %u\n", t->delay_count); fprintf(f, " - direct report : %u\n", literalsWithDirectReports(t)); - fprintf(f, " - that squash group : %u\n", - literalsWithProp(t, &RoseLiteral::squashesGroup)); + fprintf(f, " - that squash group : %zu\n", + literalsWithPredicate( + t, [](const RoseLiteral &l) { return l.squashesGroup != 0; })); fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); - - u32 group_weak_end = t->group_weak_end; + fprintf(f, " - with root program : %zu\n", + literalsWithPredicate(t, [](const RoseLiteral &l) { + return l.rootProgramOffset != 0; + })); + fprintf(f, " - with sparse iter : %zu\n", + literalsWithPredicate(t, [](const RoseLiteral &l) { + return l.iterOffset != ROSE_OFFSET_INVALID; + })); fprintf(f, " - in groups ::\n"); - fprintf(f, " + weak : %u\n", - literalsInGroups(t, 0, group_weak_end)); - fprintf(f, " + general : %u\n", - literalsInGroups(t, group_weak_end, sizeof(u64a) * 8)); - fprintf(f, "number of roles : %u\n", t->roleCount); - fprintf(f, " - with state index : %u\n", t->rolesWithStateCount); - fprintf(f, " - with leftfix nfa : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LEFTFIX)); - fprintf(f, " - with suffix nfa : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_TRIGGER_SUFFIX)); - fprintf(f, " - with lookaround : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LOOKAROUND)); - fprintf(f, " - with reports : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT)); - fprintf(f, " - with som reports : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT_SOM_INT)); - fprintf(f, " - match only at end : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); - fprintf(f, " + anchored : %u\n", t->anchoredMatches); - - fprintf(f, " - simple preds : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_SIMPLE)); - fprintf(f, " - bound root preds : %u\n", - rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS)); - fprintf(f, " - 'any' preds : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_ANY)); - fprintf(f, "number of preds : %u\n", t->predCount); - dumpPreds(f, t); + fprintf(f, " + weak : %zu\n", + literalsInGroups(t, 0, t->group_weak_end)); + fprintf(f, " + general : %zu\n", + literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8)); u32 depth1 = literalsWithDepth(t, 1); u32 depth2 = literalsWithDepth(t, 2); @@ -977,16 +877,13 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); - DUMP_U32(t, roleOffset); - DUMP_U32(t, roleCount); - DUMP_U32(t, predOffset); - DUMP_U32(t, predCount); + DUMP_U32(t, handledKeyCount); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); DUMP_U32(t, lookaroundReachOffset); DUMP_U32(t, eodIterOffset); - DUMP_U32(t, eodIterMapOffset); + DUMP_U32(t, eodProgramTableOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); @@ -1048,52 +945,15 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } -static -void roseDumpPredStructRaw(const RoseEngine *t, FILE *f) { - u32 pred_count = 0; - const RosePred *pred_table = getPredTable(t, &pred_count); - fprintf(f, "pred_count = %u\n", pred_count); - if (!pred_table) { - return; - } - - for (const RosePred *p = pred_table; p < pred_table + pred_count; p++) { - fprintf(f, "pred[%zu] = {\n", p - pred_table); - DUMP_U32(p, role); - DUMP_U32(p, minBound); - DUMP_U32(p, maxBound); - DUMP_U8(p, historyCheck); - fprintf(f, "}\n"); - } -} - -static -void roseDumpRoleStructRaw(const RoseEngine *t, FILE *f) { - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; - fprintf(f, "role_count = %zd\n", tr_end - tr); - if (!tr) { - return; - } - - for (const RoseRole *p = tr; p < tr_end; p++) { - fprintf(f, "role[%zu] = {\n", p - tr); - DUMP_U32(p, flags); - DUMP_U32(p, programOffset); - fprintf(f, "}\n"); - } -} - -void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) { +void roseDumpComponents(const RoseEngine *t, bool dump_raw, + const string &base) { dumpComponentInfo(t, base); dumpNfas(t, dump_raw, base); dumpAnchored(t, base); dumpRevComponentInfo(t, base); dumpRevNfas(t, dump_raw, base); - - // Role programs. - dumpRoseRolePrograms(t, base + "/rose_role_programs.txt"); - dumpRoseLitPrograms(t, base + "/rose_lit_root_programs.txt"); + dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); + dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); } void roseDumpInternals(const RoseEngine *t, const string &base) { @@ -1139,14 +999,6 @@ void roseDumpInternals(const RoseEngine *t, const string &base) { roseDumpStructRaw(t, f); fclose(f); - f = fopen((base + "/rose_preds.txt").c_str(), "w"); - roseDumpPredStructRaw(t, f); - fclose(f); - - f = fopen((base + "/rose_roles.txt").c_str(), "w"); - roseDumpRoleStructRaw(t, f); - fclose(f); - roseDumpComponents(t, true, base); } diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index e29fd2dd..b0ac8d11 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -39,7 +39,7 @@ #include "ue2common.h" #include "rose_build.h" -#include "rose_internal.h" /* role history, etc */ +#include "rose_internal.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "util/charreach.h" #include "util/depth.h" @@ -65,6 +65,14 @@ enum rose_literal_table { ROSE_EVENT //!< "literal-like" events, such as EOD }; +/** \brief Edge history types. */ +enum RoseRoleHistory { + ROSE_ROLE_HISTORY_NONE, //!< no special history + ROSE_ROLE_HISTORY_ANCH, //!< previous role is at a fixed offset + ROSE_ROLE_HISTORY_LAST_BYTE, //!< previous role can only match at EOD + ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned +}; + #include "util/order_check.h" /** \brief Provides information about the (pre|in)fix engine to the left of a @@ -140,9 +148,6 @@ struct RoseVertexProps { /** \brief Report IDs to fire. */ flat_set reports; - /** \brief Role ID for this vertex. These are what end up in the bytecode. */ - u32 role = ~u32{0}; - /** \brief Bitmask of groups that this role sets. */ rose_group groups = 0; diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 00e62eb9..7aae2f22 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -73,18 +73,55 @@ ReportID literalToReport(u32 id) { return id & ~LITERAL_DR_FLAG; } -// Structure representing a literal. Each literal may have many roles. +/** \brief Structure representing a literal. */ struct RoseLiteral { - u32 rootProgramOffset; // role program to run for root roles. - u32 iterOffset; // offset of sparse iterator, relative to rose - u32 iterMapOffset; // offset of the iter mapping table, relative to rose - rose_group groups; // bitset of groups that cause this literal to fire. - u8 minDepth; // the minimum of this literal's roles' depths (for depths > 1) - u8 squashesGroup; /**< literal switches off its group behind it if it sets a - * role */ - u32 delay_mask; /**< bit set indicates that the literal inserts a delayed - * match at the given offset */ - u32 delayIdsOffset; // offset to array of ids to poke in the delay structure + /** + * \brief Role program to run unconditionally when this literal is seen. + * + * Offset is relative to RoseEngine, or zero for no program. + */ + u32 rootProgramOffset; + + /** + * \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over + * predecessor states. + * + * Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no + * iterator. + */ + u32 iterOffset; + + /** + * \brief Table of role programs to run when triggered by the sparse + * iterator, indexed by dense sparse iter index. + * + * Offset is relative to RoseEngine, zero for no programs. + */ + u32 iterProgramOffset; + + /** \brief Bitset of groups that cause this literal to fire. */ + rose_group groups; + + /** + * \brief The minimum depth of this literal in the Rose graph (for depths + * greater than 1). + */ + u8 minDepth; + + /** + * \brief True if this literal switches off its group behind it when it + * sets a role. + */ + u8 squashesGroup; + + /** + * \brief Bitset which indicates that the literal inserts a delayed + * match at the given offset. + */ + u32 delay_mask; + + /** \brief Offset to array of ids to poke in the delay structure. */ + u32 delayIdsOffset; }; /* Allocation of Rose literal ids @@ -179,15 +216,6 @@ struct RoseLiteral { * terminals. */ -// We have different types of role history storage. -enum RoseRoleHistory { - ROSE_ROLE_HISTORY_NONE, // I'm sorry, I don't recall. - ROSE_ROLE_HISTORY_ANCH, // used when previous role is at a fixed offset - ROSE_ROLE_HISTORY_LAST_BYTE, /* used when previous role can only match at the - * last byte of a stream */ - ROSE_ROLE_HISTORY_INVALID // history not yet assigned -}; - struct RoseCountingMiracle { char shufti; /** 1: count shufti class; 0: count a single character */ u8 count; /** minimum number of occurrences for the counting @@ -225,15 +253,6 @@ struct NfaInfo { * matches */ }; -/* We allow different types of role-predecessor relationships. These are stored - * in with the flags */ -#define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no - * offset tracking */ -#define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */ - -#define ROSE_ROLE_PRED_CLEAR_MASK \ - (~(ROSE_ROLE_PRED_SIMPLE | ROSE_ROLE_PRED_ANY)) - #define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one * whole byte (OWB) (streaming only). Other * values in OWB are reserved for zombie @@ -241,33 +260,6 @@ struct NfaInfo { #define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose * prefix checks */ -// Structure representing a literal role. -struct RoseRole { - u32 flags; - u32 programOffset; /**< offset to program to run. */ -}; - -// Structure representing a predecessor relationship -struct RosePred { - u32 role; // index of predecessor role - u32 minBound; // min bound on distance from pred (_ANCH ->absolute offset) - u32 maxBound; /* max bound on distance from pred, or ROSE_BOUND_INF - * (_ANCH -> absolute offset ) */ - u8 historyCheck; // from enum RoseRoleHistory -}; - -// Structure mapping between the dense index produced by the literal sparse -// iterator and a list of roles. -struct RoseIterMapping { - u32 offset; // offset into iter role table - u32 count; // number of roles -}; - -struct RoseIterRole { - u32 role; - u32 pred; -}; - /** * \brief Rose state offsets. * @@ -376,8 +368,6 @@ struct RoseBoundaryReports { // 1c. eod-anchored literal matcher table // 1d. small block table // 2. array of RoseLiteral (literalCount entries) -// 3. array of RoseRole (roleCount entries) -// 4. array of RosePred (predCount entries) // 8. array of NFA offsets, one per queue // 9. array of state offsets, one per queue (+) // 10. array of role ids for the set of all root roles @@ -447,10 +437,10 @@ struct RoseEngine { u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ - u32 roleOffset; // offset of RoseRole array (bytes) - u32 roleCount; // number of RoseRole entries - u32 predOffset; // offset of RosePred array (bytes) - u32 predCount; // number of RosePred entries + + /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role + * programs. Used to size the handled_roles fatbit in scratch. */ + u32 handledKeyCount; u32 leftOffset; u32 roseCount; @@ -459,7 +449,7 @@ struct RoseEngine { * bytes each) */ u32 eodIterOffset; // or 0 if no eod iterator - u32 eodIterMapOffset; + u32 eodProgramTableOffset; u32 lastByteHistoryIterOffset; // if non-zero @@ -614,22 +604,6 @@ const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) { return tl; } -static really_inline -const struct RoseRole *getRoleTable(const struct RoseEngine *t) { - const struct RoseRole *r - = (const struct RoseRole *)((const char *)t + t->roleOffset); - assert(ISALIGNED_N(r, 4)); - return r; -} - -static really_inline -const struct RosePred *getPredTable(const struct RoseEngine *t) { - const struct RosePred *p - = (const struct RosePred *)((const char *)t + t->predOffset); - assert(ISALIGNED_N(p, 4)); - return p; -} - static really_inline const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) { const struct LeftNfaInfo *r diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 40f013ca..ee747b9d 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -40,54 +40,61 @@ #define ROSE_INSTR_MIN_ALIGN 8U /** \brief Role program instruction opcodes. */ -enum RoseRoleInstructionCode { - ROSE_ROLE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. - ROSE_ROLE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. - ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS, //!< Bounds on distance from root. - ROSE_ROLE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. - ROSE_ROLE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. - ROSE_ROLE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. - ROSE_ROLE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. - ROSE_ROLE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. - ROSE_ROLE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. - ROSE_ROLE_INSTR_REPORT, //!< Fire an ordinary report. - ROSE_ROLE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). - ROSE_ROLE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. - ROSE_ROLE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. - ROSE_ROLE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. - ROSE_ROLE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. - ROSE_ROLE_INSTR_SET_STATE, //!< Switch a state index on. - ROSE_ROLE_INSTR_SET_GROUPS, //!< Set some literal group bits. - ROSE_ROLE_INSTR_END //!< End of program. +enum RoseInstructionCode { + ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. + ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. + ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". + ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. + ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. + ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. + ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. + ROSE_INSTR_REPORT, //!< Fire an ordinary report. + ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). + ROSE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. + ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. + ROSE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. + ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. + ROSE_INSTR_SET_STATE, //!< Switch a state index on. + ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_INSTR_END //!< End of program. }; -struct ROSE_ROLE_STRUCT_ANCHORED_DELAY { +struct ROSE_STRUCT_ANCHORED_DELAY { u8 code; //!< From enum RoseRoleInstructionCode. u8 depth; //!< Depth for this state. rose_group groups; //!< Bitmask. u32 done_jump; //!< Jump forward this many bytes if successful. }; -struct ROSE_ROLE_STRUCT_CHECK_ONLY_EOD { +struct ROSE_STRUCT_CHECK_ONLY_EOD { u8 code; //!< From enum RoseRoleInstructionCode. u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS { +struct ROSE_STRUCT_CHECK_BOUNDS { u8 code; //!< From enum RoseRoleInstructionCode. u32 min_bound; //!< Min distance from zero. u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_CHECK_LOOKAROUND { +struct ROSE_STRUCT_CHECK_NOT_HANDLED { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 key; //!< Key in the "handled_roles" fatbit in scratch. + u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. +}; + +struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseRoleInstructionCode. u32 index; u32 count; u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX { +struct ROSE_STRUCT_CHECK_LEFTFIX { u8 code; //!< From enum RoseRoleInstructionCode. u32 queue; //!< Queue of leftfix to check. u32 lag; //!< Lag of leftfix for this case. @@ -95,72 +102,72 @@ struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX { u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_ROLE_STRUCT_SOM_ADJUST { +struct ROSE_STRUCT_SOM_ADJUST { u8 code; //!< From enum RoseRoleInstructionCode. u32 distance; //!< Distance to EOM. }; -struct ROSE_ROLE_STRUCT_SOM_LEFTFIX { +struct ROSE_STRUCT_SOM_LEFTFIX { u8 code; //!< From enum RoseRoleInstructionCode. u32 queue; //!< Queue index of leftfix providing SOM. u32 lag; //!< Lag of leftfix for this case. }; -struct ROSE_ROLE_STRUCT_TRIGGER_INFIX { +struct ROSE_STRUCT_TRIGGER_INFIX { u8 code; //!< From enum RoseRoleInstructionCode. u8 cancel; //!< Cancels previous top event. u32 queue; //!< Queue index of infix. u32 event; //!< Queue event, from MQE_*. }; -struct ROSE_ROLE_STRUCT_TRIGGER_SUFFIX { +struct ROSE_STRUCT_TRIGGER_SUFFIX { u8 code; //!< From enum RoseRoleInstructionCode. u32 queue; //!< Queue index of suffix. u32 event; //!< Queue event, from MQE_*. }; -struct ROSE_ROLE_STRUCT_REPORT { +struct ROSE_STRUCT_REPORT { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_CHAIN { +struct ROSE_STRUCT_REPORT_CHAIN { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_EOD { +struct ROSE_STRUCT_REPORT_EOD { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_SOM_INT { +struct ROSE_STRUCT_REPORT_SOM_INT { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_SOM { +struct ROSE_STRUCT_REPORT_SOM { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN { +struct ROSE_STRUCT_REPORT_SOM_KNOWN { u8 code; //!< From enum RoseRoleInstructionCode. ReportID report; }; -struct ROSE_ROLE_STRUCT_SET_STATE { +struct ROSE_STRUCT_SET_STATE { u8 code; //!< From enum RoseRoleInstructionCode. u8 depth; //!< Depth for this state. u32 index; //!< State index in multibit. }; -struct ROSE_ROLE_STRUCT_SET_GROUPS { +struct ROSE_STRUCT_SET_GROUPS { u8 code; //!< From enum RoseRoleInstructionCode. rose_group groups; //!< Bitmask. }; -struct ROSE_ROLE_STRUCT_END { +struct ROSE_STRUCT_END { u8 code; //!< From enum RoseRoleInstructionCode. }; diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 2a87e3eb..d71c32d6 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -172,15 +172,6 @@ const struct internal_report *getInternalReport(const struct RoseEngine *t, return reports + intId; } -static really_inline -const struct RoseRole *getRoleByOffset(const struct RoseEngine *t, u32 offset) { - const struct RoseRole *tr = (const void *)((const char *)t + offset); - - assert((size_t)(tr - getRoleTable(t)) < t->roleCount); - DEBUG_PRINTF("get root role %zu\n", tr - getRoleTable(t)); - return tr; -} - #define ANCHORED_MATCH_SENTINEL (~0U) static really_inline diff --git a/src/scratch.c b/src/scratch.c index b0888fdb..30241ab4 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -90,7 +90,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { + bStateSize + tStateSize + fullStateSize + 63 /* cacheline padding */ + nfa_context_size - + fatbit_size(proto->roleCount) /* handled roles */ + + fatbit_size(proto->handledKeyCount) /* handled roles */ + fatbit_size(queueCount) /* active queue array */ + 2 * fatbit_size(deduperCount) /* need odd and even logs */ + 2 * fatbit_size(deduperCount) /* ditto som logs */ @@ -192,7 +192,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { current += fatbit_size(queueCount); s->handled_roles = (struct fatbit *)current; - current += fatbit_size(proto->roleCount); + current += fatbit_size(proto->handledKeyCount); s->deduper.log[0] = (struct fatbit *)current; current += fatbit_size(deduperCount); @@ -312,9 +312,9 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->delay_count = rose->delay_count; } - if (rose->roleCount > proto->roleCount) { + if (rose->handledKeyCount > proto->handledKeyCount) { resize = 1; - proto->roleCount = rose->roleCount; + proto->handledKeyCount = rose->handledKeyCount; } if (rose->tStateSize > proto->tStateSize) { diff --git a/src/scratch.h b/src/scratch.h index 1d329bda..07e72511 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -180,7 +180,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 delay_count; u32 scratchSize; u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; - u32 roleCount; + u32 handledKeyCount; struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already * handled by this literal */ u64a *som_store; /**< array of som locations */