diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ff47595..f10e5cb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -470,6 +470,7 @@ set (hs_exec_SRCS src/rose/runtime.h src/rose/rose.h src/rose/rose_internal.h + src/rose/rose_program.h src/rose/rose_types.h src/rose/rose_common.h src/util/bitutils.h diff --git a/src/rose/eod.c b/src/rose/eod.c index 46605f93..60bf2ea2 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -169,11 +169,12 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, /* mark role as handled so we don't touch it again in this walk */ fatbit_set(handled_roles, t->roleCount, role); - DEBUG_PRINTF("fire report for role %u, report=%u\n", role, - tr->reportId); - int rv = scratch->tctxt.cb(offset, tr->reportId, - scratch->tctxt.userCtx); - if (rv == MO_HALT_MATCHING) { + u64a som = 0; + int work_done = 0; + hwlmcb_rv_t rv = + roseRunRoleProgram(t, tr->programOffset, offset, &som, + &(scratch->tctxt), &work_done); + if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } } diff --git a/src/rose/match.c b/src/rose/match.c index d71cbe43..ac995866 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -31,6 +31,7 @@ #include "infix.h" #include "match.h" #include "miracle.h" +#include "rose_program.h" #include "rose.h" #include "som/som_runtime.h" #include "util/bitutils.h" @@ -319,22 +320,18 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, static rose_inline hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, - const struct RoseRole *tr, u64a som, + u32 qi, u32 top, u64a som, u64a end, struct RoseContext *tctxt, char in_anchored) { - DEBUG_PRINTF("woot we have a mask/follower/suffix/... role\n"); + DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - assert(tr->suffixOffset); - - const struct NFA *nfa - = (const struct NFA *)((const char *)t + tr->suffixOffset); u8 *aa = getActiveLeafArray(t, tctxt->state); struct hs_scratch *scratch = tctxtToScratch(tctxt); - u32 aaCount = t->activeArrayCount; - u32 qCount = t->queueCount; - u32 qi = nfa->queueIndex; + const u32 aaCount = t->activeArrayCount; + const u32 qCount = t->queueCount; struct mq *q = &scratch->queues[qi]; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + const struct NFA *nfa = getNfaByInfo(t, info); struct core_info *ci = &scratch->core_info; s64a loc = (s64a)end - ci->buf_offset; @@ -368,7 +365,6 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, } } - u32 top = tr->suffixEvent; assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); pushQueueSom(q, top, loc, som); @@ -748,14 +744,12 @@ found_miracle: return 1; } -static rose_inline -char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, - u64a end, struct RoseContext *tctxt) { +static really_inline +char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, + ReportID leftfixReport, u64a end, + struct RoseContext *tctxt) { struct hs_scratch *scratch = tctxtToScratch(tctxt); struct core_info *ci = &scratch->core_info; - assert(tr->flags & ROSE_ROLE_FLAG_ROSE); - - u32 qi = tr->leftfixQueue; u32 ri = queueToLeftIndex(t, qi); const struct LeftNfaInfo *left = getLeftTable(t) + ri; @@ -763,9 +757,9 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", (left->transient ? "transient" : "active"), (left->infix ? "infix" : "prefix"), - ri, qi, tr->leftfixLag, left->maxLag); + ri, qi, leftfixLag, left->maxLag); - assert(tr->leftfixLag <= left->maxLag); + assert(leftfixLag <= left->maxLag); struct mq *q = scratch->queues + qi; u32 qCount = t->queueCount; @@ -776,7 +770,7 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, return 0; } - if (unlikely(end < tr->leftfixLag)) { + if (unlikely(end < leftfixLag)) { assert(0); /* lag is the literal length */ return 0; } @@ -816,9 +810,9 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, } } - s64a loc = (s64a)end - ci->buf_offset - tr->leftfixLag; + s64a loc = (s64a)end - ci->buf_offset - leftfixLag; assert(loc >= q_cur_loc(q)); - assert(tr->leftfixReport != MO_INVALID_IDX); + assert(leftfixReport != MO_INVALID_IDX); if (left->transient) { s64a start_loc = loc - left->transient; @@ -855,7 +849,7 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, pushQueueNoMerge(q, MQE_END, loc); - char rv = nfaQueueExecRose(q->nfa, q, tr->leftfixReport); + char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); if (!rv) { /* nfa is dead */ DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); @@ -869,12 +863,12 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); - DEBUG_PRINTF("checking for report %u\n", tr->leftfixReport); + DEBUG_PRINTF("checking for report %u\n", leftfixReport); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); return rv == MO_MATCHES_PENDING; } else { - DEBUG_PRINTF("checking for report %u\n", tr->leftfixReport); - char rv = nfaInAcceptState(q->nfa, tr->leftfixReport, q); + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + char rv = nfaInAcceptState(q->nfa, leftfixReport, q); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); return rv; } @@ -882,136 +876,84 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, static rose_inline void roseSetRole(const struct RoseEngine *t, u8 *state, - struct RoseContext *tctxt, const struct RoseRole *tr) { - DEBUG_PRINTF("set role %u on: idx=%u, depth=%u, groups=0x%016llx\n", - (u32)(tr - getRoleTable(t)), - tr->stateIndex, tr->depth, tr->groups); - void *role_state = getRoleState(state); - - assert(tr < getRoleTable(t) + t->roleCount); - - int leafNode = !!(tr->stateIndex == MMB_INVALID); - - // If this role is a leaf node, it doesn't have a state index to switch - // on and it doesn't need any history stored or other work done. So we can - // bail. - /* may be a ghost role; still need to set groups */ - if (leafNode) { - tctxt->groups |= tr->groups; - DEBUG_PRINTF("role %u is a leaf node, no work to do.\n", - (u32)(tr - getRoleTable(t))); - return; - } - - // Switch this role on in the state bitvector, checking whether it was set - // already. - char alreadySet = mmbit_set(role_state, t->rolesWithStateCount, - tr->stateIndex); - - // Roles that we've already seen have had most of their bookkeeping done: - // all we need to do is update the offset table if this is an - // offset-tracking role. - if (alreadySet) { - DEBUG_PRINTF("role already set\n"); - return; - } - - // If this role's depth is greater than the current depth, update it - update_depth(tctxt, tr); - - // Switch on this role's groups - tctxt->groups |= tr->groups; + struct RoseContext *tctxt, u32 stateIndex, u8 depth) { + DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth); + mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex); + update_depth(tctxt, depth); } static rose_inline -void roseTriggerInfixes(const struct RoseEngine *t, const struct RoseRole *tr, - u64a start, u64a end, struct RoseContext *tctxt) { +void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, + u32 topEvent, u8 cancel, struct RoseContext *tctxt) { struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - - DEBUG_PRINTF("infix time! @%llu\t(s%llu)\n", end, start); - - assert(tr->infixTriggerOffset); - - u32 qCount = t->queueCount; - u32 arCount = t->activeLeftCount; - struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; - u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); s64a loc = (s64a)end - ci->buf_offset; - const struct RoseTrigger *curr_r = (const struct RoseTrigger *) - ((const char *)t + tr->infixTriggerOffset); - assert(ISALIGNED_N(curr_r, alignof(struct RoseTrigger))); - assert(curr_r->queue != MO_INVALID_IDX); /* shouldn't be here if no - * triggers */ - do { - u32 qi = curr_r->queue; - u32 ri = queueToLeftIndex(t, qi); - u32 topEvent = curr_r->event; - u8 cancel = curr_r->cancel_prev_top; - assert(topEvent < MQE_INVALID); + u32 ri = queueToLeftIndex(t, qi); + assert(topEvent < MQE_INVALID); - const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); - assert(!left->transient); + const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); + assert(!left->transient); - DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); + DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); - struct mq *q = tctxtToScratch(tctxt)->queues + qi; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + struct mq *q = tctxtToScratch(tctxt)->queues + qi; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - char alive = mmbit_set(activeLeftArray, arCount, ri); + u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); + const u32 arCount = t->activeLeftCount; + char alive = mmbit_set(activeLeftArray, arCount, ri); - if (alive && info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - goto next_infix; - } + if (alive && info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + return; + } - if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset - && !fatbit_isset(aqa, qCount, qi) - && isZombie(t, tctxt->state, left)) { - DEBUG_PRINTF("yawn - zombie\n"); - goto next_infix; - } + struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; + const u32 qCount = t->queueCount; - if (cancel) { - DEBUG_PRINTF("dominating top: (re)init\n"); - fatbit_set(aqa, qCount, qi); - initRoseQueue(t, qi, left, tctxt); + if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && + !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) { + DEBUG_PRINTF("yawn - zombie\n"); + return; + } + + if (cancel) { + DEBUG_PRINTF("dominating top: (re)init\n"); + fatbit_set(aqa, qCount, qi); + initRoseQueue(t, qi, left, tctxt); + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (!fatbit_set(aqa, qCount, qi)) { + DEBUG_PRINTF("initing %u\n", qi); + initRoseQueue(t, qi, left, tctxt); + if (alive) { + s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); + pushQueueAt(q, 0, MQE_START, sp); + loadStreamState(q->nfa, q, sp); + } else { pushQueueAt(q, 0, MQE_START, loc); nfaQueueInitState(q->nfa, q); - } else if (!fatbit_set(aqa, qCount, qi)) { - DEBUG_PRINTF("initing %u\n", qi); - initRoseQueue(t, qi, left, tctxt); - if (alive) { - s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); - pushQueueAt(q, 0, MQE_START, sp); - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } - } else if (!alive) { + } + } else if (!alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (isQueueFull(q)) { + reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + + if (isQueueFull(q)) { + /* still full - reduceQueue did nothing */ + DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, + q->end - q->cur); + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (isQueueFull(q)) { - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - - if (isQueueFull(q)) { - /* still full - reduceQueue did nothing */ - DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", - qi, q->end - q->cur); - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } } + } - pushQueueSom(q, topEvent, loc, start); - next_infix: - ++curr_r; - } while (curr_r->queue != MO_INVALID_IDX); + pushQueueSom(q, topEvent, loc, start); } static really_inline @@ -1024,10 +966,11 @@ int reachHasBit(const u8 *reach, u8 c) { * are satisfied. */ static rose_inline -int roseCheckLookaround(const struct RoseEngine *t, const struct RoseRole *tr, - u64a end, struct RoseContext *tctxt) { - assert(tr->lookaroundIndex != MO_INVALID_IDX); - assert(tr->lookaroundCount > 0); +int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex, + u32 lookaroundCount, u64a end, + struct RoseContext *tctxt) { + assert(lookaroundIndex != MO_INVALID_IDX); + assert(lookaroundCount > 0); const struct core_info *ci = &tctxtToScratch(tctxt)->core_info; DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, @@ -1035,12 +978,12 @@ int roseCheckLookaround(const struct RoseEngine *t, const struct RoseRole *tr, const u8 *base = (const u8 *)t; const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + tr->lookaroundIndex; - const s8 *look_end = look + tr->lookaroundCount; + const s8 *look = look_base + lookaroundIndex; + const s8 *look_end = look + lookaroundCount; assert(look < look_end); const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + tr->lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; // The following code assumes that the lookaround structures are ordered by // increasing offset. @@ -1113,38 +1056,6 @@ int roseCheckLookaround(const struct RoseEngine *t, const struct RoseRole *tr, return 1; } -static rose_inline -int roseCheckRolePreconditions(const struct RoseEngine *t, - const struct RoseRole *tr, u64a end, - struct RoseContext *tctxt) { - // If this role can only match at end-of-block, then check that it's so. - if (tr->flags & ROSE_ROLE_FLAG_ONLY_AT_END) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - if (end != ci->buf_offset + ci->len) { - DEBUG_PRINTF("role %u should only match at end of data, skipping\n", - (u32)(tr - getRoleTable(t))); - return 0; - } - } - - if (tr->lookaroundIndex != MO_INVALID_IDX) { - if (!roseCheckLookaround(t, tr, end, tctxt)) { - DEBUG_PRINTF("failed lookaround check\n"); - return 0; - } - } - - assert(!tr->leftfixQueue || (tr->flags & ROSE_ROLE_FLAG_ROSE)); - if (tr->flags & ROSE_ROLE_FLAG_ROSE) { - if (!roseTestLeftfix(t, tr, end, tctxt)) { - DEBUG_PRINTF("failed leftfix check\n"); - return 0; - } - } - - return 1; -} - static int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, void *context) { @@ -1154,20 +1065,18 @@ int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, } static rose_inline -u64a roseGetHaigSom(const struct RoseEngine *t, const struct RoseRole *tr, - UNUSED u64a end, struct RoseContext *tctxt) { - assert(tr->flags & ROSE_ROLE_FLAG_ROSE); - - u32 qi = tr->leftfixQueue; +u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, + UNUSED const u32 leftfixLag, + struct RoseContext *tctxt) { u32 ri = queueToLeftIndex(t, qi); UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", left->transient ? "transient" : "active", ri, qi, - tr->leftfixLag, left->maxLag); + leftfixLag, left->maxLag); - assert(tr->leftfixLag <= left->maxLag); + assert(leftfixLag <= left->maxLag); struct mq *q = tctxtToScratch(tctxt)->queues + qi; @@ -1186,98 +1095,217 @@ u64a roseGetHaigSom(const struct RoseEngine *t, const struct RoseRole *tr, return start; } +static rose_inline +char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + if (end < min_bound) { + return 0; + } + return max_bound == ROSE_BOUND_INF || end <= max_bound; +} + +#define PROGRAM_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_ROLE_INSTR_##name); \ + const struct ROSE_ROLE_STRUCT_##name *ri = \ + (const struct ROSE_ROLE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + static really_inline -hwlmcb_rv_t roseHandleRoleEffects(const struct RoseEngine *t, - const struct RoseRole *tr, u64a end, - struct RoseContext *tctxt, char in_anchored, - int *work_done) { - u64a som = 0ULL; - if (tr->flags & ROSE_ROLE_FLAG_SOM_ADJUST) { - som = end - tr->somAdjust; - DEBUG_PRINTF("som requested som %llu = %llu - %u\n", som, end, - tr->somAdjust); - } else if (tr->flags & ROSE_ROLE_FLAG_SOM_ROSEFIX) { - som = roseGetHaigSom(t, tr, end, tctxt); - DEBUG_PRINTF("som from rosefix %llu\n", som); - } +hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, + u64a end, u64a *som, struct RoseContext *tctxt, + char in_anchored, int *work_done) { + assert(programOffset); - if (tr->infixTriggerOffset) { - roseTriggerInfixes(t, tr, som, end, tctxt); - tctxt->groups |= tr->groups; /* groups may have been cleared by infix - * going quiet before */ - } + DEBUG_PRINTF("program begins at offset %u\n", programOffset); - if (tr->suffixOffset) { - hwlmcb_rv_t rv = roseHandleSuffixTrigger(t, tr, som, end, tctxt, - in_anchored); - if (rv != HWLM_CONTINUE_MATCHING) { - return rv; - } - } + const char *pc = getByOffset(t, programOffset); - if (tr->reportId != MO_INVALID_IDX) { - hwlmcb_rv_t rv; - if (tr->flags & ROSE_ROLE_FLAG_REPORT_START) { - /* rose role knows its start offset */ - assert(tr->flags & ROSE_ROLE_FLAG_SOM_ROSEFIX); - assert(!(tr->flags & ROSE_ROLE_FLAG_CHAIN_REPORT)); - if (tr->flags & ROSE_ROLE_FLAG_SOM_REPORT) { - rv = roseHandleSomSom(t, tctxt->state, tr->reportId, som, end, - tctxt, in_anchored); - } else { - rv = roseHandleSomMatch(t, tctxt->state, tr->reportId, som, end, - tctxt, in_anchored); + assert(*(const u8 *)pc != ROSE_ROLE_INSTR_END); + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + + switch ((enum RoseRoleInstructionCode)code) { + PROGRAM_CASE(ANCHORED_DELAY) { + if (in_anchored && end > t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("delay until playback\n"); + update_depth(tctxt, ri->depth); + tctxt->groups |= ri->groups; + *work_done = 1; + pc += ri->done_jump; + continue; + } } - } else { - if (tr->flags & ROSE_ROLE_FLAG_SOM_REPORT) { - /* do som management */ - rv = roseHandleSom(t, tctxt->state, tr->reportId, end, tctxt, - in_anchored); - } else if (tr->flags & ROSE_ROLE_FLAG_CHAIN_REPORT) { - rv = roseCatchUpAndHandleChainMatch(t, tctxt->state, - tr->reportId, end, tctxt, - in_anchored); - } else { - rv = roseHandleMatch(t, tctxt->state, tr->reportId, end, tctxt, - in_anchored); - } - } + PROGRAM_NEXT_INSTRUCTION - if (rv != HWLM_CONTINUE_MATCHING) { - return HWLM_TERMINATE_MATCHING; + PROGRAM_CASE(CHECK_ONLY_EOD) { + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + if (end != ci->buf_offset + ci->len) { + DEBUG_PRINTF("should only match at end of data\n"); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + if (!in_anchored && + !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed root bounds check\n"); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LEFTFIX) { + if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, + tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + assert(ri->distance <= end); + *som = end - ri->distance; + DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, *som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + *som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt); + DEBUG_PRINTF("som from leftfix is %llu\n", *som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + roseTriggerInfix(t, *som, end, ri->queue, ri->event, ri->cancel, + tctxt); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseHandleSuffixTrigger(t, ri->queue, ri->event, *som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report, + end, tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EOD) { + if (tctxt->cb(end, ri->report, tctxt->userCtx) == + MO_HALT_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + if (roseHandleSomSom(t, tctxt->state, ri->report, *som, end, + tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_KNOWN) { + if (roseHandleSomMatch(t, tctxt->state, ri->report, *som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + tctxt->groups |= ri->groups; + DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION } } - roseSetRole(t, tctxt->state, tctxt, tr); - - *work_done = 1; - + assert(0); // unreachable return HWLM_CONTINUE_MATCHING; } -static really_inline -hwlmcb_rv_t roseHandleRole(const struct RoseEngine *t, - const struct RoseRole *tr, u64a end, - struct RoseContext *tctxt, char in_anchored, - int *work_done) { - DEBUG_PRINTF("hi role %zd (flags %08x)\n", tr - getRoleTable(t), - tr->flags); - if (in_anchored && end > t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("delay until playback, just do groups/depth now\n"); - update_depth(tctxt, tr); - tctxt->groups |= tr->groups; - *work_done = 1; - return HWLM_CONTINUE_MATCHING; - } +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION - if (!roseCheckRolePreconditions(t, tr, end, tctxt)) { - return HWLM_CONTINUE_MATCHING; - } - - /* We now know the role has matched. We can now trigger things that need to - * be triggered and record things that need to be recorded.*/ - - return roseHandleRoleEffects(t, tr, end, tctxt, in_anchored, work_done); +hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, + u64a end, u64a *som, struct RoseContext *tctxt, + int *work_done) { + return roseRunRoleProgram_i(t, programOffset, end, som, tctxt, 0, + work_done); } static really_inline @@ -1364,9 +1392,12 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, /* mark role as handled so we don't touch it again in this walk */ fatbit_set(handled_roles, t->roleCount, role); - hwlmcb_rv_t rv = roseHandleRole(t, tr, end, tctxt, - 0 /* in_anchored */, &work_done); - if (rv == HWLM_TERMINATE_MATCHING) { + if (!tr->programOffset) { + continue; + } + u64a som = 0ULL; + if (roseRunRoleProgram_i(t, tr->programOffset, end, &som, tctxt, 0, + &work_done) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -1381,51 +1412,26 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } -// Check that the predecessor bounds are satisfied for a root role with special -// requirements (anchored, or unanchored but with preceding dots). -static rose_inline -char roseCheckRootBounds(const struct RoseEngine *t, const struct RoseRole *tr, - u64a end) { - assert(tr->predOffset != ROSE_OFFSET_INVALID); - const struct RosePred *tp = getPredTable(t) + tr->predOffset; - assert(tp->role == MO_INVALID_IDX); - - // Check history. We only use a subset of our history types for root or - // anchored root roles. - assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE || - tp->historyCheck == ROSE_ROLE_HISTORY_ANCH); - - return roseCheckPredHistory(tp, end); -} - // Walk the set of root roles (roles with depth 1) associated with this literal // and set them on. static really_inline char roseWalkRootRoles_i(const struct RoseEngine *t, const struct RoseLiteral *tl, u64a end, struct RoseContext *tctxt, char in_anchored) { - /* main entry point ensures that there is at least two root roles */ + if (!tl->rootProgramOffset) { + return 1; + } + + DEBUG_PRINTF("running literal root program at %u\n", tl->rootProgramOffset); + + u64a som = 0; int work_done = 0; - assert(tl->rootRoleOffset + tl->rootRoleCount <= t->rootRoleCount); - assert(tl->rootRoleCount > 1); - - const u32 *rootRole = getRootRoleTable(t) + tl->rootRoleOffset; - const u32 *rootRoleEnd = rootRole + tl->rootRoleCount; - for (; rootRole < rootRoleEnd; rootRole++) { - u32 role_offset = *rootRole; - const struct RoseRole *tr = getRoleByOffset(t, role_offset); - - if (!in_anchored && (tr->flags & ROSE_ROLE_PRED_ROOT) - && !roseCheckRootBounds(t, tr, end)) { - continue; - } - - if (roseHandleRole(t, tr, end, tctxt, in_anchored, &work_done) - == HWLM_TERMINATE_MATCHING) { - return 0; - } - }; + if (roseRunRoleProgram_i(t, tl->rootProgramOffset, end, &som, tctxt, + in_anchored, + &work_done) == HWLM_TERMINATE_MATCHING) { + return 0; + } // If we've actually handled any roles, we might need to apply this // literal's squash mask to our groups as well. @@ -1450,73 +1456,20 @@ char roseWalkRootRoles_N(const struct RoseEngine *t, return roseWalkRootRoles_i(t, tl, end, tctxt, 0); } -static really_inline -char roseWalkRootRoles_i1(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt, char in_anchored) { - /* main entry point ensures that there is exactly one root role */ - int work_done = 0; - u32 role_offset = tl->rootRoleOffset; - const struct RoseRole *tr = getRoleByOffset(t, role_offset); - - if (!in_anchored && (tr->flags & ROSE_ROLE_PRED_ROOT) - && !roseCheckRootBounds(t, tr, end)) { - return 1; - } - - hwlmcb_rv_t rv = roseHandleRole(t, tr, end, tctxt, in_anchored, &work_done); - if (rv == HWLM_TERMINATE_MATCHING) { - return 0; - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return 1; -} - -static never_inline -char roseWalkRootRoles_A1(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i1(t, tl, end, tctxt, 1); -} - -static never_inline -char roseWalkRootRoles_N1(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i1(t, tl, end, tctxt, 0); -} - - static really_inline char roseWalkRootRoles(const struct RoseEngine *t, const struct RoseLiteral *tl, u64a end, struct RoseContext *tctxt, char in_anchored, char in_anch_playback) { - DEBUG_PRINTF("literal has %u root roles\n", tl->rootRoleCount); - - assert(!in_anch_playback || tl->rootRoleCount); - if (!in_anch_playback && !tl->rootRoleCount) { + assert(!in_anch_playback || tl->rootProgramOffset); + if (!in_anch_playback && !tl->rootProgramOffset) { return 1; } if (in_anchored) { - if (tl->rootRoleCount == 1) { - return roseWalkRootRoles_A1(t, tl, end, tctxt); - } else { - return roseWalkRootRoles_A(t, tl, end, tctxt); - } + return roseWalkRootRoles_A(t, tl, end, tctxt); } else { - if (tl->rootRoleCount == 1) { - return roseWalkRootRoles_N1(t, tl, end, tctxt); - } else { - return roseWalkRootRoles_N(t, tl, end, tctxt); - } + return roseWalkRootRoles_N(t, tl, end, tctxt); } } @@ -1617,12 +1570,11 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - assert(tl->rootRoleCount > 0); + assert(tl->rootProgramOffset); assert(!tl->delay_mask); - DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx, " - "rootRoleCount=%u\n", - id, tl->minDepth, tl->groups, tl->rootRoleCount); + DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n", id, + tl->minDepth, tl->groups); if (real_end <= t->floatingMinLiteralMatchOffset) { roseFlushLastByteHistory(t, state, real_end, tctxt); @@ -1688,8 +1640,8 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("lit id=%u, minDepth=%u, groups=0x%016llx, rootRoleCount=%u\n", - id, tl->minDepth, tl->groups, tl->rootRoleCount); + DEBUG_PRINTF("lit id=%u, minDepth=%u, groups=0x%016llx\n", id, tl->minDepth, + tl->groups); if (do_group_check && !(tl->groups & tctxt->groups)) { DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n"); diff --git a/src/rose/match.h b/src/rose/match.h index 19365f01..a39bebf3 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -262,8 +262,8 @@ hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt } static really_inline -void update_depth(struct RoseContext *tctxt, const struct RoseRole *tr) { - u8 d = MAX(tctxt->depth, tr->depth + 1); +void update_depth(struct RoseContext *tctxt, u8 depth) { + u8 d = MAX(tctxt->depth, depth + 1); assert(d >= tctxt->depth); DEBUG_PRINTF("depth now %hhu was %hhu\n", d, tctxt->depth); tctxt->depth = d; @@ -323,4 +323,8 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, scratch->sparse_iter_state); } +hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, + u64a end, u64a *som, struct RoseContext *tctxt, + int *work_done); + #endif diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 3b8949e4..978d413d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -38,6 +38,7 @@ #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" +#include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ #include "hwlm/hwlm_build.h" #include "nfa/castlecompile.h" @@ -116,61 +117,41 @@ namespace /* anon */ { // Orders RoseEdge edges by the state index of the source node struct EdgeSourceStateCompare { - EdgeSourceStateCompare(const RoseGraph &g_, - const vector &roleTable_) : - g(g_), roleTable(roleTable_) {} - bool operator()(const RoseEdge &a, const RoseEdge &b) const { - u32 arole = g[source(a, g)].role; - u32 brole = g[source(b, g)].role; - if (arole >= roleTable.size()) { - DEBUG_PRINTF("bad arole %u (idx=%zu)\n", arole, g[source(a, g)].idx); - } - if (brole >= roleTable.size()) { - DEBUG_PRINTF("bad brole %u (idx=%zu)\n", brole, g[source(b, g)].idx); - } - assert(arole < roleTable.size()); - assert(brole < roleTable.size()); - return roleTable.at(arole).stateIndex < roleTable.at(brole).stateIndex; - } - const RoseGraph &g; - const vector &roleTable; -}; + EdgeSourceStateCompare( + const RoseGraph &g_, + const ue2::unordered_map &roleStateIndices_) + : g(g_), roleStateIndices(roleStateIndices_) {} -struct RoseTriggerOrdering { - RoseTriggerOrdering() {} - bool operator()(const RoseTrigger &a, const RoseTrigger &b) const { - ORDER_CHECK(queue); - ORDER_CHECK(event); - ORDER_CHECK(cancel_prev_top); - return false; + u32 state_index(RoseVertex v) const { + auto it = roleStateIndices.find(v); + if (it != roleStateIndices.end()) { + return it->second; + } + return MMB_INVALID; } -}; -struct RoseTriggerEquality { - RoseTriggerEquality() {} - bool operator()(const RoseTrigger &a, const RoseTrigger &b) const { - return a.queue == b.queue - && a.event == b.event - && a.cancel_prev_top == b.cancel_prev_top; + + bool operator()(const RoseEdge &a, const RoseEdge &b) const { + return state_index(source(a, g)) < state_index(source(b, g)); } + + const RoseGraph &g; + const ue2::unordered_map &roleStateIndices; }; struct left_build_info { // Constructor for an engine implementation. - left_build_info(NFA *n, u32 q, u32 l, u32 t, rose_group sm, + left_build_info(u32 q, u32 l, u32 t, rose_group sm, const std::vector &stops, u32 max_ql, u8 cm_count, const CharReach &cm_cr) - : nfa(n), queue(q), lag(l), transient(t), squash_mask(sm), - stopAlphabet(stops), max_queuelen(max_ql), - countingMiracleCount(cm_count), countingMiracleReach(cm_cr) { - assert(n); - } + : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), + max_queuelen(max_ql), countingMiracleCount(cm_count), + countingMiracleReach(cm_cr) {} // Constructor for a lookaround implementation. explicit left_build_info(const vector &look) : has_lookaround(true), lookaround(look) {} - NFA *nfa = nullptr; /* uniquely idents the left_build_info */ - u32 queue = 0; /* also uniquely idents the left_build_info */ + u32 queue = 0; /* uniquely idents the left_build_info */ u32 lag = 0; u32 transient = 0; rose_group squash_mask = ~rose_group{0}; @@ -183,6 +164,104 @@ struct left_build_info { vector lookaround; // alternative implementation to the NFA }; +/** \brief Role instruction model used at compile time. */ +class RoleInstruction { +public: + RoleInstruction() { + memset(&u, 0, sizeof(u)); + u.end.code = ROSE_ROLE_INSTR_END; + } + + explicit RoleInstruction(enum RoseRoleInstructionCode c) { + memset(&u, 0, sizeof(u)); + u.end.code = c; + } + + bool operator<(const RoleInstruction &a) const { + return memcmp(&u, &a.u, sizeof(u)) < 0; + } + + bool operator==(const RoleInstruction &a) const { + return memcmp(&u, &a.u, sizeof(u)) == 0; + } + + enum RoseRoleInstructionCode code() const { + // Note that this sort of type-punning (relying on identical initial + // layout) is explicitly allowed by the C++11 standard. + return (enum RoseRoleInstructionCode)u.end.code; + } + + const void *get() const { + switch (code()) { + case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; + case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return &u.checkRootBounds; + case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; + case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; + case ROSE_ROLE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; + case ROSE_ROLE_INSTR_SOM_ADJUST: return &u.somAdjust; + case ROSE_ROLE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; + case ROSE_ROLE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; + case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; + case ROSE_ROLE_INSTR_REPORT: return &u.report; + case ROSE_ROLE_INSTR_REPORT_CHAIN: return &u.reportChain; + case ROSE_ROLE_INSTR_REPORT_EOD: return &u.reportEod; + case ROSE_ROLE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; + case ROSE_ROLE_INSTR_REPORT_SOM: return &u.reportSom; + case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; + case ROSE_ROLE_INSTR_SET_STATE: return &u.setState; + case ROSE_ROLE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_ROLE_INSTR_END: return &u.end; + } + assert(0); + return &u.end; + } + + size_t length() const { + switch (code()) { + case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); + case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: return sizeof(u.checkRootBounds); + case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); + case ROSE_ROLE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); + case ROSE_ROLE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); + case ROSE_ROLE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); + case ROSE_ROLE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); + case ROSE_ROLE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); + case ROSE_ROLE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); + case ROSE_ROLE_INSTR_REPORT: return sizeof(u.report); + case ROSE_ROLE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); + case ROSE_ROLE_INSTR_REPORT_EOD: return sizeof(u.reportEod); + case ROSE_ROLE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); + case ROSE_ROLE_INSTR_REPORT_SOM: return sizeof(u.reportSom); + case ROSE_ROLE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); + case ROSE_ROLE_INSTR_SET_STATE: return sizeof(u.setState); + case ROSE_ROLE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_ROLE_INSTR_END: return sizeof(u.end); + } + return 0; + } + + union { + ROSE_ROLE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; + ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS checkRootBounds; + ROSE_ROLE_STRUCT_CHECK_LOOKAROUND checkLookaround; + ROSE_ROLE_STRUCT_CHECK_LEFTFIX checkLeftfix; + ROSE_ROLE_STRUCT_ANCHORED_DELAY anchoredDelay; + ROSE_ROLE_STRUCT_SOM_ADJUST somAdjust; + ROSE_ROLE_STRUCT_SOM_LEFTFIX somLeftfix; + ROSE_ROLE_STRUCT_TRIGGER_INFIX triggerInfix; + ROSE_ROLE_STRUCT_TRIGGER_SUFFIX triggerSuffix; + ROSE_ROLE_STRUCT_REPORT report; + ROSE_ROLE_STRUCT_REPORT_CHAIN reportChain; + ROSE_ROLE_STRUCT_REPORT_EOD reportEod; + ROSE_ROLE_STRUCT_REPORT_SOM_INT reportSomInt; + ROSE_ROLE_STRUCT_REPORT_SOM reportSom; + ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; + ROSE_ROLE_STRUCT_SET_STATE setState; + ROSE_ROLE_STRUCT_SET_GROUPS setGroups; + ROSE_ROLE_STRUCT_END end; + } u; +}; + struct build_context : boost::noncopyable { /** \brief Rose Role information. * These entries are filled in by a number of functions as other tables are @@ -190,6 +269,9 @@ struct build_context : boost::noncopyable { */ vector roleTable; + /** \brief Role program mapping, keyed by index in roleTable. */ + vector> rolePrograms; + /** \brief minimum depth in number of hops from root/anchored root. */ map depths; @@ -215,15 +297,110 @@ struct build_context : boost::noncopyable { /** \brief Map from literal final ID to a set of non-root role IDs. */ ue2::unordered_map> litNonRootRoles; - /* contents of rose immediately following the RoseEngine. */ - vector engine_blob; + /** \brief State indices, for those roles that have them. */ + ue2::unordered_map roleStateIndices; - /* base offset of engine_blob in the bytecode */ - const u32 engine_blob_base = ROUNDUP_16(sizeof(RoseEngine)); + /** \brief Mapping from queue index to bytecode offset for built engines + * that have already been pushed into the engine_blob. */ + ue2::unordered_map engineOffsets; + + /** \brief Contents of the Rose bytecode immediately following the + * RoseEngine. */ + vector> engine_blob; + + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ + static constexpr u32 engine_blob_base = ROUNDUP_CL(sizeof(RoseEngine)); }; } +static +void pad_engine_blob(build_context &bc, size_t align) { + assert(ISALIGNED_N(bc.engine_blob_base, align)); + size_t s = bc.engine_blob.size(); + + if (ISALIGNED_N(s, align)) { + return; + } + + bc.engine_blob.resize(s + align - s % align); +} + +static +u32 add_to_engine_blob(build_context &bc, const void *a, const size_t len, + const size_t align) { + pad_engine_blob(bc, align); + + size_t rv = bc.engine_blob_base + bc.engine_blob.size(); + assert(rv >= bc.engine_blob_base); + DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); + + assert(ISALIGNED_N(bc.engine_blob.size(), align)); + + bc.engine_blob.resize(bc.engine_blob.size() + len); + memcpy(&bc.engine_blob.back() - len + 1, a, len); + + return verify_u32(rv); +} + +template +static +u32 add_to_engine_blob(build_context &bc, const T &a) { + static_assert(is_pod::value, "should be pod"); + return add_to_engine_blob(bc, &a, sizeof(a), alignof(a)); +} + +template +static +u32 add_to_engine_blob(build_context &bc, const T &a, const size_t len) { + static_assert(is_pod::value, "should be pod"); + return add_to_engine_blob(bc, &a, len, alignof(a)); +} + +template +static +u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { + using value_type = typename Iter::value_type; + static_assert(is_pod::value, "should be pod"); + + if (b == e) { + return 0; + } + + u32 offset = add_to_engine_blob(bc, *b); + for (++b; b != e; ++b) { + add_to_engine_blob(bc, *b); + } + + return offset; +} + +static +const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { + assert(contains(bc.engineOffsets, qi)); + u32 nfa_offset = bc.engineOffsets.at(qi); + assert(nfa_offset >= bc.engine_blob_base); + const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - + bc.engine_blob_base); + assert(n->queueIndex == qi); + return n; +} + +static +const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { + u32 qi = nfa.queueIndex; + u32 nfa_offset = add_to_engine_blob(bc, nfa, nfa.length); + DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, + nfa.type, nfa.length, nfa_offset); + + assert(!contains(bc.engineOffsets, qi)); + bc.engineOffsets.emplace(qi, nfa_offset); + + const NFA *n = get_nfa_from_blob(bc, qi); + assert(memcmp(&nfa, n, nfa.length) == 0); + return n; +} + /* vertex ordered by their role index */ static vector get_ordered_verts(const RoseGraph &g) { @@ -767,15 +944,13 @@ void setLeftNfaProperties(NFA &n, const left_id &left) { } static -bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, - vector> *built_out, - set *no_retrigger_queues, - map *leftfix_info, +bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, + QueueIndexFactory &qif, set *no_retrigger_queues, bool do_prefix) { const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; - ue2::unordered_map seen; + ue2::unordered_map seen; // already built queue indices map > infixTriggers; findInfixTriggers(tbi, &infixTriggers); @@ -798,7 +973,6 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, // our in-edges. assert(roseHasTops(g, v)); - NFA *n; u32 qi; // queue index, set below. u32 lag = g[v].left.lag; bool is_transient = contains(tbi.transient, leftfix); @@ -807,16 +981,15 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, vector lookaround; if (makeLeftfixLookaround(tbi, v, lookaround)) { DEBUG_PRINTF("implementing as lookaround!\n"); - leftfix_info->emplace(v, left_build_info(lookaround)); + bc.leftfix_info.emplace(v, left_build_info(lookaround)); continue; } } if (contains(seen, leftfix)) { // NFA already built. - n = seen[leftfix]; - qi = n->queueIndex; - assert(qi < built_out->size()); + qi = seen[leftfix]; + assert(contains(bc.engineOffsets, qi)); DEBUG_PRINTF("sharing leftfix, qi=%u\n", qi); } else { DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); @@ -841,7 +1014,6 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, setLeftNfaProperties(*nfa, leftfix); qi = qif.get_queue(); - assert(qi == built_out->size()); nfa->queueIndex = qi; if (!is_prefix && !leftfix.haig() && leftfix.graph() && @@ -850,10 +1022,9 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, no_retrigger_queues->insert(qi); } - n = nfa.get(); - seen.insert(make_pair(leftfix, n)); DEBUG_PRINTF("built leftfix, qi=%u\n", qi); - built_out->push_back(move(nfa)); + add_nfa_to_blob(bc, *nfa); + seen.emplace(leftfix, qi); } rose_group squash_mask = tbi.rose_squash_masks.at(leftfix); @@ -899,10 +1070,9 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, QueueIndexFactory &qif, findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); } - leftfix_info->insert( - make_pair(v, left_build_info(n, qi, lag, max_width, - squash_mask, stop, max_queuelen, - cm_count, cm_cr))); + bc.leftfix_info.emplace( + v, left_build_info(qi, lag, max_width, squash_mask, stop, + max_queuelen, cm_count, cm_cr)); } return true; @@ -934,7 +1104,6 @@ bool hasNonSmallBlockOutfix(const vector &outfixes) { static aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { assert(!outfix.is_dead()); // should not be marked dead. - assert(!outfix.nfa); // should not be already built. const CompileContext &cc = tbi.cc; const ReportManager &rm = tbi.rm; @@ -978,14 +1147,13 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &tbi, OutfixInfo &outfix) { buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth); } - outfix.nfa = n.get(); return n; } static -void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, - size_t *historyRequired, bool *mpv_as_outfix) { - assert(built_nfas->empty()); +void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, + bool *mpv_as_outfix) { + assert(bc.engineOffsets.empty()); // MPV should be first *mpv_as_outfix = false; OutfixInfo *mpv = nullptr; @@ -1004,7 +1172,6 @@ void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, } assert(mpv->chained); - assert(!mpv->nfa); auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes); assert(nfa); if (!nfa) { @@ -1016,7 +1183,6 @@ void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, } u32 qi = mpv->get_queue(tbi.qif); - assert(qi == built_nfas->size()); nfa->queueIndex = qi; DEBUG_PRINTF("built mpv\n"); @@ -1025,8 +1191,7 @@ void prepMpv(RoseBuildImpl &tbi, vector> *built_nfas, *historyRequired = 1; } - mpv->nfa = nfa.get(); - built_nfas->push_back(move(nfa)); + add_nfa_to_blob(bc, *nfa); *mpv_as_outfix = !mpv->puffettes.empty(); } @@ -1053,8 +1218,7 @@ void setOutfixProperties(NFA &n, const OutfixInfo &outfix) { } static -bool prepOutfixes(RoseBuildImpl &tbi, - vector> *built_nfas, +bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired) { if (tbi.cc.grey.onlyOneOutfix && tbi.outfixes.size() > 1) { DEBUG_PRINTF("we have %zu outfixes, but Grey::onlyOneOutfix is set\n", @@ -1062,9 +1226,7 @@ bool prepOutfixes(RoseBuildImpl &tbi, throw ResourceLimitError(); } - assert(tbi.qif.allocated_count() == built_nfas->size()); - /* assume outfixes are just above chain tails in queue indices */ - built_nfas->reserve(tbi.outfixes.size()); + assert(tbi.qif.allocated_count() == bc.engineOffsets.size()); for (auto &out : tbi.outfixes) { if (out.chained) { @@ -1080,15 +1242,13 @@ bool prepOutfixes(RoseBuildImpl &tbi, setOutfixProperties(*n, out); - u32 qi = tbi.qif.get_queue(); - assert(qi == built_nfas->size()); - n->queueIndex = qi; + n->queueIndex = out.get_queue(tbi.qif); if (!*historyRequired && requires_decompress_key(*n)) { *historyRequired = 1; } - built_nfas->push_back(move(n)); + add_nfa_to_blob(bc, *n); } return true; @@ -1139,16 +1299,24 @@ void setSuffixProperties(NFA &n, const suffix_id &suff, } static -bool buildSuffixes(const RoseBuildImpl &tbi, - vector> *built_nfas, +bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, map *suffixes, set *no_retrigger_queues) { map > suffixTriggers; findSuffixTriggers(tbi, &suffixTriggers); + // To ensure compile determinism, build suffix engines in order of their + // (unique) queue indices, so that we call add_nfa_to_blob in the same + // order. + vector> ordered; for (const auto &e : *suffixes) { - const suffix_id &s = e.first; - const u32 queue = e.second; + ordered.emplace_back(e.second, e.first); + } + sort(begin(ordered), end(ordered)); + + for (const auto &e : ordered) { + const u32 queue = e.first; + const suffix_id &s = e.second; const set &s_triggers = suffixTriggers.at(s); map fixed_depth_tops; @@ -1173,69 +1341,12 @@ bool buildSuffixes(const RoseBuildImpl &tbi, no_retrigger_queues->insert(queue); } - if (built_nfas->size() <= queue) { - built_nfas->resize(queue + 1); - } - - (*built_nfas)[queue] = move(n); + add_nfa_to_blob(bc, *n); } return true; } -static -void pad_engine_blob(build_context &bc, size_t align) { - assert(ISALIGNED_N(bc.engine_blob_base, align)); - size_t s = bc.engine_blob.size(); - - if (ISALIGNED_N(s, align)) { - return; - } - - bc.engine_blob.resize(s + align - s % align); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a) { - static_assert(is_pod::value, "should be pod"); - pad_engine_blob(bc, alignof(T)); - - size_t rv = bc.engine_blob_base + bc.engine_blob.size(); - assert(rv >= bc.engine_blob_base); - - assert(ISALIGNED_N(bc.engine_blob.size(), alignof(T))); - - bc.engine_blob.resize(bc.engine_blob.size() + sizeof(a)); - memcpy(&bc.engine_blob.back() - sizeof(a) + 1, &a, sizeof(a)); - - return verify_u32(rv); -} - -template -static -u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { - using value_type = typename Iter::value_type; - static_assert(is_pod::value, "should be pod"); - pad_engine_blob(bc, alignof(value_type)); - - size_t rv = bc.engine_blob_base + bc.engine_blob.size(); - assert(rv >= bc.engine_blob_base); - - assert(ISALIGNED_N(bc.engine_blob.size(), alignof(value_type))); - - size_t total_added_length = sizeof(*b) * distance(b, e); - bc.engine_blob.resize(bc.engine_blob.size() + total_added_length); - char *p = bc.engine_blob.data() + bc.engine_blob.size() - - total_added_length; - for (; b != e; ++b, p += sizeof(*b)) { - memcpy(p, &*b, sizeof(*b)); - } - assert(p - 1 == &bc.engine_blob.back()); - - return verify_u32(rv); -} - static void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { map, u32> pre_built; @@ -1292,26 +1403,22 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { } static -bool buildNfas(RoseBuildImpl &tbi, QueueIndexFactory &qif, - vector> *built_nfas, +bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, map *suffixes, - map *leftfix_info, set *no_retrigger_queues, u32 *leftfixBeginQueue) { findSuffixes(tbi, qif, suffixes); - if (!buildSuffixes(tbi, built_nfas, suffixes, no_retrigger_queues)) { + if (!buildSuffixes(tbi, bc, suffixes, no_retrigger_queues)) { return false; } *leftfixBeginQueue = qif.allocated_count(); - if (!buildLeftfixes(tbi, qif, built_nfas, no_retrigger_queues, leftfix_info, - true)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, true)) { return false; } - if (!buildLeftfixes(tbi, qif, built_nfas, no_retrigger_queues, leftfix_info, - false)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, false)) { return false; } @@ -1364,20 +1471,20 @@ void findTransientQueues(const map &leftfix_info, } static -void updateNfaState(const vector> &built_nfas, - const map &leftfix_info, - RoseStateOffsets *so, NfaInfo *nfa_infos, - u32 *fullStateSize, u32 *nfaStateSize, u32 *tStateSize) { +void updateNfaState(const build_context &bc, RoseStateOffsets *so, + NfaInfo *nfa_infos, u32 *fullStateSize, u32 *nfaStateSize, + u32 *tStateSize) { *nfaStateSize = 0; *tStateSize = 0; *fullStateSize = 0; set transient_queues; - findTransientQueues(leftfix_info, &transient_queues); + findTransientQueues(bc.leftfix_info, &transient_queues); - for (const auto &n : built_nfas) { - allocateStateSpace(n.get(), transient_queues, so, nfa_infos, - fullStateSize, nfaStateSize, tStateSize); + for (const auto &m : bc.engineOffsets) { + const NFA *n = get_nfa_from_blob(bc, m.first); + allocateStateSpace(n, transient_queues, so, nfa_infos, fullStateSize, + nfaStateSize, tStateSize); } } @@ -2121,9 +2228,8 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { for (auto v : vertices_range(g)) { if (hasLastByteHistoryOutEdge(g, v)) { - u32 role = g[v].role; - assert(role < bc.roleTable.size()); - lb_roles.push_back(bc.roleTable[role].stateIndex); + assert(contains(bc.roleStateIndices, v)); + lb_roles.push_back(bc.roleStateIndices.at(v)); } } @@ -2154,16 +2260,6 @@ const char *describeHistory(RoseRoleHistory history) { } #endif -static -u32 calcNfaSize(const vector> &nfas) { - size_t nfas_size = 0; - - for (const auto &n : nfas) { - nfas_size += ROUNDUP_CL(n->length); - } - return verify_u32(nfas_size); -} - static void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) { // Global limit. @@ -2188,81 +2284,6 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre } } -/* copies nfas into the final engine and updates role to reflect nfa offset */ -static -u32 copyInNFAs(const RoseBuildImpl &tbi, vector *roleTable, - const vector> &built_nfas, - const set &no_retrigger_queues, NfaInfo *infos, - u32 base_nfa_offset, - const map &suffixes, char *ptr) { - const RoseGraph &g = tbi.g; - const CompileContext &cc = tbi.cc; - - // Enforce engine count resource limit. - if (built_nfas.size() > cc.grey.limitRoseEngineCount) { - throw ResourceLimitError(); - } - - vector suffix_base(built_nfas.size()); - vector classic_top(built_nfas.size(), false); - - for (u32 i = 0; i < built_nfas.size(); i++) { - const NFA *n = built_nfas[i].get(); - - // Enforce individual engine size limit. - enforceEngineSizeLimit(n, n->length, cc.grey); - - DEBUG_PRINTF("copying in nfa %u: len=%u, offset=%u\n", i, n->length, - base_nfa_offset); - - memcpy(ptr + base_nfa_offset, n, n->length); - suffix_base[i] = base_nfa_offset; - - if (!isMultiTopType(n->type)) { - classic_top[i] = true; - } - - infos[i].nfaOffset = base_nfa_offset; - if (contains(no_retrigger_queues, i)) { - infos[i].no_retrigger = 1; - } - base_nfa_offset += ROUNDUP_CL(n->length); - } - - /* Write NFA indices into RoseRole structures for suffix NFAs */ - for (auto v : vertices_range(g)) { - if (!g[v].suffix) { - continue; - } - - u32 nfa_index = suffixes.at(g[v].suffix); - assert(nfa_index < suffix_base.size()); - - assert(g[v].role < roleTable->size()); - RoseRole &tr = (*roleTable)[g[v].role]; - tr.suffixOffset = suffix_base[nfa_index]; - - // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP - // event. - if (classic_top[nfa_index]) { - assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); - tr.suffixEvent = MQE_TOP; - } else { - assert(!g[v].suffix.haig); - u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; - assert(top < MQE_INVALID); - tr.suffixEvent = top; - } - - /* mark suffixes triggered by etable literals */ - if (tbi.isInETable(v)) { - infos[nfa_index].eod = 1; - } - } - - return base_nfa_offset; -} - static u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { const RoseGraph &g = tbi.g; @@ -2284,82 +2305,6 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { return minWidth; } -static -vector buildRoseTriggerList(const RoseGraph &g, RoseVertex u, - const map &leftfix_info) { - // Terminator struct that marks the end of each role's trigger list. - RoseTrigger terminator; - memset(&terminator, 0, sizeof(RoseTrigger)); - terminator.queue = MO_INVALID_IDX; - terminator.event = MQE_INVALID; - terminator.cancel_prev_top = false; - - vector rv; - - for (const auto &e : out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (!g[v].left) { - continue; - } - - assert(contains(leftfix_info, v)); - const left_build_info &rbi = leftfix_info.at(v); - if (rbi.has_lookaround) { - continue; - } - assert(rbi.nfa); - - // DFAs have no TOP_N support, so they get a classic MQE_TOP event. - u32 top; - if (!isMultiTopType(rbi.nfa->type)) { - assert(num_tops(g[v].left) == 1); - top = MQE_TOP; - } else { - top = MQE_TOP_FIRST + g[e].rose_top; - assert(top < MQE_INVALID); - } - - rv.push_back(terminator); - RoseTrigger &trigger = rv.back(); - trigger.queue = rbi.nfa->queueIndex; - trigger.event = top; - trigger.cancel_prev_top = g[e].rose_cancel_prev_top; - } - - if (rv.empty()) { - return rv; - } - - sort(rv.begin(), rv.end(), RoseTriggerOrdering()); - rv.erase(unique(rv.begin(), rv.end(), RoseTriggerEquality()), rv.end()); - - rv.push_back(terminator); - - return rv; -} - -static -void buildRoseTriggerLists(const RoseBuildImpl &tbi, build_context &bc) { - const RoseGraph &g = tbi.g; - for (auto u : vertices_range(g)) { - if (tbi.isAnyStart(u) || g[u].literals.empty() - || tbi.hasDirectFinalId(u)) { - continue; - } - - assert(g[u].role < bc.roleTable.size()); - RoseRole &tr = bc.roleTable.at(g[u].role); - - vector trigs = buildRoseTriggerList(g, u, bc.leftfix_info); - - if (!trigs.empty()) { - assert(trigs.size() != 1); /* at min should be trig + term */ - tr.infixTriggerOffset = add_to_engine_blob(bc, trigs.begin(), - trigs.end()); - } - } -} - static void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, const QueueIndexFactory &qif, @@ -2381,8 +2326,7 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, /* for each outfix also build elists */ for (const auto &outfix : tbi.outfixes) { - assert(outfix.nfa); - u32 qi = outfix.nfa->queueIndex; + u32 qi = outfix.get_queue(); set ekeys = reportsToEkeys(all_reports(outfix), tbi.rm); if (!ekeys.empty()) { @@ -2431,7 +2375,6 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &tbi) { /* outfixes */ for (const auto &out : tbi.outfixes) { - assert(out.nfa); if (hasMpvTrigger(all_reports(out), tbi.rm)) { return true; } @@ -2451,31 +2394,53 @@ bool hasInternalReport(const set &reports, const ReportManager &rm) { } static -void populateNfaInfoBasics(NfaInfo *infos, const vector &outfixes, - const ReportManager &rm, +void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, + const vector &outfixes, const map &suffixes, - const vector &ekeyListOffsets) { - for (const auto &out : outfixes) { - assert(out.nfa); - const u32 qi = out.nfa->queueIndex; + const vector &ekeyListOffsets, + const set &no_retrigger_queues, + NfaInfo *infos) { + const u32 num_queues = build.qif.allocated_count(); + for (u32 qi = 0; qi < num_queues; qi++) { + const NFA *n = get_nfa_from_blob(bc, qi); + enforceEngineSizeLimit(n, n->length, build.cc.grey); - infos[qi].in_sbmatcher = out.in_sbmatcher; - if (!hasInternalReport(all_reports(out), rm)) { - infos[qi].only_external = 1; - } - - infos[qi].ekeyListOffset = ekeyListOffsets[qi]; + NfaInfo &info = infos[qi]; + info.nfaOffset = bc.engineOffsets.at(qi); + info.ekeyListOffset = ekeyListOffsets[qi]; + info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; } + // Mark outfixes that only trigger external reports. + for (const auto &out : outfixes) { + const u32 qi = out.get_queue(); + + infos[qi].in_sbmatcher = out.in_sbmatcher; + if (!hasInternalReport(all_reports(out), build.rm)) { + infos[qi].only_external = 1; + } + } + + // Mark suffixes that only trigger external reports. for (const auto &e : suffixes) { const suffix_id &s = e.first; u32 qi = e.second; - if (!hasInternalReport(all_reports(s), rm)) { + if (!hasInternalReport(all_reports(s), build.rm)) { infos[qi].only_external = 1; } + } - infos[qi].ekeyListOffset = ekeyListOffsets[qi]; + // Mark suffixes triggered by EOD table literals. + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (!g[v].suffix) { + continue; + } + u32 qi = suffixes.at(g[v].suffix); + if (build.isInETable(v)) { + infos[qi].eod = 1; + } } } @@ -2652,51 +2617,117 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { return out; } +/** + * \brief Flattens a list of role programs into one finalised program with its + * fail_jump/done_jump targets set correctly. + */ static -void buildRootRoleTable(const RoseBuildImpl &tbi, u32 roleTableOffset, - vector &literalTable, - vector *rootRoleTable) { +vector +flattenRoleProgram(const vector> &program) { + vector out; + + vector offsets; // offset of each instruction (bytes) + vector targets; // jump target for each instruction + + size_t curr_offset = 0; + for (const auto &prog : program) { + for (const auto &ri : prog) { + out.push_back(ri); + offsets.push_back(curr_offset); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + for (size_t i = 0; i < prog.size(); i++) { + targets.push_back(curr_offset); + } + } + + // Add an END instruction. + out.emplace_back(ROSE_ROLE_INSTR_END); + offsets.push_back(curr_offset); + targets.push_back(curr_offset); + + for (size_t i = 0; i < out.size(); i++) { + auto &ri = out[i]; + switch (ri.code()) { + case ROSE_ROLE_INSTR_ANCHORED_DELAY: + ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_ONLY_EOD: + ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS: + ri.u.checkRootBounds.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_LOOKAROUND: + ri.u.checkLookaround.fail_jump = targets[i] - offsets[i]; + break; + case ROSE_ROLE_INSTR_CHECK_LEFTFIX: + ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i]; + break; + default: + break; + } + } + + return out; +} + +static +u32 writeRoleProgram(build_context &bc, vector &program) { + DEBUG_PRINTF("writing %zu instructions\n", program.size()); + u32 programOffset = 0; + for (const auto &ri : program) { + u32 offset = + add_to_engine_blob(bc, ri.get(), ri.length(), ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("code %u len %zu written at offset %u\n", ri.code(), + ri.length(), offset); + if (!programOffset) { + programOffset = offset; + } + } + return programOffset; +} + +static +void buildRootRolePrograms(const RoseBuildImpl &build, build_context &bc, + vector &literalTable) { for (u32 id = 0; id < literalTable.size(); id++) { - RoseLiteral &tl = literalTable[id]; - const rose_literal_info &lit_info = - **getLiteralInfoByFinalId(tbi, id).begin(); - const auto &vertices = lit_info.vertices; + DEBUG_PRINTF("lit %u\n", id); + const auto &lit_info = **getLiteralInfoByFinalId(build, id).begin(); - tl.rootRoleOffset = verify_u32(rootRoleTable->size()); - tl.rootRoleCount = 0; + flat_set root_roles; // with programs to run. - for (RoseVertex v : vertices) { - if (tbi.isRootSuccessor(v)) { - if (tbi.hasDirectFinalId(v)) { - DEBUG_PRINTF("[skip root role %u as direct]\n", - tbi.g[v].role); - continue; - } - assert(tbi.isRootSuccessor(v)); - u32 role_offset - = roleTableOffset + tbi.g[v].role * sizeof(RoseRole); - rootRoleTable->push_back(role_offset); - tl.rootRoleCount++; - DEBUG_PRINTF("root role %u\n", tbi.g[v].role); + for (RoseVertex v : lit_info.vertices) { + if (!build.isRootSuccessor(v)) { + continue; } + if (build.hasDirectFinalId(v)) { + DEBUG_PRINTF("[skip root role %u as direct]\n", + build.g[v].role); + continue; + } + DEBUG_PRINTF("root role %u\n", build.g[v].role); + root_roles.insert(build.g[v].role); } - if (!tl.rootRoleCount) { - tl.rootRoleOffset = 0; - } else if (tl.rootRoleCount > 1) { - // Sort the entries for this literal by role index - vector::iterator begin = rootRoleTable->begin() - + tl.rootRoleOffset; - vector::iterator end = begin + tl.rootRoleCount; - sort(begin, end); - } else if (tl.rootRoleCount == 1) { - /* if there is only one root role, the rose literal stores the - * offset directly */ - tl.rootRoleOffset = (*rootRoleTable)[tl.rootRoleOffset]; + vector> root_prog; + for (const auto &role : root_roles) { + assert(role < bc.rolePrograms.size()); + const auto &role_prog = bc.rolePrograms[role]; + if (role_prog.empty()) { + continue; + } + root_prog.push_back(role_prog); } - DEBUG_PRINTF("literal %u: %u root roles, starting from idx=%u\n", id, - tl.rootRoleCount, tl.rootRoleOffset); + RoseLiteral &tl = literalTable[id]; + if (root_prog.empty()) { + tl.rootProgramOffset = 0; + continue; + } + + auto final_program = flattenRoleProgram(root_prog); + tl.rootProgramOffset = writeRoleProgram(bc, final_program); } } @@ -2722,12 +2753,10 @@ void buildActiveLeftIter(const vector &leftTable, } static -bool hasEodAnchors(const RoseBuildImpl &tbi, - const vector> &built_nfas, +bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, u32 outfixEndQueue) { - assert(outfixEndQueue <= built_nfas.size()); for (u32 i = 0; i < outfixEndQueue; i++) { - if (nfaAcceptsEod(built_nfas[i].get())) { + if (nfaAcceptsEod(get_nfa_from_blob(bc, i))) { DEBUG_PRINTF("outfix has eod\n"); return true; } @@ -2880,42 +2909,27 @@ pair buildEodAnchorRoles(RoseBuildImpl &tbi, build_context &bc, bc.roleTable.push_back(RoseRole()); RoseRole &tr = bc.roleTable.back(); memset(&tr, 0, sizeof(tr)); - tr.stateIndex = MMB_INVALID; - tr.predOffset = ROSE_OFFSET_INVALID; - tr.reportId = er.first; - tr.flags = ROSE_ROLE_FLAG_ACCEPT_EOD; + + bc.rolePrograms.push_back({}); + auto &program = bc.rolePrograms.back(); + auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_EOD); + ri.u.report.report = er.first; + program.push_back(ri); // Collect the state IDs of this report's vertices to add to the EOD // sparse iterator, creating pred entries appropriately. for (const auto &e : er.second) { RoseVertex v = source(e, g); DEBUG_PRINTF("vertex %zu has role %u\n", g[v].idx, g[v].role); - assert(g[v].role < bc.roleTable.size()); - RoseRole &predRole = bc.roleTable[g[v].role]; + assert(contains(bc.roleStateIndices, v)); + u32 predStateIdx = bc.roleStateIndices.at(v); createPred(tbi, bc, e, predTable); - const RosePred &tp = predTable.back(); - RoseIterRole ir = { (u32)(bc.roleTable.size() - 1), (u32)(predTable.size() - 1) }; - predStates[predRole.stateIndex].push_back(ir); - - if (out_degree(v, g) == 1 && tp.minBound == 0 && tp.maxBound == 0) { - // Since it leads ONLY to an EOD accept with bounds (0, 0), we - // can tag this role with the "must match at end of block" - // flag. - DEBUG_PRINTF("flagging role %u as ONLY_AT_END\n", g[v].role); - - /* There is no pointing enforcing this check at runtime if - * the predRole is only fired by eod event literal */ - if (g[v].literals.size() != 1 - || *g[v].literals.begin() != tbi.eod_event_literal_id) { - predRole.flags |= ROSE_ROLE_FLAG_ONLY_AT_END; - } - } - predRole.flags |= ROSE_ROLE_FLAG_PRED_OF_EOD; + predStates[predStateIdx].push_back(ir); } } @@ -2976,10 +2990,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, RoseLiteral &tl = literalTable.back(); memset(&tl, 0, sizeof(tl)); - // These two are set by buildRootRoleTable. - tl.rootRoleOffset = 0; - tl.rootRoleCount = 0; - tl.groups = 0; for (const auto &li : lit_infos) { tl.groups |= li->group_mask; @@ -3036,18 +3046,301 @@ void buildLiteralTable(const RoseBuildImpl &tbi, build_context &bc, } } +/** + * \brief True if the given vertex is a role that can only be switched on at + * EOD. + */ +static +bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { + const RoseGraph &g = tbi.g; + + // All such roles have only (0,0) edges to vertices with the eod_accept + // property, and no other effects (suffixes, ordinary reports, etc, etc). + + if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { + return false; + } + + for (const auto &e : out_edges_range(v, g)) { + RoseVertex w = target(e, g); + if (!g[w].eod_accept) { + return false; + } + assert(!g[w].reports.empty()); + assert(g[w].literals.empty()); + + if (g[e].minBound || g[e].maxBound) { + return false; + } + } + + /* There is no pointing enforcing this check at runtime if + * this role is only fired by the eod event literal */ + if (tbi.eod_event_literal_id != MO_INVALID_IDX && + g[v].literals.size() == 1 && + *g[v].literals.begin() == tbi.eod_event_literal_id) { + return false; + } + + return true; +} + +static +void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program, + ue2::unordered_map, size_t> &lookaround_cache) { + if (!build.cc.grey.roseLookaroundMasks) { + return; + } + + vector look; + + // Lookaround from leftfix (mandatory). + if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) { + DEBUG_PRINTF("using leftfix lookaround\n"); + look = bc.leftfix_info.at(v).lookaround; + } + + // We may be able to find more lookaround info (advisory) and merge it + // in. + vector look_more; + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + + if (look.empty()) { + return; + } + + DEBUG_PRINTF("role has lookaround\n"); + u32 look_idx; + auto it = lookaround_cache.find(look); + if (it != lookaround_cache.end()) { + DEBUG_PRINTF("reusing look at idx %zu\n", it->second); + look_idx = verify_u32(it->second); + } else { + size_t idx = bc.lookaround.size(); + lookaround_cache.emplace(look, idx); + insert(&bc.lookaround, bc.lookaround.end(), look); + DEBUG_PRINTF("adding look at idx %zu\n", idx); + look_idx = verify_u32(idx); + } + u32 look_count = verify_u32(look.size()); + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LOOKAROUND); + ri.u.checkLookaround.index = look_idx; + ri.u.checkLookaround.count = look_count; + program.push_back(ri); +} + +static +void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program) { + auto it = bc.leftfix_info.find(v); + if (it == end(bc.leftfix_info)) { + return; + } + const left_build_info &lni = it->second; + if (lni.has_lookaround) { + return; // Leftfix completely implemented by lookaround. + } + + assert(!build.cc.streaming || + build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_LEFTFIX); + ri.u.checkLeftfix.queue = lni.queue; + ri.u.checkLeftfix.lag = build.g[v].left.lag; + ri.u.checkLeftfix.report = build.g[v].left.leftfix_report; + program.push_back(ri); +} + +static +void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, + RoseVertex v, vector &program) { + // Only relevant for roles that can be triggered by the anchored table. + if (!build.isAnchored(v)) { + return; + } + + // TODO: also limit to matches that can occur after + // floatingMinLiteralMatchOffset. + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_ANCHORED_DELAY); + ri.u.anchoredDelay.depth = (u8)min(254U, bc.depths.at(v)); + ri.u.anchoredDelay.groups = build.g[v].groups; + program.push_back(ri); +} + +static +void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program) { + const auto &g = build.g; + + /* we are a suffaig - need to update role to provide som to the + * suffix. */ + bool has_som = false; + if (g[v].left.tracksSom()) { + assert(contains(bc.leftfix_info, v)); + const left_build_info &lni = bc.leftfix_info.at(v); + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_LEFTFIX); + ri.u.somLeftfix.queue = lni.queue; + ri.u.somLeftfix.lag = g[v].left.lag; + program.push_back(ri); + has_som = true; + } else if (g[v].som_adjust) { + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SOM_ADJUST); + ri.u.somAdjust.distance = g[v].som_adjust; + program.push_back(ri); + has_som = true; + } + + // Write program instructions for reports. + for (ReportID id : g[v].reports) { + assert(id < build.rm.numReports()); + const Report &ir = build.rm.getReport(id); + if (isInternalSomReport(ir)) { + auto ri = + RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM + : ROSE_ROLE_INSTR_REPORT_SOM_INT); + ri.u.report.report = id; + program.push_back(ri); + } else if (ir.type == INTERNAL_ROSE_CHAIN) { + auto ri = RoleInstruction(ROSE_ROLE_INSTR_REPORT_CHAIN); + ri.u.report.report = id; + program.push_back(ri); + } else { + auto ri = + RoleInstruction(has_som ? ROSE_ROLE_INSTR_REPORT_SOM_KNOWN + : ROSE_ROLE_INSTR_REPORT); + ri.u.report.report = id; + program.push_back(ri); + } + } +} + +static +void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, + const map &suffixes, + vector &program) { + const auto &g = build.g; + if (!g[v].suffix) { + return; + } + assert(contains(suffixes, g[v].suffix)); + u32 qi = suffixes.at(g[v].suffix); + assert(contains(bc.engineOffsets, qi)); + const NFA *nfa = get_nfa_from_blob(bc, qi); + u32 suffixEvent; + if (isMultiTopType(nfa->type)) { + assert(!g[v].suffix.haig); + u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; + assert(top < MQE_INVALID); + suffixEvent = top; + } else { + // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP + // event. + assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); + suffixEvent = MQE_TOP; + } + auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_SUFFIX); + ri.u.triggerSuffix.queue = qi; + ri.u.triggerSuffix.event = suffixEvent; + program.push_back(ri); +} + +static +void makeRoleGroups(const rose_group &groups, + vector &program) { + if (!groups) { + return; + } + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_GROUPS); + ri.u.setGroups.groups = groups; + program.push_back(ri); +} + +static +void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, + RoseVertex u, vector &program) { + const auto &g = build.g; + + vector infix_program; + + for (const auto &e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (!g[v].left) { + continue; + } + + assert(contains(bc.leftfix_info, v)); + const left_build_info &lbi = bc.leftfix_info.at(v); + if (lbi.has_lookaround) { + continue; + } + + const NFA *nfa = get_nfa_from_blob(bc, lbi.queue); + + // DFAs have no TOP_N support, so they get a classic MQE_TOP event. + u32 top; + if (!isMultiTopType(nfa->type)) { + assert(num_tops(g[v].left) == 1); + top = MQE_TOP; + } else { + top = MQE_TOP_FIRST + g[e].rose_top; + assert(top < MQE_INVALID); + } + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_TRIGGER_INFIX); + ri.u.triggerInfix.queue = lbi.queue; + ri.u.triggerInfix.event = top; + ri.u.triggerInfix.cancel = g[e].rose_cancel_prev_top; + infix_program.push_back(ri); + } + + if (infix_program.empty()) { + return; + } + + // Order, de-dupe and add instructions to the end of program. + sort(begin(infix_program), end(infix_program)); + unique_copy(begin(infix_program), end(infix_program), + back_inserter(program)); + + // Groups may be cleared by an infix going quiet. Set groups immediately + // after infixes are triggered. + makeRoleGroups(g[u].groups, program); +} + +static +void makeRoleSetState(RoseBuildImpl &build, build_context &bc, RoseVertex v, + vector &program, + u32 *nextStateIndex) { + const auto &g = build.g; + + // Leaf nodes don't need state indices, as they don't have successors. + if (isLeafNode(v, g)) { + return; + } + + /* TODO: also don't need a state index if all edges are nfa based */ + + u32 idx = (*nextStateIndex)++; + auto ri = RoleInstruction(ROSE_ROLE_INSTR_SET_STATE); + ri.u.setState.index = idx; + ri.u.setState.depth = (u8)min(254U, bc.depths.at(v)); + program.push_back(ri); + bc.roleStateIndices.emplace(v, idx); +} + static void createRoleEntry(RoseBuildImpl &tbi, build_context &bc, RoseVertex v, vector &roleTable, ue2::unordered_map, size_t> &lookaround_cache, - u32 *nextStateIndex) { + const map &suffixes, u32 *nextStateIndex) { RoseGraph &g = tbi.g; - // Vertices have been normalised by now to have <= 1 reports. - assert(g[v].reports.size() <= 1); - // set role ID in the graph where we can find it later - u32 roleId = (u32)roleTable.size(); + u32 roleId = verify_u32(roleTable.size()); g[v].role = roleId; // track id if it's a nonroot role for use in buildSparseIter if (!tbi.isRootSuccessor(v)) { @@ -3065,117 +3358,67 @@ void createRoleEntry(RoseBuildImpl &tbi, build_context &bc, g[v].idx, (u32)g[v].eod_accept, g[v].suffix.graph.get(), g[v].suffix.haig.get()); - // accept roles get their report ID. - if (!g[v].reports.empty()) { - DEBUG_PRINTF("%zu reports\n", g[v].reports.size()); - assert(g[v].reports.size() == 1); - tr.reportId = *g[v].reports.begin(); - assert(tr.reportId < tbi.rm.numReports()); - const Report &ir = tbi.rm.getReport(tr.reportId); - if (isInternalSomReport(ir)) { - tr.flags |= ROSE_ROLE_FLAG_SOM_REPORT; + // Build role program. + + assert(bc.rolePrograms.size() == roleId); + bc.rolePrograms.push_back({}); + vector &program = bc.rolePrograms.back(); + + // First, add program instructions that enforce preconditions without + // effects. + + makeRoleAnchoredDelay(tbi, bc, v, program); + + if (onlyAtEod(tbi, v)) { + DEBUG_PRINTF("only at eod\n"); + program.push_back(RoleInstruction(ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); + } + + makeRoleLookaround(tbi, bc, v, program, lookaround_cache); + makeRoleCheckLeftfix(tbi, bc, v, program); + + // Next, we can add program instructions that have effects. + + makeRoleReports(tbi, bc, v, program); + makeRoleInfixTriggers(tbi, bc, v, program); + makeRoleSuffix(tbi, bc, v, suffixes, program); + makeRoleSetState(tbi, bc, v, program, nextStateIndex); + makeRoleGroups(g[v].groups, program); +} + +static +void writeRolePrograms(build_context &bc) { + assert(bc.roleTable.size() == bc.rolePrograms.size()); + + for (size_t i = 0; i < bc.roleTable.size(); i++) { + auto &role = bc.roleTable[i]; + auto &program = bc.rolePrograms[i]; + + if (program.empty()) { + role.programOffset = 0; + continue; } - if (ir.type == INTERNAL_ROSE_CHAIN) { - tr.flags |= ROSE_ROLE_FLAG_CHAIN_REPORT; - } - } else { - tr.reportId = MO_INVALID_IDX; - } - tr.leftfixReport = g[v].left.leftfix_report; - assert(!tbi.cc.streaming || g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - tr.leftfixLag = g[v].left.lag; - tr.depth = (u8)min(254U, bc.depths.at(v)); - tr.groups = g[v].groups; - tr.flags |= ROSE_ROLE_PRED_NONE; + // Safety check: all precondition checks should occur before + // instructions with effects. + assert(is_partitioned( + begin(program), end(program), [](const RoleInstruction &ri) { + // CHECK_LEFTFIX is the last precondition check. + return ri.code() <= ROSE_ROLE_INSTR_CHECK_LEFTFIX; + })); - if (contains(bc.leftfix_info, v)) { - const left_build_info &lni = bc.leftfix_info.at(v); - if (!lni.has_lookaround) { - tr.flags |= ROSE_ROLE_FLAG_ROSE; - tr.leftfixQueue = lni.nfa->queueIndex; - } - } + // Apply jump fixups. + auto final_program = flattenRoleProgram({program}); - if (!g[v].literals.empty()) { - /* all literals for a role come from the same table -> inspect any */ - switch (tbi.literals.right.at(*g[v].literals.begin()).table) { - case ROSE_ANCHORED: - tr.flags |= ROSE_ROLE_FLAG_ANCHOR_TABLE; - break; - case ROSE_EOD_ANCHORED: - tr.flags |= ROSE_ROLE_FLAG_EOD_TABLE; - break; - default: - ; - } + // Write into bytecode. + role.programOffset = writeRoleProgram(bc, final_program); } - - // Leaf nodes don't need state indices, as they don't have successors. - /* TODO: also don't need a state index if all edges are nfa based */ - if (isLeafNode(v, g)) { - tr.stateIndex = MMB_INVALID; - } else { - tr.stateIndex = (*nextStateIndex)++; - } - - /* we are a suffaig - need to update role to provide som to the - * suffix. */ - bool has_som = false; - if (g[v].left.tracksSom()) { - tr.flags |= ROSE_ROLE_FLAG_SOM_ROSEFIX; - has_som = true; - } else if (g[v].som_adjust) { - tr.somAdjust = g[v].som_adjust; - tr.flags |= ROSE_ROLE_FLAG_SOM_ADJUST; - has_som = true; - } - - if (has_som && !g[v].reports.empty()) { - tr.flags |= ROSE_ROLE_FLAG_REPORT_START; - } - - vector look; - if (tbi.cc.grey.roseLookaroundMasks) { - // Lookaround from leftfix (mandatory). - if (contains(bc.leftfix_info, v) && - bc.leftfix_info.at(v).has_lookaround) { - DEBUG_PRINTF("using leftfix lookaround\n"); - look = bc.leftfix_info.at(v).lookaround; - } - // We may be able to find more lookaround info (advisory) and merge it - // in. - vector look_more; - findLookaroundMasks(tbi, v, look_more); - mergeLookaround(look, look_more); - } - if (look.empty()) { - DEBUG_PRINTF("no lookaround\n"); - tr.lookaroundIndex = MO_INVALID_IDX; - tr.lookaroundCount = 0; - } else { - auto it = lookaround_cache.find(look); - if (it != lookaround_cache.end()) { - DEBUG_PRINTF("reusing look at idx %zu\n", it->second); - tr.lookaroundIndex = verify_u32(it->second); - } else { - size_t idx = bc.lookaround.size(); - lookaround_cache.insert(make_pair(look, idx)); - insert(&bc.lookaround, bc.lookaround.end(), look); - DEBUG_PRINTF("adding look at idx %zu\n", idx); - tr.lookaroundIndex = verify_u32(idx); - } - tr.lookaroundCount = verify_u32(look.size()); - } - - DEBUG_PRINTF("role id=%u, stateidx=%u, reportId=%u, " - "depth=%u, groups=0x%016llx\n", roleId, tr.stateIndex, - tr.reportId, tr.depth, tr.groups); } // Construct an initial role table containing the basic role information. static -void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc) { +void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc, + const map &suffixes) { DEBUG_PRINTF("building role table\n"); const RoseGraph &g = tbi.g; @@ -3210,7 +3453,8 @@ void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc) { } assert(!g[v].literals.empty()); - createRoleEntry(tbi, bc, v, roleTable, lookaround_cache, &stateIndex); + createRoleEntry(tbi, bc, v, roleTable, lookaround_cache, suffixes, + &stateIndex); } bc.numStates = stateIndex; @@ -3218,6 +3462,48 @@ void buildInitialRoleTable(RoseBuildImpl &tbi, build_context &bc) { stateIndex); } +static +void makeRoleCheckRootBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, + vector &program) { + const RoseGraph &g = build.g; + const RoseVertex u = source(e, g); + + assert(u == build.root || u == build.anchored_root); + + // Use the minimum literal length. + u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); + + u32 min_bound = g[e].minBound + lit_length; + u32 max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + assert(g[u].max_offset != ROSE_BOUND_INF); + // Make offsets absolute. + min_bound += g[u].max_offset; + if (max_bound != ROSE_BOUND_INF) { + max_bound += g[u].max_offset; + } + } + + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + auto ri = RoleInstruction(ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS); + ri.u.checkRootBounds.min_bound = min_bound; + ri.u.checkRootBounds.max_bound = max_bound; + + // This precondition instruction should go near the start of + // the program, after the ONLY_EOD check if it's present. + auto it = + find_if(begin(program), end(program), [](const RoleInstruction &ri) { + return ri.code() > ROSE_ROLE_INSTR_CHECK_ONLY_EOD; + }); + program.insert(it, ri); +} + // Construct pred table and sparse iterators over preds. static void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, @@ -3249,16 +3535,17 @@ void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, "[%u, %u]\n", g[u].role, g[v].role, g[e].minBound, g[e].maxBound); if (tbi.isAnyStart(u)) { - /* we have ourselves a root role */ + // Solely root roles can be handled with no check at all (for + // very simple cases), or a bounds check in the role program. assert(u != tbi.root || g[e].maxBound == ROSE_BOUND_INF); if (u == tbi.root && g[e].minBound == 0) { DEBUG_PRINTF("root role with .* edge, no pred needed\n"); continue; /* no pred required */ } - tr.predOffset = verify_u32(predTable.size()); + tr.flags &= ROSE_ROLE_PRED_CLEAR_MASK; - tr.flags |= ROSE_ROLE_PRED_ROOT; - createPred(tbi, bc, e, predTable); + auto &program = bc.rolePrograms[g[v].role]; + makeRoleCheckRootBounds(tbi, v, e, program); continue; } @@ -3287,26 +3574,11 @@ void buildPredTable(const RoseBuildImpl &tbi, build_context &bc, // Collect in-edges, ordered by the state index of the predecessor. vector edges = make_vector_from(in_edges(v, g)); sort(edges.begin(), edges.end(), - EdgeSourceStateCompare(g, bc.roleTable)); + EdgeSourceStateCompare(g, bc.roleStateIndices)); - vector keys; - - // Create preds and collect state indices for our sparse iterator. for (const auto &e : edges) { createPred(tbi, bc, e, predTable); - RoseVertex u = source(e, g); - assert(g[u].role < bc.roleTable.size()); - u32 stateIdx = bc.roleTable.at(g[u].role).stateIndex; - if (stateIdx != MMB_INVALID) { - keys.push_back(stateIdx); - } } - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - - tr.predOffset = addIteratorToTable(bc, iter); } } @@ -3345,9 +3617,8 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, continue; } - assert(lbi.nfa); - assert(lbi.nfa->queueIndex >= leftfixBeginQueue); - u32 left_index = lbi.nfa->queueIndex - leftfixBeginQueue; + assert(lbi.queue >= leftfixBeginQueue); + u32 left_index = lbi.queue - leftfixBeginQueue; assert(left_index < leftfixCount); /* seedy hack to make miracles more effective. @@ -3414,8 +3685,17 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, // Build sparse iterators for literals. static -void buildSparseIter(build_context &bc, vector &literalTable, +void buildSparseIter(RoseBuildImpl &build, build_context &bc, + vector &literalTable, const vector &predTable) { + const RoseGraph &g = build.g; + + // Construct a mapping from role ids to state indices. + ue2::unordered_map role_to_state; + for (const auto &m : bc.roleStateIndices) { + role_to_state.emplace(g[m.first].role, m.second); + } + for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { RoseLiteral &tl = literalTable[finalId]; @@ -3441,14 +3721,16 @@ void buildSparseIter(build_context &bc, vector &literalTable, u32 p = bc.rolePredecessors.at(r)[0]; assert(p != ROSE_OFFSET_INVALID); RoseIterRole ir = { r, ROSE_OFFSET_INVALID }; - predStates[bc.roleTable[p].stateIndex].push_back(ir); + assert(contains(role_to_state, p)); + predStates[role_to_state.at(p)].push_back(ir); } else { const vector &myPreds = bc.rolePredecessors.at(r); for (u32 pred_entry : myPreds) { u32 p = predTable.at(pred_entry).role; RoseIterRole ir = { r, pred_entry }; assert(p < bc.roleTable.size()); - predStates[bc.roleTable[p].stateIndex].push_back(ir); + assert(contains(role_to_state, p)); + predStates[role_to_state.at(p)].push_back(ir); } } } @@ -3603,20 +3885,19 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.depths = findDepths(*this); // Build NFAs - vector> built_nfas; map suffixes; set no_retrigger_queues; bool mpv_as_outfix; - prepMpv(*this, &built_nfas, &historyRequired, &mpv_as_outfix); + prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); u32 outfixBeginQueue = qif.allocated_count(); - if (!prepOutfixes(*this, &built_nfas, &historyRequired)) { + if (!prepOutfixes(*this, bc, &historyRequired)) { return nullptr; } u32 outfixEndQueue = qif.allocated_count(); u32 leftfixBeginQueue = outfixEndQueue; - if (!buildNfas(*this, qif, &built_nfas, &suffixes, &bc.leftfix_info, - &no_retrigger_queues, &leftfixBeginQueue)) { + if (!buildNfas(*this, bc, qif, &suffixes, &no_retrigger_queues, + &leftfixBeginQueue)) { return nullptr; } buildCountingMiracles(*this, bc); @@ -3630,15 +3911,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 lit_benefits_size = verify_u32(sizeof(lit_benefits) * nonbenefits_base_id); assert(ISALIGNED_16(lit_benefits_size)); - u32 nfas_size = calcNfaSize(built_nfas); - - // Build our other tables - DEBUG_PRINTF("nfas_size %u\n", nfas_size); vector suffixEkeyLists; buildSuffixEkeyLists(*this, bc, qif, suffixes, &suffixEkeyLists); - buildInitialRoleTable(*this, bc); + buildInitialRoleTable(*this, bc, suffixes); DEBUG_PRINTF("roletable %zu\n", bc.roleTable.size()); @@ -3651,11 +3928,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - buildRoseTriggerLists(*this, bc); - vector literalTable; buildLiteralTable(*this, bc, literalTable); - buildSparseIter(bc, literalTable, predTable); + buildSparseIter(*this, bc, literalTable, predTable); u32 eodIterOffset; u32 eodIterMapOffset; @@ -3673,6 +3948,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } + // Write role programs into the engine blob. + writeRolePrograms(bc); + + // Write root programs for literals into the engine blob. + buildRootRolePrograms(*this, bc, literalTable); + u32 amatcherOffset = 0; u32 fmatcherOffset = 0; u32 ematcherOffset = 0; @@ -3685,13 +3966,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = sizeof(RoseEngine); } + UNUSED const size_t engineBlobSize = + byte_length(bc.engine_blob); // test later + currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); - /* leave space for the nfas */ - u32 base_nfa_offset = currOffset; - currOffset += nfas_size; - /* leave space for the benefits listing */ u32 base_lits_benefits_offset = currOffset; currOffset += lit_benefits_size; @@ -3754,13 +4034,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 nfaInfoLen = sizeof(NfaInfo) * queue_count; currOffset = nfaInfoOffset + nfaInfoLen; - vector rootRoleTable; - buildRootRoleTable(*this, roleOffset, literalTable, &rootRoleTable); - - u32 rootRoleOffset = ROUNDUP_N(currOffset, sizeof(u32)); - u32 rootRoleLen = sizeof(u32) * rootRoleTable.size(); - currOffset = rootRoleOffset + rootRoleLen; - vector art; // Reports raised by anchored roles vector arit; // inverse reportID -> position in art calcAnchoredMatches(*this, art, arit); @@ -3834,23 +4107,19 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(ISALIGNED_CL(ptr)); if (atable) { - assert(amatcherOffset >= base_nfa_offset); assert(amatcherOffset); memcpy(ptr + amatcherOffset, atable.get(), asize); } if (ftable) { assert(fmatcherOffset); - assert(fmatcherOffset >= base_nfa_offset); memcpy(ptr + fmatcherOffset, ftable.get(), fsize); } if (etable) { assert(ematcherOffset); - assert(ematcherOffset >= base_nfa_offset); memcpy(ptr + ematcherOffset, etable.get(), esize); } if (sbtable) { assert(sbmatcherOffset); - assert(sbmatcherOffset >= base_nfa_offset); memcpy(ptr + sbmatcherOffset, sbtable.get(), sbsize); } @@ -3902,8 +4171,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->anchoredReportInverseMapOffset = anchoredReportInverseMapOffset; engine->multidirectOffset = multidirectOffset; - engine->rootRoleCount = verify_u32(rootRoleTable.size()); - engine->rootRoleOffset = rootRoleOffset; engine->eodIterOffset = eodIterOffset; engine->eodIterMapOffset = eodIterMapOffset; @@ -3956,8 +4223,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->maxBiAnchoredWidth = findMaxBAWidth(*this); engine->noFloatingRoots = hasNoFloatingRoots(); engine->hasFloatingDirectReports = floating_direct_report; - engine->requiresEodCheck = hasEodAnchors(*this, built_nfas, - outfixEndQueue); + engine->requiresEodCheck = hasEodAnchors(*this, bc, outfixEndQueue); engine->hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); engine->canExhaust = rm.patternSetCanExhaust(); engine->hasSom = hasSom; @@ -3997,19 +4263,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { } NfaInfo *nfa_infos = (NfaInfo *)(ptr + nfaInfoOffset); - populateNfaInfoBasics(nfa_infos, outfixes, rm, suffixes, suffixEkeyLists); - updateNfaState(built_nfas, bc.leftfix_info, &engine->stateOffsets, nfa_infos, + populateNfaInfoBasics(*this, bc, outfixes, suffixes, suffixEkeyLists, + no_retrigger_queues, nfa_infos); + updateNfaState(bc, &engine->stateOffsets, nfa_infos, &engine->scratchStateSize, &engine->nfaStateSize, &engine->tStateSize); - // Copy in the NFAs and update roles - engine->nfaRegionBegin = base_nfa_offset; - engine->nfaRegionEnd = copyInNFAs(*this, &bc.roleTable, built_nfas, - no_retrigger_queues, nfa_infos, - base_nfa_offset, suffixes, ptr); - // We're done with the NFAs. - built_nfas.clear(); - /* do after update mask */ buildLitBenefits(*this, engine.get(), base_lits_benefits_offset); @@ -4024,12 +4283,15 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); copy_bytes(ptr + engine->predOffset, predTable); - copy_bytes(ptr + engine->rootRoleOffset, rootRoleTable); copy_bytes(ptr + engine->anchoredReportMapOffset, art); copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); copy_bytes(ptr + engine->multidirectOffset, mdr_reports); copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); + // Safety check: we shouldn't have written anything to the engine blob + // after we copied it into the engine bytecode. + assert(byte_length(bc.engine_blob) == engineBlobSize); + DEBUG_PRINTF("rose done %p\n", engine.get()); return engine; } diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index fd507a11..2a31a65a 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -2154,53 +2154,6 @@ bool hasOrphanedTops(const RoseBuildImpl &tbi) { #endif // NDEBUG -/** - * \brief Normalise vertices so that every one has <= 1 report. - */ -static -void normaliseRoles(RoseBuildImpl &build) { - DEBUG_PRINTF("normalising\n"); - RoseGraph &g = build.g; - - vector work; // Vertices with > 1 report. - - for (const auto &v : vertices_range(g)) { - if (g[v].reports.size() > 1) { - work.push_back(v); - } - } - - DEBUG_PRINTF("%zu vertices to normalise\n", work.size()); - - for (const auto &v : work) { - DEBUG_PRINTF("exploding vertex %zu with %zu reports\n", g[v].idx, - g[v].reports.size()); - - // Make a copy of v for the trailing N-1 reports. Each of those gets - // one report and a copy of the in-edges. The first vertex retains the - // out-edges and suffix, if any are present. All the others don't need - // them. - - const auto &reports = g[v].reports; - - for (auto it = next(begin(reports)); it != end(reports); ++it) { - const ReportID &r = *it; - RoseVertex v2 = build.cloneVertex(v); - g[v2].reports = {r}; - - for (const auto &e : in_edges_range(v, g)) { - add_edge(source(e, g), v2, g[e], g); - } - - // No out-edges or suffix. - g[v2].suffix.reset(); - } - - // Vertex v retains the first report. - g[v].reports = {*begin(reports)}; - } -} - aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { dumpRoseGraph(*this, nullptr, "rose_early.dot"); @@ -2315,10 +2268,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { dumpRoseGraph(*this, nullptr, "rose_pre_norm.dot"); - // Ensure that every vertex has <= 1 report, since the Rose runtime - // requires this at present. - normaliseRoles(*this); - return buildFinalEngine(minWidth); } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index d4918e4f..d69d28d6 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -34,6 +34,7 @@ #include "rose_build_impl.h" #include "rose/rose_dump.h" #include "rose_internal.h" +#include "rose_program.h" #include "ue2common.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" @@ -95,6 +96,59 @@ const RoseRole *getRoseRole(const RoseBuildImpl &build, return &roles[role_idx]; } +#define SKIP_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +template +const Struct * +findInstruction(const RoseEngine *t, const RoseRole *role) { + if (!role->programOffset) { + return nullptr; + } + + const char *pc = (const char *)t + role->programOffset; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + if (code == Opcode) { + return (const Struct *)pc; + } + // Skip to the next instruction. + switch (code) { + SKIP_CASE(ANCHORED_DELAY) + SKIP_CASE(CHECK_ONLY_EOD) + SKIP_CASE(CHECK_ROOT_BOUNDS) + SKIP_CASE(CHECK_LEFTFIX) + SKIP_CASE(CHECK_LOOKAROUND) + SKIP_CASE(SOM_ADJUST) + SKIP_CASE(SOM_LEFTFIX) + SKIP_CASE(TRIGGER_INFIX) + SKIP_CASE(TRIGGER_SUFFIX) + SKIP_CASE(REPORT) + SKIP_CASE(REPORT_CHAIN) + SKIP_CASE(REPORT_EOD) + SKIP_CASE(REPORT_SOM_INT) + SKIP_CASE(REPORT_SOM) + SKIP_CASE(REPORT_SOM_KNOWN) + SKIP_CASE(SET_STATE) + SKIP_CASE(SET_GROUPS) + case ROSE_ROLE_INSTR_END: + return nullptr; + default: + assert(0); + return nullptr; + } + } + + return nullptr; +} + +#undef SKIP_CASE + namespace { class RoseGraphWriter { @@ -149,9 +203,12 @@ public: if (g[v].suffix) { os << "\\nSUFFIX (TOP " << g[v].suffix.top; if (r) { - assert(t); - const NFA *n = (const NFA *)((const char *)t + r->suffixOffset); - os << ", Q" << n->queueIndex; + const auto *ri = + findInstruction(t, r); + if (ri) { + os << ", Q" << ri->queue; + } } else { // Can't dump the queue number, but we can identify the suffix. if (g[v].suffix.graph) { @@ -191,7 +248,12 @@ public: os << "\\nROSE " << roseKind; os << " ("; if (r) { - os << "Q" << r->leftfixQueue << ", "; + const auto *ri = + findInstruction(t, r); + if (ri) { + os << "Q" << ri->queue << ", "; + } } os << "report " << g[v].left.leftfix_report << ")"; @@ -555,19 +617,28 @@ void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t, for (RoseVertex v : vertices_range(g)) { const RoseRole *role = getRoseRole(build, t, v); - if (!role || role->lookaroundIndex == MO_INVALID_IDX) { + if (!role) { continue; } + const auto *ri = + findInstruction(t, role); + if (!ri) { + continue; + } + + const u32 look_idx = ri->index; + const u32 look_count = ri->count; + os << "Role " << g[v].role << endl; os << " literals: " << as_string_list(g[v].literals) << endl; - os << " lookaround: index=" << role->lookaroundIndex - << ", count=" << role->lookaroundCount << endl; + os << " lookaround: index=" << look_idx << ", count=" << look_count + << endl; - const s8 *look = look_base + role->lookaroundIndex; - const s8 *look_end = look + role->lookaroundCount; - const u8 *reach = - reach_base + role->lookaroundIndex * REACH_BITVECTOR_LEN; + const s8 *look = look_base + look_idx; + const s8 *look_end = look + look_count; + const u8 *reach = reach_base + look_idx * REACH_BITVECTOR_LEN; for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { os << " " << std::setw(4) << std::setfill(' ') << int{*look} diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 39596d8f..b2604ff0 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -305,6 +305,11 @@ struct OutfixInfo { /* TODO: poly */ u32 get_queue(QueueIndexFactory &qif); + u32 get_queue() const { + assert(queue != ~0U); + return queue; + } + bool is_nonempty_mpv() const { return !puffettes.empty() || !triggered_puffettes.empty(); } @@ -329,9 +334,6 @@ struct OutfixInfo { /* TODO: poly */ std::vector puffettes; std::vector triggered_puffettes; - /** Once the outfix has been built into an engine, this will point to it. */ - NFA *nfa = nullptr; - RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width depth minWidth = depth::infinity(); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index e42e0aca..e89a1772 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2572,10 +2572,6 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { assert(!it->is_dead()); - if (it->nfa) { - assert(!it->rdfa && !it->holder && !it->haig); - continue; - } assert(!it->chained); if (it->rdfa) { dfas.push_back(it->rdfa.get()); @@ -2650,10 +2646,6 @@ void mergeOutfixes(RoseBuildImpl &tbi) { vector som_dfas; for (const auto &outfix : tbi.outfixes) { - if (outfix.nfa) { - assert(!outfix.rdfa && !outfix.holder && !outfix.haig); - continue; - } assert(!outfix.chained); if (outfix.rdfa) { dfas.push_back(outfix.rdfa.get()); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 8fbef889..109c2d26 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -629,8 +629,6 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } for (const auto &outfix : tbi.outfixes) { - assert(!outfix.nfa); /* should not be built yet */ - for (const auto &report_id : all_reports(outfix)) { outfix_map[report_id].insert(&outfix); } @@ -738,7 +736,6 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( for (const auto &outfix_ptr : outfixes) { assert(outfix_ptr); const OutfixInfo &out = *outfix_ptr; - assert(!out.nfa); /* should not be built yet */ if (has_outfix || has_role || has_suffix) { return true; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index b9c0c05b..6ec89064 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -34,6 +34,7 @@ #include "rose_dump.h" #include "rose_common.h" #include "rose_internal.h" +#include "rose_program.h" #include "hs_compile.h" #include "ue2common.h" #include "nfa/nfa_build_util.h" @@ -202,47 +203,240 @@ u32 rolesWithFlag(const RoseEngine *t, u32 flag) { return n; } +#define HANDLE_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + static -u32 rolesWithSuffixes(const RoseEngine *t) { +u32 rolesWithInstr(const RoseEngine *t, + enum RoseRoleInstructionCode find_code) { u32 n = 0; const RoseRole *tr = getRoleTable(t); const RoseRole *tr_end = tr + t->roleCount; for (; tr != tr_end; ++tr) { - if (tr->suffixOffset) { - n++; + if (!tr->programOffset) { + continue; } + + const char *pc = (const char *)t + tr->programOffset; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + if (code == find_code) { + n++; + goto next_role; + } + switch (code) { + HANDLE_CASE(CHECK_ONLY_EOD) + HANDLE_CASE(CHECK_ROOT_BOUNDS) + HANDLE_CASE(CHECK_LOOKAROUND) + HANDLE_CASE(CHECK_LEFTFIX) + HANDLE_CASE(ANCHORED_DELAY) + HANDLE_CASE(SOM_ADJUST) + HANDLE_CASE(SOM_LEFTFIX) + HANDLE_CASE(TRIGGER_INFIX) + HANDLE_CASE(TRIGGER_SUFFIX) + HANDLE_CASE(REPORT) + HANDLE_CASE(REPORT_CHAIN) + HANDLE_CASE(REPORT_EOD) + HANDLE_CASE(REPORT_SOM_INT) + HANDLE_CASE(REPORT_SOM) + HANDLE_CASE(REPORT_SOM_KNOWN) + HANDLE_CASE(SET_STATE) + HANDLE_CASE(SET_GROUPS) + case ROSE_ROLE_INSTR_END: + goto next_role; + default: + assert(0); + return 0; + } + } + next_role:; } return n; } -static -u32 rolesWithLookaround(const RoseEngine *t) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; +#undef HANDLE_CASE - for (; tr != tr_end; ++tr) { - if (tr->lookaroundIndex != MO_INVALID_IDX) { - n++; +#define PROGRAM_CASE(name) \ + case ROSE_ROLE_INSTR_##name: { \ + os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ + << ": " #name " (" << (int)ROSE_ROLE_INSTR_##name << ")" << endl; \ + const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +static +void dumpRoleProgram(ofstream &os, const char *pc) { + const char *pc_base = pc; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= ROSE_ROLE_INSTR_END); + switch (code) { + PROGRAM_CASE(ANCHORED_DELAY) { + os << " depth " << u32{ri->depth} << endl; + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + os << " done_jump +" << ri->done_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ROOT_BOUNDS) { + os << " min_bound " << ri->min_bound << endl; + os << " max_bound " << ri->max_bound << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + os << " index " << ri->index << endl; + os << " count " << ri->count << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LEFTFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + os << " report " << ri->report << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + os << " distance " << ri->distance << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + os << " cancel " << u32{ri->cancel} << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EOD) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_KNOWN) { + os << " report " << ri->report << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + os << " depth " << u32{ri->depth} << endl; + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(END) { return; } + PROGRAM_NEXT_INSTRUCTION + + default: + os << " UNKNOWN (code " << int{code} << ")" << endl; + os << " " << endl; + return; } } - return n; } -// Count roles that fire reports -static -u32 rolesWithReports(const RoseEngine *t) { - u32 n = 0; - const RoseRole *tr = getRoleTable(t); - const RoseRole *tr_end = tr + t->roleCount; +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION - for (; tr != tr_end; ++tr) { - if (tr->reportId != MO_INVALID_IDX) { - n++; +static +void dumpRoseRolePrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const RoseRole *roles = getRoleTable(t); + const char *base = (const char *)t; + + for (u32 i = 0; i < t->roleCount; i++) { + const RoseRole *role = &roles[i]; + os << "Role " << i << endl; + + if (!role->programOffset) { + os << " " << endl; + continue; } + + dumpRoleProgram(os, base + role->programOffset); + os << endl; } - return n; + + os.close(); +} + +static +void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const RoseLiteral *lits = getLiteralTable(t); + const char *base = (const char *)t; + + for (u32 i = 0; i < t->literalCount; i++) { + const RoseLiteral *lit = &lits[i]; + if (!lit->rootProgramOffset) { + continue; + } + + os << "Literal " << i << endl; + dumpRoleProgram(os, base + lit->rootProgramOffset); + os << endl; + } + + os.close(); } static @@ -279,16 +473,6 @@ void dumpPreds(FILE *f, const RoseEngine *t) { } } -static -const char *startNfaRegion(const RoseEngine *t) { - return (const char *)t + t->nfaRegionBegin; -} - -static -const char *endNfaRegion(const RoseEngine *t) { - return (const char *)t + t->nfaRegionEnd; -} - static void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { const u32 qindex = n->queueIndex; @@ -353,18 +537,15 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { ss << base << "rose_components.txt"; ofstream fout(ss.str().c_str()); - const char *p = startNfaRegion(t); - const char *pe = endNfaRegion(t); - fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; - while (p < pe) { - const NFA *n = (const NFA *)p; - u32 i = n->queueIndex; + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); fout << left << setw(6) << i << " "; - fout << left << (p - (const char *)t) << "\t"; /* offset */ + fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */ fout << left << setw(16) << describe(*n) << "\t"; @@ -375,8 +556,6 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { dumpNfaNotes(fout, t, n); fout << endl; - - p += ROUNDUP_CL(n->length); } } @@ -416,20 +595,17 @@ void dumpExhaust(const RoseEngine *t, const string &base) { static void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { - const char *p = startNfaRegion(t); - const char *pe = endNfaRegion(t); - dumpExhaust(t, base); - while (p < pe) { - const NFA *n = (const NFA *)p; - u32 q = n->queueIndex; + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); stringstream sstxt, ssdot, ssraw; - sstxt << base << "rose_nfa_" << q << ".txt"; - ssdot << base << "rose_nfa_" << q << ".dot"; - ssraw << base << "rose_nfa_" << q << ".raw"; + sstxt << base << "rose_nfa_" << i << ".txt"; + ssdot << base << "rose_nfa_" << i << ".dot"; + ssraw << base << "rose_nfa_" << i << ".raw"; FILE *f; @@ -446,8 +622,6 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { fwrite(n, 1, n->length, f); fclose(f); } - - p += ROUNDUP_CL(n->length); } } @@ -638,9 +812,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %u bytes\n", - t->rootRoleOffset - t->nfaInfoOffset); - fprintf(f, " - root role table : %zu bytes\n", - t->rootRoleCount * sizeof(u32)); + t->anchoredReportMapOffset - t->nfaInfoOffset); fprintf(f, " - lookaround table : %u bytes\n", t->predOffset - t->lookaroundTableOffset); fprintf(f, " - lookaround reach : %u bytes\n", @@ -686,24 +858,23 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "number of roles : %u\n", t->roleCount); fprintf(f, " - with state index : %u\n", t->rolesWithStateCount); fprintf(f, " - with leftfix nfa : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_ROSE)); - fprintf(f, " - with suffix nfa : %u\n", rolesWithSuffixes(t)); - fprintf(f, " - with lookaround : %u\n", rolesWithLookaround(t)); - fprintf(f, " - with reports : %u\n", rolesWithReports(t)); + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LEFTFIX)); + fprintf(f, " - with suffix nfa : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_TRIGGER_SUFFIX)); + fprintf(f, " - with lookaround : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LOOKAROUND)); + fprintf(f, " - with reports : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT)); fprintf(f, " - with som reports : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_SOM_REPORT)); - fprintf(f, " - with eod accepts : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_ACCEPT_EOD)); + rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT_SOM_INT)); fprintf(f, " - match only at end : %u\n", - rolesWithFlag(t, ROSE_ROLE_FLAG_ONLY_AT_END)); + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ONLY_EOD)); fprintf(f, " + anchored : %u\n", t->anchoredMatches); - fprintf(f, " - no preds (root) : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_NONE)); fprintf(f, " - simple preds : %u\n", rolesWithFlag(t, ROSE_ROLE_PRED_SIMPLE)); - fprintf(f, " - root preds : %u\n", - rolesWithFlag(t, ROSE_ROLE_PRED_ROOT)); + fprintf(f, " - bound root preds : %u\n", + rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS)); fprintf(f, " - 'any' preds : %u\n", rolesWithFlag(t, ROSE_ROLE_PRED_ANY)); fprintf(f, "number of preds : %u\n", t->predCount); @@ -810,8 +981,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, roleCount); DUMP_U32(t, predOffset); DUMP_U32(t, predCount); - DUMP_U32(t, rootRoleOffset); - DUMP_U32(t, rootRoleCount); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); @@ -872,8 +1041,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, literalBenefitsOffsets); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, nfaRegionBegin); - DUMP_U32(t, nfaRegionEnd); DUMP_U32(t, group_weak_end); DUMP_U32(t, floatingStreamState); DUMP_U32(t, eodLiteralId); @@ -912,19 +1079,7 @@ void roseDumpRoleStructRaw(const RoseEngine *t, FILE *f) { for (const RoseRole *p = tr; p < tr_end; p++) { fprintf(f, "role[%zu] = {\n", p - tr); DUMP_U32(p, flags); - DUMP_U32(p, predOffset); - DUMP_U64(p, groups); - DUMP_U32(p, reportId); - DUMP_U32(p, stateIndex); - DUMP_U32(p, suffixEvent); - DUMP_U8(p, depth); - DUMP_U32(p, suffixOffset); - DUMP_U32(p, leftfixReport); - DUMP_U32(p, leftfixLag); - DUMP_U32(p, leftfixQueue); - DUMP_U32(p, somAdjust); - DUMP_U32(p, lookaroundIndex); - DUMP_U32(p, lookaroundCount); + DUMP_U32(p, programOffset); fprintf(f, "}\n"); } } @@ -935,6 +1090,10 @@ void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) dumpAnchored(t, base); dumpRevComponentInfo(t, base); dumpRevNfas(t, dump_raw, base); + + // Role programs. + dumpRoseRolePrograms(t, base + "/rose_role_programs.txt"); + dumpRoseLitPrograms(t, base + "/rose_lit_root_programs.txt"); } void roseDumpInternals(const RoseEngine *t, const string &base) { diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 335d2b2c..00e62eb9 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -75,13 +75,7 @@ ReportID literalToReport(u32 id) { // Structure representing a literal. Each literal may have many roles. struct RoseLiteral { - u32 rootRoleOffset; /**< If rootRoleCount == 1, this is an offset relative - * to the rose engine to the root role associated with - * the literal. - * If rootRoleCount > 1, this is the first index into - * the rootRoleTable indicating the root roles. - */ - u32 rootRoleCount; // number of root roles + u32 rootProgramOffset; // role program to run for root roles. u32 iterOffset; // offset of sparse iterator, relative to rose u32 iterMapOffset; // offset of the iter mapping table, relative to rose rose_group groups; // bitset of groups that cause this literal to fire. @@ -216,13 +210,6 @@ struct LeftNfaInfo { rose_group squash_mask; /* & mask applied when rose nfa dies */ }; -// A list of these is used to trigger prefix/infix roses. -struct RoseTrigger { - u32 queue; // queue index of leftfix - u32 event; // queue event, from MQE_* - u8 cancel_prev_top; -}; - struct NfaInfo { u32 nfaOffset; u32 stateOffset; @@ -238,42 +225,14 @@ struct NfaInfo { * matches */ }; -#define ROSE_ROLE_FLAG_ANCHOR_TABLE (1U << 0) /**< role is triggered from - * anchored table */ -#define ROSE_ROLE_FLAG_ACCEPT_EOD (1U << 2) /**< "fake" role, fires callback - * at EOD */ -#define ROSE_ROLE_FLAG_ONLY_AT_END (1U << 3) /**< role can only be switched on - * at end of block */ -#define ROSE_ROLE_FLAG_PRED_OF_EOD (1U << 4) /**< eod is a successor literal - * of the role */ -#define ROSE_ROLE_FLAG_EOD_TABLE (1U << 5) /**< role is triggered from eod - * table */ -#define ROSE_ROLE_FLAG_ROSE (1U << 6) /**< rose style prefix nfa for - * role */ -#define ROSE_ROLE_FLAG_SOM_REPORT (1U << 7) /**< report id is only used to - * manipulate som */ -#define ROSE_ROLE_FLAG_REPORT_START (1U << 8) /**< som som som som */ -#define ROSE_ROLE_FLAG_CHAIN_REPORT (1U << 9) /**< report id is only used to - * start an outfix engine */ -#define ROSE_ROLE_FLAG_SOM_ADJUST (1U << 10) /**< som value to use is offset - * from match end location */ -#define ROSE_ROLE_FLAG_SOM_ROSEFIX (1U << 11) /**< som value to use is provided - * by prefix/infix */ - /* We allow different types of role-predecessor relationships. These are stored * in with the flags */ -#define ROSE_ROLE_PRED_NONE (1U << 20) /**< the only pred is the root, - * [0, inf] bounds */ #define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no * offset tracking */ -#define ROSE_ROLE_PRED_ROOT (1U << 22) /**< pred is root or anchored - * root, and we have bounds */ #define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */ -#define ROSE_ROLE_PRED_CLEAR_MASK (~(ROSE_ROLE_PRED_NONE \ - | ROSE_ROLE_PRED_SIMPLE \ - | ROSE_ROLE_PRED_ROOT \ - | ROSE_ROLE_PRED_ANY)) +#define ROSE_ROLE_PRED_CLEAR_MASK \ + (~(ROSE_ROLE_PRED_SIMPLE | ROSE_ROLE_PRED_ANY)) #define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one * whole byte (OWB) (streaming only). Other @@ -285,28 +244,7 @@ struct NfaInfo { // Structure representing a literal role. struct RoseRole { u32 flags; - u32 predOffset; // either offset of pred sparse iterator, or - // (for ROSE_ROLE_PRED_ROOT) index of single RosePred. - rose_group groups; /**< groups to enable when role is set (groups of succ - * literals) */ - ReportID reportId; // report ID, or MO_INVALID_IDX - u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not - * require a state bit if they are terminal */ - u32 suffixEvent; // queue event, from MQE_ - u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater. - */ - u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role, - * relative to base of the rose. */ - ReportID leftfixReport; // (pre|in)fix report to check, or MO_INVALID_IDX. - u32 leftfixLag; /**< distance behind match where we need to check the - * leftfix engine status */ - u32 leftfixQueue; /**< queue index of the prefix/infix before role */ - u32 infixTriggerOffset; /* offset to list of infix roses to trigger */ - u32 somAdjust; /**< som for the role is offset from end match offset */ - - u32 lookaroundIndex; /**< index of lookaround offset/reach in table, or - * MO_INVALID_IDX. */ - u32 lookaroundCount; /**< number of lookaround entries. */ + u32 programOffset; /**< offset to program to run. */ }; // Structure representing a predecessor relationship @@ -513,8 +451,6 @@ struct RoseEngine { u32 roleCount; // number of RoseRole entries u32 predOffset; // offset of RosePred array (bytes) u32 predCount; // number of RosePred entries - u32 rootRoleOffset; - u32 rootRoleCount; u32 leftOffset; u32 roseCount; @@ -584,8 +520,6 @@ struct RoseEngine { id */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ - u32 nfaRegionBegin; /* start of the nfa region, debugging only */ - u32 nfaRegionEnd; /* end of the nfa region, debugging only */ u32 group_weak_end; /* end of weak groups, debugging only */ u32 floatingStreamState; // size in bytes u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0. @@ -715,13 +649,6 @@ const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) { return it; } -static really_inline -const u32 *getRootRoleTable(const struct RoseEngine *t) { - const u32 *r = (const u32 *)((const char *)t + t->rootRoleOffset); - assert(ISALIGNED_N(r, 4)); - return r; -} - static really_inline const struct lit_benefits *getLiteralBenefitsTable( const struct RoseEngine *t) { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h new file mode 100644 index 00000000..40f013ca --- /dev/null +++ b/src/rose/rose_program.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose data structures to do with role programs. + */ + +#ifndef ROSE_ROSE_PROGRAM_H +#define ROSE_ROSE_PROGRAM_H + +#include "rose_internal.h" +#include "ue2common.h" + +/** \brief Minimum alignment for each instruction in memory. */ +#define ROSE_INSTR_MIN_ALIGN 8U + +/** \brief Role program instruction opcodes. */ +enum RoseRoleInstructionCode { + ROSE_ROLE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_ROLE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. + ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS, //!< Bounds on distance from root. + ROSE_ROLE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + ROSE_ROLE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. + ROSE_ROLE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. + ROSE_ROLE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + ROSE_ROLE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. + ROSE_ROLE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. + ROSE_ROLE_INSTR_REPORT, //!< Fire an ordinary report. + ROSE_ROLE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). + ROSE_ROLE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. + ROSE_ROLE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. + ROSE_ROLE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. + ROSE_ROLE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. + ROSE_ROLE_INSTR_SET_STATE, //!< Switch a state index on. + ROSE_ROLE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_ROLE_INSTR_END //!< End of program. +}; + +struct ROSE_ROLE_STRUCT_ANCHORED_DELAY { + u8 code; //!< From enum RoseRoleInstructionCode. + u8 depth; //!< Depth for this state. + rose_group groups; //!< Bitmask. + u32 done_jump; //!< Jump forward this many bytes if successful. +}; + +struct ROSE_ROLE_STRUCT_CHECK_ONLY_EOD { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 min_bound; //!< Min distance from zero. + u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_CHECK_LOOKAROUND { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 index; + u32 count; + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 queue; //!< Queue of leftfix to check. + u32 lag; //!< Lag of leftfix for this case. + ReportID report; //!< ReportID of leftfix to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_ROLE_STRUCT_SOM_ADJUST { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 distance; //!< Distance to EOM. +}; + +struct ROSE_ROLE_STRUCT_SOM_LEFTFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 queue; //!< Queue index of leftfix providing SOM. + u32 lag; //!< Lag of leftfix for this case. +}; + +struct ROSE_ROLE_STRUCT_TRIGGER_INFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u8 cancel; //!< Cancels previous top event. + u32 queue; //!< Queue index of infix. + u32 event; //!< Queue event, from MQE_*. +}; + +struct ROSE_ROLE_STRUCT_TRIGGER_SUFFIX { + u8 code; //!< From enum RoseRoleInstructionCode. + u32 queue; //!< Queue index of suffix. + u32 event; //!< Queue event, from MQE_*. +}; + +struct ROSE_ROLE_STRUCT_REPORT { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_CHAIN { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_EOD { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_SOM_INT { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_SOM { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN { + u8 code; //!< From enum RoseRoleInstructionCode. + ReportID report; +}; + +struct ROSE_ROLE_STRUCT_SET_STATE { + u8 code; //!< From enum RoseRoleInstructionCode. + u8 depth; //!< Depth for this state. + u32 index; //!< State index in multibit. +}; + +struct ROSE_ROLE_STRUCT_SET_GROUPS { + u8 code; //!< From enum RoseRoleInstructionCode. + rose_group groups; //!< Bitmask. +}; + +struct ROSE_ROLE_STRUCT_END { + u8 code; //!< From enum RoseRoleInstructionCode. +}; + +#endif // ROSE_ROSE_PROGRAM_H diff --git a/src/util/container.h b/src/util/container.h index 62e841c1..63e27743 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -100,8 +100,9 @@ std::set assoc_keys(const C &container) { /** * \brief Return the length in bytes of the given vector of (POD) objects. */ -template -typename std::vector::size_type byte_length(const std::vector &vec) { +template +typename std::vector::size_type +byte_length(const std::vector &vec) { static_assert(std::is_pod::value, "should be pod"); return vec.size() * sizeof(T); } @@ -110,8 +111,8 @@ typename std::vector::size_type byte_length(const std::vector &vec) { * \brief Copy the given vector of POD objects to the given location in memory. * It is safe to give this function an empty vector. */ -template -void *copy_bytes(void *dest, const std::vector &vec) { +template +void *copy_bytes(void *dest, const std::vector &vec) { static_assert(std::is_pod::value, "should be pod"); assert(dest);