diff --git a/src/rose/eod.c b/src/rose/eod.c index ef987388..b95a952e 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -114,9 +114,9 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); - int work_done = 0; - if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0, - &work_done) == HWLM_TERMINATE_MATCHING) { + const size_t match_len = 0; + if (roseRunProgram(t, t->eodIterProgramOffset, offset, match_len, + &(scratch->tctxt), 0) == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -233,9 +233,9 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset, // There should be no pending delayed literals. assert(!scratch->tctxt.filledDelayedSlots); - int work_done = 0; - if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0, - &work_done) == HWLM_TERMINATE_MATCHING) { + const size_t match_len = 0; + if (roseRunProgram(t, t->eodProgramOffset, offset, match_len, + &scratch->tctxt, 0) == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/rose/match.c b/src/rose/match.c index 89f0674e..72f2a167 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,123 +71,6 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) { } #endif -static rose_inline -int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind, - const u8 *and_mask, const u8 *exp_mask) { - DEBUG_PRINTF("am offset = %zu, em offset = %zu\n", - and_mask - (const u8 *)tctxt->t, - exp_mask - (const u8 *)tctxt->t); - const u8 *data; - - // If the check works over part of the history and part of the buffer, we - // create a temporary copy of the data in here so it's contiguous. - u8 temp[MAX_MASK2_WIDTH]; - - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - s64a buffer_offset = (s64a)end - ci->buf_offset; - DEBUG_PRINTF("rel offset %lld\n", buffer_offset); - if (buffer_offset >= mask_rewind) { - data = ci->buf + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else if (buffer_offset <= 0) { - data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else { - u32 shortfall = mask_rewind - buffer_offset; - DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall, - mask_rewind, ci->hlen); - data = temp; - memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall); - memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall); - } - -#ifdef DEBUG - DEBUG_PRINTF("DATA: "); - for (u32 i = 0; i < mask_rewind; i++) { - printf("%c", ourisprint(data[i]) ? data[i] : '?'); - } - printf(" (len=%u)\n", mask_rewind); -#endif - - u32 len = mask_rewind; - while (len >= sizeof(u64a)) { - u64a a = unaligned_load_u64a(data); - a &= *(const u64a *)and_mask; - if (a != *(const u64a *)exp_mask) { - DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask); - return 0; - } - data += sizeof(u64a); - and_mask += sizeof(u64a); - exp_mask += sizeof(u64a); - len -= sizeof(u64a); - } - - while (len) { - u8 a = *data; - a &= *and_mask; - if (a != *exp_mask) { - DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx em%02hhx\n", a, - *data, *and_mask, *exp_mask); - return 0; - } - data++; - and_mask++; - exp_mask++; - len--; - } - - return 1; -} - -static -int roseCheckLiteralBenefits(u64a end, size_t mask_rewind, u32 id, - struct RoseContext *tctxt) { - const struct RoseEngine *t = tctxt->t; - const struct lit_benefits *lbi = getLiteralBenefitsTable(t) + id; - return roseCheckBenefits(tctxt, end, mask_rewind, lbi->and_mask.a8, - lbi->expected.e8); -} - -static rose_inline -void pushDelayedMatches(const struct RoseLiteral *tl, u64a offset, - struct RoseContext *tctxt) { - u32 delay_mask = tl->delay_mask; - if (!delay_mask) { - return; - } - - u32 delay_count = tctxt->t->delay_count; - u8 *delaySlotBase = getDelaySlots(tctxtToScratch(tctxt)); - size_t delaySlotSize = tctxt->t->delay_slot_size; - assert(tl->delayIdsOffset != ROSE_OFFSET_INVALID); - const u32 *delayIds = getByOffset(tctxt->t, tl->delayIdsOffset); - assert(ISALIGNED(delayIds)); - - while (delay_mask) { - u32 src_slot_index = findAndClearLSB_32(&delay_mask); - u32 slot_index = (src_slot_index + offset) & DELAY_MASK; - u8 *slot = delaySlotBase + delaySlotSize * slot_index; - - if (offset + src_slot_index <= tctxt->delayLastEndOffset) { - DEBUG_PRINTF("skip too late\n"); - goto next; - } - - DEBUG_PRINTF("pushing tab %u into slot %u\n", *delayIds, slot_index); - if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { - tctxt->filledDelayedSlots |= 1U << slot_index; - mmbit_clear(slot, delay_count); - } - - mmbit_set(slot, delay_count, *delayIds); - next: - delayIds++; - } -} - hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, void *ctx) { struct hs_scratch *scratch = ctx; @@ -211,17 +94,17 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, return tctx->groups; } - if (id < t->nonbenefits_base_id - && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) { - return tctx->groups; - } - assert(id < t->literalCount); - const struct RoseLiteral *tl = &getLiteralTable(t)[id]; + const u32 *delayRebuildPrograms = + getByOffset(t, t->litDelayRebuildProgramOffset); + const u32 programOffset = delayRebuildPrograms[id]; - DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); - - pushDelayedMatches(tl, real_end, tctx); + if (programOffset) { + const size_t match_len = end - start + 1; + UNUSED hwlmcb_rv_t rv = + roseRunProgram(t, programOffset, real_end, match_len, tctx, 0); + assert(rv != HWLM_TERMINATE_MATCHING); + } /* we are just repopulating the delay queue, groups should be * already set from the original scan. */ @@ -465,31 +348,28 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { } assert(id < t->literalCount); - const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - assert(tl->programOffset); - assert(!tl->delay_mask); + const u32 *programs = getByOffset(t, t->litProgramOffset); + const u32 programOffset = programs[id]; + assert(programOffset); - DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); + // Anchored literals are never delayed. + assert(!((const u32 *)getByOffset(t, t->litDelayRebuildProgramOffset))[id]); + + DEBUG_PRINTF("literal id=%u\n", id); if (real_end <= t->floatingMinLiteralMatchOffset) { roseFlushLastByteHistory(t, state, real_end, tctxt); tctxt->lastEndOffset = real_end; } - int work_done = 0; - if (roseRunProgram(t, tl->programOffset, real_end, tctxt, 1, &work_done) == + const size_t match_len = 0; + if (roseRunProgram(t, programOffset, real_end, match_len, tctxt, 1) == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(tctxtToScratch(tctxt))); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; } - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); if (real_end > t->floatingMinLiteralMatchOffset) { @@ -502,9 +382,10 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { // Rose match-processing workhorse /* assumes not in_anchored */ static really_inline -hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, - struct RoseContext *tctxt, char do_group_check, - char in_delay_play, char in_anch_playback) { +hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, + size_t match_len, u32 id, + struct RoseContext *tctxt, char in_delay_play, + char in_anch_playback) { /* assert(!tctxt->in_anchored); */ u8 *state = tctxt->state; @@ -536,63 +417,30 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, } assert(id < t->literalCount); - const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("lit id=%u, groups=0x%016llx\n", id, tl->groups); - - if (do_group_check && !(tl->groups & tctxt->groups)) { - DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n"); - return HWLM_CONTINUE_MATCHING; - } - - assert(!in_delay_play || !tl->delay_mask); - if (!in_delay_play) { - pushDelayedMatches(tl, end, tctxt); - } - - if (end < t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("too soon\n"); - assert(!in_delay_play); /* should not have been enqueued */ - /* continuing on may result in pushing global time back */ - return HWLM_CONTINUE_MATCHING; - } - - int work_done = 0; - - if (tl->programOffset) { - DEBUG_PRINTF("running program at %u\n", tl->programOffset); - if (roseRunProgram(t, tl->programOffset, end, tctxt, 0, &work_done) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return HWLM_CONTINUE_MATCHING; -} - - -static never_inline -hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end, u32 id, - struct RoseContext *tctxt) { - return roseProcessMatch_i(t, end, id, tctxt, 1, 1, 0); + const u32 *programs = getByOffset(t, t->litProgramOffset); + return roseRunProgram(t, programs[id], end, match_len, tctxt, 0); } static never_inline -hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, u64a end, - u32 id, struct RoseContext *tctxt) { - return roseProcessMatch_i(t, end, id, tctxt, 0, 0, 1); +hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end, + u32 id, struct RoseContext *tctxt) { + size_t match_len = 0; + return roseProcessMatch_i(t, end, match_len, id, tctxt, 1, 0); +} + +static never_inline +hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, + u64a end, u32 id, + struct RoseContext *tctxt) { + size_t match_len = 0; + return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 1); } static really_inline -hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, u32 id, +hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, + size_t match_len, u32 id, struct RoseContext *tctxt) { - return roseProcessMatch_i(t, end, id, tctxt, 1, 0, 0); + return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 0); } static rose_inline @@ -839,11 +687,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } - if (id < tctx->t->nonbenefits_base_id - && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) { - return tctx->groups; - } - hwlmcb_rv_t rv = flushQueuedLiterals(tctx, real_end); /* flushDelayed may have advanced tctx->lastEndOffset */ @@ -856,7 +699,8 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } - rv = roseProcessMainMatch(tctx->t, real_end, id, tctx); + size_t match_len = end - start + 1; + rv = roseProcessMainMatch(tctx->t, real_end, match_len, id, tctx); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 6ba86ca6..b4d4aeee 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,9 +41,108 @@ #include "runtime.h" #include "scratch.h" #include "ue2common.h" +#include "util/compare.h" #include "util/fatbit.h" #include "util/multibit.h" +static rose_inline +int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind, + const u8 *and_mask, const u8 *exp_mask) { + DEBUG_PRINTF("am offset = %zu, em offset = %zu\n", + and_mask - (const u8 *)tctxt->t, + exp_mask - (const u8 *)tctxt->t); + const u8 *data; + + // If the check works over part of the history and part of the buffer, we + // create a temporary copy of the data in here so it's contiguous. + u8 temp[MAX_MASK2_WIDTH]; + + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + s64a buffer_offset = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("rel offset %lld\n", buffer_offset); + if (buffer_offset >= mask_rewind) { + data = ci->buf + buffer_offset - mask_rewind; + DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, + ci->buf, mask_rewind); + } else if (buffer_offset <= 0) { + data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind; + DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, + ci->buf, mask_rewind); + } else { + u32 shortfall = mask_rewind - buffer_offset; + DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall, + mask_rewind, ci->hlen); + data = temp; + memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall); + memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall); + } + +#ifdef DEBUG + DEBUG_PRINTF("DATA: "); + for (u32 i = 0; i < mask_rewind; i++) { + printf("%c", ourisprint(data[i]) ? data[i] : '?'); + } + printf(" (len=%u)\n", mask_rewind); +#endif + + u32 len = mask_rewind; + while (len >= sizeof(u64a)) { + u64a a = unaligned_load_u64a(data); + a &= *(const u64a *)and_mask; + if (a != *(const u64a *)exp_mask) { + DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask); + return 0; + } + data += sizeof(u64a); + and_mask += sizeof(u64a); + exp_mask += sizeof(u64a); + len -= sizeof(u64a); + } + + while (len) { + u8 a = *data; + a &= *and_mask; + if (a != *exp_mask) { + DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx em%02hhx\n", a, + *data, *and_mask, *exp_mask); + return 0; + } + data++; + and_mask++; + exp_mask++; + len--; + } + + return 1; +} + +static rose_inline +void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay, + u32 delay_index, u64a offset, + struct RoseContext *tctxt) { + assert(delay); + + const u32 src_slot_index = delay; + u32 slot_index = (src_slot_index + offset) & DELAY_MASK; + + if (offset + src_slot_index <= tctxt->delayLastEndOffset) { + DEBUG_PRINTF("skip too late\n"); + return; + } + + const u32 delay_count = t->delay_count; + u8 *slot = getDelaySlots(tctxtToScratch(tctxt)) + + (t->delay_slot_size * slot_index); + + DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index); + if (!(tctxt->filledDelayedSlots & (1U << slot_index))) { + tctxt->filledDelayedSlots |= 1U << slot_index; + mmbit_clear(slot, delay_count); + } + + mmbit_set(slot, delay_count, delay_index); +} + static rose_inline char rosePrefixCheckMiracles(const struct RoseEngine *t, const struct LeftNfaInfo *left, @@ -782,10 +881,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { break; \ } -static really_inline +static rose_inline hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, - u64a end, struct RoseContext *tctxt, - char in_anchored, int *work_done) { + u64a end, size_t match_len, + struct RoseContext *tctxt, char in_anchored) { DEBUG_PRINTF("program begins at offset %u\n", programOffset); assert(programOffset); @@ -800,6 +899,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, // and SPARSE_ITER_NEXT instructions. struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + // If this program has an effect, work_done will be set to one (which may + // allow the program to squash groups). + int work_done = 0; + assert(*(const u8 *)pc != ROSE_INSTR_END); for (;;) { @@ -812,7 +915,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, if (in_anchored && end > t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("delay until playback\n"); tctxt->groups |= ri->groups; - *work_done = 1; + work_done = 1; assert(ri->done_jump); // must progress pc += ri->done_jump; continue; @@ -820,6 +923,35 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LIT_MASK) { + assert(match_len); + if (!roseCheckBenefits(tctxt, end, match_len, ri->and_mask.a8, + ri->cmp_mask.a8)) { + DEBUG_PRINTF("halt: failed mask check\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LIT_EARLY) { + if (end < t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("halt: too soon, min offset=%u\n", + t->floatingMinLiteralMatchOffset); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n", + tctxt->groups, ri->groups); + if (!(ri->groups & tctxt->groups)) { + DEBUG_PRINTF("halt: no groups are set\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_ONLY_EOD) { struct core_info *ci = &tctxtToScratch(tctxt)->core_info; if (end != ci->buf_offset + ci->len) { @@ -874,6 +1006,11 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(PUSH_DELAYED) { + rosePushDelayedMatch(t, ri->delay, ri->index, end, tctxt); + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { assert(ri->distance <= end); som = end - ri->distance; @@ -890,7 +1027,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_CASE(TRIGGER_INFIX) { roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel, tctxt); - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -900,7 +1037,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -909,7 +1046,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -919,7 +1056,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -928,7 +1065,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, MO_HALT_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -937,7 +1074,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -947,7 +1084,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, in_anchored) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -957,7 +1094,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -965,7 +1102,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, DEBUG_PRINTF("set state index %u\n", ri->index); mmbit_set(getRoleState(tctxt->state), t->rolesWithStateCount, ri->index); - *work_done = 1; + work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -976,6 +1113,28 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SQUASH_GROUPS) { + assert(popcount64(ri->groups) == 63); // Squash only one group. + if (work_done) { + tctxt->groups &= ri->groups; + DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + DEBUG_PRINTF("check state %u\n", ri->index); + if (!mmbit_isset(getRoleState(tctxt->state), + t->rolesWithStateCount, ri->index)) { + DEBUG_PRINTF("state not on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_BEGIN) { DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); const struct mmbit_sparse_iter *it = @@ -1045,17 +1204,4 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, #undef PROGRAM_CASE #undef PROGRAM_NEXT_INSTRUCTION -static rose_inline -void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) { - assert(tl->squashesGroup); - - // we should be squashing a single group - assert(popcount64(tl->groups) == 1); - - DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n", - ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups); - - tctxt->groups &= ~tl->groups; -} - #endif // PROGRAM_RUNTIME_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5f654191..9444005d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -170,12 +170,16 @@ public: const void *get() const { switch (code()) { + case ROSE_INSTR_CHECK_LIT_MASK: return &u.checkLitMask; + case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly; + case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups; case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; + case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; @@ -188,6 +192,8 @@ public: case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; case ROSE_INSTR_SET_STATE: return &u.setState; case ROSE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups; + case ROSE_INSTR_CHECK_STATE: return &u.checkState; case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; case ROSE_INSTR_END: return &u.end; @@ -198,12 +204,16 @@ public: size_t length() const { switch (code()) { + case ROSE_INSTR_CHECK_LIT_MASK: return sizeof(u.checkLitMask); + case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly); + case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups); case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); + case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); @@ -216,6 +226,8 @@ public: case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups); + case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState); case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); case ROSE_INSTR_END: return sizeof(u.end); @@ -224,12 +236,16 @@ public: } union { + ROSE_STRUCT_CHECK_LIT_MASK checkLitMask; + ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly; + ROSE_STRUCT_CHECK_GROUPS checkGroups; ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix; ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; + ROSE_STRUCT_PUSH_DELAYED pushDelayed; ROSE_STRUCT_SOM_ADJUST somAdjust; ROSE_STRUCT_SOM_LEFTFIX somLeftfix; ROSE_STRUCT_TRIGGER_INFIX triggerInfix; @@ -242,12 +258,25 @@ public: ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; ROSE_STRUCT_SET_STATE setState; ROSE_STRUCT_SET_GROUPS setGroups; + ROSE_STRUCT_SQUASH_GROUPS squashGroups; + ROSE_STRUCT_CHECK_STATE checkState; ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; ROSE_STRUCT_END end; } u; }; +static +size_t hash_value(const RoseInstruction &ri) { + size_t val = 0; + const char *bytes = (const char *)ri.get(); + const size_t len = ri.length(); + for (size_t i = 0; i < len; i++) { + boost::hash_combine(val, bytes[i]); + } + return val; +} + struct build_context : boost::noncopyable { /** \brief information about engines to the left of a vertex */ map leftfix_info; @@ -270,6 +299,10 @@ struct build_context : boost::noncopyable { * up iterators in early misc. */ map, u32> iterCache; + /** \brief Simple cache of programs written to engine blob, used for + * deduplication. */ + ue2::unordered_map, u32> program_cache; + /** \brief LookEntry list cache, so that we don't have to go scanning * through the full list to find cases we've used already. */ ue2::unordered_map, size_t> lookaround_cache; @@ -284,6 +317,9 @@ struct build_context : boost::noncopyable { * that have already been pushed into the engine_blob. */ ue2::unordered_map engineOffsets; + /** \brief Minimum offset of a match from the floating table. */ + u32 floatingMinLiteralMatchOffset = 0; + /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ vector> engine_blob; @@ -1453,31 +1489,6 @@ void updateNfaState(const build_context &bc, RoseStateOffsets *so, } } -static -void buildLitBenefits(const RoseBuildImpl &tbi, RoseEngine *engine, - u32 base_lits_benefits_offset) { - lit_benefits *lba = (lit_benefits *)((char *)engine - + base_lits_benefits_offset); - DEBUG_PRINTF("base offset %u\n", base_lits_benefits_offset); - for (u32 i = 0; i < tbi.nonbenefits_base_id; i++) { - assert(contains(tbi.final_id_to_literal, i)); - assert(tbi.final_id_to_literal.at(i).size() == 1); - u32 lit_id = *tbi.final_id_to_literal.at(i).begin(); - const ue2_literal &s = tbi.literals.right.at(lit_id).s; - DEBUG_PRINTF("building mask for lit %u (fid %u) %s\n", lit_id, i, - dumpString(s).c_str()); - assert(s.length() <= MAX_MASK2_WIDTH); - u32 j = 0; - for (const auto &e : s) { - lba[i].and_mask.a8[j] = e.nocase ? 0 : CASE_BIT; - lba[i].expected.e8[j] = e.nocase ? 0 : (CASE_BIT & e.c); - DEBUG_PRINTF("a%02hhx e%02hhx\n", lba[i].and_mask.a8[j], - lba[i].expected.e8[j]); - j++; - } - } -} - /* does not include history requirements for outfixes or literal matchers */ u32 RoseBuildImpl::calcHistoryRequired() const { u32 m = cc.grey.minHistoryAvailable; @@ -2232,11 +2243,11 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre } static -u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) { - const RoseGraph &g = tbi.g; +u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; u32 minWidth = ROSE_BOUND_INF; for (auto v : vertices_range(g)) { - if (tbi.isAnchored(v) || tbi.isVirtualVertex(v)) { + if (build.isAnchored(v) || build.isVirtualVertex(v)) { DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].idx); continue; } @@ -2656,12 +2667,21 @@ flattenProgram(const vector> &programs) { } static -u32 writeProgram(build_context &bc, vector &program) { +u32 writeProgram(build_context &bc, const vector &program) { if (program.empty()) { DEBUG_PRINTF("no program\n"); return 0; } + assert(program.back().code() == ROSE_INSTR_END); + assert(program.size() >= 1); + + auto it = bc.program_cache.find(program); + if (it != end(bc.program_cache)) { + DEBUG_PRINTF("reusing cached program at %u\n", it->second); + return it->second; + } + DEBUG_PRINTF("writing %zu instructions\n", program.size()); u32 programOffset = 0; for (const auto &ri : program) { @@ -2674,6 +2694,7 @@ u32 writeProgram(build_context &bc, vector &program) { } } DEBUG_PRINTF("program begins at offset %u\n", programOffset); + bc.program_cache.emplace(program, programOffset); return programOffset; } @@ -2764,72 +2785,6 @@ bool hasBoundaryReports(const BoundaryReports &boundary) { return false; } -static -void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, - vector &literalTable) { - const u32 final_id = verify_u32(literalTable.size()); - assert(contains(tbi.final_id_to_literal, final_id)); - const UNUSED u32 literalId = *tbi.final_id_to_literal.at(final_id).begin(); - /* all literal ids associated with this final id should result in identical - * literal entry */ - const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id); - const rose_literal_info &arb_lit_info = **lit_infos.begin(); - - literalTable.push_back(RoseLiteral()); - RoseLiteral &tl = literalTable.back(); - memset(&tl, 0, sizeof(tl)); - - tl.groups = 0; - for (const auto &li : lit_infos) { - tl.groups |= li->group_mask; - } - - assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED - || tbi.literals.right.at(literalId).table == ROSE_EVENT); - - // If this literal squashes its group behind it, store that data too - tl.squashesGroup = arb_lit_info.squash_group; - - // Setup the delay stuff - const auto &children = arb_lit_info.delayed_ids; - if (children.empty()) { - tl.delay_mask = 0; - tl.delayIdsOffset = ROSE_OFFSET_INVALID; - } else { - map local_delay_map; // delay -> relative child id - for (const auto &int_id : children) { - const rose_literal_id &child_literal = tbi.literals.right.at(int_id); - u32 child_id = tbi.literal_info[int_id].final_id; - u32 delay_index = child_id - tbi.delay_base_id; - tl.delay_mask |= 1U << child_literal.delay; - local_delay_map[child_literal.delay] = delay_index; - } - - vector delayIds; - for (const auto &did : local_delay_map | map_values) { - delayIds.push_back(did); - } - - tl.delayIdsOffset = add_to_engine_blob(bc, delayIds.begin(), - delayIds.end()); - - } - - assert(!tbi.literals.right.at(literalId).delay || !tl.delay_mask); -} - -// Construct the literal table. -static -void buildLiteralTable(const RoseBuildImpl &tbi, build_context &bc, - vector &literalTable) { - size_t numLiterals = tbi.final_id_to_literal.size(); - literalTable.reserve(numLiterals); - - for (size_t i = 0; i < numLiterals; ++i) { - createLiteralEntry(tbi, bc, literalTable); - } -} - /** * \brief True if the given vertex is a role that can only be switched on at * EOD. @@ -2945,8 +2900,11 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc, return; } - // TODO: also limit to matches that can occur after - // floatingMinLiteralMatchOffset. + // If this match cannot occur after floatingMinLiteralMatchOffset, we do + // not need this check. + if (build.g[v].max_offset <= bc.floatingMinLiteralMatchOffset) { + return; + } auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY); ri.u.anchoredDelay.groups = build.g[v].groups; @@ -3112,6 +3070,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, const RoseGraph &g = build.g; const RoseVertex u = source(e, g); + // We know that we can trust the anchored table (DFA) to always deliver us + // literals at the correct offset. + if (build.isAnchored(v)) { + DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); + return; + } + // Use the minimum literal length. u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); @@ -3347,97 +3312,171 @@ vector makePredProgram(RoseBuildImpl &build, build_context &bc, return program; } +static +u32 addPredBlocksSingle( + map>> &predProgramLists, + u32 curr_offset, vector &program) { + assert(predProgramLists.size() == 1); + + u32 pred_state = predProgramLists.begin()->first; + auto subprog = flattenProgram(predProgramLists.begin()->second); + + // Check our pred state. + auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE); + ri.u.checkState.index = pred_state; + program.push_back(ri); + curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + // Add subprogram. + for (const auto &ri : subprog) { + program.push_back(ri); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + + const u32 end_offset = + curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + // Fix up the instruction operands. + curr_offset = 0; + for (size_t i = 0; i < program.size(); i++) { + auto &ri = program[i]; + switch (ri.code()) { + case ROSE_INSTR_CHECK_STATE: + ri.u.checkState.fail_jump = end_offset - curr_offset; + break; + default: + break; + } + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + + return 0; // No iterator. +} + +static +u32 addPredBlocksMulti(build_context &bc, + map>> &predProgramLists, + u32 curr_offset, vector &program) { + assert(!predProgramLists.empty()); + + // First, add the iterator itself. + vector keys; + for (const auto &elem : predProgramLists) { + keys.push_back(elem.first); + } + DEBUG_PRINTF("%zu keys: %s\n", keys.size(), as_string_list(keys).c_str()); + + vector iter; + mmbBuildSparseIterator(iter, keys, bc.numStates); + assert(!iter.empty()); + u32 iter_offset = addIteratorToTable(bc, iter); + + // Construct our program, starting with the SPARSE_ITER_BEGIN + // instruction, keeping track of the jump offset for each sub-program. + vector jump_table; + + program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); + curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + for (const auto &e : predProgramLists) { + DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), + curr_offset); + jump_table.push_back(curr_offset); + auto subprog = flattenProgram(e.second); + + if (e.first != keys.back()) { + // For all but the last subprogram, replace the END instruction + // with a SPARSE_ITER_NEXT. + assert(!subprog.empty()); + assert(subprog.back().code() == ROSE_INSTR_END); + subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); + } + + for (const auto &ri : subprog) { + program.push_back(ri); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + } + + const u32 end_offset = + curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + // Write the jump table into the bytecode. + const u32 jump_table_offset = + add_to_engine_blob(bc, begin(jump_table), end(jump_table)); + + // Fix up the instruction operands. + auto keys_it = begin(keys); + curr_offset = 0; + for (size_t i = 0; i < program.size(); i++) { + auto &ri = program[i]; + switch (ri.code()) { + case ROSE_INSTR_SPARSE_ITER_BEGIN: + ri.u.sparseIterBegin.iter_offset = iter_offset; + ri.u.sparseIterBegin.jump_table = jump_table_offset; + ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; + break; + case ROSE_INSTR_SPARSE_ITER_NEXT: + ri.u.sparseIterNext.iter_offset = iter_offset; + ri.u.sparseIterNext.jump_table = jump_table_offset; + assert(keys_it != end(keys)); + ri.u.sparseIterNext.state = *keys_it++; + ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; + break; + default: + break; + } + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + + return iter_offset; +} + +static +u32 addPredBlocks(build_context &bc, + map>> &predProgramLists, + u32 curr_offset, vector &program, + bool force_sparse_iter) { + const size_t num_preds = predProgramLists.size(); + if (num_preds == 0) { + program = flattenProgram({program}); + return 0; // No iterator. + } else if (!force_sparse_iter && num_preds == 1) { + return addPredBlocksSingle(predProgramLists, curr_offset, program); + } else { + return addPredBlocksMulti(bc, predProgramLists, curr_offset, program); + } +} + /** * Returns the pair (program offset, sparse iter offset). */ static pair makeSparseIterProgram(build_context &bc, map>> &predProgramLists, - const vector &root_program) { + const vector &root_program, + const vector &pre_program) { vector program; - u32 iter_offset = 0; + u32 curr_offset = 0; - if (!predProgramLists.empty()) { - // First, add the iterator itself. - vector keys; - for (const auto &elem : predProgramLists) { - keys.push_back(elem.first); - } - DEBUG_PRINTF("%zu keys: %s\n", keys.size(), - as_string_list(keys).c_str()); - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - iter_offset = addIteratorToTable(bc, iter); - - // Construct our program, starting with the SPARSE_ITER_BEGIN - // instruction, keeping track of the jump offset for each sub-program. - vector jump_table; - u32 curr_offset = 0; - - program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); - curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); - - for (const auto &e : predProgramLists) { - DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), - curr_offset); - jump_table.push_back(curr_offset); - auto subprog = flattenProgram(e.second); - - if (e.first != keys.back()) { - // For all but the last subprogram, replace the END instruction - // with a SPARSE_ITER_NEXT. - assert(!subprog.empty()); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); - } - - for (const auto &ri : subprog) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - } - - const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(), - ROSE_INSTR_MIN_ALIGN); - - // Write the jump table into the bytecode. - const u32 jump_table_offset = - add_to_engine_blob(bc, begin(jump_table), end(jump_table)); - - // Fix up the instruction operands. - auto keys_it = begin(keys); - curr_offset = 0; - for (size_t i = 0; i < program.size(); i++) { - auto &ri = program[i]; - switch (ri.code()) { - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.iter_offset = iter_offset; - ri.u.sparseIterBegin.jump_table = jump_table_offset; - ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.iter_offset = iter_offset; - ri.u.sparseIterNext.jump_table = jump_table_offset; - assert(keys_it != end(keys)); - ri.u.sparseIterNext.state = *keys_it++; - ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; - break; - default: - break; - } - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } + // Add pre-program first. + for (const auto &ri : pre_program) { + program.push_back(ri); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). This operation will flatten the program up to this + // point. + u32 iter_offset = + addPredBlocks(bc, predProgramLists, curr_offset, program, false); + // If we have a root program, replace the END instruction with it. Note // that the root program has already been flattened. + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); if (!root_program.empty()) { - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } + program.pop_back(); program.insert(end(program), begin(root_program), end(root_program)); } @@ -3445,15 +3484,182 @@ pair makeSparseIterProgram(build_context &bc, } static -u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, +void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, + vector &program) { + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &arb_lit_info = **lit_infos.begin(); + if (arb_lit_info.delayed_ids.empty()) { + return; + } + + for (const auto &int_id : arb_lit_info.delayed_ids) { + const auto &child_literal = build.literals.right.at(int_id); + u32 child_id = build.literal_info[int_id].final_id; + u32 delay_index = child_id - build.delay_base_id; + + DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id, + child_literal.delay, child_id); + + auto ri = RoseInstruction(ROSE_INSTR_PUSH_DELAYED); + ri.u.pushDelayed.delay = verify_u8(child_literal.delay); + ri.u.pushDelayed.index = delay_index; + program.push_back(move(ri)); + } +} + +static +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + + rose_group groups = 0; + for (const auto &li : lit_infos) { + groups |= li->group_mask; + } + + if (!groups) { + return; + } + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_GROUPS); + ri.u.checkGroups.groups = groups; + program.push_back(move(ri)); +} + +static +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + assert(!lit_infos.empty()); + + if (!lit_infos.front()->requires_benefits) { + return; + } + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_MASK); + + assert(build.final_id_to_literal.at(final_id).size() == 1); + u32 lit_id = *build.final_id_to_literal.at(final_id).begin(); + const ue2_literal &s = build.literals.right.at(lit_id).s; + DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id, + final_id, dumpString(s).c_str()); + assert(s.length() <= MAX_MASK2_WIDTH); + u32 i = 0; + for (const auto &e : s) { + ri.u.checkLitMask.and_mask.a8[i] = e.nocase ? 0 : CASE_BIT; + ri.u.checkLitMask.cmp_mask.a8[i] = e.nocase ? 0 : (CASE_BIT & e.c); + i++; + } + + program.push_back(move(ri)); +} + +static +void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + + if (!lit_infos.front()->squash_group) { + return; + } + + rose_group groups = 0; + for (const auto &li : lit_infos) { + groups |= li->group_mask; + } + + if (!groups) { + return; + } + + DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups); + + auto ri = RoseInstruction(ROSE_INSTR_SQUASH_GROUPS); + ri.u.squashGroups.groups = ~groups; // Negated, so we can just AND it in. + program.push_back(move(ri)); +} + +static +void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges, + vector &program) { + if (lit_edges.empty()) { + return; + } + + if (bc.floatingMinLiteralMatchOffset == 0) { + return; + } + + RoseVertex v = target(lit_edges.front(), build.g); + if (!build.isFloating(v)) { + return; + } + + const auto &lit_ids = build.final_id_to_literal.at(final_id); + if (lit_ids.empty()) { + return; + } + + size_t min_offset = SIZE_MAX; + for (u32 lit_id : lit_ids) { + const auto &lit = build.literals.right.at(lit_id); + min_offset = min(min_offset, lit.elength()); + } + + DEBUG_PRINTF("%zu lits, min_offset=%zu\n", lit_ids.size(), min_offset); + + // If we can't match before the min offset, we don't need the check. + if (min_offset >= bc.floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("no need for check, min is %u\n", + bc.floatingMinLiteralMatchOffset); + return; + } + + program.push_back(RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY)); +} + +static +vector buildLitInitialProgram(RoseBuildImpl &build, + build_context &bc, u32 final_id, + const vector &lit_edges) { + vector pre_program; + + // No initial program for EOD. + if (final_id == MO_INVALID_IDX) { + return pre_program; + } + + DEBUG_PRINTF("final_id %u\n", final_id); + + // Check lit mask. + makeCheckLitMaskInstruction(build, final_id, pre_program); + + // Check literal groups. + makeGroupCheckInstruction(build, final_id, pre_program); + + // Add instructions for pushing delayed matches, if there are any. + makePushDelayedInstructions(build, final_id, pre_program); + + // Add pre-check for early literals in the floating table. + makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, pre_program); + + return pre_program; +} + +static +u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges) { const auto &g = build.g; - DEBUG_PRINTF("%zu lit edges\n", lit_edges.size()); + DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); // pred state id -> list of programs map>> predProgramLists; - vector nonroot_verts; // Construct sparse iter sub-programs. for (const auto &e : lit_edges) { @@ -3467,7 +3673,6 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 pred_state = bc.roleStateIndices.at(u); auto program = makePredProgram(build, bc, e); predProgramLists[pred_state].push_back(program); - nonroot_verts.push_back(target(e, g)); } // Construct sub-program for handling root roles. @@ -3485,13 +3690,39 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, root_programs.push_back(role_prog); } + // Literal may squash groups. + if (final_id != MO_INVALID_IDX) { + root_programs.push_back({}); + makeGroupSquashInstruction(build, final_id, root_programs.back()); + } + vector root_program; if (!root_programs.empty()) { root_program = flattenProgram(root_programs); } + auto pre_program = buildLitInitialProgram(build, bc, final_id, lit_edges); + // Put it all together. - return makeSparseIterProgram(bc, predProgramLists, root_program).first; + return makeSparseIterProgram(bc, predProgramLists, root_program, + pre_program).first; +} + +static +u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id) { + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &arb_lit_info = **lit_infos.begin(); + if (arb_lit_info.delayed_ids.empty()) { + return 0; // No delayed IDs, no work to do. + } + + vector program; + makeCheckLitMaskInstruction(build, final_id, program); + makePushDelayedInstructions(build, final_id, program); + assert(!program.empty()); + program = flattenProgram({program}); + return writeProgram(bc, program); } static @@ -3530,17 +3761,35 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { return lit_edge_map; } -/** \brief Build the interpreter program for each literal. */ +/** + * \brief Build the interpreter programs for each literal. + * + * Returns the base of the literal program list and the base of the delay + * rebuild program list. + */ static -void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, - vector &literalTable) { +pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { + const u32 num_literals = build.final_id_to_literal.size(); auto lit_edge_map = findEdgesByLiteral(build); - for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { + vector litPrograms(num_literals); + vector delayRebuildPrograms(num_literals); + + for (u32 finalId = 0; finalId != num_literals; ++finalId) { const auto &lit_edges = lit_edge_map[finalId]; - u32 offset = buildLiteralProgram(build, bc, lit_edges); - literalTable[finalId].programOffset = offset; + + litPrograms[finalId] = + buildLiteralProgram(build, bc, finalId, lit_edges); + delayRebuildPrograms[finalId] = + buildDelayRebuildProgram(build, bc, finalId); } + + u32 litProgramsOffset = + add_to_engine_blob(bc, begin(litPrograms), end(litPrograms)); + u32 delayRebuildProgramsOffset = add_to_engine_blob( + bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); + + return {litProgramsOffset, delayRebuildProgramsOffset}; } static @@ -3604,7 +3853,14 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { return {0, 0}; } - return makeSparseIterProgram(bc, predProgramLists, {}); + vector program; + + // Note: we force the use of a sparse iterator for the EOD program so we + // can easily guard EOD execution at runtime. + u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true); + + assert(program.size() > 1); + return {writeProgram(bc, program), iter_offset}; } static @@ -3634,7 +3890,7 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { tie(g[source(b, g)].idx, g[target(b, g)].idx); }); - return buildLiteralProgram(build, bc, edge_list); + return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); } static @@ -3780,6 +4036,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { aligned_unique_ptr sbtable = buildSmallBlockMatcher(*this, &sbsize); build_context bc; + bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this); // Build NFAs set no_retrigger_queues; @@ -3805,10 +4062,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - u32 lit_benefits_size = - verify_u32(sizeof(lit_benefits) * nonbenefits_base_id); - assert(ISALIGNED_16(lit_benefits_size)); - vector suffixEkeyLists; buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists); @@ -3820,9 +4073,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - vector literalTable; - buildLiteralTable(*this, bc, literalTable); - buildLiteralPrograms(*this, bc, literalTable); + u32 litProgramOffset; + u32 litDelayRebuildProgramOffset; + tie(litProgramOffset, litDelayRebuildProgramOffset) = + buildLiteralPrograms(*this, bc); u32 eodProgramOffset = writeEodProgram(*this, bc); u32 eodIterProgramOffset; @@ -3857,10 +4111,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); - /* leave space for the benefits listing */ - u32 base_lits_benefits_offset = currOffset; - currOffset += lit_benefits_size; - if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset; @@ -3891,10 +4141,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 intReportOffset = currOffset; currOffset += sizeof(internal_report) * int_reports.size(); - u32 literalOffset = ROUNDUP_N(currOffset, alignof(RoseLiteral)); - u32 literalLen = sizeof(RoseLiteral) * literalTable.size(); - currOffset = literalOffset + literalLen; - u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size(); currOffset = leftOffset + roseLen; @@ -4016,8 +4262,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillInReportInfo(engine.get(), intReportOffset, rm, int_reports); - engine->literalOffset = literalOffset; - engine->literalCount = verify_u32(literalTable.size()); + engine->literalCount = verify_u32(final_id_to_literal.size()); + engine->litProgramOffset = litProgramOffset; + engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; engine->runtimeImpl = pickRuntimeImpl(*this, outfixEndQueue); engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); @@ -4053,14 +4300,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->lastByteHistoryIterOffset = lastByteOffset; - u32 delay_count = verify_u32(literalTable.size() - delay_base_id); + u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id); engine->delay_count = delay_count; engine->delay_slot_size = mmbit_size(delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; engine->anchored_count = delay_base_id - anchored_base_id; - engine->nonbenefits_base_id = nonbenefits_base_id; - engine->literalBenefitsOffsets = base_lits_benefits_offset; engine->rosePrefixCount = rosePrefixCount; @@ -4094,7 +4339,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; engine->minWidthExcludingBoundaries = minWidth; engine->maxSafeAnchoredDROffset = findMinWidth(*this, ROSE_FLOATING); - engine->floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this); + engine->floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset; engine->maxBiAnchoredWidth = findMaxBAWidth(*this); engine->noFloatingRoots = hasNoFloatingRoots(); @@ -4109,7 +4354,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillMatcherDistances(*this, engine.get()); engine->initialGroups = getInitialGroups(); - engine->totalNumLiterals = verify_u32(literalTable.size()); + engine->totalNumLiterals = verify_u32(literal_info.size()); engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; engine->floatingStreamState = verify_u32(floatingStreamStateRequired); @@ -4138,12 +4383,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &engine->scratchStateSize, &engine->nfaStateSize, &engine->tStateSize); - /* do after update mask */ - buildLitBenefits(*this, engine.get(), base_lits_benefits_offset); - // Copy in other tables copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); - copy_bytes(ptr + engine->literalOffset, literalTable); copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 2a3fe540..6202299b 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -258,7 +258,6 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { set anch; set norm; - set norm_benefits; set delay; /* undelayed ids come first */ @@ -281,12 +280,8 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { continue; } - const rose_literal_info &info = tbi.literal_info[i]; - if (info.requires_benefits) { - assert(!tbi.isDelayed(i)); - norm_benefits.insert(i); - DEBUG_PRINTF("%u has benefits\n", i); - } else if (tbi.isDelayed(i)) { + if (tbi.isDelayed(i)) { + assert(!tbi.literal_info[i].requires_benefits); delay.insert(i); } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) { anch.insert(i); @@ -295,12 +290,7 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { } } - /* normal lits first (with benefits confirm)*/ - allocateFinalIdToSet(g, norm_benefits, &tbi.literal_info, - &tbi.final_id_to_literal, &next_final_id); - - /* other normal lits (without benefits)*/ - tbi.nonbenefits_base_id = next_final_id; + /* normal lits */ allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal, &next_final_id); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index a7f2e2f7..c6d10063 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -496,7 +496,6 @@ public: u32 anchored_base_id; - u32 nonbenefits_base_id; u32 ematcher_region_size; /**< number of bytes the eod table runs over */ /** \brief Mapping from anchored literal ID to the original literal suffix diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 044a4208..66b0bdd4 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,7 +78,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, group_weak_end(0), group_end(0), anchored_base_id(MO_INVALID_IDX), - nonbenefits_base_id(MO_INVALID_IDX), ematcher_region_size(0), floating_direct_report(false), eod_event_literal_id(MO_INVALID_IDX), diff --git a/src/rose/rose_build_util.h b/src/rose/rose_build_util.h index fe2124a0..536b031a 100644 --- a/src/rose/rose_build_util.h +++ b/src/rose/rose_build_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,21 +36,6 @@ namespace ue2 { -// Calculate the minimum depth for the given set of vertices, ignoring those -// with depth 1. -template -static -u8 calcMinDepth(const std::map &depths, const Cont &verts) { - u8 d = 255; - for (RoseVertex v : verts) { - u8 vdepth = (u8)std::min((u32)255, depths.at(v)); - if (vdepth > 1) { - d = std::min(d, vdepth); - } - } - return d; -} - // Comparator for vertices using their index property. struct VertexIndexComp { VertexIndexComp(const RoseGraph &gg) : g(gg) {} diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 6210d102..cd70c734 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -116,31 +116,6 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); } -static -u32 literalsWithDirectReports(const RoseEngine *t) { - return t->totalNumLiterals - t->literalCount; -} - -template -static -size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) { - const RoseLiteral *tl = getLiteralTable(t); - const RoseLiteral *tl_end = tl + t->literalCount; - - return count_if(tl, tl_end, pred); -} - -static -size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) { - rose_group mask = ~((1ULL << from) - 1); - if (to < 64) { - mask &= ((1ULL << to) - 1); - } - - return literalsWithPredicate( - t, [&mask](const RoseLiteral &l) { return l.groups & mask; }); -} - static CharReach bitvectorToReach(const u8 *reach) { CharReach cr; @@ -177,6 +152,16 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } } +static +string dumpStrMask(const u8 *mask, size_t len) { + ostringstream oss; + for (size_t i = 0; i < len; i++) { + oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]} + << " "; + } + return oss.str(); +} + #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ @@ -202,14 +187,26 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_ONLY_EOD) { - os << " fail_jump +" << ri->fail_jump << endl; + PROGRAM_CASE(CHECK_LIT_MASK) { + os << " and_mask " + << dumpStrMask(ri->and_mask.a8, sizeof(ri->and_mask.a8)) + << endl; + os << " cmp_mask " + << dumpStrMask(ri->cmp_mask.a8, sizeof(ri->cmp_mask.a8)) + << endl; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_BOUNDS) { - os << " min_bound " << ri->min_bound << endl; - os << " max_bound " << ri->max_bound << endl; + PROGRAM_CASE(CHECK_LIT_EARLY) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -236,6 +233,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(PUSH_DELAYED) { + os << " delay " << u32{ri->delay} << endl; + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { os << " distance " << ri->distance << endl; } @@ -301,6 +304,18 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SQUASH_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + os << " index " << ri->index << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_BEGIN) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; @@ -334,21 +349,32 @@ static void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); - const RoseLiteral *lits = getLiteralTable(t); - const char *base = (const char *)t; + const u32 *litPrograms = + (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset); + const u32 *delayRebuildPrograms = + (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset); for (u32 i = 0; i < t->literalCount; i++) { - const RoseLiteral *lit = &lits[i]; os << "Literal " << i << endl; os << "---------------" << endl; - if (lit->programOffset) { - os << "Program @ " << lit->programOffset << ":" << endl; - dumpProgram(os, t, base + lit->programOffset); + if (litPrograms[i]) { + os << "Program @ " << litPrograms[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, litPrograms[i]); + dumpProgram(os, t, prog); } else { os << "" << endl; } + if (delayRebuildPrograms[i]) { + os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":" + << endl; + const char *prog = (const char *)loadFromByteCodeOffset( + t, delayRebuildPrograms[i]); + dumpProgram(os, t, prog); + } + os << endl; } @@ -710,8 +736,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); - fprintf(f, " - literal table : %zu bytes\n", - t->literalCount * sizeof(RoseLiteral)); fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %u bytes\n", @@ -745,22 +769,9 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "handled key count : %u\n", t->handledKeyCount); fprintf(f, "\n"); - fprintf(f, "number of literals : %u\n", t->totalNumLiterals); - fprintf(f, " - delayed : %u\n", t->delay_count); - fprintf(f, " - direct report : %u\n", - literalsWithDirectReports(t)); - fprintf(f, " - that squash group : %zu\n", - literalsWithPredicate( - t, [](const RoseLiteral &l) { return l.squashesGroup != 0; })); - fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); - fprintf(f, " - with program : %zu\n", - literalsWithPredicate( - t, [](const RoseLiteral &l) { return l.programOffset != 0; })); - fprintf(f, " - in groups ::\n"); - fprintf(f, " + weak : %zu\n", - literalsInGroups(t, 0, t->group_weak_end)); - fprintf(f, " + general : %zu\n", - literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8)); + fprintf(f, "total literal count : %u\n", t->totalNumLiterals); + fprintf(f, " prog table size : %u\n", t->literalCount); + fprintf(f, " delayed literals : %u\n", t->delay_count); fprintf(f, "\n"); fprintf(f, " minWidth : %u\n", t->minWidth); @@ -839,7 +850,8 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, fmatcherMaxBiAnchoredWidth); DUMP_U32(t, intReportOffset); DUMP_U32(t, intReportCount); - DUMP_U32(t, literalOffset); + DUMP_U32(t, litProgramOffset); + DUMP_U32(t, litDelayRebuildProgramOffset); DUMP_U32(t, literalCount); DUMP_U32(t, multidirectOffset); DUMP_U32(t, activeArrayCount); @@ -876,7 +888,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); DUMP_U32(t, anchored_base_id); - DUMP_U32(t, nonbenefits_base_id); DUMP_U32(t, maxFloatingDelayedMatch); DUMP_U32(t, delayRebuildLength); DUMP_U32(t, stateOffsets.history); @@ -905,7 +916,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, rosePrefixCount); DUMP_U32(t, activeLeftIterOffset); DUMP_U32(t, ematcherRegionSize); - DUMP_U32(t, literalBenefitsOffsets); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); DUMP_U32(t, group_weak_end); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 92a67ae1..c9025600 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,43 +73,11 @@ ReportID literalToReport(u32 id) { return id & ~LITERAL_DR_FLAG; } -/** \brief Structure representing a literal. */ -struct RoseLiteral { - /** - * \brief Program to run when this literal is seen. - * - * Offset is relative to RoseEngine, or zero for no program. - */ - u32 programOffset; - - /** \brief Bitset of groups that cause this literal to fire. */ - rose_group groups; - - /** - * \brief True if this literal switches off its group behind it when it - * sets a role. - */ - u8 squashesGroup; - - /** - * \brief Bitset which indicates that the literal inserts a delayed - * match at the given offset. - */ - u32 delay_mask; - - /** \brief Offset to array of ids to poke in the delay structure. */ - u32 delayIdsOffset; -}; - /* Allocation of Rose literal ids * * The rose literal id space is segmented: * * ---- 0 - * | | Normal undelayed literals in the e, or f tables which require a - * | | manual benefits confirm on match [a table never requires benefits] - * | | - * ---- nonbenefits_base_id * | | 'Normal' undelayed literals in either e or f tables * | | * | | @@ -127,7 +95,7 @@ struct RoseLiteral { * ---- LITERAL_DR_FLAG * | | Direct Report literals: immediately raise an internal report with id * | | given by (lit_id & ~LITERAL_DR_FLAG). Raised by a or f tables (or e??). - * | | No RoseLiteral structure + * | | No literal programs. * | | * | | * ---- @@ -135,14 +103,15 @@ struct RoseLiteral { /* Rose Literal Sources * - * Rose currently gets events (mainly roseProcessMatch calls) from 8 sources: + * Rose currently gets events (mainly roseProcessMatch calls) from a number of + * sources: * 1) The floating table * 2) The anchored table * 3) Delayed literals - * 4) suffixes NFAs - * 5) masksv2 (literals with benefits) - * 6) End anchored table - * 7) prefix / infix nfas + * 4) Suffix NFAs + * 5) Literal masks + * 5) End anchored table + * 6) Prefix / Infix nfas * * Care is required to ensure that events appear to come into Rose in order * (or sufficiently ordered for Rose to cope). Generally the progress of the @@ -165,7 +134,7 @@ struct RoseLiteral { * NFA queues are run to the current point (floating or delayed literal) as * appropriate. * - * Maskv2: + * Literal Masks: * These are triggered from either floating literals or delayed literals and * inspect the data behind them. Matches are raised at the same location as the * trigger literal so there are no ordering issues. Masks are always pure @@ -301,12 +270,12 @@ struct RoseStateOffsets { }; struct RoseBoundaryReports { - u32 reportEodOffset; /**< 0 if no reports lits, otherwise offset of + u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of * MO_INVALID_IDX terminated list to report at EOD */ - u32 reportZeroOffset; /**< 0 if no reports lits, otherwise offset of + u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of * MO_INVALID_IDX terminated list to report at offset * 0 */ - u32 reportZeroEodOffset; /**< 0 if no reports lits, otherwise offset of + u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of * MO_INVALID_IDX terminated list to report if eod * is at offset 0. Superset of other lists. */ }; @@ -338,18 +307,20 @@ struct RoseBoundaryReports { #define ROSE_RUNTIME_PURE_LITERAL 1 #define ROSE_RUNTIME_SINGLE_OUTFIX 2 -// Runtime structure header for Rose. -// In memory, we follow this with: -// 1a. anchored 'literal' matcher table -// 1b. floating literal matcher table -// 1c. eod-anchored literal matcher table -// 1d. small block table -// 2. array of RoseLiteral (literalCount entries) -// 8. array of NFA offsets, one per queue -// 9. array of state offsets, one per queue (+) -// 10. array of role ids for the set of all root roles -// 12. multi-direct report array -/* +/** + * \brief Runtime structure header for Rose. + * + * Runtime structure header for Rose. + * In memory, we follow this with: + * -# the "engine blob" + * -# anchored 'literal' matcher table + * -# floating literal matcher table + * -# eod-anchored literal matcher table + * -# small block table + * -# array of NFA offsets, one per queue + * -# array of state offsets, one per queue (+) + * -# multi-direct report array + * * (+) stateOffset array note: Offsets in the array are either into the stream * state (normal case) or into the tstate region of scratch (for transient rose * nfas). Rose nfa info table can distinguish the cases. @@ -407,8 +378,22 @@ struct RoseEngine { * with the anchored table. */ u32 intReportOffset; /**< offset of array of internal_report structures */ u32 intReportCount; /**< number of internal_report structures */ - u32 literalOffset; // offset of RoseLiteral array (bytes) - u32 literalCount; // number of RoseLiteral entries [NOT number of literals] + + /** \brief Offset of u32 array of program offsets for literals. */ + u32 litProgramOffset; + + /** \brief Offset of u32 array of delay rebuild program offsets for + * literals. */ + u32 litDelayRebuildProgramOffset; + + /** + * \brief Number of entries in the arrays pointed to by litProgramOffset, + * litDelayRebuildProgramOffset. + * + * Note: NOT the total number of literals. + */ + u32 literalCount; + u32 multidirectOffset; /**< offset of multi-direct report list. */ u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array @@ -468,8 +453,6 @@ struct RoseEngine { u32 anchored_count; /* number of anchored literal ids */ u32 anchored_base_id; /* literal id of the first literal in the A table. * anchored literal ids are contiguous */ - u32 nonbenefits_base_id; /* first literal id without benefit conf. - * contiguous, blah, blah */ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can * usefully be reported */ u32 delayRebuildLength; /* length of the history region which needs to be @@ -486,8 +469,6 @@ struct RoseEngine { u32 rosePrefixCount; /* number of rose prefixes */ u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */ u32 ematcherRegionSize; /* max region size to pass to ematcher */ - u32 literalBenefitsOffsets; /* offset to array of benefits indexed by lit - id */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ u32 group_weak_end; /* end of weak groups, debugging only */ @@ -496,17 +477,6 @@ struct RoseEngine { struct scatter_full_plan state_init; }; -struct lit_benefits { - union { - u64a a64[MAX_MASK2_WIDTH/sizeof(u64a)]; - u8 a8[MAX_MASK2_WIDTH]; - } and_mask; - union { - u64a e64[MAX_MASK2_WIDTH/sizeof(u64a)]; - u8 e8[MAX_MASK2_WIDTH]; - } expected; -}; - #if defined(_WIN32) #pragma pack(push, 1) #endif @@ -574,14 +544,6 @@ const void *getSBLiteralMatcher(const struct RoseEngine *t) { return matcher; } -static really_inline -const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) { - const struct RoseLiteral *tl - = (const struct RoseLiteral *)((const char *)t + t->literalOffset); - assert(ISALIGNED_N(tl, 4)); - return tl; -} - static really_inline const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) { const struct LeftNfaInfo *r @@ -601,13 +563,6 @@ const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) { return it; } -static really_inline -const struct lit_benefits *getLiteralBenefitsTable( - const struct RoseEngine *t) { - return (const struct lit_benefits *) - ((const char *)t + t->literalBenefitsOffsets); -} - static really_inline const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) { const struct NfaInfo *infos diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 3f59ba15..37017ca0 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,11 +42,15 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_INSTR_CHECK_LIT_MASK, //!< Check and/cmp mask. + ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. + ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. + ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. @@ -59,6 +63,8 @@ enum RoseInstructionCode { ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. ROSE_INSTR_SET_STATE, //!< Switch a state index on. ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. + ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. ROSE_INSTR_END //!< End of program. @@ -70,6 +76,29 @@ struct ROSE_STRUCT_ANCHORED_DELAY { u32 done_jump; //!< Jump forward this many bytes if successful. }; +union RoseLiteralMask { + u64a a64[MAX_MASK2_WIDTH / sizeof(u64a)]; + u8 a8[MAX_MASK2_WIDTH]; +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LIT_MASK { + u8 code; //!< From enum RoseInstructionCode. + union RoseLiteralMask and_mask; + union RoseLiteralMask cmp_mask; +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LIT_EARLY { + u8 code; //!< From enum RoseInstructionCode. +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_GROUPS { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask. +}; + struct ROSE_STRUCT_CHECK_ONLY_EOD { u8 code; //!< From enum RoseInstructionCode. u32 fail_jump; //!< Jump forward this many bytes on failure. @@ -103,6 +132,12 @@ struct ROSE_STRUCT_CHECK_LEFTFIX { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_PUSH_DELAYED { + u8 code; //!< From enum RoseInstructionCode. + u8 delay; // Number of bytes to delay. + u32 index; // Delay literal index (relative to first delay lit). +}; + struct ROSE_STRUCT_SOM_ADJUST { u8 code; //!< From enum RoseInstructionCode. u32 distance; //!< Distance to EOM. @@ -164,7 +199,18 @@ struct ROSE_STRUCT_SET_STATE { struct ROSE_STRUCT_SET_GROUPS { u8 code; //!< From enum RoseInstructionCode. - rose_group groups; //!< Bitmask. + rose_group groups; //!< Bitmask to OR into groups. +}; + +struct ROSE_STRUCT_SQUASH_GROUPS { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask to AND into groups. +}; + +struct ROSE_STRUCT_CHECK_STATE { + u8 code; //!< From enum RoseInstructionCode. + u32 index; //!< State index in the role multibit. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; /**