From 4dbbc4eaa57987d1f962653348220e2a64829963 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 14 Jul 2016 10:05:47 +1000 Subject: [PATCH] rose: add RECORD_ANCHORED instruction to program Moves recordAnchoredLiteralMatch from an unconditional call in the anchored callback to being driven by a program instruction. --- src/rose/match.c | 26 -------------- src/rose/program_runtime.h | 32 +++++++++++++++++ src/rose/rose_build_bytecode.cpp | 60 ++++++++++++++++++++++++++++++-- src/rose/rose_dump.cpp | 5 +++ src/rose/rose_program.h | 6 ++++ 5 files changed, 100 insertions(+), 29 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index 15d3534c..95cb141e 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -112,28 +112,6 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained); } -static rose_inline -void recordAnchoredLiteralMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 literal_id, - u64a end) { - assert(end); - struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); - - DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); - - if (!bf64_set(&scratch->al_log_sum, end - 1)) { - // first time, clear row - DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); - fatbit_clear(anchoredLiteralRows[end - 1]); - } - - u32 rel_idx = literal_id - t->anchored_base_id; - DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, - t->anchored_count); - assert(rel_idx < t->anchored_count); - fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); -} - hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, u32 event, u64a top_squash_distance, u64a end, @@ -254,10 +232,6 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); - if (real_end > t->floatingMinLiteralMatchOffset) { - recordAnchoredLiteralMatch(t, scratch, id, real_end); - } - return MO_CONTINUE_MATCHING; } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index fdaa2e07..fef41269 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -165,6 +165,33 @@ void rosePushDelayedMatch(const struct RoseEngine *t, fatbit_set(slot, delay_count, delay_index); } +static rose_inline +void recordAnchoredLiteralMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 literal_id, + u64a end) { + assert(end); + + if (end <= t->floatingMinLiteralMatchOffset) { + return; + } + + struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); + + DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); + + if (!bf64_set(&scratch->al_log_sum, end - 1)) { + // first time, clear row + DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); + fatbit_clear(anchoredLiteralRows[end - 1]); + } + + u32 rel_idx = literal_id - t->anchored_base_id; + DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, + t->anchored_count); + assert(rel_idx < t->anchored_count); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); +} + static rose_inline char roseLeftfixCheckMiracles(const struct RoseEngine *t, const struct LeftNfaInfo *left, @@ -1226,6 +1253,11 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(RECORD_ANCHORED) { + recordAnchoredLiteralMatch(t, scratch, ri->id, end); + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) { if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 37e6ae13..5cd8161b 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -204,6 +204,7 @@ public: case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; + case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored; case ROSE_INSTR_CATCH_UP: return &u.catchUp; case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv; case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; @@ -255,6 +256,7 @@ public: case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); + case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored); case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv); case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); @@ -305,6 +307,7 @@ public: ROSE_STRUCT_CHECK_PREFIX checkPrefix; ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; ROSE_STRUCT_PUSH_DELAYED pushDelayed; + ROSE_STRUCT_RECORD_ANCHORED recordAnchored; ROSE_STRUCT_CATCH_UP catchUp; ROSE_STRUCT_CATCH_UP_MPV catchUpMpv; ROSE_STRUCT_SOM_ADJUST somAdjust; @@ -4432,6 +4435,49 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, program.push_back(move(ri)); } +static +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 max_offset = 0; + for (const auto &v : lit_vertices) { + max_offset = max(max_offset, build.g[v].max_offset); + } + + return max_offset; +} + +static +void makeRecordAnchoredInstruction(const RoseBuildImpl &build, + build_context &bc, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_ids = build.final_id_to_literal.at(final_id); + + // Must be anchored. + assert(!lit_ids.empty()); + if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) { + return; + } + + // If this anchored literal can never match past + // floatingMinLiteralMatchOffset, we will never have to record it. + u32 max_offset = 0; + for (u32 lit_id : lit_ids) { + assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED); + max_offset = max(max_offset, findMaxOffset(build, lit_id)); + } + + if (max_offset <= bc.floatingMinLiteralMatchOffset) { + return; + } + + auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED); + ri.u.recordAnchored.id = final_id; + program.push_back(move(ri)); +} + static u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { const auto &lit_vertices = build.literal_info.at(lit_id).vertices; @@ -4589,10 +4635,18 @@ vector buildLiteralProgram(RoseBuildImpl &build, root_programs.push_back(role_prog); } - // Literal may squash groups. if (final_id != MO_INVALID_IDX) { - root_programs.push_back({}); - makeGroupSquashInstruction(build, final_id, root_programs.back()); + vector prog; + + // Literal may squash groups. + makeGroupSquashInstruction(build, final_id, prog); + + // Literal may be anchored and need to be recorded. + makeRecordAnchoredInstruction(build, bc, final_id, prog); + + if (!prog.empty()) { + root_programs.push_back(move(prog)); + } } vector root_program; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index c483443c..dedd8fcf 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -337,6 +337,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(RECORD_ANCHORED) { + os << " id " << ri->id << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) {} PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 6ca117ea..545e190f 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -55,6 +55,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. + ROSE_INSTR_RECORD_ANCHORED, //!< Record an anchored literal match. ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. @@ -207,6 +208,11 @@ struct ROSE_STRUCT_PUSH_DELAYED { u32 index; // Delay literal index (relative to first delay lit). }; +struct ROSE_STRUCT_RECORD_ANCHORED { + u8 code; //!< From enum RoseInstructionCode. + u32 id; //!< Literal ID. +}; + struct ROSE_STRUCT_CATCH_UP { u8 code; //!< From enum RoseInstructionCode. };