rose: add RECORD_ANCHORED instruction to program

Moves recordAnchoredLiteralMatch from an unconditional call in the
anchored callback to being driven by a program instruction.
This commit is contained in:
Justin Viiret 2016-07-14 10:05:47 +10:00 committed by Matthew Barr
parent 981b59fd05
commit 4dbbc4eaa5
5 changed files with 100 additions and 29 deletions

View File

@ -112,28 +112,6 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained);
}
static rose_inline
void recordAnchoredLiteralMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 literal_id,
u64a end) {
assert(end);
struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
DEBUG_PRINTF("record %u @ %llu\n", literal_id, end);
if (!bf64_set(&scratch->al_log_sum, end - 1)) {
// first time, clear row
DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
fatbit_clear(anchoredLiteralRows[end - 1]);
}
u32 rel_idx = literal_id - t->anchored_base_id;
DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx,
t->anchored_count);
assert(rel_idx < t->anchored_count);
fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
}
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 event,
u64a top_squash_distance, u64a end,
@ -254,10 +232,6 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
if (real_end > t->floatingMinLiteralMatchOffset) {
recordAnchoredLiteralMatch(t, scratch, id, real_end);
}
return MO_CONTINUE_MATCHING;
}

View File

@ -165,6 +165,33 @@ void rosePushDelayedMatch(const struct RoseEngine *t,
fatbit_set(slot, delay_count, delay_index);
}
static rose_inline
void recordAnchoredLiteralMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 literal_id,
u64a end) {
assert(end);
if (end <= t->floatingMinLiteralMatchOffset) {
return;
}
struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
DEBUG_PRINTF("record %u @ %llu\n", literal_id, end);
if (!bf64_set(&scratch->al_log_sum, end - 1)) {
// first time, clear row
DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
fatbit_clear(anchoredLiteralRows[end - 1]);
}
u32 rel_idx = literal_id - t->anchored_base_id;
DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx,
t->anchored_count);
assert(rel_idx < t->anchored_count);
fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
}
static rose_inline
char roseLeftfixCheckMiracles(const struct RoseEngine *t,
const struct LeftNfaInfo *left,
@ -1226,6 +1253,11 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(RECORD_ANCHORED) {
recordAnchoredLiteralMatch(t, scratch, ri->id, end);
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CATCH_UP) {
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;

View File

@ -204,6 +204,7 @@ public:
case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix;
case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored;
case ROSE_INSTR_CATCH_UP: return &u.catchUp;
case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv;
case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
@ -255,6 +256,7 @@ public:
case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix);
case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored);
case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp);
case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv);
case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
@ -305,6 +307,7 @@ public:
ROSE_STRUCT_CHECK_PREFIX checkPrefix;
ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
ROSE_STRUCT_PUSH_DELAYED pushDelayed;
ROSE_STRUCT_RECORD_ANCHORED recordAnchored;
ROSE_STRUCT_CATCH_UP catchUp;
ROSE_STRUCT_CATCH_UP_MPV catchUpMpv;
ROSE_STRUCT_SOM_ADJUST somAdjust;
@ -4432,6 +4435,49 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id,
program.push_back(move(ri));
}
static
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
assert(!lit_vertices.empty());
u32 max_offset = 0;
for (const auto &v : lit_vertices) {
max_offset = max(max_offset, build.g[v].max_offset);
}
return max_offset;
}
static
void makeRecordAnchoredInstruction(const RoseBuildImpl &build,
build_context &bc, u32 final_id,
vector<RoseInstruction> &program) {
assert(contains(build.final_id_to_literal, final_id));
const auto &lit_ids = build.final_id_to_literal.at(final_id);
// Must be anchored.
assert(!lit_ids.empty());
if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) {
return;
}
// If this anchored literal can never match past
// floatingMinLiteralMatchOffset, we will never have to record it.
u32 max_offset = 0;
for (u32 lit_id : lit_ids) {
assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED);
max_offset = max(max_offset, findMaxOffset(build, lit_id));
}
if (max_offset <= bc.floatingMinLiteralMatchOffset) {
return;
}
auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED);
ri.u.recordAnchored.id = final_id;
program.push_back(move(ri));
}
static
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
@ -4589,10 +4635,18 @@ vector<RoseInstruction> buildLiteralProgram(RoseBuildImpl &build,
root_programs.push_back(role_prog);
}
// Literal may squash groups.
if (final_id != MO_INVALID_IDX) {
root_programs.push_back({});
makeGroupSquashInstruction(build, final_id, root_programs.back());
vector<RoseInstruction> prog;
// Literal may squash groups.
makeGroupSquashInstruction(build, final_id, prog);
// Literal may be anchored and need to be recorded.
makeRecordAnchoredInstruction(build, bc, final_id, prog);
if (!prog.empty()) {
root_programs.push_back(move(prog));
}
}
vector<RoseInstruction> root_program;

View File

@ -337,6 +337,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(RECORD_ANCHORED) {
os << " id " << ri->id << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CATCH_UP) {}
PROGRAM_NEXT_INSTRUCTION

View File

@ -55,6 +55,7 @@ enum RoseInstructionCode {
ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state.
ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state.
ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches.
ROSE_INSTR_RECORD_ANCHORED, //!< Record an anchored literal match.
ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches.
ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV.
ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
@ -207,6 +208,11 @@ struct ROSE_STRUCT_PUSH_DELAYED {
u32 index; // Delay literal index (relative to first delay lit).
};
struct ROSE_STRUCT_RECORD_ANCHORED {
u8 code; //!< From enum RoseInstructionCode.
u32 id; //!< Literal ID.
};
struct ROSE_STRUCT_CATCH_UP {
u8 code; //!< From enum RoseInstructionCode.
};