mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: add RECORD_ANCHORED instruction to program
Moves recordAnchoredLiteralMatch from an unconditional call in the anchored callback to being driven by a program instruction.
This commit is contained in:
parent
981b59fd05
commit
4dbbc4eaa5
@ -112,28 +112,6 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
|
|||||||
return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained);
|
return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained);
|
||||||
}
|
}
|
||||||
|
|
||||||
static rose_inline
|
|
||||||
void recordAnchoredLiteralMatch(const struct RoseEngine *t,
|
|
||||||
struct hs_scratch *scratch, u32 literal_id,
|
|
||||||
u64a end) {
|
|
||||||
assert(end);
|
|
||||||
struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("record %u @ %llu\n", literal_id, end);
|
|
||||||
|
|
||||||
if (!bf64_set(&scratch->al_log_sum, end - 1)) {
|
|
||||||
// first time, clear row
|
|
||||||
DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
|
|
||||||
fatbit_clear(anchoredLiteralRows[end - 1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 rel_idx = literal_id - t->anchored_base_id;
|
|
||||||
DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx,
|
|
||||||
t->anchored_count);
|
|
||||||
assert(rel_idx < t->anchored_count);
|
|
||||||
fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
|
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
|
||||||
struct hs_scratch *scratch, u32 event,
|
struct hs_scratch *scratch, u32 event,
|
||||||
u64a top_squash_distance, u64a end,
|
u64a top_squash_distance, u64a end,
|
||||||
@ -254,10 +232,6 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
|
|||||||
|
|
||||||
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
|
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
|
||||||
|
|
||||||
if (real_end > t->floatingMinLiteralMatchOffset) {
|
|
||||||
recordAnchoredLiteralMatch(t, scratch, id, real_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,6 +165,33 @@ void rosePushDelayedMatch(const struct RoseEngine *t,
|
|||||||
fatbit_set(slot, delay_count, delay_index);
|
fatbit_set(slot, delay_count, delay_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static rose_inline
|
||||||
|
void recordAnchoredLiteralMatch(const struct RoseEngine *t,
|
||||||
|
struct hs_scratch *scratch, u32 literal_id,
|
||||||
|
u64a end) {
|
||||||
|
assert(end);
|
||||||
|
|
||||||
|
if (end <= t->floatingMinLiteralMatchOffset) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("record %u @ %llu\n", literal_id, end);
|
||||||
|
|
||||||
|
if (!bf64_set(&scratch->al_log_sum, end - 1)) {
|
||||||
|
// first time, clear row
|
||||||
|
DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
|
||||||
|
fatbit_clear(anchoredLiteralRows[end - 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 rel_idx = literal_id - t->anchored_base_id;
|
||||||
|
DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx,
|
||||||
|
t->anchored_count);
|
||||||
|
assert(rel_idx < t->anchored_count);
|
||||||
|
fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
|
||||||
|
}
|
||||||
|
|
||||||
static rose_inline
|
static rose_inline
|
||||||
char roseLeftfixCheckMiracles(const struct RoseEngine *t,
|
char roseLeftfixCheckMiracles(const struct RoseEngine *t,
|
||||||
const struct LeftNfaInfo *left,
|
const struct LeftNfaInfo *left,
|
||||||
@ -1226,6 +1253,11 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(RECORD_ANCHORED) {
|
||||||
|
recordAnchoredLiteralMatch(t, scratch, ri->id, end);
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CATCH_UP) {
|
PROGRAM_CASE(CATCH_UP) {
|
||||||
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
|
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
|
||||||
return HWLM_TERMINATE_MATCHING;
|
return HWLM_TERMINATE_MATCHING;
|
||||||
|
@ -204,6 +204,7 @@ public:
|
|||||||
case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix;
|
case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix;
|
||||||
case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
|
case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
|
||||||
case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
|
case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
|
||||||
|
case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored;
|
||||||
case ROSE_INSTR_CATCH_UP: return &u.catchUp;
|
case ROSE_INSTR_CATCH_UP: return &u.catchUp;
|
||||||
case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv;
|
case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv;
|
||||||
case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
|
case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
|
||||||
@ -255,6 +256,7 @@ public:
|
|||||||
case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix);
|
case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix);
|
||||||
case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
|
case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
|
||||||
case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
|
case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
|
||||||
|
case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored);
|
||||||
case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp);
|
case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp);
|
||||||
case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv);
|
case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv);
|
||||||
case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
|
case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
|
||||||
@ -305,6 +307,7 @@ public:
|
|||||||
ROSE_STRUCT_CHECK_PREFIX checkPrefix;
|
ROSE_STRUCT_CHECK_PREFIX checkPrefix;
|
||||||
ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
|
ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
|
||||||
ROSE_STRUCT_PUSH_DELAYED pushDelayed;
|
ROSE_STRUCT_PUSH_DELAYED pushDelayed;
|
||||||
|
ROSE_STRUCT_RECORD_ANCHORED recordAnchored;
|
||||||
ROSE_STRUCT_CATCH_UP catchUp;
|
ROSE_STRUCT_CATCH_UP catchUp;
|
||||||
ROSE_STRUCT_CATCH_UP_MPV catchUpMpv;
|
ROSE_STRUCT_CATCH_UP_MPV catchUpMpv;
|
||||||
ROSE_STRUCT_SOM_ADJUST somAdjust;
|
ROSE_STRUCT_SOM_ADJUST somAdjust;
|
||||||
@ -4432,6 +4435,49 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id,
|
|||||||
program.push_back(move(ri));
|
program.push_back(move(ri));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
|
||||||
|
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
|
||||||
|
assert(!lit_vertices.empty());
|
||||||
|
|
||||||
|
u32 max_offset = 0;
|
||||||
|
for (const auto &v : lit_vertices) {
|
||||||
|
max_offset = max(max_offset, build.g[v].max_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return max_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void makeRecordAnchoredInstruction(const RoseBuildImpl &build,
|
||||||
|
build_context &bc, u32 final_id,
|
||||||
|
vector<RoseInstruction> &program) {
|
||||||
|
assert(contains(build.final_id_to_literal, final_id));
|
||||||
|
const auto &lit_ids = build.final_id_to_literal.at(final_id);
|
||||||
|
|
||||||
|
// Must be anchored.
|
||||||
|
assert(!lit_ids.empty());
|
||||||
|
if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this anchored literal can never match past
|
||||||
|
// floatingMinLiteralMatchOffset, we will never have to record it.
|
||||||
|
u32 max_offset = 0;
|
||||||
|
for (u32 lit_id : lit_ids) {
|
||||||
|
assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED);
|
||||||
|
max_offset = max(max_offset, findMaxOffset(build, lit_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (max_offset <= bc.floatingMinLiteralMatchOffset) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED);
|
||||||
|
ri.u.recordAnchored.id = final_id;
|
||||||
|
program.push_back(move(ri));
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
|
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
|
||||||
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
|
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
|
||||||
@ -4589,10 +4635,18 @@ vector<RoseInstruction> buildLiteralProgram(RoseBuildImpl &build,
|
|||||||
root_programs.push_back(role_prog);
|
root_programs.push_back(role_prog);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Literal may squash groups.
|
|
||||||
if (final_id != MO_INVALID_IDX) {
|
if (final_id != MO_INVALID_IDX) {
|
||||||
root_programs.push_back({});
|
vector<RoseInstruction> prog;
|
||||||
makeGroupSquashInstruction(build, final_id, root_programs.back());
|
|
||||||
|
// Literal may squash groups.
|
||||||
|
makeGroupSquashInstruction(build, final_id, prog);
|
||||||
|
|
||||||
|
// Literal may be anchored and need to be recorded.
|
||||||
|
makeRecordAnchoredInstruction(build, bc, final_id, prog);
|
||||||
|
|
||||||
|
if (!prog.empty()) {
|
||||||
|
root_programs.push_back(move(prog));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<RoseInstruction> root_program;
|
vector<RoseInstruction> root_program;
|
||||||
|
@ -337,6 +337,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(RECORD_ANCHORED) {
|
||||||
|
os << " id " << ri->id << endl;
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CATCH_UP) {}
|
PROGRAM_CASE(CATCH_UP) {}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
@ -55,6 +55,7 @@ enum RoseInstructionCode {
|
|||||||
ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state.
|
ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state.
|
||||||
ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state.
|
ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state.
|
||||||
ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches.
|
ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches.
|
||||||
|
ROSE_INSTR_RECORD_ANCHORED, //!< Record an anchored literal match.
|
||||||
ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches.
|
ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches.
|
||||||
ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV.
|
ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV.
|
||||||
ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
|
ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
|
||||||
@ -207,6 +208,11 @@ struct ROSE_STRUCT_PUSH_DELAYED {
|
|||||||
u32 index; // Delay literal index (relative to first delay lit).
|
u32 index; // Delay literal index (relative to first delay lit).
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_RECORD_ANCHORED {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u32 id; //!< Literal ID.
|
||||||
|
};
|
||||||
|
|
||||||
struct ROSE_STRUCT_CATCH_UP {
|
struct ROSE_STRUCT_CATCH_UP {
|
||||||
u8 code; //!< From enum RoseInstructionCode.
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user