mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: Extend program to handle literals, iterators
- cleanups - add sparse iter instructions - merge "root" and "sparse iter" programs together - move program execution to new file program_runtime.h - simplify EOD execution
This commit is contained in:
parent
8069e99bee
commit
b2ebdac642
@ -467,6 +467,7 @@ set (hs_exec_SRCS
|
||||
src/rose/match.h
|
||||
src/rose/match.c
|
||||
src/rose/miracle.h
|
||||
src/rose/program_runtime.h
|
||||
src/rose/runtime.h
|
||||
src/rose/rose.h
|
||||
src/rose/rose_internal.h
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#include "catchup.h"
|
||||
#include "match.h"
|
||||
#include "program_runtime.h"
|
||||
#include "rose.h"
|
||||
#include "util/fatbit.h"
|
||||
|
||||
@ -107,43 +108,18 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset,
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
if (!t->eodIterOffset) {
|
||||
if (!t->eodIterProgramOffset) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset);
|
||||
DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset);
|
||||
|
||||
const u32 *programTable = getByOffset(t, t->eodProgramTableOffset);
|
||||
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
|
||||
assert(ISALIGNED(programTable));
|
||||
assert(ISALIGNED(it));
|
||||
|
||||
// Sparse iterator state was allocated earlier
|
||||
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
|
||||
struct fatbit *handled_roles = scratch->handled_roles;
|
||||
|
||||
const u32 numStates = t->rolesWithStateCount;
|
||||
|
||||
void *role_state = getRoleState(state);
|
||||
u32 idx = 0;
|
||||
u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s);
|
||||
|
||||
fatbit_clear(handled_roles);
|
||||
|
||||
int work_done = 0; // not read from in this path.
|
||||
|
||||
for (; i != MMB_INVALID;
|
||||
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
|
||||
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
|
||||
u32 programOffset = programTable[idx];
|
||||
u64a som = 0;
|
||||
if (roseRunRoleProgram(t, programOffset, offset, &som,
|
||||
&(scratch->tctxt),
|
||||
&work_done) == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
int work_done = 0;
|
||||
if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0,
|
||||
&work_done) == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
@ -236,6 +212,27 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
}
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
if (!t->eodProgramOffset) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset);
|
||||
|
||||
// There should be no pending delayed literals.
|
||||
assert(!scratch->tctxt.filledDelayedSlots);
|
||||
|
||||
int work_done = 0;
|
||||
if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0,
|
||||
&work_done) == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch, const char is_streaming) {
|
||||
@ -244,31 +241,20 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
assert(!scratch->core_info.buf || !scratch->core_info.hbuf);
|
||||
assert(!can_stop_matching(scratch));
|
||||
|
||||
// Fire the special EOD event literal.
|
||||
if (t->hasEodEventLiteral) {
|
||||
DEBUG_PRINTF("firing eod event id %u at offset %llu\n",
|
||||
t->eodLiteralId, offset);
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
size_t len = ci->buf ? ci->len : ci->hlen;
|
||||
assert(len || !ci->buf); /* len may be 0 if no history is required
|
||||
* (bounds checks only can lead to this) */
|
||||
|
||||
roseRunEvent(len, t->eodLiteralId, &scratch->tctxt);
|
||||
if (can_stop_matching(scratch)) {
|
||||
DEBUG_PRINTF("user told us to stop\n");
|
||||
return;
|
||||
}
|
||||
// Run the unconditional EOD program.
|
||||
if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
roseCheckNfaEod(t, state, scratch, offset, is_streaming);
|
||||
|
||||
if (!t->eodIterOffset && !t->ematcherOffset) {
|
||||
if (!t->eodIterProgramOffset && !t->ematcherOffset) {
|
||||
DEBUG_PRINTF("no eod accepts\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle pending EOD reports.
|
||||
int itrv = roseEodRunIterator(t, state, offset, scratch);
|
||||
int itrv = roseEodRunIterator(t, offset, scratch);
|
||||
if (itrv == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
@ -288,7 +274,7 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
cleanupAfterEodMatcher(t, state, offset, scratch);
|
||||
|
||||
// Fire any new EOD reports.
|
||||
roseEodRunIterator(t, state, offset, scratch);
|
||||
roseEodRunIterator(t, offset, scratch);
|
||||
|
||||
roseCheckEodSuffixes(t, state, offset, scratch);
|
||||
}
|
||||
|
1173
src/rose/match.c
1173
src/rose/match.c
File diff suppressed because it is too large
Load Diff
@ -55,7 +55,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx);
|
||||
hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
|
||||
void *ctx);
|
||||
int roseAnchoredCallback(u64a end, u32 id, void *ctx);
|
||||
void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt);
|
||||
|
||||
/* Common code, used all over Rose runtime */
|
||||
|
||||
@ -299,8 +298,4 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state,
|
||||
scratch->sparse_iter_state);
|
||||
}
|
||||
|
||||
hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset,
|
||||
u64a end, u64a *som, struct RoseContext *tctxt,
|
||||
int *work_done);
|
||||
|
||||
#endif
|
||||
|
1081
src/rose/program_runtime.h
Normal file
1081
src/rose/program_runtime.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -45,6 +45,39 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
RoseCallback callback, RoseCallbackSom som_callback,
|
||||
void *context);
|
||||
|
||||
static really_inline
|
||||
int roseBlockHasEodWork(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch) {
|
||||
if (t->ematcherOffset) {
|
||||
DEBUG_PRINTF("eod matcher to run\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (t->eodProgramOffset) {
|
||||
DEBUG_PRINTF("has eod program\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
void *state = scratch->core_info.state;
|
||||
if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
|
||||
DEBUG_PRINTF("active outfix/suffix engines\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (t->eodIterOffset) {
|
||||
u32 idx;
|
||||
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
|
||||
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
|
||||
if (mmbit_sparse_iter_begin(getRoleState(state), t->rolesWithStateCount,
|
||||
&idx, it, s) != MMB_INVALID) {
|
||||
DEBUG_PRINTF("eod iter has states on\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* assumes core_info in scratch has been init to point to data */
|
||||
static really_inline
|
||||
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
@ -77,19 +110,8 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
return;
|
||||
}
|
||||
|
||||
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
|
||||
const u32 numStates = t->rolesWithStateCount;
|
||||
u8 *state = (u8 *)scratch->core_info.state;
|
||||
void *role_state = getRoleState(state);
|
||||
u32 idx = 0;
|
||||
const struct mmbit_sparse_iter *it
|
||||
= (const void *)((const u8 *)t + t->eodIterOffset);
|
||||
|
||||
if (!t->ematcherOffset && !t->hasEodEventLiteral
|
||||
&& !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)
|
||||
&& (!t->eodIterOffset
|
||||
|| mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s)
|
||||
== MMB_INVALID)) {
|
||||
if (!roseBlockHasEodWork(t, scratch)) {
|
||||
DEBUG_PRINTF("no eod work\n");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -170,6 +170,7 @@ public:
|
||||
|
||||
const void *get() const {
|
||||
switch (code()) {
|
||||
case ROSE_INSTR_CHECK_DEPTH: return &u.checkDepth;
|
||||
case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod;
|
||||
case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds;
|
||||
case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
|
||||
@ -188,6 +189,8 @@ public:
|
||||
case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown;
|
||||
case ROSE_INSTR_SET_STATE: return &u.setState;
|
||||
case ROSE_INSTR_SET_GROUPS: return &u.setGroups;
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
|
||||
case ROSE_INSTR_END: return &u.end;
|
||||
}
|
||||
assert(0);
|
||||
@ -196,6 +199,7 @@ public:
|
||||
|
||||
size_t length() const {
|
||||
switch (code()) {
|
||||
case ROSE_INSTR_CHECK_DEPTH: return sizeof(u.checkDepth);
|
||||
case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod);
|
||||
case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds);
|
||||
case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
|
||||
@ -214,12 +218,15 @@ public:
|
||||
case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown);
|
||||
case ROSE_INSTR_SET_STATE: return sizeof(u.setState);
|
||||
case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups);
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
|
||||
case ROSE_INSTR_END: return sizeof(u.end);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
union {
|
||||
ROSE_STRUCT_CHECK_DEPTH checkDepth;
|
||||
ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod;
|
||||
ROSE_STRUCT_CHECK_BOUNDS checkBounds;
|
||||
ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
|
||||
@ -238,6 +245,8 @@ public:
|
||||
ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown;
|
||||
ROSE_STRUCT_SET_STATE setState;
|
||||
ROSE_STRUCT_SET_GROUPS setGroups;
|
||||
ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
|
||||
ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
|
||||
ROSE_STRUCT_END end;
|
||||
} u;
|
||||
};
|
||||
@ -2565,7 +2574,7 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) {
|
||||
*/
|
||||
static
|
||||
vector<RoseInstruction>
|
||||
flattenRoleProgram(const vector<vector<RoseInstruction>> &programs) {
|
||||
flattenProgram(const vector<vector<RoseInstruction>> &programs) {
|
||||
vector<RoseInstruction> out;
|
||||
|
||||
vector<u32> offsets; // offset of each instruction (bytes)
|
||||
@ -2601,6 +2610,10 @@ flattenRoleProgram(const vector<vector<RoseInstruction>> &programs) {
|
||||
assert(targets[i] > offsets[i]); // jumps always progress
|
||||
ri.u.anchoredDelay.done_jump = targets[i] - offsets[i];
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_DEPTH:
|
||||
assert(targets[i] > offsets[i]);
|
||||
ri.u.checkDepth.fail_jump = targets[i] - offsets[i];
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_ONLY_EOD:
|
||||
assert(targets[i] > offsets[i]);
|
||||
ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i];
|
||||
@ -2630,9 +2643,13 @@ flattenRoleProgram(const vector<vector<RoseInstruction>> &programs) {
|
||||
}
|
||||
|
||||
static
|
||||
u32 writeRoleProgram(build_context &bc, vector<RoseInstruction> &program) {
|
||||
DEBUG_PRINTF("writing %zu instructions\n", program.size());
|
||||
u32 writeProgram(build_context &bc, vector<RoseInstruction> &program) {
|
||||
if (program.empty()) {
|
||||
DEBUG_PRINTF("no program\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("writing %zu instructions\n", program.size());
|
||||
u32 programOffset = 0;
|
||||
for (const auto &ri : program) {
|
||||
u32 offset =
|
||||
@ -2696,32 +2713,6 @@ bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc,
|
||||
return false;
|
||||
}
|
||||
|
||||
/* creates (and adds to rose) a sparse iterator visiting pred states/roles,
|
||||
* returns a pair:
|
||||
* - the offset of the itermap
|
||||
* - the offset for the sparse iterator.
|
||||
*/
|
||||
static
|
||||
pair<u32, u32> addPredSparseIter(build_context &bc,
|
||||
const map<u32, u32> &predPrograms) {
|
||||
vector<u32> keys;
|
||||
vector<u32> programTable;
|
||||
for (const auto &elem : predPrograms) {
|
||||
keys.push_back(elem.first);
|
||||
programTable.push_back(elem.second);
|
||||
}
|
||||
|
||||
vector<mmbit_sparse_iter> iter;
|
||||
mmbBuildSparseIterator(iter, keys, bc.numStates);
|
||||
assert(!iter.empty());
|
||||
DEBUG_PRINTF("iter size = %zu\n", iter.size());
|
||||
|
||||
u32 iterOffset = addIteratorToTable(bc, iter);
|
||||
u32 programTableOffset =
|
||||
add_to_engine_blob(bc, begin(programTable), end(programTable));
|
||||
return make_pair(programTableOffset, iterOffset);
|
||||
}
|
||||
|
||||
static
|
||||
void fillLookaroundTables(char *look_base, char *reach_base,
|
||||
const vector<LookEntry> &look_vec) {
|
||||
@ -2770,7 +2761,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc,
|
||||
* literal entry */
|
||||
const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id);
|
||||
const rose_literal_info &arb_lit_info = **lit_infos.begin();
|
||||
const auto &vertices = arb_lit_info.vertices;
|
||||
|
||||
literalTable.push_back(RoseLiteral());
|
||||
RoseLiteral &tl = literalTable.back();
|
||||
@ -2784,11 +2774,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc,
|
||||
assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED
|
||||
|| tbi.literals.right.at(literalId).table == ROSE_EVENT);
|
||||
|
||||
// Minimum depth based on this literal's roles.
|
||||
tl.minDepth = calcMinDepth(bc.depths, vertices);
|
||||
|
||||
DEBUG_PRINTF("lit %u: role minDepth=%u\n", final_id, tl.minDepth);
|
||||
|
||||
// If this literal squashes its group behind it, store that data too
|
||||
tl.squashesGroup = arb_lit_info.squash_group;
|
||||
|
||||
@ -3150,8 +3135,8 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
|
||||
}
|
||||
|
||||
static
|
||||
vector<RoseInstruction> makeRoleProgram(RoseBuildImpl &build, build_context &bc,
|
||||
const RoseEdge &e) {
|
||||
vector<RoseInstruction> makeProgram(RoseBuildImpl &build, build_context &bc,
|
||||
const RoseEdge &e) {
|
||||
const RoseGraph &g = build.g;
|
||||
auto v = target(e, g);
|
||||
|
||||
@ -3185,69 +3170,6 @@ vector<RoseInstruction> makeRoleProgram(RoseBuildImpl &build, build_context &bc,
|
||||
return program;
|
||||
}
|
||||
|
||||
static
|
||||
void findRootEdges(const RoseBuildImpl &build, RoseVertex src,
|
||||
map<u32, flat_set<RoseEdge>> &root_edges_map) {
|
||||
const auto &g = build.g;
|
||||
for (const auto &e : out_edges_range(src, g)) {
|
||||
const auto &v = target(e, g);
|
||||
if (build.hasDirectFinalId(v)) {
|
||||
continue; // Skip direct reports.
|
||||
}
|
||||
for (auto lit_id : g[v].literals) {
|
||||
assert(lit_id < build.literal_info.size());
|
||||
u32 final_id = build.literal_info.at(lit_id).final_id;
|
||||
if (final_id != MO_INVALID_IDX) {
|
||||
root_edges_map[final_id].insert(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void buildRootRolePrograms(RoseBuildImpl &build, build_context &bc,
|
||||
vector<RoseLiteral> &literalTable) {
|
||||
const auto &g = build.g;
|
||||
|
||||
map<u32, flat_set<RoseEdge>> root_edges_map; // lit id -> root edges
|
||||
findRootEdges(build, build.root, root_edges_map);
|
||||
findRootEdges(build, build.anchored_root, root_edges_map);
|
||||
|
||||
for (u32 id = 0; id < literalTable.size(); id++) {
|
||||
const auto &root_edges = root_edges_map[id];
|
||||
DEBUG_PRINTF("lit %u has %zu root edges\n", id, root_edges.size());
|
||||
|
||||
// Sort edges by (source, target) vertex indices to ensure
|
||||
// deterministic program construction.
|
||||
vector<RoseEdge> ordered_edges(begin(root_edges), end(root_edges));
|
||||
sort(begin(ordered_edges), end(ordered_edges),
|
||||
[&g](const RoseEdge &a, const RoseEdge &b) {
|
||||
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
|
||||
tie(g[source(b, g)].idx, g[target(b, g)].idx);
|
||||
});
|
||||
|
||||
vector<vector<RoseInstruction>> root_prog;
|
||||
for (const auto &e : ordered_edges) {
|
||||
DEBUG_PRINTF("edge (%zu,%zu)\n", g[source(e, g)].idx,
|
||||
g[target(e, g)].idx);
|
||||
auto role_prog = makeRoleProgram(build, bc, e);
|
||||
if (role_prog.empty()) {
|
||||
continue;
|
||||
}
|
||||
root_prog.push_back(role_prog);
|
||||
}
|
||||
|
||||
RoseLiteral &tl = literalTable[id];
|
||||
if (root_prog.empty()) {
|
||||
tl.rootProgramOffset = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto final_program = flattenRoleProgram(root_prog);
|
||||
tl.rootProgramOffset = writeRoleProgram(bc, final_program);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void assignStateIndices(const RoseBuildImpl &build, build_context &bc) {
|
||||
const auto &g = build.g;
|
||||
@ -3399,13 +3321,12 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v,
|
||||
}
|
||||
|
||||
static
|
||||
vector<RoseInstruction> makeSparseIterProgram(RoseBuildImpl &build,
|
||||
build_context &bc,
|
||||
const RoseEdge &e) {
|
||||
vector<RoseInstruction> makePredProgram(RoseBuildImpl &build, build_context &bc,
|
||||
const RoseEdge &e) {
|
||||
const RoseGraph &g = build.g;
|
||||
const RoseVertex v = target(e, g);
|
||||
|
||||
auto program = makeRoleProgram(build, bc, e);
|
||||
auto program = makeProgram(build, bc, e);
|
||||
|
||||
if (hasGreaterInDegree(1, v, g)) {
|
||||
// Only necessary when there is more than one pred.
|
||||
@ -3415,75 +3336,215 @@ vector<RoseInstruction> makeSparseIterProgram(RoseBuildImpl &build,
|
||||
return program;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pair (program offset, sparse iter offset).
|
||||
*/
|
||||
static
|
||||
void buildLitSparseIter(RoseBuildImpl &build, build_context &bc,
|
||||
vector<RoseVertex> &verts, RoseLiteral &tl) {
|
||||
const auto &g = build.g;
|
||||
pair<u32, u32> makeSparseIterProgram(build_context &bc,
|
||||
map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
|
||||
const vector<RoseVertex> &verts,
|
||||
const vector<RoseInstruction> &root_program) {
|
||||
vector<RoseInstruction> program;
|
||||
u32 iter_offset = 0;
|
||||
|
||||
if (verts.empty()) {
|
||||
// This literal has no non-root roles => no sparse iter
|
||||
tl.iterOffset = ROSE_OFFSET_INVALID;
|
||||
tl.iterProgramOffset = 0;
|
||||
return;
|
||||
if (!predProgramLists.empty()) {
|
||||
// First, add the iterator itself.
|
||||
vector<u32> keys;
|
||||
for (const auto &elem : predProgramLists) {
|
||||
keys.push_back(elem.first);
|
||||
}
|
||||
DEBUG_PRINTF("%zu keys: %s\n", keys.size(),
|
||||
as_string_list(keys).c_str());
|
||||
|
||||
vector<mmbit_sparse_iter> iter;
|
||||
mmbBuildSparseIterator(iter, keys, bc.numStates);
|
||||
assert(!iter.empty());
|
||||
iter_offset = addIteratorToTable(bc, iter);
|
||||
|
||||
// Construct our program, starting with the SPARSE_ITER_BEGIN
|
||||
// instruction, keeping track of the jump offset for each sub-program.
|
||||
vector<u32> jump_table;
|
||||
u32 curr_offset = 0;
|
||||
|
||||
// Add a pre-check for min depth, if it's useful.
|
||||
if (!verts.empty()) {
|
||||
u32 min_depth = calcMinDepth(bc.depths, verts);
|
||||
if (min_depth > 1) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_DEPTH);
|
||||
ri.u.checkDepth.min_depth = min_depth;
|
||||
program.push_back(ri);
|
||||
curr_offset = ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
}
|
||||
|
||||
program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
|
||||
curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
|
||||
|
||||
for (const auto &e : predProgramLists) {
|
||||
DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
|
||||
curr_offset);
|
||||
jump_table.push_back(curr_offset);
|
||||
auto subprog = flattenProgram(e.second);
|
||||
|
||||
if (e.first != keys.back()) {
|
||||
// For all but the last subprogram, replace the END instruction
|
||||
// with a SPARSE_ITER_NEXT.
|
||||
assert(!subprog.empty());
|
||||
assert(subprog.back().code() == ROSE_INSTR_END);
|
||||
subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
|
||||
}
|
||||
|
||||
for (const auto &ri : subprog) {
|
||||
program.push_back(ri);
|
||||
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
}
|
||||
|
||||
const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(),
|
||||
ROSE_INSTR_MIN_ALIGN);
|
||||
|
||||
// Write the jump table into the bytecode.
|
||||
const u32 jump_table_offset =
|
||||
add_to_engine_blob(bc, begin(jump_table), end(jump_table));
|
||||
|
||||
// Fix up the instruction operands.
|
||||
auto keys_it = begin(keys);
|
||||
curr_offset = 0;
|
||||
for (size_t i = 0; i < program.size(); i++) {
|
||||
auto &ri = program[i];
|
||||
switch (ri.code()) {
|
||||
case ROSE_INSTR_CHECK_DEPTH:
|
||||
ri.u.checkDepth.fail_jump = end_offset - curr_offset;
|
||||
break;
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN:
|
||||
ri.u.sparseIterBegin.iter_offset = iter_offset;
|
||||
ri.u.sparseIterBegin.jump_table = jump_table_offset;
|
||||
ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
|
||||
break;
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT:
|
||||
ri.u.sparseIterNext.iter_offset = iter_offset;
|
||||
ri.u.sparseIterNext.jump_table = jump_table_offset;
|
||||
assert(keys_it != end(keys));
|
||||
ri.u.sparseIterNext.state = *keys_it++;
|
||||
ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
}
|
||||
|
||||
// Deterministic ordering.
|
||||
sort(begin(verts), end(verts),
|
||||
[&g](RoseVertex a, RoseVertex b) { return g[a].idx < g[b].idx; });
|
||||
// If we have a root program, replace the END instruction with it. Note
|
||||
// that the root program has already been flattened.
|
||||
if (!root_program.empty()) {
|
||||
if (!program.empty()) {
|
||||
assert(program.back().code() == ROSE_INSTR_END);
|
||||
program.pop_back();
|
||||
}
|
||||
program.insert(end(program), begin(root_program), end(root_program));
|
||||
}
|
||||
|
||||
return {writeProgram(bc, program), iter_offset};
|
||||
}
|
||||
|
||||
static
|
||||
u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
||||
const vector<RoseEdge> &lit_edges) {
|
||||
const auto &g = build.g;
|
||||
|
||||
DEBUG_PRINTF("%zu lit edges\n", lit_edges.size());
|
||||
|
||||
// pred state id -> list of programs
|
||||
map<u32, vector<vector<RoseInstruction>>> predProgramLists;
|
||||
vector<RoseVertex> nonroot_verts;
|
||||
|
||||
for (const auto &v : verts) {
|
||||
DEBUG_PRINTF("vertex %zu\n", g[v].idx);
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
const auto &u = source(e, g);
|
||||
if (build.isAnyStart(u)) {
|
||||
continue; // Root roles are not handled with sparse iterator.
|
||||
}
|
||||
|
||||
assert(contains(bc.roleStateIndices, u));
|
||||
u32 pred_state = bc.roleStateIndices.at(u);
|
||||
|
||||
DEBUG_PRINTF("pred %zu (state %u)\n", g[u].idx, pred_state);
|
||||
|
||||
auto program = makeSparseIterProgram(build, bc, e);
|
||||
predProgramLists[pred_state].push_back(program);
|
||||
// Construct sparse iter sub-programs.
|
||||
for (const auto &e : lit_edges) {
|
||||
const auto &u = source(e, g);
|
||||
if (build.isAnyStart(u)) {
|
||||
continue; // Root roles are not handled with sparse iterator.
|
||||
}
|
||||
DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].idx,
|
||||
g[target(e, g)].idx);
|
||||
assert(contains(bc.roleStateIndices, u));
|
||||
u32 pred_state = bc.roleStateIndices.at(u);
|
||||
auto program = makePredProgram(build, bc, e);
|
||||
predProgramLists[pred_state].push_back(program);
|
||||
nonroot_verts.push_back(target(e, g));
|
||||
}
|
||||
|
||||
map<u32, u32> predPrograms;
|
||||
for (const auto &e : predProgramLists) {
|
||||
auto program = flattenRoleProgram(e.second);
|
||||
u32 offset = writeRoleProgram(bc, program);
|
||||
predPrograms.emplace(e.first, offset);
|
||||
// Construct sub-program for handling root roles.
|
||||
vector<vector<RoseInstruction>> root_programs;
|
||||
for (const auto &e : lit_edges) {
|
||||
const auto &u = source(e, g);
|
||||
if (!build.isAnyStart(u)) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx);
|
||||
auto role_prog = makeProgram(build, bc, e);
|
||||
if (role_prog.empty()) {
|
||||
continue;
|
||||
}
|
||||
root_programs.push_back(role_prog);
|
||||
}
|
||||
|
||||
tie(tl.iterProgramOffset, tl.iterOffset) =
|
||||
addPredSparseIter(bc, predPrograms);
|
||||
vector<RoseInstruction> root_program;
|
||||
if (!root_programs.empty()) {
|
||||
root_program = flattenProgram(root_programs);
|
||||
}
|
||||
|
||||
// Put it all together.
|
||||
return makeSparseIterProgram(bc, predProgramLists, nonroot_verts,
|
||||
root_program).first;
|
||||
}
|
||||
|
||||
// Build sparse iterators for literals.
|
||||
static
|
||||
void buildSparseIter(RoseBuildImpl &build, build_context &bc,
|
||||
vector<RoseLiteral> &literalTable) {
|
||||
const RoseGraph &g = build.g;
|
||||
map<u32, vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
|
||||
// Use a set of edges while building the map to cull duplicates.
|
||||
map<u32, flat_set<RoseEdge>> unique_lit_edge_map;
|
||||
|
||||
// Find all our non-root roles.
|
||||
ue2::unordered_map<u32, vector<RoseVertex>> litNonRootVertices;
|
||||
for (const auto &v : vertices_range(g)) {
|
||||
if (build.isRootSuccessor(v)) {
|
||||
const auto &g = build.g;
|
||||
for (const auto &e : edges_range(g)) {
|
||||
const auto &v = target(e, g);
|
||||
if (build.hasDirectFinalId(v)) {
|
||||
// Skip direct reports, which do not have RoseLiteral entries.
|
||||
continue;
|
||||
}
|
||||
for (const auto &lit_id : g[v].literals) {
|
||||
assert(lit_id < build.literal_info.size());
|
||||
u32 final_id = build.literal_info.at(lit_id).final_id;
|
||||
litNonRootVertices[final_id].push_back(v);
|
||||
if (final_id != MO_INVALID_IDX) {
|
||||
unique_lit_edge_map[final_id].insert(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build output map, sorting edges by (source, target) vertex index.
|
||||
map<u32, vector<RoseEdge>> lit_edge_map;
|
||||
for (const auto &m : unique_lit_edge_map) {
|
||||
auto edge_list = vector<RoseEdge>(begin(m.second), end(m.second));
|
||||
sort(begin(edge_list), end(edge_list),
|
||||
[&g](const RoseEdge &a, const RoseEdge &b) {
|
||||
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
|
||||
tie(g[source(b, g)].idx, g[target(b, g)].idx);
|
||||
});
|
||||
lit_edge_map.emplace(m.first, edge_list);
|
||||
}
|
||||
|
||||
return lit_edge_map;
|
||||
}
|
||||
|
||||
/** \brief Build the interpreter program for each literal. */
|
||||
static
|
||||
void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
|
||||
vector<RoseLiteral> &literalTable) {
|
||||
auto lit_edge_map = findEdgesByLiteral(build);
|
||||
|
||||
for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) {
|
||||
buildLitSparseIter(build, bc, litNonRootVertices[finalId],
|
||||
literalTable[finalId]);
|
||||
const auto &lit_edges = lit_edge_map[finalId];
|
||||
u32 offset = buildLiteralProgram(build, bc, lit_edges);
|
||||
literalTable[finalId].programOffset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3514,9 +3575,11 @@ vector<RoseInstruction> makeEodAnchorProgram(RoseBuildImpl &build,
|
||||
return program;
|
||||
}
|
||||
|
||||
/* returns a pair containing the iter map offset and iter offset */
|
||||
/**
|
||||
* Returns the pair (program offset, sparse iter offset).
|
||||
*/
|
||||
static
|
||||
pair<u32, u32> buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) {
|
||||
pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
// pred state id -> list of programs
|
||||
@ -3546,15 +3609,35 @@ pair<u32, u32> buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) {
|
||||
return {0, 0};
|
||||
}
|
||||
|
||||
map<u32, u32> predPrograms;
|
||||
for (const auto &e : predProgramLists) {
|
||||
DEBUG_PRINTF("pred %u has %zu programs\n", e.first, e.second.size());
|
||||
auto program = flattenRoleProgram(e.second);
|
||||
u32 offset = writeRoleProgram(bc, program);
|
||||
predPrograms.emplace(e.first, offset);
|
||||
return makeSparseIterProgram(bc, predProgramLists, {}, {});
|
||||
}
|
||||
|
||||
static
|
||||
u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
|
||||
if (build.eod_event_literal_id == MO_INVALID_IDX) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return addPredSparseIter(bc, predPrograms);
|
||||
const RoseGraph &g = build.g;
|
||||
const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
|
||||
assert(lit_info.delayed_ids.empty());
|
||||
assert(!lit_info.squash_group);
|
||||
assert(!lit_info.requires_benefits);
|
||||
|
||||
// Collect all edges leading into EOD event literal vertices.
|
||||
vector<RoseEdge> edge_list;
|
||||
for (const auto &v : lit_info.vertices) {
|
||||
insert(&edge_list, edge_list.end(), in_edges(v, g));
|
||||
}
|
||||
|
||||
// Sort edge list for determinism, prettiness.
|
||||
sort(begin(edge_list), end(edge_list),
|
||||
[&g](const RoseEdge &a, const RoseEdge &b) {
|
||||
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
|
||||
tie(g[source(b, g)].idx, g[target(b, g)].idx);
|
||||
});
|
||||
|
||||
return buildLiteralProgram(build, bc, edge_list);
|
||||
}
|
||||
|
||||
static
|
||||
@ -3742,11 +3825,12 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
|
||||
vector<RoseLiteral> literalTable;
|
||||
buildLiteralTable(*this, bc, literalTable);
|
||||
buildSparseIter(*this, bc, literalTable);
|
||||
buildLiteralPrograms(*this, bc, literalTable);
|
||||
|
||||
u32 eodProgramOffset = writeEodProgram(*this, bc);
|
||||
u32 eodIterProgramOffset;
|
||||
u32 eodIterOffset;
|
||||
u32 eodProgramTableOffset;
|
||||
tie(eodProgramTableOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc);
|
||||
tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc);
|
||||
|
||||
vector<mmbit_sparse_iter> activeLeftIter;
|
||||
buildActiveLeftIter(leftInfoTable, activeLeftIter);
|
||||
@ -3758,9 +3842,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
// Write root programs for literals into the engine blob.
|
||||
buildRootRolePrograms(*this, bc, literalTable);
|
||||
|
||||
u32 amatcherOffset = 0;
|
||||
u32 fmatcherOffset = 0;
|
||||
u32 ematcherOffset = 0;
|
||||
@ -3968,8 +4049,9 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
= anchoredReportInverseMapOffset;
|
||||
engine->multidirectOffset = multidirectOffset;
|
||||
|
||||
engine->eodProgramOffset = eodProgramOffset;
|
||||
engine->eodIterProgramOffset = eodIterProgramOffset;
|
||||
engine->eodIterOffset = eodIterOffset;
|
||||
engine->eodProgramTableOffset = eodProgramTableOffset;
|
||||
|
||||
engine->lastByteHistoryIterOffset = lastByteOffset;
|
||||
|
||||
@ -4038,13 +4120,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
write_out(&engine->state_init, (char *)engine.get(), state_scatter,
|
||||
state_scatter_aux_offset);
|
||||
|
||||
if (eod_event_literal_id != MO_INVALID_IDX) {
|
||||
engine->hasEodEventLiteral = 1;
|
||||
DEBUG_PRINTF("eod literal id=%u, final_id=%u\n", eod_event_literal_id,
|
||||
literal_info.at(eod_event_literal_id).final_id);
|
||||
engine->eodLiteralId = literal_info.at(eod_event_literal_id).final_id;
|
||||
}
|
||||
|
||||
if (anchoredIsMulti(*engine)) {
|
||||
DEBUG_PRINTF("multiple anchored dfas\n");
|
||||
engine->maxSafeAnchoredDROffset = 1;
|
||||
|
@ -274,6 +274,13 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// The special EOD event literal has its own program and does not need
|
||||
// a real literal ID.
|
||||
if (i == tbi.eod_event_literal_id) {
|
||||
assert(tbi.eod_event_literal_id != MO_INVALID_IDX);
|
||||
continue;
|
||||
}
|
||||
|
||||
const rose_literal_info &info = tbi.literal_info[i];
|
||||
if (info.requires_benefits) {
|
||||
assert(!tbi.isDelayed(i));
|
||||
|
@ -34,7 +34,6 @@
|
||||
#include "rose_build_impl.h"
|
||||
#include "rose/rose_dump.h"
|
||||
#include "rose_internal.h"
|
||||
#include "rose_program.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
|
@ -130,12 +130,6 @@ size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) {
|
||||
return count_if(tl, tl_end, pred);
|
||||
}
|
||||
|
||||
static
|
||||
size_t literalsWithDepth(const RoseEngine *t, u8 depth) {
|
||||
return literalsWithPredicate(
|
||||
t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; });
|
||||
}
|
||||
|
||||
static
|
||||
size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
|
||||
rose_group mask = ~((1ULL << from) - 1);
|
||||
@ -195,7 +189,7 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
const char *pc_base = pc;
|
||||
for (;;) {
|
||||
u8 code = *(const u8 *)pc;
|
||||
@ -209,6 +203,12 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_DEPTH) {
|
||||
os << " min_depth " << u32{ri->min_depth} << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_ONLY_EOD) {
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
@ -309,6 +309,21 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SPARSE_ITER_BEGIN) {
|
||||
os << " iter_offset " << ri->iter_offset << endl;
|
||||
os << " jump_table " << ri->jump_table << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SPARSE_ITER_NEXT) {
|
||||
os << " iter_offset " << ri->iter_offset << endl;
|
||||
os << " jump_table " << ri->jump_table << endl;
|
||||
os << " state " << ri->state << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(END) { return; }
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -323,30 +338,6 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
#undef PROGRAM_CASE
|
||||
#undef PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
static
|
||||
void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset,
|
||||
u32 programTableOffset) {
|
||||
const auto *it =
|
||||
(const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset);
|
||||
const u32 *programTable =
|
||||
(const u32 *)loadFromByteCodeOffset(t, programTableOffset);
|
||||
|
||||
// Construct a full multibit.
|
||||
const u32 total_bits = t->rolesWithStateCount;
|
||||
const vector<u8> bits(mmbit_size(total_bits), u8{0xff});
|
||||
|
||||
struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES];
|
||||
u32 idx = 0;
|
||||
for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s);
|
||||
i != MMB_INVALID;
|
||||
i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) {
|
||||
u32 programOffset = programTable[idx];
|
||||
os << "Sparse Iter Program " << idx << " triggered by state " << i
|
||||
<< " @ " << programOffset << ":" << endl;
|
||||
dumpRoleProgram(os, t, (const char *)t + programOffset);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
|
||||
ofstream os(filename);
|
||||
@ -359,18 +350,11 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
|
||||
os << "Literal " << i << endl;
|
||||
os << "---------------" << endl;
|
||||
|
||||
if (lit->rootProgramOffset) {
|
||||
os << "Root Program @ " << lit->rootProgramOffset << ":" << endl;
|
||||
dumpRoleProgram(os, t, base + lit->rootProgramOffset);
|
||||
if (lit->programOffset) {
|
||||
os << "Program @ " << lit->programOffset << ":" << endl;
|
||||
dumpProgram(os, t, base + lit->programOffset);
|
||||
} else {
|
||||
os << "<No Root Program>" << endl;
|
||||
}
|
||||
|
||||
if (lit->iterOffset != ROSE_OFFSET_INVALID) {
|
||||
dumpSparseIterPrograms(os, t, lit->iterOffset,
|
||||
lit->iterProgramOffset);
|
||||
} else {
|
||||
os << "<No Sparse Iter Programs>" << endl;
|
||||
os << "<No Program>" << endl;
|
||||
}
|
||||
|
||||
os << endl;
|
||||
@ -382,12 +366,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
|
||||
static
|
||||
void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
|
||||
ofstream os(filename);
|
||||
const char *base = (const char *)t;
|
||||
|
||||
if (t->eodIterOffset) {
|
||||
dumpSparseIterPrograms(os, t, t->eodIterOffset,
|
||||
t->eodProgramTableOffset);
|
||||
os << "Unconditional EOD Program:" << endl;
|
||||
|
||||
if (t->eodProgramOffset) {
|
||||
dumpProgram(os, t, base + t->eodProgramOffset);
|
||||
os << endl;
|
||||
} else {
|
||||
os << "<No EOD Iter Programs>" << endl;
|
||||
os << "<No EOD Program>" << endl;
|
||||
}
|
||||
|
||||
os << "Sparse Iter EOD Program:" << endl;
|
||||
|
||||
if (t->eodIterProgramOffset) {
|
||||
dumpProgram(os, t, base + t->eodIterProgramOffset);
|
||||
} else {
|
||||
os << "<No EOD Iter Program>" << endl;
|
||||
}
|
||||
|
||||
os.close();
|
||||
@ -766,33 +761,15 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
literalsWithPredicate(
|
||||
t, [](const RoseLiteral &l) { return l.squashesGroup != 0; }));
|
||||
fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id);
|
||||
fprintf(f, " - with root program : %zu\n",
|
||||
literalsWithPredicate(t, [](const RoseLiteral &l) {
|
||||
return l.rootProgramOffset != 0;
|
||||
}));
|
||||
fprintf(f, " - with sparse iter : %zu\n",
|
||||
literalsWithPredicate(t, [](const RoseLiteral &l) {
|
||||
return l.iterOffset != ROSE_OFFSET_INVALID;
|
||||
}));
|
||||
fprintf(f, " - with program : %zu\n",
|
||||
literalsWithPredicate(
|
||||
t, [](const RoseLiteral &l) { return l.programOffset != 0; }));
|
||||
fprintf(f, " - in groups ::\n");
|
||||
fprintf(f, " + weak : %zu\n",
|
||||
literalsInGroups(t, 0, t->group_weak_end));
|
||||
fprintf(f, " + general : %zu\n",
|
||||
literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8));
|
||||
|
||||
u32 depth1 = literalsWithDepth(t, 1);
|
||||
u32 depth2 = literalsWithDepth(t, 2);
|
||||
u32 depth3 = literalsWithDepth(t, 3);
|
||||
u32 depth4 = literalsWithDepth(t, 4);
|
||||
u32 depthN = t->literalCount - (depth1 + depth2 + depth3 + depth4);
|
||||
|
||||
fprintf(f, "\nLiteral depths:\n");
|
||||
fprintf(f, " minimum depth 1 : %u\n", depth1);
|
||||
fprintf(f, " minimum depth 2 : %u\n", depth2);
|
||||
fprintf(f, " minimum depth 3 : %u\n", depth3);
|
||||
fprintf(f, " minimum depth 4 : %u\n", depth4);
|
||||
fprintf(f, " minimum depth >4 : %u\n", depthN);
|
||||
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, " minWidth : %u\n", t->minWidth);
|
||||
fprintf(f, " minWidthExcludingBoundaries : %u\n",
|
||||
@ -840,7 +817,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U8(t, hasFloatingDirectReports);
|
||||
DUMP_U8(t, noFloatingRoots);
|
||||
DUMP_U8(t, requiresEodCheck);
|
||||
DUMP_U8(t, hasEodEventLiteral);
|
||||
DUMP_U8(t, hasOutfixesInSmallBlock);
|
||||
DUMP_U8(t, runtimeImpl);
|
||||
DUMP_U8(t, mpvTriggeredByLeaf);
|
||||
@ -882,8 +858,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, roseCount);
|
||||
DUMP_U32(t, lookaroundTableOffset);
|
||||
DUMP_U32(t, lookaroundReachOffset);
|
||||
DUMP_U32(t, eodProgramOffset);
|
||||
DUMP_U32(t, eodIterProgramOffset);
|
||||
DUMP_U32(t, eodIterOffset);
|
||||
DUMP_U32(t, eodProgramTableOffset);
|
||||
DUMP_U32(t, lastByteHistoryIterOffset);
|
||||
DUMP_U32(t, minWidth);
|
||||
DUMP_U32(t, minWidthExcludingBoundaries);
|
||||
@ -940,7 +917,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, somRevOffsetOffset);
|
||||
DUMP_U32(t, group_weak_end);
|
||||
DUMP_U32(t, floatingStreamState);
|
||||
DUMP_U32(t, eodLiteralId);
|
||||
fprintf(f, "}\n");
|
||||
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
|
||||
}
|
||||
|
@ -76,38 +76,15 @@ ReportID literalToReport(u32 id) {
|
||||
/** \brief Structure representing a literal. */
|
||||
struct RoseLiteral {
|
||||
/**
|
||||
* \brief Role program to run unconditionally when this literal is seen.
|
||||
* \brief Program to run when this literal is seen.
|
||||
*
|
||||
* Offset is relative to RoseEngine, or zero for no program.
|
||||
*/
|
||||
u32 rootProgramOffset;
|
||||
|
||||
/**
|
||||
* \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over
|
||||
* predecessor states.
|
||||
*
|
||||
* Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no
|
||||
* iterator.
|
||||
*/
|
||||
u32 iterOffset;
|
||||
|
||||
/**
|
||||
* \brief Table of role programs to run when triggered by the sparse
|
||||
* iterator, indexed by dense sparse iter index.
|
||||
*
|
||||
* Offset is relative to RoseEngine, zero for no programs.
|
||||
*/
|
||||
u32 iterProgramOffset;
|
||||
u32 programOffset;
|
||||
|
||||
/** \brief Bitset of groups that cause this literal to fire. */
|
||||
rose_group groups;
|
||||
|
||||
/**
|
||||
* \brief The minimum depth of this literal in the Rose graph (for depths
|
||||
* greater than 1).
|
||||
*/
|
||||
u8 minDepth;
|
||||
|
||||
/**
|
||||
* \brief True if this literal switches off its group behind it when it
|
||||
* sets a role.
|
||||
@ -382,7 +359,6 @@ struct RoseEngine {
|
||||
u8 noFloatingRoots; /* only need to run the anchored table if something
|
||||
* matched in the anchored table */
|
||||
u8 requiresEodCheck; /* stuff happens at eod time */
|
||||
u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time.
|
||||
u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
|
||||
in small block scans. */
|
||||
u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
|
||||
@ -448,8 +424,9 @@ struct RoseEngine {
|
||||
u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
|
||||
* bytes each) */
|
||||
|
||||
u32 eodIterOffset; // or 0 if no eod iterator
|
||||
u32 eodProgramTableOffset;
|
||||
u32 eodProgramOffset; //!< Unconditional EOD program, otherwise 0.
|
||||
u32 eodIterProgramOffset; // or 0 if no eod iterator program
|
||||
u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
|
||||
|
||||
u32 lastByteHistoryIterOffset; // if non-zero
|
||||
|
||||
@ -512,7 +489,6 @@ struct RoseEngine {
|
||||
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
|
||||
u32 group_weak_end; /* end of weak groups, debugging only */
|
||||
u32 floatingStreamState; // size in bytes
|
||||
u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0.
|
||||
|
||||
struct scatter_full_plan state_init;
|
||||
};
|
||||
|
@ -42,6 +42,7 @@
|
||||
/** \brief Role program instruction opcodes. */
|
||||
enum RoseInstructionCode {
|
||||
ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
|
||||
ROSE_INSTR_CHECK_DEPTH, //!< Check minimum graph depth.
|
||||
ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
|
||||
ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0.
|
||||
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
|
||||
@ -59,43 +60,51 @@ enum RoseInstructionCode {
|
||||
ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
|
||||
ROSE_INSTR_SET_STATE, //!< Switch a state index on.
|
||||
ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits.
|
||||
ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
|
||||
ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states.
|
||||
ROSE_INSTR_END //!< End of program.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_ANCHORED_DELAY {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 depth; //!< Depth for this state.
|
||||
rose_group groups; //!< Bitmask.
|
||||
u32 done_jump; //!< Jump forward this many bytes if successful.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_DEPTH {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 min_depth; //!< Minimum depth of this literal in the Rose graph.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_ONLY_EOD {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_BOUNDS {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 min_bound; //!< Min distance from zero.
|
||||
u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF).
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_NOT_HANDLED {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 key; //!< Key in the "handled_roles" fatbit in scratch.
|
||||
u32 fail_jump; //!< Jump forward this many bytes if we have seen key before.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 index;
|
||||
u32 count;
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_LEFTFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 queue; //!< Queue of leftfix to check.
|
||||
u32 lag; //!< Lag of leftfix for this case.
|
||||
ReportID report; //!< ReportID of leftfix to check.
|
||||
@ -103,72 +112,95 @@ struct ROSE_STRUCT_CHECK_LEFTFIX {
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SOM_ADJUST {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 distance; //!< Distance to EOM.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SOM_LEFTFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 queue; //!< Queue index of leftfix providing SOM.
|
||||
u32 lag; //!< Lag of leftfix for this case.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_TRIGGER_INFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 cancel; //!< Cancels previous top event.
|
||||
u32 queue; //!< Queue index of infix.
|
||||
u32 event; //!< Queue event, from MQE_*.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_TRIGGER_SUFFIX {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 queue; //!< Queue index of suffix.
|
||||
u32 event; //!< Queue event, from MQE_*.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_CHAIN {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_EOD {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM_INT {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM_KNOWN {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SET_STATE {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 depth; //!< Depth for this state.
|
||||
u32 index; //!< State index in multibit.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SET_GROUPS {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
rose_group groups; //!< Bitmask.
|
||||
};
|
||||
|
||||
/**
|
||||
* Note that the offsets in the jump table are always relative to the start of
|
||||
* the program, not the current instruction.
|
||||
*/
|
||||
struct ROSE_STRUCT_SPARSE_ITER_BEGIN {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
|
||||
u32 jump_table; //!< Offset of jump table indexed by sparse iterator.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
/**
|
||||
* Note that the offsets in the jump table are always relative to the start of
|
||||
* the program, not the current instruction.
|
||||
*/
|
||||
struct ROSE_STRUCT_SPARSE_ITER_NEXT {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
|
||||
u32 jump_table; //!< Offset of jump table indexed by sparse iterator.
|
||||
u32 state; // Current state index.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_END {
|
||||
u8 code; //!< From enum RoseRoleInstructionCode.
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
#endif // ROSE_ROSE_PROGRAM_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user