rose: Extend program to handle literals, iterators

- cleanups
- add sparse iter instructions
- merge "root" and "sparse iter" programs together
- move program execution to new file program_runtime.h
- simplify EOD execution
This commit is contained in:
Justin Viiret 2015-12-10 11:41:47 +11:00 committed by Matthew Barr
parent 8069e99bee
commit b2ebdac642
12 changed files with 1534 additions and 1503 deletions

View File

@ -467,6 +467,7 @@ set (hs_exec_SRCS
src/rose/match.h src/rose/match.h
src/rose/match.c src/rose/match.c
src/rose/miracle.h src/rose/miracle.h
src/rose/program_runtime.h
src/rose/runtime.h src/rose/runtime.h
src/rose/rose.h src/rose/rose.h
src/rose/rose_internal.h src/rose/rose_internal.h

View File

@ -28,6 +28,7 @@
#include "catchup.h" #include "catchup.h"
#include "match.h" #include "match.h"
#include "program_runtime.h"
#include "rose.h" #include "rose.h"
#include "util/fatbit.h" #include "util/fatbit.h"
@ -107,44 +108,19 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset,
} }
static rose_inline static rose_inline
int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch) { struct hs_scratch *scratch) {
if (!t->eodIterOffset) { if (!t->eodIterProgramOffset) {
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset); DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset);
const u32 *programTable = getByOffset(t, t->eodProgramTableOffset); int work_done = 0;
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0,
assert(ISALIGNED(programTable));
assert(ISALIGNED(it));
// Sparse iterator state was allocated earlier
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
struct fatbit *handled_roles = scratch->handled_roles;
const u32 numStates = t->rolesWithStateCount;
void *role_state = getRoleState(state);
u32 idx = 0;
u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s);
fatbit_clear(handled_roles);
int work_done = 0; // not read from in this path.
for (; i != MMB_INVALID;
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
u32 programOffset = programTable[idx];
u64a som = 0;
if (roseRunRoleProgram(t, programOffset, offset, &som,
&(scratch->tctxt),
&work_done) == HWLM_TERMINATE_MATCHING) { &work_done) == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
}
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
} }
@ -236,6 +212,27 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, u8 *state, u64a offset,
} }
} }
static rose_inline
int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch) {
if (!t->eodProgramOffset) {
return MO_CONTINUE_MATCHING;
}
DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset);
// There should be no pending delayed literals.
assert(!scratch->tctxt.filledDelayedSlots);
int work_done = 0;
if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0,
&work_done) == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}
return MO_CONTINUE_MATCHING;
}
static really_inline static really_inline
void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch, const char is_streaming) { struct hs_scratch *scratch, const char is_streaming) {
@ -244,31 +241,20 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
assert(!scratch->core_info.buf || !scratch->core_info.hbuf); assert(!scratch->core_info.buf || !scratch->core_info.hbuf);
assert(!can_stop_matching(scratch)); assert(!can_stop_matching(scratch));
// Fire the special EOD event literal. // Run the unconditional EOD program.
if (t->hasEodEventLiteral) { if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) {
DEBUG_PRINTF("firing eod event id %u at offset %llu\n",
t->eodLiteralId, offset);
const struct core_info *ci = &scratch->core_info;
size_t len = ci->buf ? ci->len : ci->hlen;
assert(len || !ci->buf); /* len may be 0 if no history is required
* (bounds checks only can lead to this) */
roseRunEvent(len, t->eodLiteralId, &scratch->tctxt);
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("user told us to stop\n");
return; return;
} }
}
roseCheckNfaEod(t, state, scratch, offset, is_streaming); roseCheckNfaEod(t, state, scratch, offset, is_streaming);
if (!t->eodIterOffset && !t->ematcherOffset) { if (!t->eodIterProgramOffset && !t->ematcherOffset) {
DEBUG_PRINTF("no eod accepts\n"); DEBUG_PRINTF("no eod accepts\n");
return; return;
} }
// Handle pending EOD reports. // Handle pending EOD reports.
int itrv = roseEodRunIterator(t, state, offset, scratch); int itrv = roseEodRunIterator(t, offset, scratch);
if (itrv == MO_HALT_MATCHING) { if (itrv == MO_HALT_MATCHING) {
return; return;
} }
@ -288,7 +274,7 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
cleanupAfterEodMatcher(t, state, offset, scratch); cleanupAfterEodMatcher(t, state, offset, scratch);
// Fire any new EOD reports. // Fire any new EOD reports.
roseEodRunIterator(t, state, offset, scratch); roseEodRunIterator(t, offset, scratch);
roseCheckEodSuffixes(t, state, offset, scratch); roseCheckEodSuffixes(t, state, offset, scratch);
} }

File diff suppressed because it is too large Load Diff

View File

@ -55,7 +55,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx);
hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
void *ctx); void *ctx);
int roseAnchoredCallback(u64a end, u32 id, void *ctx); int roseAnchoredCallback(u64a end, u32 id, void *ctx);
void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt);
/* Common code, used all over Rose runtime */ /* Common code, used all over Rose runtime */
@ -299,8 +298,4 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state,
scratch->sparse_iter_state); scratch->sparse_iter_state);
} }
hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset,
u64a end, u64a *som, struct RoseContext *tctxt,
int *work_done);
#endif #endif

1081
src/rose/program_runtime.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -45,6 +45,39 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch,
RoseCallback callback, RoseCallbackSom som_callback, RoseCallback callback, RoseCallbackSom som_callback,
void *context); void *context);
static really_inline
int roseBlockHasEodWork(const struct RoseEngine *t,
struct hs_scratch *scratch) {
if (t->ematcherOffset) {
DEBUG_PRINTF("eod matcher to run\n");
return 1;
}
if (t->eodProgramOffset) {
DEBUG_PRINTF("has eod program\n");
return 1;
}
void *state = scratch->core_info.state;
if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
DEBUG_PRINTF("active outfix/suffix engines\n");
return 1;
}
if (t->eodIterOffset) {
u32 idx;
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
if (mmbit_sparse_iter_begin(getRoleState(state), t->rolesWithStateCount,
&idx, it, s) != MMB_INVALID) {
DEBUG_PRINTF("eod iter has states on\n");
return 1;
}
}
return 0;
}
/* assumes core_info in scratch has been init to point to data */ /* assumes core_info in scratch has been init to point to data */
static really_inline static really_inline
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch,
@ -77,19 +110,8 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch,
return; return;
} }
struct mmbit_sparse_state *s = scratch->sparse_iter_state; if (!roseBlockHasEodWork(t, scratch)) {
const u32 numStates = t->rolesWithStateCount; DEBUG_PRINTF("no eod work\n");
u8 *state = (u8 *)scratch->core_info.state;
void *role_state = getRoleState(state);
u32 idx = 0;
const struct mmbit_sparse_iter *it
= (const void *)((const u8 *)t + t->eodIterOffset);
if (!t->ematcherOffset && !t->hasEodEventLiteral
&& !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)
&& (!t->eodIterOffset
|| mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s)
== MMB_INVALID)) {
return; return;
} }

View File

@ -170,6 +170,7 @@ public:
const void *get() const { const void *get() const {
switch (code()) { switch (code()) {
case ROSE_INSTR_CHECK_DEPTH: return &u.checkDepth;
case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod;
case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds;
case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
@ -188,6 +189,8 @@ public:
case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown;
case ROSE_INSTR_SET_STATE: return &u.setState; case ROSE_INSTR_SET_STATE: return &u.setState;
case ROSE_INSTR_SET_GROUPS: return &u.setGroups; case ROSE_INSTR_SET_GROUPS: return &u.setGroups;
case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
case ROSE_INSTR_END: return &u.end; case ROSE_INSTR_END: return &u.end;
} }
assert(0); assert(0);
@ -196,6 +199,7 @@ public:
size_t length() const { size_t length() const {
switch (code()) { switch (code()) {
case ROSE_INSTR_CHECK_DEPTH: return sizeof(u.checkDepth);
case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod);
case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds);
case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
@ -214,12 +218,15 @@ public:
case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown);
case ROSE_INSTR_SET_STATE: return sizeof(u.setState); case ROSE_INSTR_SET_STATE: return sizeof(u.setState);
case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups);
case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
case ROSE_INSTR_END: return sizeof(u.end); case ROSE_INSTR_END: return sizeof(u.end);
} }
return 0; return 0;
} }
union { union {
ROSE_STRUCT_CHECK_DEPTH checkDepth;
ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod;
ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_BOUNDS checkBounds;
ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
@ -238,6 +245,8 @@ public:
ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown;
ROSE_STRUCT_SET_STATE setState; ROSE_STRUCT_SET_STATE setState;
ROSE_STRUCT_SET_GROUPS setGroups; ROSE_STRUCT_SET_GROUPS setGroups;
ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
ROSE_STRUCT_END end; ROSE_STRUCT_END end;
} u; } u;
}; };
@ -2565,7 +2574,7 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) {
*/ */
static static
vector<RoseInstruction> vector<RoseInstruction>
flattenRoleProgram(const vector<vector<RoseInstruction>> &programs) { flattenProgram(const vector<vector<RoseInstruction>> &programs) {
vector<RoseInstruction> out; vector<RoseInstruction> out;
vector<u32> offsets; // offset of each instruction (bytes) vector<u32> offsets; // offset of each instruction (bytes)
@ -2601,6 +2610,10 @@ flattenRoleProgram(const vector<vector<RoseInstruction>> &programs) {
assert(targets[i] > offsets[i]); // jumps always progress assert(targets[i] > offsets[i]); // jumps always progress
ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; ri.u.anchoredDelay.done_jump = targets[i] - offsets[i];
break; break;
case ROSE_INSTR_CHECK_DEPTH:
assert(targets[i] > offsets[i]);
ri.u.checkDepth.fail_jump = targets[i] - offsets[i];
break;
case ROSE_INSTR_CHECK_ONLY_EOD: case ROSE_INSTR_CHECK_ONLY_EOD:
assert(targets[i] > offsets[i]); assert(targets[i] > offsets[i]);
ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i];
@ -2630,9 +2643,13 @@ flattenRoleProgram(const vector<vector<RoseInstruction>> &programs) {
} }
static static
u32 writeRoleProgram(build_context &bc, vector<RoseInstruction> &program) { u32 writeProgram(build_context &bc, vector<RoseInstruction> &program) {
DEBUG_PRINTF("writing %zu instructions\n", program.size()); if (program.empty()) {
DEBUG_PRINTF("no program\n");
return 0;
}
DEBUG_PRINTF("writing %zu instructions\n", program.size());
u32 programOffset = 0; u32 programOffset = 0;
for (const auto &ri : program) { for (const auto &ri : program) {
u32 offset = u32 offset =
@ -2696,32 +2713,6 @@ bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc,
return false; return false;
} }
/* creates (and adds to rose) a sparse iterator visiting pred states/roles,
* returns a pair:
* - the offset of the itermap
* - the offset for the sparse iterator.
*/
static
pair<u32, u32> addPredSparseIter(build_context &bc,
const map<u32, u32> &predPrograms) {
vector<u32> keys;
vector<u32> programTable;
for (const auto &elem : predPrograms) {
keys.push_back(elem.first);
programTable.push_back(elem.second);
}
vector<mmbit_sparse_iter> iter;
mmbBuildSparseIterator(iter, keys, bc.numStates);
assert(!iter.empty());
DEBUG_PRINTF("iter size = %zu\n", iter.size());
u32 iterOffset = addIteratorToTable(bc, iter);
u32 programTableOffset =
add_to_engine_blob(bc, begin(programTable), end(programTable));
return make_pair(programTableOffset, iterOffset);
}
static static
void fillLookaroundTables(char *look_base, char *reach_base, void fillLookaroundTables(char *look_base, char *reach_base,
const vector<LookEntry> &look_vec) { const vector<LookEntry> &look_vec) {
@ -2770,7 +2761,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc,
* literal entry */ * literal entry */
const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id); const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id);
const rose_literal_info &arb_lit_info = **lit_infos.begin(); const rose_literal_info &arb_lit_info = **lit_infos.begin();
const auto &vertices = arb_lit_info.vertices;
literalTable.push_back(RoseLiteral()); literalTable.push_back(RoseLiteral());
RoseLiteral &tl = literalTable.back(); RoseLiteral &tl = literalTable.back();
@ -2784,11 +2774,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc,
assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED
|| tbi.literals.right.at(literalId).table == ROSE_EVENT); || tbi.literals.right.at(literalId).table == ROSE_EVENT);
// Minimum depth based on this literal's roles.
tl.minDepth = calcMinDepth(bc.depths, vertices);
DEBUG_PRINTF("lit %u: role minDepth=%u\n", final_id, tl.minDepth);
// If this literal squashes its group behind it, store that data too // If this literal squashes its group behind it, store that data too
tl.squashesGroup = arb_lit_info.squash_group; tl.squashesGroup = arb_lit_info.squash_group;
@ -3150,7 +3135,7 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
} }
static static
vector<RoseInstruction> makeRoleProgram(RoseBuildImpl &build, build_context &bc, vector<RoseInstruction> makeProgram(RoseBuildImpl &build, build_context &bc,
const RoseEdge &e) { const RoseEdge &e) {
const RoseGraph &g = build.g; const RoseGraph &g = build.g;
auto v = target(e, g); auto v = target(e, g);
@ -3185,69 +3170,6 @@ vector<RoseInstruction> makeRoleProgram(RoseBuildImpl &build, build_context &bc,
return program; return program;
} }
static
void findRootEdges(const RoseBuildImpl &build, RoseVertex src,
map<u32, flat_set<RoseEdge>> &root_edges_map) {
const auto &g = build.g;
for (const auto &e : out_edges_range(src, g)) {
const auto &v = target(e, g);
if (build.hasDirectFinalId(v)) {
continue; // Skip direct reports.
}
for (auto lit_id : g[v].literals) {
assert(lit_id < build.literal_info.size());
u32 final_id = build.literal_info.at(lit_id).final_id;
if (final_id != MO_INVALID_IDX) {
root_edges_map[final_id].insert(e);
}
}
}
}
static
void buildRootRolePrograms(RoseBuildImpl &build, build_context &bc,
vector<RoseLiteral> &literalTable) {
const auto &g = build.g;
map<u32, flat_set<RoseEdge>> root_edges_map; // lit id -> root edges
findRootEdges(build, build.root, root_edges_map);
findRootEdges(build, build.anchored_root, root_edges_map);
for (u32 id = 0; id < literalTable.size(); id++) {
const auto &root_edges = root_edges_map[id];
DEBUG_PRINTF("lit %u has %zu root edges\n", id, root_edges.size());
// Sort edges by (source, target) vertex indices to ensure
// deterministic program construction.
vector<RoseEdge> ordered_edges(begin(root_edges), end(root_edges));
sort(begin(ordered_edges), end(ordered_edges),
[&g](const RoseEdge &a, const RoseEdge &b) {
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
tie(g[source(b, g)].idx, g[target(b, g)].idx);
});
vector<vector<RoseInstruction>> root_prog;
for (const auto &e : ordered_edges) {
DEBUG_PRINTF("edge (%zu,%zu)\n", g[source(e, g)].idx,
g[target(e, g)].idx);
auto role_prog = makeRoleProgram(build, bc, e);
if (role_prog.empty()) {
continue;
}
root_prog.push_back(role_prog);
}
RoseLiteral &tl = literalTable[id];
if (root_prog.empty()) {
tl.rootProgramOffset = 0;
continue;
}
auto final_program = flattenRoleProgram(root_prog);
tl.rootProgramOffset = writeRoleProgram(bc, final_program);
}
}
static static
void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { void assignStateIndices(const RoseBuildImpl &build, build_context &bc) {
const auto &g = build.g; const auto &g = build.g;
@ -3399,13 +3321,12 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v,
} }
static static
vector<RoseInstruction> makeSparseIterProgram(RoseBuildImpl &build, vector<RoseInstruction> makePredProgram(RoseBuildImpl &build, build_context &bc,
build_context &bc,
const RoseEdge &e) { const RoseEdge &e) {
const RoseGraph &g = build.g; const RoseGraph &g = build.g;
const RoseVertex v = target(e, g); const RoseVertex v = target(e, g);
auto program = makeRoleProgram(build, bc, e); auto program = makeProgram(build, bc, e);
if (hasGreaterInDegree(1, v, g)) { if (hasGreaterInDegree(1, v, g)) {
// Only necessary when there is more than one pred. // Only necessary when there is more than one pred.
@ -3415,75 +3336,215 @@ vector<RoseInstruction> makeSparseIterProgram(RoseBuildImpl &build,
return program; return program;
} }
/**
* Returns the pair (program offset, sparse iter offset).
*/
static static
void buildLitSparseIter(RoseBuildImpl &build, build_context &bc, pair<u32, u32> makeSparseIterProgram(build_context &bc,
vector<RoseVertex> &verts, RoseLiteral &tl) { map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
const auto &g = build.g; const vector<RoseVertex> &verts,
const vector<RoseInstruction> &root_program) {
vector<RoseInstruction> program;
u32 iter_offset = 0;
if (verts.empty()) { if (!predProgramLists.empty()) {
// This literal has no non-root roles => no sparse iter // First, add the iterator itself.
tl.iterOffset = ROSE_OFFSET_INVALID; vector<u32> keys;
tl.iterProgramOffset = 0; for (const auto &elem : predProgramLists) {
return; keys.push_back(elem.first);
}
DEBUG_PRINTF("%zu keys: %s\n", keys.size(),
as_string_list(keys).c_str());
vector<mmbit_sparse_iter> iter;
mmbBuildSparseIterator(iter, keys, bc.numStates);
assert(!iter.empty());
iter_offset = addIteratorToTable(bc, iter);
// Construct our program, starting with the SPARSE_ITER_BEGIN
// instruction, keeping track of the jump offset for each sub-program.
vector<u32> jump_table;
u32 curr_offset = 0;
// Add a pre-check for min depth, if it's useful.
if (!verts.empty()) {
u32 min_depth = calcMinDepth(bc.depths, verts);
if (min_depth > 1) {
auto ri = RoseInstruction(ROSE_INSTR_CHECK_DEPTH);
ri.u.checkDepth.min_depth = min_depth;
program.push_back(ri);
curr_offset = ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
}
} }
// Deterministic ordering. program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
sort(begin(verts), end(verts), curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
[&g](RoseVertex a, RoseVertex b) { return g[a].idx < g[b].idx; });
for (const auto &e : predProgramLists) {
DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
curr_offset);
jump_table.push_back(curr_offset);
auto subprog = flattenProgram(e.second);
if (e.first != keys.back()) {
// For all but the last subprogram, replace the END instruction
// with a SPARSE_ITER_NEXT.
assert(!subprog.empty());
assert(subprog.back().code() == ROSE_INSTR_END);
subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
}
for (const auto &ri : subprog) {
program.push_back(ri);
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
}
}
const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(),
ROSE_INSTR_MIN_ALIGN);
// Write the jump table into the bytecode.
const u32 jump_table_offset =
add_to_engine_blob(bc, begin(jump_table), end(jump_table));
// Fix up the instruction operands.
auto keys_it = begin(keys);
curr_offset = 0;
for (size_t i = 0; i < program.size(); i++) {
auto &ri = program[i];
switch (ri.code()) {
case ROSE_INSTR_CHECK_DEPTH:
ri.u.checkDepth.fail_jump = end_offset - curr_offset;
break;
case ROSE_INSTR_SPARSE_ITER_BEGIN:
ri.u.sparseIterBegin.iter_offset = iter_offset;
ri.u.sparseIterBegin.jump_table = jump_table_offset;
ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
break;
case ROSE_INSTR_SPARSE_ITER_NEXT:
ri.u.sparseIterNext.iter_offset = iter_offset;
ri.u.sparseIterNext.jump_table = jump_table_offset;
assert(keys_it != end(keys));
ri.u.sparseIterNext.state = *keys_it++;
ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
break;
default:
break;
}
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
}
}
// If we have a root program, replace the END instruction with it. Note
// that the root program has already been flattened.
if (!root_program.empty()) {
if (!program.empty()) {
assert(program.back().code() == ROSE_INSTR_END);
program.pop_back();
}
program.insert(end(program), begin(root_program), end(root_program));
}
return {writeProgram(bc, program), iter_offset};
}
static
u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
const vector<RoseEdge> &lit_edges) {
const auto &g = build.g;
DEBUG_PRINTF("%zu lit edges\n", lit_edges.size());
// pred state id -> list of programs // pred state id -> list of programs
map<u32, vector<vector<RoseInstruction>>> predProgramLists; map<u32, vector<vector<RoseInstruction>>> predProgramLists;
vector<RoseVertex> nonroot_verts;
for (const auto &v : verts) { // Construct sparse iter sub-programs.
DEBUG_PRINTF("vertex %zu\n", g[v].idx); for (const auto &e : lit_edges) {
for (const auto &e : in_edges_range(v, g)) {
const auto &u = source(e, g); const auto &u = source(e, g);
if (build.isAnyStart(u)) { if (build.isAnyStart(u)) {
continue; // Root roles are not handled with sparse iterator. continue; // Root roles are not handled with sparse iterator.
} }
DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].idx,
g[target(e, g)].idx);
assert(contains(bc.roleStateIndices, u)); assert(contains(bc.roleStateIndices, u));
u32 pred_state = bc.roleStateIndices.at(u); u32 pred_state = bc.roleStateIndices.at(u);
auto program = makePredProgram(build, bc, e);
DEBUG_PRINTF("pred %zu (state %u)\n", g[u].idx, pred_state);
auto program = makeSparseIterProgram(build, bc, e);
predProgramLists[pred_state].push_back(program); predProgramLists[pred_state].push_back(program);
} nonroot_verts.push_back(target(e, g));
} }
map<u32, u32> predPrograms; // Construct sub-program for handling root roles.
for (const auto &e : predProgramLists) { vector<vector<RoseInstruction>> root_programs;
auto program = flattenRoleProgram(e.second); for (const auto &e : lit_edges) {
u32 offset = writeRoleProgram(bc, program); const auto &u = source(e, g);
predPrograms.emplace(e.first, offset); if (!build.isAnyStart(u)) {
continue;
}
DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx);
auto role_prog = makeProgram(build, bc, e);
if (role_prog.empty()) {
continue;
}
root_programs.push_back(role_prog);
} }
tie(tl.iterProgramOffset, tl.iterOffset) = vector<RoseInstruction> root_program;
addPredSparseIter(bc, predPrograms); if (!root_programs.empty()) {
root_program = flattenProgram(root_programs);
}
// Put it all together.
return makeSparseIterProgram(bc, predProgramLists, nonroot_verts,
root_program).first;
} }
// Build sparse iterators for literals.
static static
void buildSparseIter(RoseBuildImpl &build, build_context &bc, map<u32, vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
vector<RoseLiteral> &literalTable) { // Use a set of edges while building the map to cull duplicates.
const RoseGraph &g = build.g; map<u32, flat_set<RoseEdge>> unique_lit_edge_map;
// Find all our non-root roles. const auto &g = build.g;
ue2::unordered_map<u32, vector<RoseVertex>> litNonRootVertices; for (const auto &e : edges_range(g)) {
for (const auto &v : vertices_range(g)) { const auto &v = target(e, g);
if (build.isRootSuccessor(v)) { if (build.hasDirectFinalId(v)) {
// Skip direct reports, which do not have RoseLiteral entries.
continue; continue;
} }
for (const auto &lit_id : g[v].literals) { for (const auto &lit_id : g[v].literals) {
assert(lit_id < build.literal_info.size());
u32 final_id = build.literal_info.at(lit_id).final_id; u32 final_id = build.literal_info.at(lit_id).final_id;
litNonRootVertices[final_id].push_back(v); if (final_id != MO_INVALID_IDX) {
unique_lit_edge_map[final_id].insert(e);
}
} }
} }
// Build output map, sorting edges by (source, target) vertex index.
map<u32, vector<RoseEdge>> lit_edge_map;
for (const auto &m : unique_lit_edge_map) {
auto edge_list = vector<RoseEdge>(begin(m.second), end(m.second));
sort(begin(edge_list), end(edge_list),
[&g](const RoseEdge &a, const RoseEdge &b) {
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
tie(g[source(b, g)].idx, g[target(b, g)].idx);
});
lit_edge_map.emplace(m.first, edge_list);
}
return lit_edge_map;
}
/** \brief Build the interpreter program for each literal. */
static
void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
vector<RoseLiteral> &literalTable) {
auto lit_edge_map = findEdgesByLiteral(build);
for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) {
buildLitSparseIter(build, bc, litNonRootVertices[finalId], const auto &lit_edges = lit_edge_map[finalId];
literalTable[finalId]); u32 offset = buildLiteralProgram(build, bc, lit_edges);
literalTable[finalId].programOffset = offset;
} }
} }
@ -3514,9 +3575,11 @@ vector<RoseInstruction> makeEodAnchorProgram(RoseBuildImpl &build,
return program; return program;
} }
/* returns a pair containing the iter map offset and iter offset */ /**
* Returns the pair (program offset, sparse iter offset).
*/
static static
pair<u32, u32> buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) { pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
const RoseGraph &g = build.g; const RoseGraph &g = build.g;
// pred state id -> list of programs // pred state id -> list of programs
@ -3546,15 +3609,35 @@ pair<u32, u32> buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) {
return {0, 0}; return {0, 0};
} }
map<u32, u32> predPrograms; return makeSparseIterProgram(bc, predProgramLists, {}, {});
for (const auto &e : predProgramLists) { }
DEBUG_PRINTF("pred %u has %zu programs\n", e.first, e.second.size());
auto program = flattenRoleProgram(e.second); static
u32 offset = writeRoleProgram(bc, program); u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
predPrograms.emplace(e.first, offset); if (build.eod_event_literal_id == MO_INVALID_IDX) {
return 0;
} }
return addPredSparseIter(bc, predPrograms); const RoseGraph &g = build.g;
const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
assert(lit_info.delayed_ids.empty());
assert(!lit_info.squash_group);
assert(!lit_info.requires_benefits);
// Collect all edges leading into EOD event literal vertices.
vector<RoseEdge> edge_list;
for (const auto &v : lit_info.vertices) {
insert(&edge_list, edge_list.end(), in_edges(v, g));
}
// Sort edge list for determinism, prettiness.
sort(begin(edge_list), end(edge_list),
[&g](const RoseEdge &a, const RoseEdge &b) {
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
tie(g[source(b, g)].idx, g[target(b, g)].idx);
});
return buildLiteralProgram(build, bc, edge_list);
} }
static static
@ -3742,11 +3825,12 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
vector<RoseLiteral> literalTable; vector<RoseLiteral> literalTable;
buildLiteralTable(*this, bc, literalTable); buildLiteralTable(*this, bc, literalTable);
buildSparseIter(*this, bc, literalTable); buildLiteralPrograms(*this, bc, literalTable);
u32 eodProgramOffset = writeEodProgram(*this, bc);
u32 eodIterProgramOffset;
u32 eodIterOffset; u32 eodIterOffset;
u32 eodProgramTableOffset; tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc);
tie(eodProgramTableOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc);
vector<mmbit_sparse_iter> activeLeftIter; vector<mmbit_sparse_iter> activeLeftIter;
buildActiveLeftIter(leftInfoTable, activeLeftIter); buildActiveLeftIter(leftInfoTable, activeLeftIter);
@ -3758,9 +3842,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
throw ResourceLimitError(); throw ResourceLimitError();
} }
// Write root programs for literals into the engine blob.
buildRootRolePrograms(*this, bc, literalTable);
u32 amatcherOffset = 0; u32 amatcherOffset = 0;
u32 fmatcherOffset = 0; u32 fmatcherOffset = 0;
u32 ematcherOffset = 0; u32 ematcherOffset = 0;
@ -3968,8 +4049,9 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
= anchoredReportInverseMapOffset; = anchoredReportInverseMapOffset;
engine->multidirectOffset = multidirectOffset; engine->multidirectOffset = multidirectOffset;
engine->eodProgramOffset = eodProgramOffset;
engine->eodIterProgramOffset = eodIterProgramOffset;
engine->eodIterOffset = eodIterOffset; engine->eodIterOffset = eodIterOffset;
engine->eodProgramTableOffset = eodProgramTableOffset;
engine->lastByteHistoryIterOffset = lastByteOffset; engine->lastByteHistoryIterOffset = lastByteOffset;
@ -4038,13 +4120,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
write_out(&engine->state_init, (char *)engine.get(), state_scatter, write_out(&engine->state_init, (char *)engine.get(), state_scatter,
state_scatter_aux_offset); state_scatter_aux_offset);
if (eod_event_literal_id != MO_INVALID_IDX) {
engine->hasEodEventLiteral = 1;
DEBUG_PRINTF("eod literal id=%u, final_id=%u\n", eod_event_literal_id,
literal_info.at(eod_event_literal_id).final_id);
engine->eodLiteralId = literal_info.at(eod_event_literal_id).final_id;
}
if (anchoredIsMulti(*engine)) { if (anchoredIsMulti(*engine)) {
DEBUG_PRINTF("multiple anchored dfas\n"); DEBUG_PRINTF("multiple anchored dfas\n");
engine->maxSafeAnchoredDROffset = 1; engine->maxSafeAnchoredDROffset = 1;

View File

@ -274,6 +274,13 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
continue; continue;
} }
// The special EOD event literal has its own program and does not need
// a real literal ID.
if (i == tbi.eod_event_literal_id) {
assert(tbi.eod_event_literal_id != MO_INVALID_IDX);
continue;
}
const rose_literal_info &info = tbi.literal_info[i]; const rose_literal_info &info = tbi.literal_info[i];
if (info.requires_benefits) { if (info.requires_benefits) {
assert(!tbi.isDelayed(i)); assert(!tbi.isDelayed(i));

View File

@ -34,7 +34,6 @@
#include "rose_build_impl.h" #include "rose_build_impl.h"
#include "rose/rose_dump.h" #include "rose/rose_dump.h"
#include "rose_internal.h" #include "rose_internal.h"
#include "rose_program.h"
#include "ue2common.h" #include "ue2common.h"
#include "nfa/nfa_internal.h" #include "nfa/nfa_internal.h"
#include "nfagraph/ng_dump.h" #include "nfagraph/ng_dump.h"

View File

@ -130,12 +130,6 @@ size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) {
return count_if(tl, tl_end, pred); return count_if(tl, tl_end, pred);
} }
static
size_t literalsWithDepth(const RoseEngine *t, u8 depth) {
return literalsWithPredicate(
t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; });
}
static static
size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) { size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
rose_group mask = ~((1ULL << from) - 1); rose_group mask = ~((1ULL << from) - 1);
@ -195,7 +189,7 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
} }
static static
void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
const char *pc_base = pc; const char *pc_base = pc;
for (;;) { for (;;) {
u8 code = *(const u8 *)pc; u8 code = *(const u8 *)pc;
@ -209,6 +203,12 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_DEPTH) {
os << " min_depth " << u32{ri->min_depth} << endl;
os << " fail_jump +" << ri->fail_jump << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_ONLY_EOD) { PROGRAM_CASE(CHECK_ONLY_EOD) {
os << " fail_jump +" << ri->fail_jump << endl; os << " fail_jump +" << ri->fail_jump << endl;
} }
@ -309,6 +309,21 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
} }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SPARSE_ITER_BEGIN) {
os << " iter_offset " << ri->iter_offset << endl;
os << " jump_table " << ri->jump_table << endl;
os << " fail_jump +" << ri->fail_jump << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(SPARSE_ITER_NEXT) {
os << " iter_offset " << ri->iter_offset << endl;
os << " jump_table " << ri->jump_table << endl;
os << " state " << ri->state << endl;
os << " fail_jump +" << ri->fail_jump << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(END) { return; } PROGRAM_CASE(END) { return; }
PROGRAM_NEXT_INSTRUCTION PROGRAM_NEXT_INSTRUCTION
@ -323,30 +338,6 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
#undef PROGRAM_CASE #undef PROGRAM_CASE
#undef PROGRAM_NEXT_INSTRUCTION #undef PROGRAM_NEXT_INSTRUCTION
static
void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset,
u32 programTableOffset) {
const auto *it =
(const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset);
const u32 *programTable =
(const u32 *)loadFromByteCodeOffset(t, programTableOffset);
// Construct a full multibit.
const u32 total_bits = t->rolesWithStateCount;
const vector<u8> bits(mmbit_size(total_bits), u8{0xff});
struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES];
u32 idx = 0;
for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s);
i != MMB_INVALID;
i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) {
u32 programOffset = programTable[idx];
os << "Sparse Iter Program " << idx << " triggered by state " << i
<< " @ " << programOffset << ":" << endl;
dumpRoleProgram(os, t, (const char *)t + programOffset);
}
}
static static
void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename); ofstream os(filename);
@ -359,18 +350,11 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
os << "Literal " << i << endl; os << "Literal " << i << endl;
os << "---------------" << endl; os << "---------------" << endl;
if (lit->rootProgramOffset) { if (lit->programOffset) {
os << "Root Program @ " << lit->rootProgramOffset << ":" << endl; os << "Program @ " << lit->programOffset << ":" << endl;
dumpRoleProgram(os, t, base + lit->rootProgramOffset); dumpProgram(os, t, base + lit->programOffset);
} else { } else {
os << "<No Root Program>" << endl; os << "<No Program>" << endl;
}
if (lit->iterOffset != ROSE_OFFSET_INVALID) {
dumpSparseIterPrograms(os, t, lit->iterOffset,
lit->iterProgramOffset);
} else {
os << "<No Sparse Iter Programs>" << endl;
} }
os << endl; os << endl;
@ -382,12 +366,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
static static
void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename); ofstream os(filename);
const char *base = (const char *)t;
if (t->eodIterOffset) { os << "Unconditional EOD Program:" << endl;
dumpSparseIterPrograms(os, t, t->eodIterOffset,
t->eodProgramTableOffset); if (t->eodProgramOffset) {
dumpProgram(os, t, base + t->eodProgramOffset);
os << endl;
} else { } else {
os << "<No EOD Iter Programs>" << endl; os << "<No EOD Program>" << endl;
}
os << "Sparse Iter EOD Program:" << endl;
if (t->eodIterProgramOffset) {
dumpProgram(os, t, base + t->eodIterProgramOffset);
} else {
os << "<No EOD Iter Program>" << endl;
} }
os.close(); os.close();
@ -766,33 +761,15 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
literalsWithPredicate( literalsWithPredicate(
t, [](const RoseLiteral &l) { return l.squashesGroup != 0; })); t, [](const RoseLiteral &l) { return l.squashesGroup != 0; }));
fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id);
fprintf(f, " - with root program : %zu\n", fprintf(f, " - with program : %zu\n",
literalsWithPredicate(t, [](const RoseLiteral &l) { literalsWithPredicate(
return l.rootProgramOffset != 0; t, [](const RoseLiteral &l) { return l.programOffset != 0; }));
}));
fprintf(f, " - with sparse iter : %zu\n",
literalsWithPredicate(t, [](const RoseLiteral &l) {
return l.iterOffset != ROSE_OFFSET_INVALID;
}));
fprintf(f, " - in groups ::\n"); fprintf(f, " - in groups ::\n");
fprintf(f, " + weak : %zu\n", fprintf(f, " + weak : %zu\n",
literalsInGroups(t, 0, t->group_weak_end)); literalsInGroups(t, 0, t->group_weak_end));
fprintf(f, " + general : %zu\n", fprintf(f, " + general : %zu\n",
literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8)); literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8));
u32 depth1 = literalsWithDepth(t, 1);
u32 depth2 = literalsWithDepth(t, 2);
u32 depth3 = literalsWithDepth(t, 3);
u32 depth4 = literalsWithDepth(t, 4);
u32 depthN = t->literalCount - (depth1 + depth2 + depth3 + depth4);
fprintf(f, "\nLiteral depths:\n");
fprintf(f, " minimum depth 1 : %u\n", depth1);
fprintf(f, " minimum depth 2 : %u\n", depth2);
fprintf(f, " minimum depth 3 : %u\n", depth3);
fprintf(f, " minimum depth 4 : %u\n", depth4);
fprintf(f, " minimum depth >4 : %u\n", depthN);
fprintf(f, "\n"); fprintf(f, "\n");
fprintf(f, " minWidth : %u\n", t->minWidth); fprintf(f, " minWidth : %u\n", t->minWidth);
fprintf(f, " minWidthExcludingBoundaries : %u\n", fprintf(f, " minWidthExcludingBoundaries : %u\n",
@ -840,7 +817,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U8(t, hasFloatingDirectReports); DUMP_U8(t, hasFloatingDirectReports);
DUMP_U8(t, noFloatingRoots); DUMP_U8(t, noFloatingRoots);
DUMP_U8(t, requiresEodCheck); DUMP_U8(t, requiresEodCheck);
DUMP_U8(t, hasEodEventLiteral);
DUMP_U8(t, hasOutfixesInSmallBlock); DUMP_U8(t, hasOutfixesInSmallBlock);
DUMP_U8(t, runtimeImpl); DUMP_U8(t, runtimeImpl);
DUMP_U8(t, mpvTriggeredByLeaf); DUMP_U8(t, mpvTriggeredByLeaf);
@ -882,8 +858,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, roseCount); DUMP_U32(t, roseCount);
DUMP_U32(t, lookaroundTableOffset); DUMP_U32(t, lookaroundTableOffset);
DUMP_U32(t, lookaroundReachOffset); DUMP_U32(t, lookaroundReachOffset);
DUMP_U32(t, eodProgramOffset);
DUMP_U32(t, eodIterProgramOffset);
DUMP_U32(t, eodIterOffset); DUMP_U32(t, eodIterOffset);
DUMP_U32(t, eodProgramTableOffset);
DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, lastByteHistoryIterOffset);
DUMP_U32(t, minWidth); DUMP_U32(t, minWidth);
DUMP_U32(t, minWidthExcludingBoundaries); DUMP_U32(t, minWidthExcludingBoundaries);
@ -940,7 +917,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, somRevOffsetOffset); DUMP_U32(t, somRevOffsetOffset);
DUMP_U32(t, group_weak_end); DUMP_U32(t, group_weak_end);
DUMP_U32(t, floatingStreamState); DUMP_U32(t, floatingStreamState);
DUMP_U32(t, eodLiteralId);
fprintf(f, "}\n"); fprintf(f, "}\n");
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
} }

View File

@ -76,38 +76,15 @@ ReportID literalToReport(u32 id) {
/** \brief Structure representing a literal. */ /** \brief Structure representing a literal. */
struct RoseLiteral { struct RoseLiteral {
/** /**
* \brief Role program to run unconditionally when this literal is seen. * \brief Program to run when this literal is seen.
* *
* Offset is relative to RoseEngine, or zero for no program. * Offset is relative to RoseEngine, or zero for no program.
*/ */
u32 rootProgramOffset; u32 programOffset;
/**
* \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over
* predecessor states.
*
* Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no
* iterator.
*/
u32 iterOffset;
/**
* \brief Table of role programs to run when triggered by the sparse
* iterator, indexed by dense sparse iter index.
*
* Offset is relative to RoseEngine, zero for no programs.
*/
u32 iterProgramOffset;
/** \brief Bitset of groups that cause this literal to fire. */ /** \brief Bitset of groups that cause this literal to fire. */
rose_group groups; rose_group groups;
/**
* \brief The minimum depth of this literal in the Rose graph (for depths
* greater than 1).
*/
u8 minDepth;
/** /**
* \brief True if this literal switches off its group behind it when it * \brief True if this literal switches off its group behind it when it
* sets a role. * sets a role.
@ -382,7 +359,6 @@ struct RoseEngine {
u8 noFloatingRoots; /* only need to run the anchored table if something u8 noFloatingRoots; /* only need to run the anchored table if something
* matched in the anchored table */ * matched in the anchored table */
u8 requiresEodCheck; /* stuff happens at eod time */ u8 requiresEodCheck; /* stuff happens at eod time */
u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time.
u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
in small block scans. */ in small block scans. */
u8 runtimeImpl; /**< can we just run the floating table or a single outfix? u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
@ -448,8 +424,9 @@ struct RoseEngine {
u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
* bytes each) */ * bytes each) */
u32 eodIterOffset; // or 0 if no eod iterator u32 eodProgramOffset; //!< Unconditional EOD program, otherwise 0.
u32 eodProgramTableOffset; u32 eodIterProgramOffset; // or 0 if no eod iterator program
u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
u32 lastByteHistoryIterOffset; // if non-zero u32 lastByteHistoryIterOffset; // if non-zero
@ -512,7 +489,6 @@ struct RoseEngine {
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
u32 group_weak_end; /* end of weak groups, debugging only */ u32 group_weak_end; /* end of weak groups, debugging only */
u32 floatingStreamState; // size in bytes u32 floatingStreamState; // size in bytes
u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0.
struct scatter_full_plan state_init; struct scatter_full_plan state_init;
}; };

View File

@ -42,6 +42,7 @@
/** \brief Role program instruction opcodes. */ /** \brief Role program instruction opcodes. */
enum RoseInstructionCode { enum RoseInstructionCode {
ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
ROSE_INSTR_CHECK_DEPTH, //!< Check minimum graph depth.
ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0.
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
@ -59,43 +60,51 @@ enum RoseInstructionCode {
ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
ROSE_INSTR_SET_STATE, //!< Switch a state index on. ROSE_INSTR_SET_STATE, //!< Switch a state index on.
ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits.
ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states.
ROSE_INSTR_END //!< End of program. ROSE_INSTR_END //!< End of program.
}; };
struct ROSE_STRUCT_ANCHORED_DELAY { struct ROSE_STRUCT_ANCHORED_DELAY {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u8 depth; //!< Depth for this state. u8 depth; //!< Depth for this state.
rose_group groups; //!< Bitmask. rose_group groups; //!< Bitmask.
u32 done_jump; //!< Jump forward this many bytes if successful. u32 done_jump; //!< Jump forward this many bytes if successful.
}; };
struct ROSE_STRUCT_CHECK_DEPTH {
u8 code; //!< From enum RoseInstructionCode.
u8 min_depth; //!< Minimum depth of this literal in the Rose graph.
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_CHECK_ONLY_EOD { struct ROSE_STRUCT_CHECK_ONLY_EOD {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 fail_jump; //!< Jump forward this many bytes on failure. u32 fail_jump; //!< Jump forward this many bytes on failure.
}; };
struct ROSE_STRUCT_CHECK_BOUNDS { struct ROSE_STRUCT_CHECK_BOUNDS {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 min_bound; //!< Min distance from zero. u32 min_bound; //!< Min distance from zero.
u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF).
u32 fail_jump; //!< Jump forward this many bytes on failure. u32 fail_jump; //!< Jump forward this many bytes on failure.
}; };
struct ROSE_STRUCT_CHECK_NOT_HANDLED { struct ROSE_STRUCT_CHECK_NOT_HANDLED {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 key; //!< Key in the "handled_roles" fatbit in scratch. u32 key; //!< Key in the "handled_roles" fatbit in scratch.
u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. u32 fail_jump; //!< Jump forward this many bytes if we have seen key before.
}; };
struct ROSE_STRUCT_CHECK_LOOKAROUND { struct ROSE_STRUCT_CHECK_LOOKAROUND {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 index; u32 index;
u32 count; u32 count;
u32 fail_jump; //!< Jump forward this many bytes on failure. u32 fail_jump; //!< Jump forward this many bytes on failure.
}; };
struct ROSE_STRUCT_CHECK_LEFTFIX { struct ROSE_STRUCT_CHECK_LEFTFIX {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 queue; //!< Queue of leftfix to check. u32 queue; //!< Queue of leftfix to check.
u32 lag; //!< Lag of leftfix for this case. u32 lag; //!< Lag of leftfix for this case.
ReportID report; //!< ReportID of leftfix to check. ReportID report; //!< ReportID of leftfix to check.
@ -103,72 +112,95 @@ struct ROSE_STRUCT_CHECK_LEFTFIX {
}; };
struct ROSE_STRUCT_SOM_ADJUST { struct ROSE_STRUCT_SOM_ADJUST {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 distance; //!< Distance to EOM. u32 distance; //!< Distance to EOM.
}; };
struct ROSE_STRUCT_SOM_LEFTFIX { struct ROSE_STRUCT_SOM_LEFTFIX {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 queue; //!< Queue index of leftfix providing SOM. u32 queue; //!< Queue index of leftfix providing SOM.
u32 lag; //!< Lag of leftfix for this case. u32 lag; //!< Lag of leftfix for this case.
}; };
struct ROSE_STRUCT_TRIGGER_INFIX { struct ROSE_STRUCT_TRIGGER_INFIX {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u8 cancel; //!< Cancels previous top event. u8 cancel; //!< Cancels previous top event.
u32 queue; //!< Queue index of infix. u32 queue; //!< Queue index of infix.
u32 event; //!< Queue event, from MQE_*. u32 event; //!< Queue event, from MQE_*.
}; };
struct ROSE_STRUCT_TRIGGER_SUFFIX { struct ROSE_STRUCT_TRIGGER_SUFFIX {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u32 queue; //!< Queue index of suffix. u32 queue; //!< Queue index of suffix.
u32 event; //!< Queue event, from MQE_*. u32 event; //!< Queue event, from MQE_*.
}; };
struct ROSE_STRUCT_REPORT { struct ROSE_STRUCT_REPORT {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
ReportID report; ReportID report;
}; };
struct ROSE_STRUCT_REPORT_CHAIN { struct ROSE_STRUCT_REPORT_CHAIN {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
ReportID report; ReportID report;
}; };
struct ROSE_STRUCT_REPORT_EOD { struct ROSE_STRUCT_REPORT_EOD {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
ReportID report; ReportID report;
}; };
struct ROSE_STRUCT_REPORT_SOM_INT { struct ROSE_STRUCT_REPORT_SOM_INT {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
ReportID report; ReportID report;
}; };
struct ROSE_STRUCT_REPORT_SOM { struct ROSE_STRUCT_REPORT_SOM {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
ReportID report; ReportID report;
}; };
struct ROSE_STRUCT_REPORT_SOM_KNOWN { struct ROSE_STRUCT_REPORT_SOM_KNOWN {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
ReportID report; ReportID report;
}; };
struct ROSE_STRUCT_SET_STATE { struct ROSE_STRUCT_SET_STATE {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
u8 depth; //!< Depth for this state. u8 depth; //!< Depth for this state.
u32 index; //!< State index in multibit. u32 index; //!< State index in multibit.
}; };
struct ROSE_STRUCT_SET_GROUPS { struct ROSE_STRUCT_SET_GROUPS {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
rose_group groups; //!< Bitmask. rose_group groups; //!< Bitmask.
}; };
/**
* Note that the offsets in the jump table are always relative to the start of
* the program, not the current instruction.
*/
struct ROSE_STRUCT_SPARSE_ITER_BEGIN {
u8 code; //!< From enum RoseInstructionCode.
u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
u32 jump_table; //!< Offset of jump table indexed by sparse iterator.
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
/**
* Note that the offsets in the jump table are always relative to the start of
* the program, not the current instruction.
*/
struct ROSE_STRUCT_SPARSE_ITER_NEXT {
u8 code; //!< From enum RoseInstructionCode.
u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
u32 jump_table; //!< Offset of jump table indexed by sparse iterator.
u32 state; // Current state index.
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_END { struct ROSE_STRUCT_END {
u8 code; //!< From enum RoseRoleInstructionCode. u8 code; //!< From enum RoseInstructionCode.
}; };
#endif // ROSE_ROSE_PROGRAM_H #endif // ROSE_ROSE_PROGRAM_H