rose: Extend the interpreter to handle more work

- Use program for EOD sparse iterator
- Use program for literal sparse iterator
- Eliminate RoseRole, RosePred, RoseVertexProps::role
- Small performance optimizations
This commit is contained in:
Justin Viiret 2015-12-04 16:17:28 +11:00 committed by Matthew Barr
parent 9cb2233589
commit d67c7583ea
15 changed files with 766 additions and 1359 deletions

View File

@ -82,8 +82,8 @@ void init_state_for_block(const struct RoseEngine *t, u8 *state) {
assert(t);
assert(state);
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
t, t->roleCount, t->rolesWithStateCount);
DEBUG_PRINTF("init for Rose %p with %u state indices\n", t,
t->rolesWithStateCount);
// Rose is guaranteed 8-aligned state
assert(ISALIGNED_N(state, 8));

View File

@ -113,12 +113,11 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
return MO_CONTINUE_MATCHING;
}
const struct RoseRole *roleTable = getRoleTable(t);
const struct RosePred *predTable = getPredTable(t);
const struct RoseIterMapping *iterMapBase
= getByOffset(t, t->eodIterMapOffset);
DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset);
const u32 *programTable = getByOffset(t, t->eodProgramTableOffset);
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
assert(ISALIGNED(iterMapBase));
assert(ISALIGNED(programTable));
assert(ISALIGNED(it));
// Sparse iterator state was allocated earlier
@ -133,52 +132,19 @@ int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
fatbit_clear(handled_roles);
int work_done = 0; // not read from in this path.
for (; i != MMB_INVALID;
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
const struct RoseIterMapping *iterMap = iterMapBase + idx;
const struct RoseIterRole *roles = getByOffset(t, iterMap->offset);
assert(ISALIGNED(roles));
DEBUG_PRINTF("%u roles to consider\n", iterMap->count);
for (u32 j = 0; j != iterMap->count; j++) {
u32 role = roles[j].role;
assert(role < t->roleCount);
DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred);
const struct RoseRole *tr = roleTable + role;
if (fatbit_isset(handled_roles, t->roleCount, role)) {
DEBUG_PRINTF("role %u already handled by the walk, skip\n",
role);
continue;
}
// Special case: if this role is a trivial case (pred type simple)
// we don't need to check any history and we already know the pred
// role is on.
if (tr->flags & ROSE_ROLE_PRED_SIMPLE) {
DEBUG_PRINTF("pred type is simple, no need for checks\n");
} else {
assert(roles[j].pred < t->predCount);
const struct RosePred *tp = predTable + roles[j].pred;
if (!roseCheckPredHistory(tp, offset)) {
continue;
}
}
/* mark role as handled so we don't touch it again in this walk */
fatbit_set(handled_roles, t->roleCount, role);
u32 programOffset = programTable[idx];
u64a som = 0;
int work_done = 0;
hwlmcb_rv_t rv =
roseRunRoleProgram(t, tr->programOffset, offset, &som,
&(scratch->tctxt), &work_done);
if (rv == HWLM_TERMINATE_MATCHING) {
if (roseRunRoleProgram(t, programOffset, offset, &som,
&(scratch->tctxt),
&work_done) == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
return MO_CONTINUE_MATCHING;
}

View File

@ -80,8 +80,8 @@ void roseInitState(const struct RoseEngine *t, u8 *state) {
assert(t);
assert(state);
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
t, t->roleCount, t->rolesWithStateCount);
DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t,
t->rolesWithStateCount);
// Rose is guaranteed 8-aligned state
assert(ISALIGNED_N(state, 8));

View File

@ -1107,10 +1107,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
}
#define PROGRAM_CASE(name) \
case ROSE_ROLE_INSTR_##name: { \
DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_ROLE_INSTR_##name); \
const struct ROSE_ROLE_STRUCT_##name *ri = \
(const struct ROSE_ROLE_STRUCT_##name *)pc;
case ROSE_INSTR_##name: { \
DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \
const struct ROSE_STRUCT_##name *ri = \
(const struct ROSE_STRUCT_##name *)pc;
#define PROGRAM_NEXT_INSTRUCTION \
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
@ -1121,26 +1121,28 @@ static really_inline
hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
u64a end, u64a *som, struct RoseContext *tctxt,
char in_anchored, int *work_done) {
assert(programOffset);
DEBUG_PRINTF("program begins at offset %u\n", programOffset);
assert(programOffset);
assert(programOffset < t->size);
const char *pc = getByOffset(t, programOffset);
assert(*(const u8 *)pc != ROSE_ROLE_INSTR_END);
assert(*(const u8 *)pc != ROSE_INSTR_END);
for (;;) {
assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
u8 code = *(const u8 *)pc;
assert(code <= ROSE_ROLE_INSTR_END);
assert(code <= ROSE_INSTR_END);
switch ((enum RoseRoleInstructionCode)code) {
switch ((enum RoseInstructionCode)code) {
PROGRAM_CASE(ANCHORED_DELAY) {
if (in_anchored && end > t->floatingMinLiteralMatchOffset) {
DEBUG_PRINTF("delay until playback\n");
update_depth(tctxt, ri->depth);
tctxt->groups |= ri->groups;
*work_done = 1;
assert(ri->done_jump); // must progress
pc += ri->done_jump;
continue;
}
@ -1151,16 +1153,29 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
if (end != ci->buf_offset + ci->len) {
DEBUG_PRINTF("should only match at end of data\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_ROOT_BOUNDS) {
PROGRAM_CASE(CHECK_BOUNDS) {
if (!in_anchored &&
!roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) {
DEBUG_PRINTF("failed root bounds check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_NOT_HANDLED) {
struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles;
if (fatbit_set(handled, t->handledKeyCount, ri->key)) {
DEBUG_PRINTF("key %u already set\n", ri->key);
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
}
@ -1170,6 +1185,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
PROGRAM_CASE(CHECK_LOOKAROUND) {
if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) {
DEBUG_PRINTF("failed lookaround check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
}
@ -1180,6 +1196,7 @@ hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end,
tctxt)) {
DEBUG_PRINTF("failed lookaround check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
continue;
}
@ -1334,12 +1351,9 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t,
struct RoseContext *tctxt) {
/* assert(!tctxt->in_anchored); */
/* assert(!tctxt->in_anch_playback); */
const struct RoseRole *roleTable = getRoleTable(t);
const struct RosePred *predTable = getPredTable(t);
const struct RoseIterMapping *iterMapBase
= getByOffset(t, tl->iterMapOffset);
const u32 *iterProgram = getByOffset(t, tl->iterProgramOffset);
const struct mmbit_sparse_iter *it = getByOffset(t, tl->iterOffset);
assert(ISALIGNED(iterMapBase));
assert(ISALIGNED(iterProgram));
assert(ISALIGNED(it));
// Sparse iterator state was allocated earlier
@ -1357,51 +1371,20 @@ hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t,
for (; i != MMB_INVALID;
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
const struct RoseIterMapping *iterMap = iterMapBase + idx;
const struct RoseIterRole *roles = getByOffset(t, iterMap->offset);
assert(ISALIGNED(roles));
u32 programOffset = iterProgram[idx];
DEBUG_PRINTF("pred state %u (iter idx=%u) is on -> program %u\n", i,
idx, programOffset);
DEBUG_PRINTF("%u roles to consider\n", iterMap->count);
for (u32 j = 0; j != iterMap->count; j++) {
u32 role = roles[j].role;
assert(role < t->roleCount);
DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred);
const struct RoseRole *tr = roleTable + role;
// If this bit is switched on in the sparse iterator, it must be
// driving a program.
assert(programOffset);
if (fatbit_isset(handled_roles, t->roleCount, role)) {
DEBUG_PRINTF("role %u already handled by the walk, skip\n",
role);
continue;
}
// Special case: if this role is a trivial case (pred type simple)
// we don't need to check any history and we already know the pred
// role is on.
if (tr->flags & ROSE_ROLE_PRED_SIMPLE) {
DEBUG_PRINTF("pred type is simple, no need for further"
" checks\n");
} else {
assert(roles[j].pred < t->predCount);
const struct RosePred *tp = predTable + roles[j].pred;
if (!roseCheckPredHistory(tp, end)) {
continue;
}
}
/* mark role as handled so we don't touch it again in this walk */
fatbit_set(handled_roles, t->roleCount, role);
if (!tr->programOffset) {
continue;
}
u64a som = 0ULL;
if (roseRunRoleProgram_i(t, tr->programOffset, end, &som, tctxt, 0,
if (roseRunRoleProgram_i(t, programOffset, end, &som, tctxt, 0,
&work_done) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
}
// If we've actually handled any roles, we might need to apply this
// literal's squash mask to our groups as well.

View File

@ -269,30 +269,6 @@ void update_depth(struct RoseContext *tctxt, u8 depth) {
tctxt->depth = d;
}
static really_inline
int roseCheckHistoryAnch(const struct RosePred *tp, u64a end) {
DEBUG_PRINTF("end %llu min %u max %u\n", end, tp->minBound, tp->maxBound);
if (tp->maxBound == ROSE_BOUND_INF) {
return end >= tp->minBound;
} else {
return end >= tp->minBound && end <= tp->maxBound;
}
}
// Check that a predecessor's history requirements are satisfied.
static really_inline
int roseCheckPredHistory(const struct RosePred *tp, u64a end) {
DEBUG_PRINTF("pred type %u\n", tp->historyCheck);
if (tp->historyCheck == ROSE_ROLE_HISTORY_ANCH) {
return roseCheckHistoryAnch(tp, end);
}
assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE ||
tp->historyCheck == ROSE_ROLE_HISTORY_LAST_BYTE);
return 1;
}
/* Note: uses the stashed sparse iter state; cannot be called from
* anybody else who is using it */
static rose_inline

File diff suppressed because it is too large Load Diff

View File

@ -78,77 +78,6 @@ string to_string(nfa_kind k) {
return "?";
}
// Get the RoseRole associated with a given vertex in the build graph from the
// RoseEngine.
static
const RoseRole *getRoseRole(const RoseBuildImpl &build,
const RoseEngine *engine, RoseVertex v) {
if (!engine) {
return nullptr;
}
u32 role_idx = build.g[v].role;
if (role_idx == MO_INVALID_IDX) {
return nullptr;
}
const RoseRole *roles = getRoleTable(engine);
return &roles[role_idx];
}
#define SKIP_CASE(name) \
case ROSE_ROLE_INSTR_##name: { \
const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
break; \
}
template<int Opcode, class Struct>
const Struct *
findInstruction(const RoseEngine *t, const RoseRole *role) {
if (!role->programOffset) {
return nullptr;
}
const char *pc = (const char *)t + role->programOffset;
for (;;) {
u8 code = *(const u8 *)pc;
assert(code <= ROSE_ROLE_INSTR_END);
if (code == Opcode) {
return (const Struct *)pc;
}
// Skip to the next instruction.
switch (code) {
SKIP_CASE(ANCHORED_DELAY)
SKIP_CASE(CHECK_ONLY_EOD)
SKIP_CASE(CHECK_ROOT_BOUNDS)
SKIP_CASE(CHECK_LEFTFIX)
SKIP_CASE(CHECK_LOOKAROUND)
SKIP_CASE(SOM_ADJUST)
SKIP_CASE(SOM_LEFTFIX)
SKIP_CASE(TRIGGER_INFIX)
SKIP_CASE(TRIGGER_SUFFIX)
SKIP_CASE(REPORT)
SKIP_CASE(REPORT_CHAIN)
SKIP_CASE(REPORT_EOD)
SKIP_CASE(REPORT_SOM_INT)
SKIP_CASE(REPORT_SOM)
SKIP_CASE(REPORT_SOM_KNOWN)
SKIP_CASE(SET_STATE)
SKIP_CASE(SET_GROUPS)
case ROSE_ROLE_INSTR_END:
return nullptr;
default:
assert(0);
return nullptr;
}
}
return nullptr;
}
#undef SKIP_CASE
namespace {
class RoseGraphWriter {
@ -174,7 +103,7 @@ public:
}
os << "[label=\"";
os << "role=" << g[v].role << "[i" << g[v].idx <<"]\\n";
os << "idx=" << g[v].idx <<"\\n";
for (u32 lit_id : g[v].literals) {
writeLiteral(os, lit_id);
@ -198,22 +127,12 @@ public:
os << " (rep=" << as_string_list(g[v].reports) << ")";
}
const RoseRole *r = getRoseRole(v);
if (g[v].suffix) {
os << "\\nSUFFIX (TOP " << g[v].suffix.top;
if (r) {
const auto *ri =
findInstruction<ROSE_ROLE_INSTR_TRIGGER_SUFFIX,
ROSE_ROLE_STRUCT_TRIGGER_SUFFIX>(t, r);
if (ri) {
os << ", Q" << ri->queue;
}
} else {
// Can't dump the queue number, but we can identify the suffix.
if (g[v].suffix.graph) {
os << ", graph=" << g[v].suffix.graph.get()
<< " " << to_string(g[v].suffix.graph->kind);
os << ", graph=" << g[v].suffix.graph.get() << " "
<< to_string(g[v].suffix.graph->kind);
}
if (g[v].suffix.castle) {
os << ", castle=" << g[v].suffix.castle.get();
@ -225,7 +144,6 @@ public:
os << ", haig=" << g[v].suffix.haig.get();
}
}
os << ")";
}
@ -247,15 +165,6 @@ public:
build.isRootSuccessor(v) ? "PREFIX" : "INFIX";
os << "\\nROSE " << roseKind;
os << " (";
if (r) {
const auto *ri =
findInstruction<ROSE_ROLE_INSTR_CHECK_LEFTFIX,
ROSE_ROLE_STRUCT_CHECK_LEFTFIX>(t, r);
if (ri) {
os << "Q" << ri->queue << ", ";
}
}
os << "report " << g[v].left.leftfix_report << ")";
if (g[v].left.graph) {
@ -348,10 +257,6 @@ private:
}
}
const RoseRole *getRoseRole(RoseVertex v) const {
return ue2::getRoseRole(build, t, v);
}
set<RoseVertex> ghost;
const RoseBuildImpl &build;
const RoseEngine *t;
@ -383,7 +288,7 @@ namespace {
struct CompareVertexRole {
explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {}
inline bool operator()(const RoseVertex &a, const RoseVertex &b) const {
return g[a].role < g[b].role;
return g[a].idx < g[b].idx;
}
private:
const RoseGraph &g;
@ -483,7 +388,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) {
for (RoseVertex v : verts) {
// role info
os << " Role " << g[v].role << ": depth=" << depths.at(v)
os << " Index " << g[v].idx << ": depth=" << depths.at(v)
<< ", groups=0x" << hex << setw(16) << setfill('0')
<< g[v].groups << dec;
@ -497,14 +402,14 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) {
os << ", max_offset=" << g[v].max_offset << endl;
// pred info
for (const auto &ie : in_edges_range(v, g)) {
os << " Predecessor role=";
u32 predRole = g[source(ie, g)].role;
if (predRole == MO_INVALID_IDX) {
const auto &u = source(ie, g);
os << " Predecessor idx=";
if (u == build.root) {
os << "ROOT";
} else if (predRole == g[build.anchored_root].role) {
} else if (u == build.anchored_root) {
os << "ANCHORED_ROOT";
} else {
os << predRole;
os << g[u].idx;
}
os << ": bounds [" << g[ie].minBound << ", ";
if (g[ie].maxBound == ROSE_BOUND_INF) {
@ -589,70 +494,6 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
}
static
CharReach bitvectorToReach(const u8 *reach) {
CharReach cr;
for (size_t i = 0; i < 256; i++) {
if (reach[i / 8] & (1U << (i % 8))) {
cr.set(i);
}
}
return cr;
}
static
void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t,
const Grey &grey, const string &filename) {
stringstream ss;
ss << grey.dumpPath << filename;
ofstream os(ss.str());
const RoseGraph &g = build.g;
const u8 *base = (const u8 *)t;
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
const u8 *reach_base = base + t->lookaroundReachOffset;
for (RoseVertex v : vertices_range(g)) {
const RoseRole *role = getRoseRole(build, t, v);
if (!role) {
continue;
}
const auto *ri =
findInstruction<ROSE_ROLE_INSTR_CHECK_LOOKAROUND,
ROSE_ROLE_STRUCT_CHECK_LOOKAROUND>(t, role);
if (!ri) {
continue;
}
const u32 look_idx = ri->index;
const u32 look_count = ri->count;
os << "Role " << g[v].role << endl;
os << " literals: " << as_string_list(g[v].literals) << endl;
os << " lookaround: index=" << look_idx << ", count=" << look_count
<< endl;
const s8 *look = look_base + look_idx;
const s8 *look_end = look + look_count;
const u8 *reach = reach_base + look_idx * REACH_BITVECTOR_LEN;
for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
os << " " << std::setw(4) << std::setfill(' ') << int{*look}
<< ": ";
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
os << endl;
}
os << endl;
}
os.close();
}
void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
const Grey &grey) {
if (!grey.dumpFlags) {
@ -692,9 +533,6 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w");
roseDumpStructRaw(t, f);
fclose(f);
// Lookaround tables.
dumpRoseLookaround(build, t, grey, "rose_lookaround.txt");
}
} // namespace ue2

View File

@ -89,12 +89,10 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in,
next_nfa_report(0) {
// add root vertices to graph
g[root].idx = vertexIndex++;
g[root].role = MO_INVALID_IDX;
g[root].min_offset = 0;
g[root].max_offset = 0;
g[anchored_root].idx = vertexIndex++;
g[anchored_root].role = MO_INVALID_IDX;
g[anchored_root].min_offset = 0;
g[anchored_root].max_offset = 0;
}
@ -194,7 +192,7 @@ bool RoseBuildImpl::hasLiteralInTable(RoseVertex v,
bool RoseBuildImpl::hasNoFloatingRoots() const {
for (auto v : adjacent_vertices_range(root, g)) {
if (isFloating(v)) {
DEBUG_PRINTF("direct floating root %u\n", g[v].role);
DEBUG_PRINTF("direct floating root %zu\n", g[v].idx);
return false;
}
}
@ -202,7 +200,7 @@ bool RoseBuildImpl::hasNoFloatingRoots() const {
/* need to check if the anchored_root has any literals which are too deep */
for (auto v : adjacent_vertices_range(anchored_root, g)) {
if (isFloating(v)) {
DEBUG_PRINTF("indirect floating root %u\n", g[v].role);
DEBUG_PRINTF("indirect floating root %zu\n", g[v].idx);
return false;
}
}

View File

@ -40,7 +40,9 @@
#include "nfa/nfa_build_util.h"
#include "nfa/nfa_dump_api.h"
#include "nfa/nfa_internal.h"
#include "util/dump_charclass.h"
#include "util/multibit_internal.h"
#include "util/multibit.h"
#include <algorithm>
#include <fstream>
@ -114,159 +116,78 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) {
return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset);
}
static
const RosePred *getPredTable(const RoseEngine *t, u32 *count) {
*count = t->predCount;
return (const RosePred *)loadFromByteCodeOffset(t, t->predOffset);
}
static
u32 literalsWithDepth(const RoseEngine *t, u8 depth) {
u32 n = 0;
const RoseLiteral *tl = getLiteralTable(t);
const RoseLiteral *tl_end = tl + t->literalCount;
for (; tl != tl_end; ++tl) {
if (tl->minDepth == depth) {
n++;
}
}
return n;
}
static
u32 literalsWithDirectReports(const RoseEngine *t) {
return t->totalNumLiterals - t->literalCount;
}
template<typename member_type_ptr>
template<typename Predicate>
static
u32 literalsWithProp(const RoseEngine *t, member_type_ptr prop) {
u32 n = 0;
size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) {
const RoseLiteral *tl = getLiteralTable(t);
const RoseLiteral *tl_end = tl + t->literalCount;
for (; tl != tl_end; ++tl) {
if (tl->*prop) {
n++;
}
}
return n;
}
template<typename member_type>
static
u32 rolesWithPropValue(const RoseEngine *t, member_type RoseRole::*prop,
member_type value) {
u32 n = 0;
const RoseRole *tr = getRoleTable(t);
const RoseRole *tr_end = tr + t->roleCount;
for (; tr != tr_end; ++tr) {
if (tr->*prop == value) {
n++;
}
}
return n;
return count_if(tl, tl_end, pred);
}
static
u32 literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
u32 n = 0;
const RoseLiteral *tl = getLiteralTable(t);
const RoseLiteral *tl_end = tl + t->literalCount;
size_t literalsWithDepth(const RoseEngine *t, u8 depth) {
return literalsWithPredicate(
t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; });
}
static
size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
rose_group mask = ~((1ULL << from) - 1);
if (to < 64) {
mask &= ((1ULL << to) - 1);
}
for (; tl != tl_end; ++tl) {
if (tl->groups & mask) {
n++;
}
}
return n;
return literalsWithPredicate(
t, [&mask](const RoseLiteral &l) { return l.groups & mask; });
}
static
u32 rolesWithFlag(const RoseEngine *t, u32 flag) {
u32 n = 0;
const RoseRole *tr = getRoleTable(t);
const RoseRole *tr_end = tr + t->roleCount;
CharReach bitvectorToReach(const u8 *reach) {
CharReach cr;
for (size_t i = 0; i < 256; i++) {
if (reach[i / 8] & (1U << (i % 8))) {
cr.set(i);
for (; tr != tr_end; ++tr) {
if (tr->flags & flag) {
n++;
}
}
return n;
return cr;
}
#define HANDLE_CASE(name) \
case ROSE_ROLE_INSTR_##name: { \
const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc; \
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
break; \
}
static
u32 rolesWithInstr(const RoseEngine *t,
enum RoseRoleInstructionCode find_code) {
u32 n = 0;
const RoseRole *tr = getRoleTable(t);
const RoseRole *tr_end = tr + t->roleCount;
void dumpLookaround(ofstream &os, const RoseEngine *t,
const ROSE_STRUCT_CHECK_LOOKAROUND *ri) {
assert(ri);
for (; tr != tr_end; ++tr) {
if (!tr->programOffset) {
continue;
}
const u8 *base = (const u8 *)t;
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
const u8 *reach_base = base + t->lookaroundReachOffset;
const char *pc = (const char *)t + tr->programOffset;
for (;;) {
u8 code = *(const u8 *)pc;
assert(code <= ROSE_ROLE_INSTR_END);
if (code == find_code) {
n++;
goto next_role;
const s8 *look = look_base + ri->index;
const s8 *look_end = look + ri->count;
const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN;
os << " contents:" << endl;
for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
os << " " << std::setw(4) << std::setfill(' ') << int{*look}
<< ": ";
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
os << endl;
}
switch (code) {
HANDLE_CASE(CHECK_ONLY_EOD)
HANDLE_CASE(CHECK_ROOT_BOUNDS)
HANDLE_CASE(CHECK_LOOKAROUND)
HANDLE_CASE(CHECK_LEFTFIX)
HANDLE_CASE(ANCHORED_DELAY)
HANDLE_CASE(SOM_ADJUST)
HANDLE_CASE(SOM_LEFTFIX)
HANDLE_CASE(TRIGGER_INFIX)
HANDLE_CASE(TRIGGER_SUFFIX)
HANDLE_CASE(REPORT)
HANDLE_CASE(REPORT_CHAIN)
HANDLE_CASE(REPORT_EOD)
HANDLE_CASE(REPORT_SOM_INT)
HANDLE_CASE(REPORT_SOM)
HANDLE_CASE(REPORT_SOM_KNOWN)
HANDLE_CASE(SET_STATE)
HANDLE_CASE(SET_GROUPS)
case ROSE_ROLE_INSTR_END:
goto next_role;
default:
assert(0);
return 0;
}
}
next_role:;
}
return n;
}
#undef HANDLE_CASE
#define PROGRAM_CASE(name) \
case ROSE_ROLE_INSTR_##name: { \
case ROSE_INSTR_##name: { \
os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \
<< ": " #name " (" << (int)ROSE_ROLE_INSTR_##name << ")" << endl; \
const auto *ri = (const struct ROSE_ROLE_STRUCT_##name *)pc;
<< ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \
const auto *ri = (const struct ROSE_STRUCT_##name *)pc;
#define PROGRAM_NEXT_INSTRUCTION \
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
@ -274,11 +195,11 @@ u32 rolesWithInstr(const RoseEngine *t,
}
static
void dumpRoleProgram(ofstream &os, const char *pc) {
void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
const char *pc_base = pc;
for (;;) {
u8 code = *(const u8 *)pc;
assert(code <= ROSE_ROLE_INSTR_END);
assert(code <= ROSE_INSTR_END);
switch (code) {
PROGRAM_CASE(ANCHORED_DELAY) {
os << " depth " << u32{ri->depth} << endl;
@ -293,17 +214,24 @@ void dumpRoleProgram(ofstream &os, const char *pc) {
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_ROOT_BOUNDS) {
PROGRAM_CASE(CHECK_BOUNDS) {
os << " min_bound " << ri->min_bound << endl;
os << " max_bound " << ri->max_bound << endl;
os << " fail_jump +" << ri->fail_jump << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_NOT_HANDLED) {
os << " key " << ri->key << endl;
os << " fail_jump +" << ri->fail_jump << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_LOOKAROUND) {
os << " index " << ri->index << endl;
os << " count " << ri->count << endl;
os << " fail_jump +" << ri->fail_jump << endl;
dumpLookaround(os, t, ri);
}
PROGRAM_NEXT_INSTRUCTION
@ -396,26 +324,27 @@ void dumpRoleProgram(ofstream &os, const char *pc) {
#undef PROGRAM_NEXT_INSTRUCTION
static
void dumpRoseRolePrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename);
void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset,
u32 programTableOffset) {
const auto *it =
(const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset);
const u32 *programTable =
(const u32 *)loadFromByteCodeOffset(t, programTableOffset);
const RoseRole *roles = getRoleTable(t);
const char *base = (const char *)t;
// Construct a full multibit.
const u32 total_bits = t->rolesWithStateCount;
const vector<u8> bits(mmbit_size(total_bits), u8{0xff});
for (u32 i = 0; i < t->roleCount; i++) {
const RoseRole *role = &roles[i];
os << "Role " << i << endl;
if (!role->programOffset) {
os << " <no program>" << endl;
continue;
struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES];
u32 idx = 0;
for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s);
i != MMB_INVALID;
i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) {
u32 programOffset = programTable[idx];
os << "Sparse Iter Program " << idx << " triggered by state " << i
<< " @ " << programOffset << ":" << endl;
dumpRoleProgram(os, t, (const char *)t + programOffset);
}
dumpRoleProgram(os, base + role->programOffset);
os << endl;
}
os.close();
}
static
@ -427,12 +356,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
for (u32 i = 0; i < t->literalCount; i++) {
const RoseLiteral *lit = &lits[i];
if (!lit->rootProgramOffset) {
continue;
os << "Literal " << i << endl;
os << "---------------" << endl;
if (lit->rootProgramOffset) {
os << "Root Program @ " << lit->rootProgramOffset << ":" << endl;
dumpRoleProgram(os, t, base + lit->rootProgramOffset);
} else {
os << "<No Root Program>" << endl;
}
if (lit->iterOffset != ROSE_OFFSET_INVALID) {
dumpSparseIterPrograms(os, t, lit->iterOffset,
lit->iterProgramOffset);
} else {
os << "<No Sparse Iter Programs>" << endl;
}
os << "Literal " << i << endl;
dumpRoleProgram(os, base + lit->rootProgramOffset);
os << endl;
}
@ -440,37 +380,17 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
}
static
const char *historyName(RoseRoleHistory h) {
switch (h) {
case ROSE_ROLE_HISTORY_NONE:
return "history none";
case ROSE_ROLE_HISTORY_ANCH:
return "history anch";
case ROSE_ROLE_HISTORY_LAST_BYTE:
return "history last_byte";
default:
return "unknown";
}
}
void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename);
static
void dumpPreds(FILE *f, const RoseEngine *t) {
map<RoseRoleHistory, u32> counts;
u32 predCount = 0;
const RosePred *tp = getPredTable(t, &predCount);
const RosePred *tp_end = tp + predCount;
for (; tp != tp_end; ++tp) {
assert(tp->historyCheck < ROSE_ROLE_HISTORY_INVALID);
counts[(RoseRoleHistory)tp->historyCheck] += 1;
if (t->eodIterOffset) {
dumpSparseIterPrograms(os, t, t->eodIterOffset,
t->eodProgramTableOffset);
} else {
os << "<No EOD Iter Programs>" << endl;
}
for (map<RoseRoleHistory, u32>::const_iterator it = counts.begin(),
ite = counts.end();
it != ite; ++it) {
fprintf(f, " - %-18s: %u\n", historyName(it->first), it->second);
}
os.close();
}
static
@ -805,16 +725,12 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance);
fprintf(f, " - literal table : %zu bytes\n",
t->literalCount * sizeof(RoseLiteral));
fprintf(f, " - role table : %zu bytes\n",
t->roleCount * sizeof(RoseRole));
fprintf(f, " - pred table : %zu bytes\n",
t->predCount * sizeof(RosePred));
fprintf(f, " - role state table : %zu bytes\n",
t->rolesWithStateCount * sizeof(u32));
fprintf(f, " - nfa info table : %u bytes\n",
t->anchoredReportMapOffset - t->nfaInfoOffset);
fprintf(f, " - lookaround table : %u bytes\n",
t->predOffset - t->lookaroundTableOffset);
t->nfaInfoOffset - t->lookaroundTableOffset);
fprintf(f, " - lookaround reach : %u bytes\n",
t->lookaroundTableOffset - t->lookaroundReachOffset);
@ -839,46 +755,30 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, "\n");
fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups);
fprintf(f, "handled key count : %u\n", t->handledKeyCount);
fprintf(f, "\n");
fprintf(f, "number of literals : %u\n", t->totalNumLiterals);
fprintf(f, " - delayed : %u\n", t->delay_count);
fprintf(f, " - direct report : %u\n",
literalsWithDirectReports(t));
fprintf(f, " - that squash group : %u\n",
literalsWithProp(t, &RoseLiteral::squashesGroup));
fprintf(f, " - that squash group : %zu\n",
literalsWithPredicate(
t, [](const RoseLiteral &l) { return l.squashesGroup != 0; }));
fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id);
u32 group_weak_end = t->group_weak_end;
fprintf(f, " - with root program : %zu\n",
literalsWithPredicate(t, [](const RoseLiteral &l) {
return l.rootProgramOffset != 0;
}));
fprintf(f, " - with sparse iter : %zu\n",
literalsWithPredicate(t, [](const RoseLiteral &l) {
return l.iterOffset != ROSE_OFFSET_INVALID;
}));
fprintf(f, " - in groups ::\n");
fprintf(f, " + weak : %u\n",
literalsInGroups(t, 0, group_weak_end));
fprintf(f, " + general : %u\n",
literalsInGroups(t, group_weak_end, sizeof(u64a) * 8));
fprintf(f, "number of roles : %u\n", t->roleCount);
fprintf(f, " - with state index : %u\n", t->rolesWithStateCount);
fprintf(f, " - with leftfix nfa : %u\n",
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LEFTFIX));
fprintf(f, " - with suffix nfa : %u\n",
rolesWithInstr(t, ROSE_ROLE_INSTR_TRIGGER_SUFFIX));
fprintf(f, " - with lookaround : %u\n",
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_LOOKAROUND));
fprintf(f, " - with reports : %u\n",
rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT));
fprintf(f, " - with som reports : %u\n",
rolesWithInstr(t, ROSE_ROLE_INSTR_REPORT_SOM_INT));
fprintf(f, " - match only at end : %u\n",
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ONLY_EOD));
fprintf(f, " + anchored : %u\n", t->anchoredMatches);
fprintf(f, " - simple preds : %u\n",
rolesWithFlag(t, ROSE_ROLE_PRED_SIMPLE));
fprintf(f, " - bound root preds : %u\n",
rolesWithInstr(t, ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS));
fprintf(f, " - 'any' preds : %u\n",
rolesWithFlag(t, ROSE_ROLE_PRED_ANY));
fprintf(f, "number of preds : %u\n", t->predCount);
dumpPreds(f, t);
fprintf(f, " + weak : %zu\n",
literalsInGroups(t, 0, t->group_weak_end));
fprintf(f, " + general : %zu\n",
literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8));
u32 depth1 = literalsWithDepth(t, 1);
u32 depth2 = literalsWithDepth(t, 2);
@ -977,16 +877,13 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, activeArrayCount);
DUMP_U32(t, activeLeftCount);
DUMP_U32(t, queueCount);
DUMP_U32(t, roleOffset);
DUMP_U32(t, roleCount);
DUMP_U32(t, predOffset);
DUMP_U32(t, predCount);
DUMP_U32(t, handledKeyCount);
DUMP_U32(t, leftOffset);
DUMP_U32(t, roseCount);
DUMP_U32(t, lookaroundTableOffset);
DUMP_U32(t, lookaroundReachOffset);
DUMP_U32(t, eodIterOffset);
DUMP_U32(t, eodIterMapOffset);
DUMP_U32(t, eodProgramTableOffset);
DUMP_U32(t, lastByteHistoryIterOffset);
DUMP_U32(t, minWidth);
DUMP_U32(t, minWidthExcludingBoundaries);
@ -1048,52 +945,15 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
}
static
void roseDumpPredStructRaw(const RoseEngine *t, FILE *f) {
u32 pred_count = 0;
const RosePred *pred_table = getPredTable(t, &pred_count);
fprintf(f, "pred_count = %u\n", pred_count);
if (!pred_table) {
return;
}
for (const RosePred *p = pred_table; p < pred_table + pred_count; p++) {
fprintf(f, "pred[%zu] = {\n", p - pred_table);
DUMP_U32(p, role);
DUMP_U32(p, minBound);
DUMP_U32(p, maxBound);
DUMP_U8(p, historyCheck);
fprintf(f, "}\n");
}
}
static
void roseDumpRoleStructRaw(const RoseEngine *t, FILE *f) {
const RoseRole *tr = getRoleTable(t);
const RoseRole *tr_end = tr + t->roleCount;
fprintf(f, "role_count = %zd\n", tr_end - tr);
if (!tr) {
return;
}
for (const RoseRole *p = tr; p < tr_end; p++) {
fprintf(f, "role[%zu] = {\n", p - tr);
DUMP_U32(p, flags);
DUMP_U32(p, programOffset);
fprintf(f, "}\n");
}
}
void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) {
void roseDumpComponents(const RoseEngine *t, bool dump_raw,
const string &base) {
dumpComponentInfo(t, base);
dumpNfas(t, dump_raw, base);
dumpAnchored(t, base);
dumpRevComponentInfo(t, base);
dumpRevNfas(t, dump_raw, base);
// Role programs.
dumpRoseRolePrograms(t, base + "/rose_role_programs.txt");
dumpRoseLitPrograms(t, base + "/rose_lit_root_programs.txt");
dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt");
dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
}
void roseDumpInternals(const RoseEngine *t, const string &base) {
@ -1139,14 +999,6 @@ void roseDumpInternals(const RoseEngine *t, const string &base) {
roseDumpStructRaw(t, f);
fclose(f);
f = fopen((base + "/rose_preds.txt").c_str(), "w");
roseDumpPredStructRaw(t, f);
fclose(f);
f = fopen((base + "/rose_roles.txt").c_str(), "w");
roseDumpRoleStructRaw(t, f);
fclose(f);
roseDumpComponents(t, true, base);
}

View File

@ -39,7 +39,7 @@
#include "ue2common.h"
#include "rose_build.h"
#include "rose_internal.h" /* role history, etc */
#include "rose_internal.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "util/charreach.h"
#include "util/depth.h"
@ -65,6 +65,14 @@ enum rose_literal_table {
ROSE_EVENT //!< "literal-like" events, such as EOD
};
/** \brief Edge history types. */
enum RoseRoleHistory {
ROSE_ROLE_HISTORY_NONE, //!< no special history
ROSE_ROLE_HISTORY_ANCH, //!< previous role is at a fixed offset
ROSE_ROLE_HISTORY_LAST_BYTE, //!< previous role can only match at EOD
ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned
};
#include "util/order_check.h"
/** \brief Provides information about the (pre|in)fix engine to the left of a
@ -140,9 +148,6 @@ struct RoseVertexProps {
/** \brief Report IDs to fire. */
flat_set<ReportID> reports;
/** \brief Role ID for this vertex. These are what end up in the bytecode. */
u32 role = ~u32{0};
/** \brief Bitmask of groups that this role sets. */
rose_group groups = 0;

View File

@ -73,18 +73,55 @@ ReportID literalToReport(u32 id) {
return id & ~LITERAL_DR_FLAG;
}
// Structure representing a literal. Each literal may have many roles.
/** \brief Structure representing a literal. */
struct RoseLiteral {
u32 rootProgramOffset; // role program to run for root roles.
u32 iterOffset; // offset of sparse iterator, relative to rose
u32 iterMapOffset; // offset of the iter mapping table, relative to rose
rose_group groups; // bitset of groups that cause this literal to fire.
u8 minDepth; // the minimum of this literal's roles' depths (for depths > 1)
u8 squashesGroup; /**< literal switches off its group behind it if it sets a
* role */
u32 delay_mask; /**< bit set indicates that the literal inserts a delayed
* match at the given offset */
u32 delayIdsOffset; // offset to array of ids to poke in the delay structure
/**
* \brief Role program to run unconditionally when this literal is seen.
*
* Offset is relative to RoseEngine, or zero for no program.
*/
u32 rootProgramOffset;
/**
* \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over
* predecessor states.
*
* Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no
* iterator.
*/
u32 iterOffset;
/**
* \brief Table of role programs to run when triggered by the sparse
* iterator, indexed by dense sparse iter index.
*
* Offset is relative to RoseEngine, zero for no programs.
*/
u32 iterProgramOffset;
/** \brief Bitset of groups that cause this literal to fire. */
rose_group groups;
/**
* \brief The minimum depth of this literal in the Rose graph (for depths
* greater than 1).
*/
u8 minDepth;
/**
* \brief True if this literal switches off its group behind it when it
* sets a role.
*/
u8 squashesGroup;
/**
* \brief Bitset which indicates that the literal inserts a delayed
* match at the given offset.
*/
u32 delay_mask;
/** \brief Offset to array of ids to poke in the delay structure. */
u32 delayIdsOffset;
};
/* Allocation of Rose literal ids
@ -179,15 +216,6 @@ struct RoseLiteral {
* terminals.
*/
// We have different types of role history storage.
enum RoseRoleHistory {
ROSE_ROLE_HISTORY_NONE, // I'm sorry, I don't recall.
ROSE_ROLE_HISTORY_ANCH, // used when previous role is at a fixed offset
ROSE_ROLE_HISTORY_LAST_BYTE, /* used when previous role can only match at the
* last byte of a stream */
ROSE_ROLE_HISTORY_INVALID // history not yet assigned
};
struct RoseCountingMiracle {
char shufti; /** 1: count shufti class; 0: count a single character */
u8 count; /** minimum number of occurrences for the counting
@ -225,15 +253,6 @@ struct NfaInfo {
* matches */
};
/* We allow different types of role-predecessor relationships. These are stored
* in with the flags */
#define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no
* offset tracking */
#define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */
#define ROSE_ROLE_PRED_CLEAR_MASK \
(~(ROSE_ROLE_PRED_SIMPLE | ROSE_ROLE_PRED_ANY))
#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
* whole byte (OWB) (streaming only). Other
* values in OWB are reserved for zombie
@ -241,33 +260,6 @@ struct NfaInfo {
#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
* prefix checks */
// Structure representing a literal role.
struct RoseRole {
u32 flags;
u32 programOffset; /**< offset to program to run. */
};
// Structure representing a predecessor relationship
struct RosePred {
u32 role; // index of predecessor role
u32 minBound; // min bound on distance from pred (_ANCH ->absolute offset)
u32 maxBound; /* max bound on distance from pred, or ROSE_BOUND_INF
* (_ANCH -> absolute offset ) */
u8 historyCheck; // from enum RoseRoleHistory
};
// Structure mapping between the dense index produced by the literal sparse
// iterator and a list of roles.
struct RoseIterMapping {
u32 offset; // offset into iter role table
u32 count; // number of roles
};
struct RoseIterRole {
u32 role;
u32 pred;
};
/**
* \brief Rose state offsets.
*
@ -376,8 +368,6 @@ struct RoseBoundaryReports {
// 1c. eod-anchored literal matcher table
// 1d. small block table
// 2. array of RoseLiteral (literalCount entries)
// 3. array of RoseRole (roleCount entries)
// 4. array of RosePred (predCount entries)
// 8. array of NFA offsets, one per queue
// 9. array of state offsets, one per queue (+)
// 10. array of role ids for the set of all root roles
@ -447,10 +437,10 @@ struct RoseEngine {
u32 activeArrayCount; //number of nfas tracked in the active array
u32 activeLeftCount; //number of nfas tracked in the active rose array
u32 queueCount; /**< number of nfa queues */
u32 roleOffset; // offset of RoseRole array (bytes)
u32 roleCount; // number of RoseRole entries
u32 predOffset; // offset of RosePred array (bytes)
u32 predCount; // number of RosePred entries
/** \brief Number of keys used by CHECK_SET_HANDLED instructions in role
* programs. Used to size the handled_roles fatbit in scratch. */
u32 handledKeyCount;
u32 leftOffset;
u32 roseCount;
@ -459,7 +449,7 @@ struct RoseEngine {
* bytes each) */
u32 eodIterOffset; // or 0 if no eod iterator
u32 eodIterMapOffset;
u32 eodProgramTableOffset;
u32 lastByteHistoryIterOffset; // if non-zero
@ -614,22 +604,6 @@ const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) {
return tl;
}
static really_inline
const struct RoseRole *getRoleTable(const struct RoseEngine *t) {
const struct RoseRole *r
= (const struct RoseRole *)((const char *)t + t->roleOffset);
assert(ISALIGNED_N(r, 4));
return r;
}
static really_inline
const struct RosePred *getPredTable(const struct RoseEngine *t) {
const struct RosePred *p
= (const struct RosePred *)((const char *)t + t->predOffset);
assert(ISALIGNED_N(p, 4));
return p;
}
static really_inline
const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
const struct LeftNfaInfo *r

View File

@ -40,54 +40,61 @@
#define ROSE_INSTR_MIN_ALIGN 8U
/** \brief Role program instruction opcodes. */
enum RoseRoleInstructionCode {
ROSE_ROLE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
ROSE_ROLE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
ROSE_ROLE_INSTR_CHECK_ROOT_BOUNDS, //!< Bounds on distance from root.
ROSE_ROLE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
ROSE_ROLE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state.
ROSE_ROLE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
ROSE_ROLE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine.
ROSE_ROLE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine.
ROSE_ROLE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine.
ROSE_ROLE_INSTR_REPORT, //!< Fire an ordinary report.
ROSE_ROLE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV).
ROSE_ROLE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time.
ROSE_ROLE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only.
ROSE_ROLE_INSTR_REPORT_SOM, //!< Manipulate SOM and report.
ROSE_ROLE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
ROSE_ROLE_INSTR_SET_STATE, //!< Switch a state index on.
ROSE_ROLE_INSTR_SET_GROUPS, //!< Set some literal group bits.
ROSE_ROLE_INSTR_END //!< End of program.
enum RoseInstructionCode {
ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0.
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state.
ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine.
ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine.
ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine.
ROSE_INSTR_REPORT, //!< Fire an ordinary report.
ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV).
ROSE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time.
ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only.
ROSE_INSTR_REPORT_SOM, //!< Manipulate SOM and report.
ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
ROSE_INSTR_SET_STATE, //!< Switch a state index on.
ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits.
ROSE_INSTR_END //!< End of program.
};
struct ROSE_ROLE_STRUCT_ANCHORED_DELAY {
struct ROSE_STRUCT_ANCHORED_DELAY {
u8 code; //!< From enum RoseRoleInstructionCode.
u8 depth; //!< Depth for this state.
rose_group groups; //!< Bitmask.
u32 done_jump; //!< Jump forward this many bytes if successful.
};
struct ROSE_ROLE_STRUCT_CHECK_ONLY_EOD {
struct ROSE_STRUCT_CHECK_ONLY_EOD {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_ROLE_STRUCT_CHECK_ROOT_BOUNDS {
struct ROSE_STRUCT_CHECK_BOUNDS {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 min_bound; //!< Min distance from zero.
u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF).
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_ROLE_STRUCT_CHECK_LOOKAROUND {
struct ROSE_STRUCT_CHECK_NOT_HANDLED {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 key; //!< Key in the "handled_roles" fatbit in scratch.
u32 fail_jump; //!< Jump forward this many bytes if we have seen key before.
};
struct ROSE_STRUCT_CHECK_LOOKAROUND {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 index;
u32 count;
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX {
struct ROSE_STRUCT_CHECK_LEFTFIX {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 queue; //!< Queue of leftfix to check.
u32 lag; //!< Lag of leftfix for this case.
@ -95,72 +102,72 @@ struct ROSE_ROLE_STRUCT_CHECK_LEFTFIX {
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_ROLE_STRUCT_SOM_ADJUST {
struct ROSE_STRUCT_SOM_ADJUST {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 distance; //!< Distance to EOM.
};
struct ROSE_ROLE_STRUCT_SOM_LEFTFIX {
struct ROSE_STRUCT_SOM_LEFTFIX {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 queue; //!< Queue index of leftfix providing SOM.
u32 lag; //!< Lag of leftfix for this case.
};
struct ROSE_ROLE_STRUCT_TRIGGER_INFIX {
struct ROSE_STRUCT_TRIGGER_INFIX {
u8 code; //!< From enum RoseRoleInstructionCode.
u8 cancel; //!< Cancels previous top event.
u32 queue; //!< Queue index of infix.
u32 event; //!< Queue event, from MQE_*.
};
struct ROSE_ROLE_STRUCT_TRIGGER_SUFFIX {
struct ROSE_STRUCT_TRIGGER_SUFFIX {
u8 code; //!< From enum RoseRoleInstructionCode.
u32 queue; //!< Queue index of suffix.
u32 event; //!< Queue event, from MQE_*.
};
struct ROSE_ROLE_STRUCT_REPORT {
struct ROSE_STRUCT_REPORT {
u8 code; //!< From enum RoseRoleInstructionCode.
ReportID report;
};
struct ROSE_ROLE_STRUCT_REPORT_CHAIN {
struct ROSE_STRUCT_REPORT_CHAIN {
u8 code; //!< From enum RoseRoleInstructionCode.
ReportID report;
};
struct ROSE_ROLE_STRUCT_REPORT_EOD {
struct ROSE_STRUCT_REPORT_EOD {
u8 code; //!< From enum RoseRoleInstructionCode.
ReportID report;
};
struct ROSE_ROLE_STRUCT_REPORT_SOM_INT {
struct ROSE_STRUCT_REPORT_SOM_INT {
u8 code; //!< From enum RoseRoleInstructionCode.
ReportID report;
};
struct ROSE_ROLE_STRUCT_REPORT_SOM {
struct ROSE_STRUCT_REPORT_SOM {
u8 code; //!< From enum RoseRoleInstructionCode.
ReportID report;
};
struct ROSE_ROLE_STRUCT_REPORT_SOM_KNOWN {
struct ROSE_STRUCT_REPORT_SOM_KNOWN {
u8 code; //!< From enum RoseRoleInstructionCode.
ReportID report;
};
struct ROSE_ROLE_STRUCT_SET_STATE {
struct ROSE_STRUCT_SET_STATE {
u8 code; //!< From enum RoseRoleInstructionCode.
u8 depth; //!< Depth for this state.
u32 index; //!< State index in multibit.
};
struct ROSE_ROLE_STRUCT_SET_GROUPS {
struct ROSE_STRUCT_SET_GROUPS {
u8 code; //!< From enum RoseRoleInstructionCode.
rose_group groups; //!< Bitmask.
};
struct ROSE_ROLE_STRUCT_END {
struct ROSE_STRUCT_END {
u8 code; //!< From enum RoseRoleInstructionCode.
};

View File

@ -172,15 +172,6 @@ const struct internal_report *getInternalReport(const struct RoseEngine *t,
return reports + intId;
}
static really_inline
const struct RoseRole *getRoleByOffset(const struct RoseEngine *t, u32 offset) {
const struct RoseRole *tr = (const void *)((const char *)t + offset);
assert((size_t)(tr - getRoleTable(t)) < t->roleCount);
DEBUG_PRINTF("get root role %zu\n", tr - getRoleTable(t));
return tr;
}
#define ANCHORED_MATCH_SENTINEL (~0U)
static really_inline

View File

@ -90,7 +90,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
+ bStateSize + tStateSize
+ fullStateSize + 63 /* cacheline padding */
+ nfa_context_size
+ fatbit_size(proto->roleCount) /* handled roles */
+ fatbit_size(proto->handledKeyCount) /* handled roles */
+ fatbit_size(queueCount) /* active queue array */
+ 2 * fatbit_size(deduperCount) /* need odd and even logs */
+ 2 * fatbit_size(deduperCount) /* ditto som logs */
@ -192,7 +192,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
current += fatbit_size(queueCount);
s->handled_roles = (struct fatbit *)current;
current += fatbit_size(proto->roleCount);
current += fatbit_size(proto->handledKeyCount);
s->deduper.log[0] = (struct fatbit *)current;
current += fatbit_size(deduperCount);
@ -312,9 +312,9 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) {
proto->delay_count = rose->delay_count;
}
if (rose->roleCount > proto->roleCount) {
if (rose->handledKeyCount > proto->handledKeyCount) {
resize = 1;
proto->roleCount = rose->roleCount;
proto->handledKeyCount = rose->handledKeyCount;
}
if (rose->tStateSize > proto->tStateSize) {

View File

@ -180,7 +180,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
u32 delay_count;
u32 scratchSize;
u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE];
u32 roleCount;
u32 handledKeyCount;
struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already
* handled by this literal */
u64a *som_store; /**< array of som locations */