mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-17 01:41:51 +03:00
limex: rework accept handling
Rather that iterating over NFAAccept structures and testing individual bits in the state structure, iterate over the state vector and index into accept structures. Adds report list support to this path, unified with the report lists used for exception handling.
This commit is contained in:
committed by
Matthew Barr
parent
3dcfea19e0
commit
77fe1ef6e5
@@ -992,14 +992,105 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
|
||||
}
|
||||
|
||||
static
|
||||
void buildAccepts(const build_info &args, NFAStateSet &acceptMask,
|
||||
NFAStateSet &acceptEodMask, vector<NFAAccept> &accepts,
|
||||
vector<NFAAccept> &acceptsEod, vector<NFAStateSet> &squash) {
|
||||
u32 addSquashMask(const build_info &args, const NFAVertex &v,
|
||||
vector<NFAStateSet> &squash) {
|
||||
auto sit = args.reportSquashMap.find(v);
|
||||
if (sit == args.reportSquashMap.end()) {
|
||||
return MO_INVALID_IDX;
|
||||
}
|
||||
|
||||
// This state has a squash mask. Paw through the existing vector to
|
||||
// see if we've already seen it, otherwise add a new one.
|
||||
auto it = find(squash.begin(), squash.end(), sit->second);
|
||||
if (it != squash.end()) {
|
||||
return verify_u32(distance(squash.begin(), it));
|
||||
}
|
||||
u32 idx = verify_u32(squash.size());
|
||||
squash.push_back(sit->second);
|
||||
return idx;
|
||||
}
|
||||
|
||||
static
|
||||
u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
|
||||
unordered_map<vector<ReportID>, u32> &reportListCache) {
|
||||
assert(!r.empty());
|
||||
|
||||
vector<ReportID> my_reports(begin(r), end(r));
|
||||
my_reports.push_back(MO_INVALID_IDX); // sentinel
|
||||
|
||||
auto cache_it = reportListCache.find(my_reports);
|
||||
if (cache_it != end(reportListCache)) {
|
||||
u32 offset = cache_it->second;
|
||||
DEBUG_PRINTF("reusing cached report list at %u\n", offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
auto it = search(begin(reports), end(reports), begin(my_reports),
|
||||
end(my_reports));
|
||||
if (it != end(reports)) {
|
||||
u32 offset = verify_u32(distance(begin(reports), it));
|
||||
DEBUG_PRINTF("reusing found report list at %u\n", offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
u32 offset = verify_u32(reports.size());
|
||||
insert(&reports, reports.end(), my_reports);
|
||||
reportListCache.emplace(move(my_reports), offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
static
|
||||
void buildAcceptsList(const build_info &args,
|
||||
unordered_map<vector<ReportID>, u32> &reports_cache,
|
||||
vector<NFAVertex> &verts, vector<NFAAccept> &accepts,
|
||||
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
|
||||
if (verts.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("building accept lists for %zu states\n", verts.size());
|
||||
|
||||
auto cmp_state_id = [&args](NFAVertex a, NFAVertex b) {
|
||||
u32 a_state = args.state_ids.at(a);
|
||||
u32 b_state = args.state_ids.at(b);
|
||||
assert(a_state != b_state || a == b);
|
||||
return a_state < b_state;
|
||||
};
|
||||
|
||||
sort(begin(verts), end(verts), cmp_state_id);
|
||||
|
||||
const NGHolder &h = args.h;
|
||||
for (const auto &v : verts) {
|
||||
DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v),
|
||||
as_string_list(h[v].reports).c_str());
|
||||
NFAAccept a;
|
||||
memset(&a, 0, sizeof(a));
|
||||
assert(!h[v].reports.empty());
|
||||
if (h[v].reports.size() == 1) {
|
||||
a.single_report = 1;
|
||||
a.reports = *h[v].reports.begin();
|
||||
} else {
|
||||
a.single_report = 0;
|
||||
a.reports = addReports(h[v].reports, reports, reports_cache);
|
||||
}
|
||||
a.squash = addSquashMask(args, v, squash);
|
||||
accepts.push_back(move(a));
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void buildAccepts(const build_info &args,
|
||||
unordered_map<vector<ReportID>, u32> &reports_cache,
|
||||
NFAStateSet &acceptMask, NFAStateSet &acceptEodMask,
|
||||
vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod,
|
||||
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
|
||||
const NGHolder &h = args.h;
|
||||
|
||||
acceptMask.resize(args.num_states);
|
||||
acceptEodMask.resize(args.num_states);
|
||||
|
||||
vector<NFAVertex> verts_accept, verts_accept_eod;
|
||||
|
||||
for (auto v : vertices_range(h)) {
|
||||
u32 state_id = args.state_ids.at(v);
|
||||
|
||||
@@ -1007,41 +1098,20 @@ void buildAccepts(const build_info &args, NFAStateSet &acceptMask,
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 squashMaskOffset = MO_INVALID_IDX;
|
||||
auto sit = args.reportSquashMap.find(v);
|
||||
if (sit != args.reportSquashMap.end()) {
|
||||
// This state has a squash mask. Paw through the existing vector to
|
||||
// see if we've already seen it, otherwise add a new one.
|
||||
auto it = find(squash.begin(), squash.end(), sit->second);
|
||||
if (it != squash.end()) {
|
||||
squashMaskOffset = verify_u32(distance(squash.begin(), it));
|
||||
} else {
|
||||
squashMaskOffset = verify_u32(squash.size());
|
||||
squash.push_back(sit->second);
|
||||
}
|
||||
}
|
||||
|
||||
// Add an accept (or acceptEod) per report ID.
|
||||
|
||||
vector<NFAAccept> *accepts_out;
|
||||
if (edge(v, h.accept, h).second) {
|
||||
acceptMask.set(state_id);
|
||||
accepts_out = &accepts;
|
||||
verts_accept.push_back(v);
|
||||
} else {
|
||||
assert(edge(v, h.acceptEod, h).second);
|
||||
acceptEodMask.set(state_id);
|
||||
accepts_out = &acceptsEod;
|
||||
}
|
||||
|
||||
for (auto report : h[v].reports) {
|
||||
accepts_out->push_back(NFAAccept());
|
||||
NFAAccept &a = accepts_out->back();
|
||||
a.state = state_id;
|
||||
a.externalId = report;
|
||||
a.squash = squashMaskOffset;
|
||||
DEBUG_PRINTF("Accept: state=%u, externalId=%u\n", state_id, report);
|
||||
verts_accept_eod.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
buildAcceptsList(args, reports_cache, verts_accept, accepts, reports,
|
||||
squash);
|
||||
buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports,
|
||||
squash);
|
||||
}
|
||||
|
||||
static
|
||||
@@ -1314,36 +1384,12 @@ struct ExceptionProto {
|
||||
}
|
||||
};
|
||||
|
||||
static
|
||||
u32 getReportListIndex(const flat_set<ReportID> &reports,
|
||||
vector<ReportID> &exceptionReports,
|
||||
map<vector<ReportID>, u32> &reportListCache) {
|
||||
if (reports.empty()) {
|
||||
return MO_INVALID_IDX;
|
||||
}
|
||||
|
||||
const vector<ReportID> r(reports.begin(), reports.end());
|
||||
|
||||
auto it = reportListCache.find(r);
|
||||
if (it != reportListCache.end()) {
|
||||
u32 idx = it->second;
|
||||
assert(idx < exceptionReports.size());
|
||||
assert(equal(r.begin(), r.end(), exceptionReports.begin() + idx));
|
||||
return idx;
|
||||
}
|
||||
|
||||
u32 idx = verify_u32(exceptionReports.size());
|
||||
reportListCache[r] = idx;
|
||||
exceptionReports.insert(exceptionReports.end(), r.begin(), r.end());
|
||||
exceptionReports.push_back(MO_INVALID_IDX); // terminator
|
||||
return idx;
|
||||
}
|
||||
|
||||
static
|
||||
u32 buildExceptionMap(const build_info &args,
|
||||
unordered_map<vector<ReportID>, u32> &reports_cache,
|
||||
const ue2::unordered_set<NFAEdge> &exceptional,
|
||||
map<ExceptionProto, vector<u32> > &exceptionMap,
|
||||
vector<ReportID> &exceptionReports) {
|
||||
map<ExceptionProto, vector<u32>> &exceptionMap,
|
||||
vector<ReportID> &reportList) {
|
||||
const NGHolder &h = args.h;
|
||||
const u32 num_states = args.num_states;
|
||||
u32 exceptionCount = 0;
|
||||
@@ -1361,10 +1407,6 @@ u32 buildExceptionMap(const build_info &args,
|
||||
}
|
||||
}
|
||||
|
||||
// We track report lists that have already been written into the global
|
||||
// list in case we can reuse them.
|
||||
map<vector<ReportID>, u32> reportListCache;
|
||||
|
||||
for (auto v : vertices_range(h)) {
|
||||
const u32 i = args.state_ids.at(v);
|
||||
|
||||
@@ -1383,8 +1425,12 @@ u32 buildExceptionMap(const build_info &args,
|
||||
DEBUG_PRINTF("state %u is exceptional due to accept "
|
||||
"(%zu reports)\n", i, reports.size());
|
||||
|
||||
e.reports_index =
|
||||
getReportListIndex(reports, exceptionReports, reportListCache);
|
||||
if (reports.empty()) {
|
||||
e.reports_index = MO_INVALID_IDX;
|
||||
} else {
|
||||
e.reports_index =
|
||||
addReports(reports, reportList, reports_cache);
|
||||
}
|
||||
|
||||
// We may be applying a report squash too.
|
||||
auto mi = args.reportSquashMap.find(v);
|
||||
@@ -1810,9 +1856,10 @@ struct Factory {
|
||||
}
|
||||
|
||||
static
|
||||
void writeExceptions(const map<ExceptionProto, vector<u32> > &exceptionMap,
|
||||
const vector<u32> &repeatOffsets,
|
||||
implNFA_t *limex, const u32 exceptionsOffset) {
|
||||
void writeExceptions(const map<ExceptionProto, vector<u32>> &exceptionMap,
|
||||
const vector<u32> &repeatOffsets, implNFA_t *limex,
|
||||
const u32 exceptionsOffset,
|
||||
const u32 reportListOffset) {
|
||||
DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
|
||||
|
||||
exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
|
||||
@@ -1839,7 +1886,12 @@ struct Factory {
|
||||
exception_t &e = etable[ecount];
|
||||
maskSetBits(e.squash, proto.squash_states);
|
||||
maskSetBits(e.successors, proto.succ_states);
|
||||
e.reports = proto.reports_index;
|
||||
if (proto.reports_index == MO_INVALID_IDX) {
|
||||
e.reports = MO_INVALID_IDX;
|
||||
} else {
|
||||
e.reports = reportListOffset +
|
||||
proto.reports_index * sizeof(ReportID);
|
||||
}
|
||||
e.hasSquash = verify_u8(proto.squash);
|
||||
e.trigger = verify_u8(proto.trigger);
|
||||
u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX
|
||||
@@ -1958,7 +2010,9 @@ struct Factory {
|
||||
const vector<NFAAccept> &acceptsEod,
|
||||
const vector<NFAStateSet> &squash, implNFA_t *limex,
|
||||
const u32 acceptsOffset, const u32 acceptsEodOffset,
|
||||
const u32 squashOffset) {
|
||||
const u32 squashOffset, const u32 reportListOffset) {
|
||||
char *limex_base = (char *)limex;
|
||||
|
||||
DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n",
|
||||
acceptsOffset, acceptsEodOffset, squashOffset);
|
||||
|
||||
@@ -1966,27 +2020,38 @@ struct Factory {
|
||||
maskSetBits(limex->accept, acceptMask);
|
||||
maskSetBits(limex->acceptAtEOD, acceptEodMask);
|
||||
|
||||
// Transforms the index into the report list into an offset relative to
|
||||
// the base of the limex.
|
||||
auto report_offset_fn = [&](NFAAccept a) {
|
||||
if (!a.single_report) {
|
||||
a.reports = reportListOffset + a.reports * sizeof(ReportID);
|
||||
}
|
||||
return a;
|
||||
};
|
||||
|
||||
// Write accept table.
|
||||
limex->acceptOffset = acceptsOffset;
|
||||
limex->acceptCount = verify_u32(accepts.size());
|
||||
DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size());
|
||||
NFAAccept *acceptsTable = (NFAAccept *)((char *)limex + acceptsOffset);
|
||||
NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset);
|
||||
assert(ISALIGNED(acceptsTable));
|
||||
copy(accepts.begin(), accepts.end(), acceptsTable);
|
||||
transform(accepts.begin(), accepts.end(), acceptsTable,
|
||||
report_offset_fn);
|
||||
|
||||
// Write eod accept table.
|
||||
limex->acceptEodOffset = acceptsEodOffset;
|
||||
limex->acceptEodCount = verify_u32(acceptsEod.size());
|
||||
DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size());
|
||||
NFAAccept *acceptsEodTable = (NFAAccept *)((char *)limex + acceptsEodOffset);
|
||||
NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset);
|
||||
assert(ISALIGNED(acceptsEodTable));
|
||||
copy(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable);
|
||||
transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable,
|
||||
report_offset_fn);
|
||||
|
||||
// Write squash mask table.
|
||||
limex->squashCount = verify_u32(squash.size());
|
||||
limex->squashOffset = squashOffset;
|
||||
DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size());
|
||||
tableRow_t *mask = (tableRow_t *)((char *)limex + squashOffset);
|
||||
tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset);
|
||||
assert(ISALIGNED(mask));
|
||||
for (size_t i = 0, end = squash.size(); i < end; i++) {
|
||||
maskSetBits(mask[i], squash[i]);
|
||||
@@ -2023,15 +2088,12 @@ struct Factory {
|
||||
}
|
||||
|
||||
static
|
||||
void writeExceptionReports(const vector<ReportID> &reports,
|
||||
implNFA_t *limex,
|
||||
const u32 exceptionReportsOffset) {
|
||||
DEBUG_PRINTF("exceptionReportsOffset=%u\n", exceptionReportsOffset);
|
||||
|
||||
limex->exReportOffset = exceptionReportsOffset;
|
||||
assert(ISALIGNED_N((char *)limex + exceptionReportsOffset,
|
||||
void writeReportList(const vector<ReportID> &reports, implNFA_t *limex,
|
||||
const u32 reportListOffset) {
|
||||
DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset);
|
||||
assert(ISALIGNED_N((char *)limex + reportListOffset,
|
||||
alignof(ReportID)));
|
||||
copy_bytes((char *)limex + exceptionReportsOffset, reports);
|
||||
copy_bytes((char *)limex + reportListOffset, reports);
|
||||
}
|
||||
|
||||
static
|
||||
@@ -2050,6 +2112,10 @@ struct Factory {
|
||||
repeatSize += repeats[i].second;
|
||||
}
|
||||
|
||||
// We track report lists that have already been written into the global
|
||||
// list in case we can reuse them.
|
||||
unordered_map<vector<ReportID>, u32> reports_cache;
|
||||
|
||||
ue2::unordered_set<NFAEdge> exceptional;
|
||||
u32 shiftCount = findBestNumOfVarShifts(args);
|
||||
assert(shiftCount);
|
||||
@@ -2057,9 +2123,10 @@ struct Factory {
|
||||
findExceptionalTransitions(args, exceptional, maxShift);
|
||||
|
||||
map<ExceptionProto, vector<u32> > exceptionMap;
|
||||
vector<ReportID> exceptionReports;
|
||||
u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap,
|
||||
exceptionReports);
|
||||
vector<ReportID> reportList;
|
||||
|
||||
u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional,
|
||||
exceptionMap, reportList);
|
||||
|
||||
assert(exceptionCount <= args.num_states);
|
||||
|
||||
@@ -2076,8 +2143,8 @@ struct Factory {
|
||||
NFAStateSet acceptMask, acceptEodMask;
|
||||
vector<NFAAccept> accepts, acceptsEod;
|
||||
vector<NFAStateSet> squash;
|
||||
buildAccepts(args, acceptMask, acceptEodMask, accepts, acceptsEod,
|
||||
squash);
|
||||
buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts,
|
||||
acceptsEod, reportList, squash);
|
||||
|
||||
// Build all our accel info.
|
||||
NFAStateSet accelMask, accelFriendsMask;
|
||||
@@ -2118,8 +2185,8 @@ struct Factory {
|
||||
const u32 exceptionsOffset = offset;
|
||||
offset += sizeof(exception_t) * exceptionCount;
|
||||
|
||||
const u32 exceptionReportsOffset = offset;
|
||||
offset += sizeof(ReportID) * exceptionReports.size();
|
||||
const u32 reportListOffset = offset;
|
||||
offset += sizeof(ReportID) * reportList.size();
|
||||
|
||||
const u32 repeatOffsetsOffset = offset;
|
||||
offset += sizeof(u32) * args.repeats.size();
|
||||
@@ -2146,7 +2213,8 @@ struct Factory {
|
||||
limex, accelTableOffset, accelAuxOffset);
|
||||
|
||||
writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
|
||||
limex, acceptsOffset, acceptsEodOffset, squashOffset);
|
||||
limex, acceptsOffset, acceptsEodOffset, squashOffset,
|
||||
reportListOffset);
|
||||
|
||||
limex->shiftCount = shiftCount;
|
||||
writeShiftMasks(args, limex);
|
||||
@@ -2154,14 +2222,15 @@ struct Factory {
|
||||
// Determine the state required for our state vector.
|
||||
findStateSize(args, limex);
|
||||
|
||||
writeExceptionReports(exceptionReports, limex, exceptionReportsOffset);
|
||||
writeReportList(reportList, limex, reportListOffset);
|
||||
|
||||
// Repeat structures and offset table.
|
||||
vector<u32> repeatOffsets;
|
||||
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
|
||||
repeatsOffset);
|
||||
|
||||
writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset);
|
||||
writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset,
|
||||
reportListOffset);
|
||||
|
||||
writeLimexMasks(args, limex);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user