mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Rose build: move HWLM build code to own file
To reduce the size of rose_build_bytecode.cpp a little, move the code that deals with HWLM literal tables into its own new file.
This commit is contained in:
parent
12921b7c97
commit
b093616aff
@ -810,6 +810,8 @@ SET (hs_SRCS
|
||||
src/rose/rose_build_infix.h
|
||||
src/rose/rose_build_lookaround.cpp
|
||||
src/rose/rose_build_lookaround.h
|
||||
src/rose/rose_build_matchers.cpp
|
||||
src/rose/rose_build_matchers.h
|
||||
src/rose/rose_build_merge.cpp
|
||||
src/rose/rose_build_merge.h
|
||||
src/rose/rose_build_misc.cpp
|
||||
|
@ -35,12 +35,12 @@
|
||||
#include "rose_build_anchored.h"
|
||||
#include "rose_build_infix.h"
|
||||
#include "rose_build_lookaround.h"
|
||||
#include "rose_build_matchers.h"
|
||||
#include "rose_build_scatter.h"
|
||||
#include "rose_build_util.h"
|
||||
#include "rose_build_width.h"
|
||||
#include "rose_program.h"
|
||||
#include "hwlm/hwlm.h" /* engine types */
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "nfa/mcclellancompile.h"
|
||||
@ -68,7 +68,6 @@
|
||||
#include "util/compile_error.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/internal_report.h"
|
||||
#include "util/multibit_build.h"
|
||||
#include "util/order_check.h"
|
||||
@ -78,11 +77,9 @@
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
@ -1674,619 +1671,6 @@ u32 RoseBuildImpl::calcHistoryRequired() const {
|
||||
return m ? m - 1 : 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static UNUSED
|
||||
string dumpMask(const vector<u8> &v) {
|
||||
ostringstream oss;
|
||||
for (u8 e : v) {
|
||||
oss << setfill('0') << setw(2) << hex << (unsigned int)e;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool maskFromLeftGraph(const LeftEngInfo &left, vector<u8> &msk,
|
||||
vector<u8> &cmp) {
|
||||
const u32 lag = left.lag;
|
||||
const ReportID report = left.leftfix_report;
|
||||
|
||||
DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report);
|
||||
|
||||
assert(left.graph);
|
||||
const NGHolder &h = *left.graph;
|
||||
assert(in_degree(h.acceptEod, h) == 1); // no eod reports
|
||||
|
||||
// Start with the set of reporter vertices for this leftfix.
|
||||
set<NFAVertex> curr;
|
||||
for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
|
||||
if (contains(h[u].reports, report)) {
|
||||
curr.insert(u);
|
||||
}
|
||||
}
|
||||
assert(!curr.empty());
|
||||
|
||||
size_t i = HWLM_MASKLEN - lag - 1;
|
||||
do {
|
||||
if (curr.empty() || contains(curr, h.start)
|
||||
|| contains(curr, h.startDs)) {
|
||||
DEBUG_PRINTF("end of the road\n");
|
||||
break;
|
||||
}
|
||||
|
||||
set<NFAVertex> next;
|
||||
CharReach cr;
|
||||
for (NFAVertex v : curr) {
|
||||
const auto &v_cr = h[v].char_reach;
|
||||
DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index,
|
||||
describeClass(v_cr).c_str());
|
||||
cr |= v_cr;
|
||||
insert(&next, inv_adjacent_vertices(v, h));
|
||||
}
|
||||
make_and_cmp_mask(cr, &msk.at(i), &cmp.at(i));
|
||||
DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
|
||||
describeClass(cr).c_str(), msk[i], cmp[i]);
|
||||
curr.swap(next);
|
||||
} while (i-- > 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool maskFromLeftCastle(const LeftEngInfo &left, vector<u8> &msk,
|
||||
vector<u8> &cmp) {
|
||||
const u32 lag = left.lag;
|
||||
const ReportID report = left.leftfix_report;
|
||||
|
||||
DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report);
|
||||
|
||||
assert(left.castle);
|
||||
const CastleProto &c = *left.castle;
|
||||
|
||||
depth min_width(depth::infinity());
|
||||
for (const PureRepeat &repeat : c.repeats | map_values) {
|
||||
if (contains(repeat.reports, report)) {
|
||||
min_width = min(min_width, repeat.bounds.min);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("castle min width for this report is %s\n",
|
||||
min_width.str().c_str());
|
||||
|
||||
if (!min_width.is_finite() || min_width == depth(0)) {
|
||||
DEBUG_PRINTF("bad min width\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 len = min_width;
|
||||
u32 end = HWLM_MASKLEN - lag;
|
||||
for (u32 i = end; i > end - min(end, len); i--) {
|
||||
make_and_cmp_mask(c.reach(), &msk.at(i - 1), &cmp.at(i - 1));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool maskFromLeft(const LeftEngInfo &left, vector<u8> &msk, vector<u8> &cmp) {
|
||||
if (left.lag >= HWLM_MASKLEN) {
|
||||
DEBUG_PRINTF("too much lag\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (left.graph) {
|
||||
return maskFromLeftGraph(left, msk, cmp);
|
||||
} else if (left.castle) {
|
||||
return maskFromLeftCastle(left, msk, cmp);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool maskFromPreds(const RoseBuildImpl &tbi, const rose_literal_id &id,
|
||||
const RoseVertex v, vector<u8> &msk, vector<u8> &cmp) {
|
||||
const RoseGraph &g = tbi.g;
|
||||
|
||||
// For right now, wuss out and only handle cases with one pred.
|
||||
if (in_degree(v, g) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Root successors have no literal before them.
|
||||
if (tbi.isRootSuccessor(v)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we have a single predecessor with a short bound, we may be able to
|
||||
// fill out a mask with the trailing bytes of the previous literal. This
|
||||
// allows us to improve literals like the 'bar' in 'fo.bar'.
|
||||
|
||||
RoseEdge e = *(in_edges(v, g).first);
|
||||
u32 bound = g[e].maxBound;
|
||||
if (bound != g[e].minBound || bound >= HWLM_MASKLEN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bound += id.s.length();
|
||||
if (bound >= HWLM_MASKLEN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bound %u\n", bound);
|
||||
|
||||
RoseVertex u = source(e, g);
|
||||
if (g[u].literals.size() != 1) {
|
||||
DEBUG_PRINTF("u has %zu literals\n", g[u].literals.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 u_lit_id = *(g[u].literals.begin());
|
||||
const rose_literal_id &u_id = tbi.literals.right.at(u_lit_id);
|
||||
DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str());
|
||||
|
||||
// Number of characters to take from the back of u's literal.
|
||||
size_t u_len = u_id.s.length();
|
||||
size_t u_sublen = min(u_len, (size_t)HWLM_MASKLEN - bound);
|
||||
|
||||
size_t i = HWLM_MASKLEN - (bound + u_sublen);
|
||||
|
||||
ue2_literal::const_iterator it, ite;
|
||||
for (it = u_id.s.begin() + (u_len - u_sublen), ite = u_id.s.end();
|
||||
it != ite; ++it) {
|
||||
make_and_cmp_mask(*it, &msk.at(i), &cmp.at(i));
|
||||
++i;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool findHamsterMask(const RoseBuildImpl &tbi, const rose_literal_id &id,
|
||||
const rose_literal_info &info, const RoseVertex v,
|
||||
vector<u8> &msk, vector<u8> &cmp) {
|
||||
// Start with zero masks.
|
||||
msk.assign(HWLM_MASKLEN, 0);
|
||||
cmp.assign(HWLM_MASKLEN, 0);
|
||||
|
||||
// Masks can come from literal benefits (for mixed-case literals).
|
||||
if (info.requires_benefits) {
|
||||
assert(mixed_sensitivity(id.s));
|
||||
|
||||
size_t j = 0;
|
||||
for (ue2_literal::const_reverse_iterator it = id.s.rbegin(),
|
||||
ite = id.s.rend();
|
||||
it != ite && j < HWLM_MASKLEN; ++it, ++j) {
|
||||
size_t offset = HWLM_MASKLEN - j - 1;
|
||||
const CharReach &cr = *it;
|
||||
make_and_cmp_mask(cr, &msk[offset], &cmp[offset]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const LeftEngInfo &left = tbi.g[v].left;
|
||||
if (left && left.lag < HWLM_MASKLEN) {
|
||||
if (maskFromLeft(left, msk, cmp)) {
|
||||
DEBUG_PRINTF("mask from a leftfix!\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (id.s.length() < HWLM_MASKLEN) {
|
||||
if (maskFromPreds(tbi, id, v, msk, cmp)) {
|
||||
DEBUG_PRINTF("mask from preds!\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool hamsterMaskCombine(vector<u8> &msk, vector<u8> &cmp,
|
||||
const vector<u8> &v_msk, const vector<u8> &v_cmp) {
|
||||
assert(msk.size() == HWLM_MASKLEN && cmp.size() == HWLM_MASKLEN);
|
||||
assert(v_msk.size() == HWLM_MASKLEN && v_cmp.size() == HWLM_MASKLEN);
|
||||
|
||||
u8 all_masks = 0;
|
||||
|
||||
for (size_t i = 0; i < HWLM_MASKLEN; i++) {
|
||||
u8 filter = ~(cmp[i] ^ v_cmp[i]);
|
||||
msk[i] &= v_msk[i];
|
||||
msk[i] &= filter;
|
||||
cmp[i] &= filter;
|
||||
|
||||
all_masks |= msk[i];
|
||||
}
|
||||
|
||||
// Return false if we have no bits on in any mask elements.
|
||||
return all_masks != 0;
|
||||
}
|
||||
|
||||
static
|
||||
bool findHamsterMask(const RoseBuildImpl &tbi, const rose_literal_id &id,
|
||||
const rose_literal_info &info,
|
||||
vector<u8> &msk, vector<u8> &cmp) {
|
||||
if (!tbi.cc.grey.roseHamsterMasks) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!info.delayed_ids.empty()) {
|
||||
// Not safe to add masks to delayed literals at this late stage.
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t num = 0;
|
||||
vector<u8> v_msk, v_cmp;
|
||||
|
||||
for (RoseVertex v : info.vertices) {
|
||||
if (!findHamsterMask(tbi, id, info, v, v_msk, v_cmp)) {
|
||||
DEBUG_PRINTF("no mask\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!num++) {
|
||||
// First (or only) vertex, this becomes the mask/cmp pair.
|
||||
msk = v_msk;
|
||||
cmp = v_cmp;
|
||||
} else {
|
||||
// Multiple vertices with potentially different masks. We combine
|
||||
// them into an 'advisory' mask.
|
||||
if (!hamsterMaskCombine(msk, cmp, v_msk, v_cmp)) {
|
||||
DEBUG_PRINTF("mask went to zero\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
normaliseLiteralMask(id.s, msk, cmp);
|
||||
|
||||
if (msk.empty()) {
|
||||
DEBUG_PRINTF("no mask\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("msk=%s, cmp=%s\n", dumpMask(msk).c_str(),
|
||||
dumpMask(cmp).c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool isDirectHighlander(const RoseBuildImpl &build, const u32 id,
|
||||
const rose_literal_info &info) {
|
||||
if (!build.isDirectReport(id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto is_simple_exhaustible = [&build](ReportID id) {
|
||||
const Report &report = build.rm.getReport(id);
|
||||
return isSimpleExhaustible(report);
|
||||
};
|
||||
|
||||
assert(!info.vertices.empty());
|
||||
for (const auto &v : info.vertices) {
|
||||
const auto &reports = build.g[v].reports;
|
||||
assert(!reports.empty());
|
||||
if (!all_of(begin(reports), end(reports),
|
||||
is_simple_exhaustible)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Called by isNoRunsLiteral below.
|
||||
static
|
||||
bool isNoRunsVertex(const RoseBuildImpl &tbi, NFAVertex u) {
|
||||
const RoseGraph &g = tbi.g;
|
||||
if (!g[u].isBoring()) {
|
||||
DEBUG_PRINTF("u=%zu is not boring\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!g[u].reports.empty()) {
|
||||
DEBUG_PRINTF("u=%zu has accept\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* TODO: handle non-root roles as well. It can't be that difficult... */
|
||||
|
||||
if (!in_degree_equal_to(u, g, 1)) {
|
||||
DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
RoseEdge e;
|
||||
bool exists;
|
||||
tie(e, exists) = edge_by_target(tbi.root, u, g);
|
||||
|
||||
if (!exists) {
|
||||
DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto &oe : out_edges_range(u, g)) {
|
||||
RoseVertex v = target(oe, g);
|
||||
if (g[oe].maxBound != ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].idx,
|
||||
g[target(oe, g)].idx);
|
||||
return false;
|
||||
}
|
||||
if (g[v].left) {
|
||||
DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].idx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool isNoRunsLiteral(const RoseBuildImpl &tbi, const u32 id,
|
||||
const rose_literal_info &info) {
|
||||
DEBUG_PRINTF("lit id %u\n", id);
|
||||
|
||||
if (info.requires_benefits) {
|
||||
DEBUG_PRINTF("requires benefits\n"); // which would need confirm
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isDirectHighlander(tbi, id, info)) {
|
||||
DEBUG_PRINTF("highlander direct report\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Undelayed vertices.
|
||||
for (RoseVertex v : info.vertices) {
|
||||
if (!isNoRunsVertex(tbi, v)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Delayed vertices.
|
||||
for (u32 d : info.delayed_ids) {
|
||||
assert(d < tbi.literal_info.size());
|
||||
const rose_literal_info &delayed_info = tbi.literal_info.at(d);
|
||||
assert(delayed_info.undelayed_id == id);
|
||||
for (RoseVertex v : delayed_info.vertices) {
|
||||
if (!isNoRunsVertex(tbi, v)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("is no-runs literal\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
void fillHamsterLiteralList(const RoseBuildImpl &tbi, rose_literal_table table,
|
||||
vector<hwlmLiteral> *hl) {
|
||||
for (const auto &e : tbi.literals.right) {
|
||||
const u32 id = e.first;
|
||||
if (!tbi.hasFinalId(id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (e.second.delay) {
|
||||
continue; /* delay id's are virtual-ish */
|
||||
}
|
||||
|
||||
if (e.second.table != table) {
|
||||
continue; /* wrong table */
|
||||
}
|
||||
|
||||
assert(id < tbi.literal_info.size());
|
||||
const rose_literal_info &info = tbi.literal_info[id];
|
||||
u32 final_id = info.final_id;
|
||||
rose_group groups = info.group_mask;
|
||||
/* Note: requires_benefits are handled in the literal entries */
|
||||
const ue2_literal &lit = e.second.s;
|
||||
|
||||
DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str());
|
||||
|
||||
vector<u8> msk = e.second.msk; // copy
|
||||
vector<u8> cmp = e.second.cmp; // copy
|
||||
|
||||
if (msk.empty()) {
|
||||
// Try and pick up an advisory mask.
|
||||
if (!findHamsterMask(tbi, e.second, info, msk, cmp)) {
|
||||
msk.clear(); cmp.clear();
|
||||
} else {
|
||||
DEBUG_PRINTF("picked up late mask %zu\n", msk.size());
|
||||
}
|
||||
}
|
||||
|
||||
bool noruns = isNoRunsLiteral(tbi, id, info);
|
||||
|
||||
if (info.requires_explode) {
|
||||
DEBUG_PRINTF("exploding lit\n");
|
||||
const vector<u8> empty_msk; // msk/cmp will be empty
|
||||
case_iter cit = caseIterateBegin(lit);
|
||||
case_iter cite = caseIterateEnd();
|
||||
for (; cit != cite; ++cit) {
|
||||
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d msk=%s, "
|
||||
"cmp=%s (exploded)\n",
|
||||
final_id, escapeString(lit.get_string()).c_str(),
|
||||
0, noruns, dumpMask(msk).c_str(),
|
||||
dumpMask(cmp).c_str());
|
||||
hl->push_back(hwlmLiteral(*cit, false, noruns, final_id, groups,
|
||||
empty_msk, empty_msk));
|
||||
}
|
||||
} else {
|
||||
const std::string &s = lit.get_string();
|
||||
const bool nocase = lit.any_nocase();
|
||||
|
||||
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, "
|
||||
"cmp=%s\n",
|
||||
final_id, escapeString(s).c_str(), (int)nocase, noruns,
|
||||
dumpMask(msk).c_str(), dumpMask(cmp).c_str());
|
||||
|
||||
if (!maskIsConsistent(s, nocase, msk, cmp)) {
|
||||
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
hl->push_back(hwlmLiteral(lit.get_string(), lit.any_nocase(),
|
||||
noruns, final_id, groups, msk, cmp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &tbi,
|
||||
size_t *fsize,
|
||||
size_t *historyRequired,
|
||||
size_t *streamStateRequired) {
|
||||
*fsize = 0;
|
||||
|
||||
vector<hwlmLiteral> fl;
|
||||
fl.reserve(tbi.literals.size());
|
||||
fillHamsterLiteralList(tbi, ROSE_FLOATING, &fl);
|
||||
if (fl.empty()) {
|
||||
DEBUG_PRINTF("empty floating matcher\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
hwlmStreamingControl ctl;
|
||||
hwlmStreamingControl *ctlp;
|
||||
if (tbi.cc.streaming) {
|
||||
ctl.history_max = tbi.cc.grey.maxHistoryAvailable;
|
||||
ctl.history_min = MAX(*historyRequired,
|
||||
tbi.cc.grey.minHistoryAvailable);
|
||||
DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n",
|
||||
ctl.history_max, ctl.history_min);
|
||||
ctlp = &ctl;
|
||||
} else {
|
||||
ctlp = nullptr; // Null for non-streaming.
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> ftable =
|
||||
hwlmBuild(fl, ctlp, false, tbi.cc, tbi.getInitialGroups());
|
||||
if (!ftable) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
if (tbi.cc.streaming) {
|
||||
DEBUG_PRINTF("literal_history_required=%zu\n",
|
||||
ctl.literal_history_required);
|
||||
DEBUG_PRINTF("literal_stream_state_required=%zu\n",
|
||||
ctl.literal_stream_state_required);
|
||||
assert(ctl.literal_history_required <= tbi.cc.grey.maxHistoryAvailable);
|
||||
*historyRequired = max(*historyRequired,
|
||||
ctl.literal_history_required);
|
||||
*streamStateRequired = ctl.literal_stream_state_required;
|
||||
}
|
||||
|
||||
*fsize = hwlmSize(ftable.get());
|
||||
assert(*fsize);
|
||||
DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize);
|
||||
return ftable;
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct LongerThanLimit {
|
||||
explicit LongerThanLimit(size_t len) : max_len(len) {}
|
||||
bool operator()(const hwlmLiteral &lit) const {
|
||||
return lit.s.length() > max_len;
|
||||
}
|
||||
private:
|
||||
size_t max_len;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &tbi,
|
||||
size_t *sbsize) {
|
||||
*sbsize = 0;
|
||||
|
||||
if (tbi.cc.streaming) {
|
||||
DEBUG_PRINTF("streaming mode\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u32 float_min = findMinWidth(tbi, ROSE_FLOATING);
|
||||
if (float_min > ROSE_SMALL_BLOCK_LEN) {
|
||||
DEBUG_PRINTF("floating table has large min width %u, fail\n", float_min);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
vector<hwlmLiteral> lits;
|
||||
fillHamsterLiteralList(tbi, ROSE_FLOATING, &lits);
|
||||
if (lits.empty()) {
|
||||
DEBUG_PRINTF("no floating table\n");
|
||||
return nullptr;
|
||||
} else if (lits.size() == 1) {
|
||||
DEBUG_PRINTF("single floating literal, noodle will be fast enough\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
vector<hwlmLiteral> anchored_lits;
|
||||
fillHamsterLiteralList(tbi, ROSE_ANCHORED_SMALL_BLOCK, &anchored_lits);
|
||||
if (anchored_lits.empty()) {
|
||||
DEBUG_PRINTF("no small-block anchored literals\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end());
|
||||
|
||||
// Remove literals that are longer than our small block length, as they can
|
||||
// never match. TODO: improve by removing literals that have a min match
|
||||
// offset greater than ROSE_SMALL_BLOCK_LEN, which will catch anchored cases
|
||||
// with preceding dots that put them over the limit.
|
||||
lits.erase(std::remove_if(lits.begin(), lits.end(),
|
||||
LongerThanLimit(ROSE_SMALL_BLOCK_LEN)),
|
||||
lits.end());
|
||||
|
||||
if (lits.empty()) {
|
||||
DEBUG_PRINTF("no literals shorter than small block len\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> hwlm =
|
||||
hwlmBuild(lits, nullptr, true, tbi.cc, tbi.getInitialGroups());
|
||||
if (!hwlm) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
*sbsize = hwlmSize(hwlm.get());
|
||||
assert(*sbsize);
|
||||
DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize);
|
||||
return hwlm;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &tbi,
|
||||
size_t *esize) {
|
||||
*esize = 0;
|
||||
|
||||
vector<hwlmLiteral> el;
|
||||
fillHamsterLiteralList(tbi, ROSE_EOD_ANCHORED, &el);
|
||||
|
||||
if (el.empty()) {
|
||||
DEBUG_PRINTF("no eod anchored literals\n");
|
||||
assert(!tbi.ematcher_region_size);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(tbi.ematcher_region_size);
|
||||
|
||||
hwlmStreamingControl *ctlp = nullptr; // not a streaming case
|
||||
aligned_unique_ptr<HWLM> etable =
|
||||
hwlmBuild(el, ctlp, true, tbi.cc, tbi.getInitialGroups());
|
||||
if (!etable) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
*esize = hwlmSize(etable.get());
|
||||
assert(*esize);
|
||||
DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize);
|
||||
return etable;
|
||||
}
|
||||
|
||||
// Adds a sparse iterator to the end of the iterator table, returning its
|
||||
// offset.
|
||||
static
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,6 +32,7 @@
|
||||
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "rose_build_impl.h"
|
||||
#include "rose_build_matchers.h"
|
||||
#include "rose/rose_dump.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
@ -458,23 +459,18 @@ struct LongerThanLimit {
|
||||
|
||||
static
|
||||
void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
|
||||
|
||||
vector<hwlmLiteral> lits;
|
||||
|
||||
fillHamsterLiteralList(build, ROSE_ANCHORED, &lits);
|
||||
auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED);
|
||||
dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
|
||||
|
||||
lits.clear();
|
||||
fillHamsterLiteralList(build, ROSE_FLOATING, &lits);
|
||||
lits = fillHamsterLiteralList(build, ROSE_FLOATING);
|
||||
dumpTestLiterals(base + "rose_float_test_literals.txt", lits);
|
||||
|
||||
lits.clear();
|
||||
fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, &lits);
|
||||
lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
|
||||
dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
|
||||
|
||||
lits.clear();
|
||||
fillHamsterLiteralList(build, ROSE_FLOATING, &lits);
|
||||
fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, &lits);
|
||||
lits = fillHamsterLiteralList(build, ROSE_FLOATING);
|
||||
auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK);
|
||||
lits.insert(end(lits), begin(lits2), end(lits2));
|
||||
lits.erase(remove_if(lits.begin(), lits.end(),
|
||||
LongerThanLimit(ROSE_SMALL_BLOCK_LEN)),
|
||||
lits.end());
|
||||
|
@ -58,7 +58,6 @@ namespace ue2 {
|
||||
struct BoundaryReports;
|
||||
struct CastleProto;
|
||||
struct CompileContext;
|
||||
struct hwlmLiteral;
|
||||
class ReportManager;
|
||||
class SomSlotManager;
|
||||
|
||||
@ -550,9 +549,6 @@ u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
|
||||
void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
|
||||
std::vector<u8> &cmp);
|
||||
|
||||
void fillHamsterLiteralList(const RoseBuildImpl &tbi, rose_literal_table table,
|
||||
std::vector<hwlmLiteral> *hl);
|
||||
|
||||
#ifndef NDEBUG
|
||||
bool canImplementGraphs(const RoseBuildImpl &tbi);
|
||||
#endif
|
||||
|
662
src/rose/rose_build_matchers.cpp
Normal file
662
src/rose/rose_build_matchers.cpp
Normal file
@ -0,0 +1,662 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Rose build: code for constructing literal tables.
|
||||
*/
|
||||
|
||||
#include "rose_build_matchers.h"
|
||||
|
||||
#include "rose_build_impl.h"
|
||||
#include "rose_build_width.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "util/charreach_util.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/report.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::adaptors::map_values;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#ifdef DEBUG
|
||||
static UNUSED
|
||||
string dumpMask(const vector<u8> &v) {
|
||||
ostringstream oss;
|
||||
for (u8 e : v) {
|
||||
oss << setfill('0') << setw(2) << hex << (unsigned int)e;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool maskFromLeftGraph(const LeftEngInfo &left, vector<u8> &msk,
|
||||
vector<u8> &cmp) {
|
||||
const u32 lag = left.lag;
|
||||
const ReportID report = left.leftfix_report;
|
||||
|
||||
DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report);
|
||||
|
||||
assert(left.graph);
|
||||
const NGHolder &h = *left.graph;
|
||||
assert(in_degree(h.acceptEod, h) == 1); // no eod reports
|
||||
|
||||
// Start with the set of reporter vertices for this leftfix.
|
||||
set<NFAVertex> curr;
|
||||
for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
|
||||
if (contains(h[u].reports, report)) {
|
||||
curr.insert(u);
|
||||
}
|
||||
}
|
||||
assert(!curr.empty());
|
||||
|
||||
size_t i = HWLM_MASKLEN - lag - 1;
|
||||
do {
|
||||
if (curr.empty() || contains(curr, h.start)
|
||||
|| contains(curr, h.startDs)) {
|
||||
DEBUG_PRINTF("end of the road\n");
|
||||
break;
|
||||
}
|
||||
|
||||
set<NFAVertex> next;
|
||||
CharReach cr;
|
||||
for (NFAVertex v : curr) {
|
||||
const auto &v_cr = h[v].char_reach;
|
||||
DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index,
|
||||
describeClass(v_cr).c_str());
|
||||
cr |= v_cr;
|
||||
insert(&next, inv_adjacent_vertices(v, h));
|
||||
}
|
||||
make_and_cmp_mask(cr, &msk.at(i), &cmp.at(i));
|
||||
DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
|
||||
describeClass(cr).c_str(), msk[i], cmp[i]);
|
||||
curr.swap(next);
|
||||
} while (i-- > 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool maskFromLeftCastle(const LeftEngInfo &left, vector<u8> &msk,
|
||||
vector<u8> &cmp) {
|
||||
const u32 lag = left.lag;
|
||||
const ReportID report = left.leftfix_report;
|
||||
|
||||
DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report);
|
||||
|
||||
assert(left.castle);
|
||||
const CastleProto &c = *left.castle;
|
||||
|
||||
depth min_width(depth::infinity());
|
||||
for (const PureRepeat &repeat : c.repeats | map_values) {
|
||||
if (contains(repeat.reports, report)) {
|
||||
min_width = min(min_width, repeat.bounds.min);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("castle min width for this report is %s\n",
|
||||
min_width.str().c_str());
|
||||
|
||||
if (!min_width.is_finite() || min_width == depth(0)) {
|
||||
DEBUG_PRINTF("bad min width\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 len = min_width;
|
||||
u32 end = HWLM_MASKLEN - lag;
|
||||
for (u32 i = end; i > end - min(end, len); i--) {
|
||||
make_and_cmp_mask(c.reach(), &msk.at(i - 1), &cmp.at(i - 1));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool maskFromLeft(const LeftEngInfo &left, vector<u8> &msk, vector<u8> &cmp) {
|
||||
if (left.lag >= HWLM_MASKLEN) {
|
||||
DEBUG_PRINTF("too much lag\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (left.graph) {
|
||||
return maskFromLeftGraph(left, msk, cmp);
|
||||
} else if (left.castle) {
|
||||
return maskFromLeftCastle(left, msk, cmp);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id,
|
||||
const RoseVertex v, vector<u8> &msk, vector<u8> &cmp) {
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
// For right now, wuss out and only handle cases with one pred.
|
||||
if (in_degree(v, g) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Root successors have no literal before them.
|
||||
if (build.isRootSuccessor(v)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we have a single predecessor with a short bound, we may be able to
|
||||
// fill out a mask with the trailing bytes of the previous literal. This
|
||||
// allows us to improve literals like the 'bar' in 'fo.bar'.
|
||||
|
||||
RoseEdge e = *(in_edges(v, g).first);
|
||||
u32 bound = g[e].maxBound;
|
||||
if (bound != g[e].minBound || bound >= HWLM_MASKLEN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bound += id.s.length();
|
||||
if (bound >= HWLM_MASKLEN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bound %u\n", bound);
|
||||
|
||||
RoseVertex u = source(e, g);
|
||||
if (g[u].literals.size() != 1) {
|
||||
DEBUG_PRINTF("u has %zu literals\n", g[u].literals.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 u_lit_id = *(g[u].literals.begin());
|
||||
const rose_literal_id &u_id = build.literals.right.at(u_lit_id);
|
||||
DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str());
|
||||
|
||||
// Number of characters to take from the back of u's literal.
|
||||
size_t u_len = u_id.s.length();
|
||||
size_t u_sublen = min(u_len, (size_t)HWLM_MASKLEN - bound);
|
||||
|
||||
size_t i = HWLM_MASKLEN - (bound + u_sublen);
|
||||
|
||||
ue2_literal::const_iterator it, ite;
|
||||
for (it = u_id.s.begin() + (u_len - u_sublen), ite = u_id.s.end();
|
||||
it != ite; ++it) {
|
||||
make_and_cmp_mask(*it, &msk.at(i), &cmp.at(i));
|
||||
++i;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id,
|
||||
const rose_literal_info &info, const RoseVertex v,
|
||||
vector<u8> &msk, vector<u8> &cmp) {
|
||||
// Start with zero masks.
|
||||
msk.assign(HWLM_MASKLEN, 0);
|
||||
cmp.assign(HWLM_MASKLEN, 0);
|
||||
|
||||
// Masks can come from literal benefits (for mixed-case literals).
|
||||
if (info.requires_benefits) {
|
||||
assert(mixed_sensitivity(id.s));
|
||||
|
||||
size_t j = 0;
|
||||
for (ue2_literal::const_reverse_iterator it = id.s.rbegin(),
|
||||
ite = id.s.rend();
|
||||
it != ite && j < HWLM_MASKLEN; ++it, ++j) {
|
||||
size_t offset = HWLM_MASKLEN - j - 1;
|
||||
const CharReach &cr = *it;
|
||||
make_and_cmp_mask(cr, &msk[offset], &cmp[offset]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const LeftEngInfo &left = build.g[v].left;
|
||||
if (left && left.lag < HWLM_MASKLEN) {
|
||||
if (maskFromLeft(left, msk, cmp)) {
|
||||
DEBUG_PRINTF("mask from a leftfix!\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (id.s.length() < HWLM_MASKLEN) {
|
||||
if (maskFromPreds(build, id, v, msk, cmp)) {
|
||||
DEBUG_PRINTF("mask from preds!\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool hamsterMaskCombine(vector<u8> &msk, vector<u8> &cmp,
|
||||
const vector<u8> &v_msk, const vector<u8> &v_cmp) {
|
||||
assert(msk.size() == HWLM_MASKLEN && cmp.size() == HWLM_MASKLEN);
|
||||
assert(v_msk.size() == HWLM_MASKLEN && v_cmp.size() == HWLM_MASKLEN);
|
||||
|
||||
u8 all_masks = 0;
|
||||
|
||||
for (size_t i = 0; i < HWLM_MASKLEN; i++) {
|
||||
u8 filter = ~(cmp[i] ^ v_cmp[i]);
|
||||
msk[i] &= v_msk[i];
|
||||
msk[i] &= filter;
|
||||
cmp[i] &= filter;
|
||||
|
||||
all_masks |= msk[i];
|
||||
}
|
||||
|
||||
// Return false if we have no bits on in any mask elements.
|
||||
return all_masks != 0;
|
||||
}
|
||||
|
||||
static
|
||||
bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id,
|
||||
const rose_literal_info &info,
|
||||
vector<u8> &msk, vector<u8> &cmp) {
|
||||
if (!build.cc.grey.roseHamsterMasks) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!info.delayed_ids.empty()) {
|
||||
// Not safe to add masks to delayed literals at this late stage.
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t num = 0;
|
||||
vector<u8> v_msk, v_cmp;
|
||||
|
||||
for (RoseVertex v : info.vertices) {
|
||||
if (!findHamsterMask(build, id, info, v, v_msk, v_cmp)) {
|
||||
DEBUG_PRINTF("no mask\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!num++) {
|
||||
// First (or only) vertex, this becomes the mask/cmp pair.
|
||||
msk = v_msk;
|
||||
cmp = v_cmp;
|
||||
} else {
|
||||
// Multiple vertices with potentially different masks. We combine
|
||||
// them into an 'advisory' mask.
|
||||
if (!hamsterMaskCombine(msk, cmp, v_msk, v_cmp)) {
|
||||
DEBUG_PRINTF("mask went to zero\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
normaliseLiteralMask(id.s, msk, cmp);
|
||||
|
||||
if (msk.empty()) {
|
||||
DEBUG_PRINTF("no mask\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("msk=%s, cmp=%s\n", dumpMask(msk).c_str(),
|
||||
dumpMask(cmp).c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool isDirectHighlander(const RoseBuildImpl &build, const u32 id,
|
||||
const rose_literal_info &info) {
|
||||
if (!build.isDirectReport(id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto is_simple_exhaustible = [&build](ReportID id) {
|
||||
const Report &report = build.rm.getReport(id);
|
||||
return isSimpleExhaustible(report);
|
||||
};
|
||||
|
||||
assert(!info.vertices.empty());
|
||||
for (const auto &v : info.vertices) {
|
||||
const auto &reports = build.g[v].reports;
|
||||
assert(!reports.empty());
|
||||
if (!all_of(begin(reports), end(reports),
|
||||
is_simple_exhaustible)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Called by isNoRunsLiteral below.
|
||||
static
|
||||
bool isNoRunsVertex(const RoseBuildImpl &build, NFAVertex u) {
|
||||
const RoseGraph &g = build.g;
|
||||
if (!g[u].isBoring()) {
|
||||
DEBUG_PRINTF("u=%zu is not boring\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!g[u].reports.empty()) {
|
||||
DEBUG_PRINTF("u=%zu has accept\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* TODO: handle non-root roles as well. It can't be that difficult... */
|
||||
|
||||
if (!in_degree_equal_to(u, g, 1)) {
|
||||
DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
RoseEdge e;
|
||||
bool exists;
|
||||
tie(e, exists) = edge_by_target(build.root, u, g);
|
||||
|
||||
if (!exists) {
|
||||
DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].idx);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto &oe : out_edges_range(u, g)) {
|
||||
RoseVertex v = target(oe, g);
|
||||
if (g[oe].maxBound != ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].idx,
|
||||
g[target(oe, g)].idx);
|
||||
return false;
|
||||
}
|
||||
if (g[v].left) {
|
||||
DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].idx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
|
||||
const rose_literal_info &info) {
|
||||
DEBUG_PRINTF("lit id %u\n", id);
|
||||
|
||||
if (info.requires_benefits) {
|
||||
DEBUG_PRINTF("requires benefits\n"); // which would need confirm
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isDirectHighlander(build, id, info)) {
|
||||
DEBUG_PRINTF("highlander direct report\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Undelayed vertices.
|
||||
for (RoseVertex v : info.vertices) {
|
||||
if (!isNoRunsVertex(build, v)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Delayed vertices.
|
||||
for (u32 d : info.delayed_ids) {
|
||||
assert(d < build.literal_info.size());
|
||||
const rose_literal_info &delayed_info = build.literal_info.at(d);
|
||||
assert(delayed_info.undelayed_id == id);
|
||||
for (RoseVertex v : delayed_info.vertices) {
|
||||
if (!isNoRunsVertex(build, v)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("is no-runs literal\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
|
||||
rose_literal_table table) {
|
||||
vector<hwlmLiteral> lits;
|
||||
|
||||
for (const auto &e : build.literals.right) {
|
||||
const u32 id = e.first;
|
||||
if (!build.hasFinalId(id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (e.second.delay) {
|
||||
continue; /* delay id's are virtual-ish */
|
||||
}
|
||||
|
||||
if (e.second.table != table) {
|
||||
continue; /* wrong table */
|
||||
}
|
||||
|
||||
assert(id < build.literal_info.size());
|
||||
const rose_literal_info &info = build.literal_info[id];
|
||||
u32 final_id = info.final_id;
|
||||
rose_group groups = info.group_mask;
|
||||
/* Note: requires_benefits are handled in the literal entries */
|
||||
const ue2_literal &lit = e.second.s;
|
||||
|
||||
DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str());
|
||||
|
||||
vector<u8> msk = e.second.msk; // copy
|
||||
vector<u8> cmp = e.second.cmp; // copy
|
||||
|
||||
if (msk.empty()) {
|
||||
// Try and pick up an advisory mask.
|
||||
if (!findHamsterMask(build, e.second, info, msk, cmp)) {
|
||||
msk.clear(); cmp.clear();
|
||||
} else {
|
||||
DEBUG_PRINTF("picked up late mask %zu\n", msk.size());
|
||||
}
|
||||
}
|
||||
|
||||
bool noruns = isNoRunsLiteral(build, id, info);
|
||||
|
||||
if (info.requires_explode) {
|
||||
DEBUG_PRINTF("exploding lit\n");
|
||||
const vector<u8> empty_msk; // msk/cmp will be empty
|
||||
case_iter cit = caseIterateBegin(lit);
|
||||
case_iter cite = caseIterateEnd();
|
||||
for (; cit != cite; ++cit) {
|
||||
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d msk=%s, "
|
||||
"cmp=%s (exploded)\n",
|
||||
final_id, escapeString(lit.get_string()).c_str(),
|
||||
0, noruns, dumpMask(msk).c_str(),
|
||||
dumpMask(cmp).c_str());
|
||||
lits.emplace_back(*cit, false, noruns, final_id, groups,
|
||||
empty_msk, empty_msk);
|
||||
}
|
||||
} else {
|
||||
const std::string &s = lit.get_string();
|
||||
const bool nocase = lit.any_nocase();
|
||||
|
||||
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, "
|
||||
"cmp=%s\n",
|
||||
final_id, escapeString(s).c_str(), (int)nocase, noruns,
|
||||
dumpMask(msk).c_str(), dumpMask(cmp).c_str());
|
||||
|
||||
if (!maskIsConsistent(s, nocase, msk, cmp)) {
|
||||
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
lits.emplace_back(lit.get_string(), lit.any_nocase(), noruns,
|
||||
final_id, groups, msk, cmp);
|
||||
}
|
||||
}
|
||||
|
||||
return lits;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
|
||||
size_t *fsize,
|
||||
size_t *historyRequired,
|
||||
size_t *streamStateRequired) {
|
||||
*fsize = 0;
|
||||
|
||||
auto fl = fillHamsterLiteralList(build, ROSE_FLOATING);
|
||||
if (fl.empty()) {
|
||||
DEBUG_PRINTF("empty floating matcher\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
hwlmStreamingControl ctl;
|
||||
hwlmStreamingControl *ctlp;
|
||||
if (build.cc.streaming) {
|
||||
ctl.history_max = build.cc.grey.maxHistoryAvailable;
|
||||
ctl.history_min = MAX(*historyRequired,
|
||||
build.cc.grey.minHistoryAvailable);
|
||||
DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n",
|
||||
ctl.history_max, ctl.history_min);
|
||||
ctlp = &ctl;
|
||||
} else {
|
||||
ctlp = nullptr; // Null for non-streaming.
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> ftable =
|
||||
hwlmBuild(fl, ctlp, false, build.cc, build.getInitialGroups());
|
||||
if (!ftable) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
if (build.cc.streaming) {
|
||||
DEBUG_PRINTF("literal_history_required=%zu\n",
|
||||
ctl.literal_history_required);
|
||||
DEBUG_PRINTF("literal_stream_state_required=%zu\n",
|
||||
ctl.literal_stream_state_required);
|
||||
assert(ctl.literal_history_required <=
|
||||
build.cc.grey.maxHistoryAvailable);
|
||||
*historyRequired = max(*historyRequired,
|
||||
ctl.literal_history_required);
|
||||
*streamStateRequired = ctl.literal_stream_state_required;
|
||||
}
|
||||
|
||||
*fsize = hwlmSize(ftable.get());
|
||||
assert(*fsize);
|
||||
DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize);
|
||||
return ftable;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
size_t *sbsize) {
|
||||
*sbsize = 0;
|
||||
|
||||
if (build.cc.streaming) {
|
||||
DEBUG_PRINTF("streaming mode\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u32 float_min = findMinWidth(build, ROSE_FLOATING);
|
||||
if (float_min > ROSE_SMALL_BLOCK_LEN) {
|
||||
DEBUG_PRINTF("floating table has large min width %u, fail\n",
|
||||
float_min);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto lits = fillHamsterLiteralList(build, ROSE_FLOATING);
|
||||
if (lits.empty()) {
|
||||
DEBUG_PRINTF("no floating table\n");
|
||||
return nullptr;
|
||||
} else if (lits.size() == 1) {
|
||||
DEBUG_PRINTF("single floating literal, noodle will be fast enough\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto anchored_lits =
|
||||
fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK);
|
||||
if (anchored_lits.empty()) {
|
||||
DEBUG_PRINTF("no small-block anchored literals\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end());
|
||||
|
||||
// Remove literals that are longer than our small block length, as they can
|
||||
// never match. TODO: improve by removing literals that have a min match
|
||||
// offset greater than ROSE_SMALL_BLOCK_LEN, which will catch anchored cases
|
||||
// with preceding dots that put them over the limit.
|
||||
auto longer_than_limit = [](const hwlmLiteral &lit) {
|
||||
return lit.s.length() > ROSE_SMALL_BLOCK_LEN;
|
||||
};
|
||||
lits.erase(remove_if(lits.begin(), lits.end(), longer_than_limit),
|
||||
lits.end());
|
||||
|
||||
if (lits.empty()) {
|
||||
DEBUG_PRINTF("no literals shorter than small block len\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> hwlm =
|
||||
hwlmBuild(lits, nullptr, true, build.cc, build.getInitialGroups());
|
||||
if (!hwlm) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
*sbsize = hwlmSize(hwlm.get());
|
||||
assert(*sbsize);
|
||||
DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize);
|
||||
return hwlm;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
|
||||
size_t *esize) {
|
||||
*esize = 0;
|
||||
|
||||
auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
|
||||
|
||||
if (el.empty()) {
|
||||
DEBUG_PRINTF("no eod anchored literals\n");
|
||||
assert(!build.ematcher_region_size);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(build.ematcher_region_size);
|
||||
|
||||
hwlmStreamingControl *ctlp = nullptr; // not a streaming case
|
||||
aligned_unique_ptr<HWLM> etable =
|
||||
hwlmBuild(el, ctlp, true, build.cc, build.getInitialGroups());
|
||||
if (!etable) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
*esize = hwlmSize(etable.get());
|
||||
assert(*esize);
|
||||
DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize);
|
||||
return etable;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
63
src/rose/rose_build_matchers.h
Normal file
63
src/rose/rose_build_matchers.h
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Rose build: code for constructing literal tables.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_MATCHERS_H
|
||||
#define ROSE_BUILD_MATCHERS_H
|
||||
|
||||
#include "rose_build_impl.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct HWLM;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
|
||||
std::vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
|
||||
rose_literal_table table);
|
||||
|
||||
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
|
||||
size_t *fsize,
|
||||
size_t *historyRequired,
|
||||
size_t *streamStateRequired);
|
||||
|
||||
aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
size_t *sbsize);
|
||||
|
||||
aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
|
||||
size_t *esize);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ROSE_BUILD_MATCHERS_H
|
Loading…
x
Reference in New Issue
Block a user