mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
castle, lbr: native report remap
This commit is contained in:
parent
ec985a62f8
commit
c101beb541
@ -96,7 +96,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
|
||||
repeatHasMatch(info, rctrl, rstate, offset);
|
||||
DEBUG_PRINTF("repeatHasMatch returned %d\n", match);
|
||||
if (match == REPEAT_MATCH) {
|
||||
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, subIdx);
|
||||
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
|
||||
subIdx, sub->report);
|
||||
if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
@ -344,11 +344,14 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
u32 &scratchStateSize, u32 &streamStateSize,
|
||||
u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
|
||||
const ExclusiveInfo &exclusiveInfo,
|
||||
vector<u32> &may_stale) {
|
||||
vector<u32> &may_stale, const ReportManager &rm) {
|
||||
const bool remap_reports = has_managed_reports(proto.kind);
|
||||
|
||||
u32 i = 0;
|
||||
const auto &groupId = exclusiveInfo.groupId;
|
||||
const auto &numGroups = exclusiveInfo.numGroups;
|
||||
vector<u32> maxStreamSize(numGroups, 0);
|
||||
|
||||
for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
|
||||
it != ite; ++it, ++i) {
|
||||
const PureRepeat &pr = it->second;
|
||||
@ -400,7 +403,9 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
info.encodingSize = rsi.encodingSize;
|
||||
info.patchesOffset = rsi.patchesOffset;
|
||||
|
||||
sub.report = *pr.reports.begin();
|
||||
assert(pr.reports.size() == 1);
|
||||
ReportID id = *pr.reports.begin();
|
||||
sub.report = remap_reports ? rm.getProgramOffset(id) : id;
|
||||
|
||||
if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
|
||||
for (u32 j = 0; j < rsi.patchSize; j++) {
|
||||
@ -435,7 +440,7 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
aligned_unique_ptr<NFA>
|
||||
buildCastle(const CastleProto &proto,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc, const ReportManager &rm) {
|
||||
assert(cc.grey.allowCastle);
|
||||
|
||||
const size_t numRepeats = proto.repeats.size();
|
||||
@ -548,7 +553,7 @@ buildCastle(const CastleProto &proto,
|
||||
|
||||
buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
|
||||
scratchStateSize, streamStateSize, tableSize,
|
||||
tables, sparseRepeats, exclusiveInfo, may_stale);
|
||||
tables, sparseRepeats, exclusiveInfo, may_stale, rm);
|
||||
|
||||
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
|
||||
vector<mmbit_sparse_iter> stale_iter;
|
||||
@ -816,6 +821,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
|
||||
ReportID report2) {
|
||||
assert(!c1.repeats.empty());
|
||||
assert(!c2.repeats.empty());
|
||||
assert(c1.kind == c2.kind);
|
||||
|
||||
if (c1.reach() != c2.reach()) {
|
||||
DEBUG_PRINTF("different reach\n");
|
||||
@ -862,6 +868,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
|
||||
bool is_equal(const CastleProto &c1, const CastleProto &c2) {
|
||||
assert(!c1.repeats.empty());
|
||||
assert(!c2.repeats.empty());
|
||||
assert(c1.kind == c2.kind);
|
||||
|
||||
if (c1.reach() != c2.reach()) {
|
||||
DEBUG_PRINTF("different reach\n");
|
||||
@ -992,29 +999,4 @@ unique_ptr<NGHolder> makeHolder(const CastleProto &proto,
|
||||
return g;
|
||||
}
|
||||
|
||||
static
|
||||
void remapReportsToPrograms(PureRepeat &pr, const ReportManager &rm) {
|
||||
if (pr.reports.empty()) {
|
||||
return;
|
||||
}
|
||||
auto old_reports = pr.reports;
|
||||
pr.reports.clear();
|
||||
for (const auto &r : old_reports) {
|
||||
pr.reports.insert(rm.getProgramOffset(r));
|
||||
}
|
||||
}
|
||||
|
||||
void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm) {
|
||||
for (auto &m : castle.repeats) {
|
||||
remapReportsToPrograms(m.second, rm);
|
||||
}
|
||||
|
||||
auto old_report_map = castle.report_map;
|
||||
castle.report_map.clear();
|
||||
for (auto &m : old_report_map) {
|
||||
u32 program = rm.getProgramOffset(m.first);
|
||||
castle.report_map[program].insert(begin(m.second), end(m.second));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -123,7 +123,7 @@ void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);
|
||||
ue2::aligned_unique_ptr<NFA>
|
||||
buildCastle(const CastleProto &proto,
|
||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||
const CompileContext &cc);
|
||||
const CompileContext &cc, const ReportManager &rm);
|
||||
|
||||
/**
|
||||
* \brief Merge two CastleProto prototypes together, if possible.
|
||||
@ -162,8 +162,6 @@ bool requiresDedupe(const CastleProto &proto,
|
||||
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle,
|
||||
const CompileContext &cc);
|
||||
|
||||
void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NFA_CASTLECOMPILE_H
|
||||
|
@ -36,17 +36,19 @@
|
||||
#include "ng_holder.h"
|
||||
#include "ng_repeat.h"
|
||||
#include "ng_reports.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "nfa/trufflecompile.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "nfa/lbr_internal.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/repeatcompile.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "nfa/trufflecompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h" // for lg2
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
using namespace std;
|
||||
@ -294,13 +296,19 @@ aligned_unique_ptr<NFA> constructLBR(const CharReach &cr,
|
||||
return nfa;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> constructLBR(const PureRepeat &repeat,
|
||||
aligned_unique_ptr<NFA> constructLBR(const CastleProto &proto,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc,
|
||||
const ReportManager &rm) {
|
||||
if (!cc.grey.allowLbr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (proto.repeats.size() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const PureRepeat &repeat = proto.repeats.begin()->second;
|
||||
assert(!repeat.reach.none());
|
||||
|
||||
if (repeat.reports.size() != 1) {
|
||||
@ -317,6 +325,9 @@ aligned_unique_ptr<NFA> constructLBR(const PureRepeat &repeat,
|
||||
}
|
||||
|
||||
ReportID report = *repeat.reports.begin();
|
||||
if (has_managed_reports(proto.kind)) {
|
||||
report = rm.getProgramOffset(report);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
|
||||
return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
|
||||
@ -326,7 +337,8 @@ aligned_unique_ptr<NFA> constructLBR(const PureRepeat &repeat,
|
||||
/** \brief Construct an LBR engine from the given graph \p g. */
|
||||
aligned_unique_ptr<NFA> constructLBR(const NGHolder &g,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc,
|
||||
const ReportManager &rm) {
|
||||
if (!cc.grey.allowLbr) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -335,8 +347,13 @@ aligned_unique_ptr<NFA> constructLBR(const NGHolder &g,
|
||||
if (!isPureRepeat(g, repeat)) {
|
||||
return nullptr;
|
||||
}
|
||||
if (repeat.reports.size() != 1) {
|
||||
DEBUG_PRINTF("too many reports\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return constructLBR(repeat, triggers, cc);
|
||||
CastleProto proto(g.kind, repeat);
|
||||
return constructLBR(proto, triggers, cc, rm);
|
||||
}
|
||||
|
||||
/** \brief True if graph \p g could be turned into an LBR engine. */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -46,22 +46,22 @@ namespace ue2 {
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
struct CastleProto;
|
||||
struct CompileContext;
|
||||
struct DepthMinMax;
|
||||
struct Grey;
|
||||
struct PureRepeat;
|
||||
|
||||
/** \brief Construct an LBR engine from the given graph \p g. */
|
||||
aligned_unique_ptr<NFA>
|
||||
constructLBR(const NGHolder &g,
|
||||
const std::vector<std::vector<CharReach>> &triggers,
|
||||
const CompileContext &cc);
|
||||
const CompileContext &cc, const ReportManager &rm);
|
||||
|
||||
/** \brief Construct an LBR engine from the given PureRepeat. */
|
||||
/** \brief Construct an LBR engine from the given CastleProto, which should
|
||||
* contain only one repeat. */
|
||||
aligned_unique_ptr<NFA>
|
||||
constructLBR(const PureRepeat &repeat,
|
||||
constructLBR(const CastleProto &proto,
|
||||
const std::vector<std::vector<CharReach>> &triggers,
|
||||
const CompileContext &cc);
|
||||
const CompileContext &cc, const ReportManager &rm);
|
||||
|
||||
/** \brief True if graph \p g could be turned into an LBR engine. */
|
||||
bool isLBR(const NGHolder &g, const Grey &grey);
|
||||
|
@ -866,14 +866,14 @@ static
|
||||
aligned_unique_ptr<NFA>
|
||||
buildRepeatEngine(const CastleProto &proto,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc, const ReportManager &rm) {
|
||||
// If we only have one repeat, the LBR should always be the best possible
|
||||
// implementation.
|
||||
if (proto.repeats.size() == 1 && cc.grey.allowLbr) {
|
||||
return constructLBR(proto.repeats.begin()->second, triggers.at(0), cc);
|
||||
return constructLBR(proto, triggers.at(0), cc, rm);
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> castle_nfa = buildCastle(proto, triggers, cc);
|
||||
auto castle_nfa = buildCastle(proto, triggers, cc, rm);
|
||||
assert(castle_nfa); // Should always be constructible.
|
||||
return castle_nfa;
|
||||
}
|
||||
@ -886,9 +886,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
suffix_id suff, const CompileContext &cc) {
|
||||
if (suff.castle()) {
|
||||
auto remapped_castle = *suff.castle();
|
||||
remapReportsToPrograms(remapped_castle, rm);
|
||||
auto n = buildRepeatEngine(remapped_castle, triggers, cc);
|
||||
auto n = buildRepeatEngine(*suff.castle(), triggers, cc, rm);
|
||||
assert(n);
|
||||
return n;
|
||||
}
|
||||
@ -913,9 +911,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
|
||||
|
||||
// Take a shot at the LBR engine.
|
||||
if (oneTop) {
|
||||
auto remapped_holder = cloneHolder(holder);
|
||||
remapReportsToPrograms(*remapped_holder, rm);
|
||||
auto lbr = constructLBR(*remapped_holder, triggers.at(0), cc);
|
||||
auto lbr = constructLBR(holder, triggers.at(0), cc, rm);
|
||||
if (lbr) {
|
||||
return lbr;
|
||||
}
|
||||
@ -1044,7 +1040,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
|
||||
assert(!is_prefix);
|
||||
map<u32, vector<vector<CharReach> > > triggers;
|
||||
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
|
||||
n = buildRepeatEngine(*left.castle(), triggers, cc);
|
||||
n = buildRepeatEngine(*left.castle(), triggers, cc, rm);
|
||||
assert(n);
|
||||
return n; // Castles/LBRs are always best!
|
||||
}
|
||||
@ -1064,7 +1060,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
|
||||
map<u32, vector<vector<CharReach> > > triggers;
|
||||
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
|
||||
assert(contains(triggers, 0)); // single top
|
||||
n = constructLBR(*left.graph(), triggers[0], cc);
|
||||
n = constructLBR(*left.graph(), triggers[0], cc, rm);
|
||||
}
|
||||
|
||||
if (!n && left.graph()) {
|
||||
|
@ -49,6 +49,8 @@ using namespace std;
|
||||
using namespace testing;
|
||||
using namespace ue2;
|
||||
|
||||
static constexpr u32 MATCH_REPORT = 1024;
|
||||
|
||||
struct LbrTestParams {
|
||||
CharReach reach;
|
||||
u32 min;
|
||||
@ -98,10 +100,11 @@ protected:
|
||||
|
||||
ASSERT_TRUE(isLBR(*g, grey));
|
||||
|
||||
vector<vector<CharReach> > triggers;
|
||||
triggers.push_back(vector<CharReach>());
|
||||
triggers.back().push_back(CharReach::dot()); /* lbr triggered by . */
|
||||
nfa = constructLBR(*g, triggers, cc);
|
||||
rm.setProgramOffset(0, MATCH_REPORT);
|
||||
|
||||
/* LBR triggered by dot */
|
||||
vector<vector<CharReach>> triggers = {{CharReach::dot()}};
|
||||
nfa = constructLBR(*g, triggers, cc, rm);
|
||||
ASSERT_TRUE(nfa != nullptr);
|
||||
|
||||
full_state = aligned_zmalloc_unique<char>(nfa->scratchStateSize);
|
||||
@ -247,7 +250,7 @@ TEST_P(LbrTest, QueueExecToMatch) {
|
||||
char rv = nfaQueueExecToMatch(nfa.get(), &q, end);
|
||||
ASSERT_EQ(MO_MATCHES_PENDING, rv);
|
||||
ASSERT_EQ(0, matches);
|
||||
ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q));
|
||||
ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q));
|
||||
nfaReportCurrentMatches(nfa.get(), &q);
|
||||
ASSERT_EQ(1, matches);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user