mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: use program offsets directly in lit tables
This commit is contained in:
parent
ac858cd47c
commit
76f72b6ab4
@ -85,19 +85,13 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
|
||||
|
||||
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
|
||||
|
||||
const u32 *delayRebuildPrograms =
|
||||
getByOffset(t, t->litDelayRebuildProgramOffset);
|
||||
assert(id < t->literalCount);
|
||||
const u32 program = delayRebuildPrograms[id];
|
||||
|
||||
if (program) {
|
||||
assert(id < t->size); // id is a program offset
|
||||
const u64a som = 0;
|
||||
const size_t match_len = end - start + 1;
|
||||
const u8 flags = 0;
|
||||
UNUSED hwlmcb_rv_t rv = roseRunProgram(t, scratch, program, som,
|
||||
real_end, match_len, flags);
|
||||
UNUSED hwlmcb_rv_t rv =
|
||||
roseRunProgram(t, scratch, id, som, real_end, match_len, flags);
|
||||
assert(rv != HWLM_TERMINATE_MATCHING);
|
||||
}
|
||||
|
||||
/* we are just repopulating the delay queue, groups should be
|
||||
* already set from the original scan. */
|
||||
@ -245,12 +239,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u64a end,
|
||||
size_t match_len, u32 id) {
|
||||
DEBUG_PRINTF("id=%u\n", id);
|
||||
const u32 *programs = getByOffset(t, t->litProgramOffset);
|
||||
assert(id < t->literalCount);
|
||||
assert(id < t->size); // id is an offset into bytecode
|
||||
const u64a som = 0;
|
||||
const u8 flags = 0;
|
||||
return roseRunProgram_i(t, scratch, programs[id], som, end, match_len,
|
||||
flags);
|
||||
return roseRunProgram_i(t, scratch, id, som, end, match_len, flags);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
|
@ -208,8 +208,8 @@ void remapAnchoredReports(RoseBuildImpl &build) {
|
||||
* raw_dfa with program offsets.
|
||||
*/
|
||||
static
|
||||
void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms,
|
||||
const map<u32, u32> &final_to_frag_map) {
|
||||
void remapIdsToPrograms(raw_dfa &rdfa,
|
||||
const map<u32, LitFragment> &final_to_frag_map) {
|
||||
for (dstate &ds : rdfa.states) {
|
||||
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
|
||||
if (ds.reports.empty()) {
|
||||
@ -219,9 +219,8 @@ void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms,
|
||||
flat_set<ReportID> new_reports;
|
||||
for (auto final_id : ds.reports) {
|
||||
assert(contains(final_to_frag_map, final_id));
|
||||
auto frag_id = final_to_frag_map.at(final_id);
|
||||
assert(frag_id < litPrograms.size());
|
||||
new_reports.insert(litPrograms.at(frag_id));
|
||||
auto &frag = final_to_frag_map.at(final_id);
|
||||
new_reports.insert(frag.lit_program_offset);
|
||||
}
|
||||
ds.reports = move(new_reports);
|
||||
}
|
||||
@ -849,8 +848,8 @@ vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build) {
|
||||
|
||||
aligned_unique_ptr<anchored_matcher_info>
|
||||
buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
|
||||
const vector<u32> &litPrograms,
|
||||
const map<u32, u32> &final_to_frag_map, size_t *asize) {
|
||||
const map<u32, LitFragment> &final_to_frag_map,
|
||||
size_t *asize) {
|
||||
const CompileContext &cc = build.cc;
|
||||
|
||||
if (dfas.empty()) {
|
||||
@ -860,7 +859,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
|
||||
}
|
||||
|
||||
for (auto &rdfa : dfas) {
|
||||
remapIdsToPrograms(rdfa, litPrograms, final_to_frag_map);
|
||||
remapIdsToPrograms(rdfa, final_to_frag_map);
|
||||
}
|
||||
|
||||
vector<aligned_unique_ptr<NFA>> nfas;
|
||||
|
@ -30,7 +30,7 @@
|
||||
#define ROSE_BUILD_ANCHORED
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "rose_build.h"
|
||||
#include "rose_build_impl.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
@ -59,8 +59,7 @@ std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build);
|
||||
*/
|
||||
aligned_unique_ptr<anchored_matcher_info>
|
||||
buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas,
|
||||
const std::vector<u32> &litPrograms,
|
||||
const std::map<u32, u32> &final_to_frag_map,
|
||||
const std::map<u32, LitFragment> &final_to_frag_map,
|
||||
size_t *asize);
|
||||
|
||||
u32 anchoredStateSize(const anchored_matcher_info &atable);
|
||||
|
@ -213,10 +213,6 @@ struct build_context : boost::noncopyable {
|
||||
* that have already been pushed into the engine_blob. */
|
||||
ue2::unordered_map<u32, u32> engineOffsets;
|
||||
|
||||
/** \brief Literal programs, indexed by final_id, after they have been
|
||||
* written to the engine_blob. */
|
||||
vector<u32> litPrograms;
|
||||
|
||||
/** \brief List of long literals (ones with CHECK_LONG_LIT instructions)
|
||||
* that need hash table support. */
|
||||
vector<ue2_case_string> longLiterals;
|
||||
@ -4578,6 +4574,10 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
||||
static
|
||||
u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
|
||||
const flat_set<u32> &final_ids) {
|
||||
if (!build.cc.streaming) {
|
||||
return 0; // We only do delayed rebuild in streaming mode.
|
||||
}
|
||||
|
||||
RoseProgram program;
|
||||
|
||||
for (const auto &final_id : final_ids) {
|
||||
@ -4649,9 +4649,9 @@ rose_literal_id getFragment(const rose_literal_id &lit) {
|
||||
return frag;
|
||||
}
|
||||
|
||||
map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
|
||||
map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build) {
|
||||
u32 frag_id = 0;
|
||||
map<u32, u32> final_to_frag;
|
||||
map<u32, LitFragment> final_to_frag;
|
||||
|
||||
map<rose_literal_id, vector<u32>> frag_lits;
|
||||
for (const auto &m : build.final_id_to_literal) {
|
||||
@ -4660,21 +4660,21 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
|
||||
assert(!lit_ids.empty());
|
||||
|
||||
if (lit_ids.size() > 1) {
|
||||
final_to_frag.emplace(final_id, frag_id++);
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id++));
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto lit_id = *lit_ids.begin();
|
||||
const auto &lit = build.literals.right.at(lit_id);
|
||||
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
|
||||
final_to_frag.emplace(final_id, frag_id++);
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id++));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Combining fragments that squash their groups is unsafe.
|
||||
const auto &info = build.literal_info[lit_id];
|
||||
if (info.squash_group) {
|
||||
final_to_frag.emplace(final_id, frag_id++);
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id++));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -4689,7 +4689,7 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
|
||||
as_string_list(m.second).c_str());
|
||||
for (const auto final_id : m.second) {
|
||||
assert(!contains(final_to_frag, final_id));
|
||||
final_to_frag.emplace(final_id, frag_id);
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id));
|
||||
}
|
||||
frag_id++;
|
||||
}
|
||||
@ -4709,11 +4709,11 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
|
||||
static
|
||||
tuple<u32, u32, u32>
|
||||
buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
|
||||
const map<u32, u32> &final_to_frag_map) {
|
||||
map<u32, LitFragment> &final_to_frag_map) {
|
||||
// Build a reverse mapping from fragment -> final_id.
|
||||
map<u32, flat_set<u32>> frag_to_final_map;
|
||||
for (const auto &m : final_to_frag_map) {
|
||||
frag_to_final_map[m.second].insert(m.first);
|
||||
frag_to_final_map[m.second.fragment_id].insert(m.first);
|
||||
}
|
||||
|
||||
const u32 num_fragments = verify_u32(frag_to_final_map.size());
|
||||
@ -4721,7 +4721,7 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
|
||||
|
||||
auto lit_edge_map = findEdgesByLiteral(build);
|
||||
|
||||
bc.litPrograms.resize(num_fragments);
|
||||
vector<u32> litPrograms(num_fragments);
|
||||
vector<u32> delayRebuildPrograms(num_fragments);
|
||||
|
||||
for (u32 frag_id = 0; frag_id != num_fragments; ++frag_id) {
|
||||
@ -4729,14 +4729,20 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
|
||||
DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag_id,
|
||||
as_string_list(final_ids).c_str());
|
||||
|
||||
bc.litPrograms[frag_id] =
|
||||
litPrograms[frag_id] =
|
||||
writeLiteralProgram(build, bc, final_ids, lit_edge_map);
|
||||
delayRebuildPrograms[frag_id] =
|
||||
buildDelayRebuildProgram(build, bc, final_ids);
|
||||
}
|
||||
|
||||
// Update LitFragment entries.
|
||||
for (auto &frag : final_to_frag_map | map_values) {
|
||||
frag.lit_program_offset = litPrograms[frag.fragment_id];
|
||||
frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
|
||||
}
|
||||
|
||||
u32 litProgramsOffset =
|
||||
bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms));
|
||||
bc.engine_blob.add(begin(litPrograms), end(litPrograms));
|
||||
u32 delayRebuildProgramsOffset = bc.engine_blob.add(
|
||||
begin(delayRebuildPrograms), end(delayRebuildPrograms));
|
||||
|
||||
@ -5513,8 +5519,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
// Build anchored matcher.
|
||||
size_t asize = 0;
|
||||
u32 amatcherOffset = 0;
|
||||
auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms,
|
||||
final_to_frag_map, &asize);
|
||||
auto atable =
|
||||
buildAnchoredMatcher(*this, anchored_dfas, final_to_frag_map, &asize);
|
||||
if (atable) {
|
||||
currOffset = ROUNDUP_CL(currOffset);
|
||||
amatcherOffset = currOffset;
|
||||
|
@ -642,7 +642,14 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
|
||||
bool canImplementGraphs(const RoseBuildImpl &tbi);
|
||||
#endif
|
||||
|
||||
std::map<u32, u32> groupByFragment(const RoseBuildImpl &build);
|
||||
struct LitFragment {
|
||||
explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {}
|
||||
u32 fragment_id;
|
||||
u32 lit_program_offset = 0;
|
||||
u32 delay_program_offset = 0;
|
||||
};
|
||||
|
||||
std::map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -637,12 +637,12 @@ u64a literalMinReportOffset(const RoseBuildImpl &build,
|
||||
|
||||
static
|
||||
map<u32, hwlm_group_t> makeFragGroupMap(const RoseBuildImpl &build,
|
||||
const map<u32, u32> &final_to_frag_map) {
|
||||
const map<u32, LitFragment> &final_to_frag_map) {
|
||||
map<u32, hwlm_group_t> frag_to_group;
|
||||
|
||||
for (const auto &m : final_to_frag_map) {
|
||||
u32 final_id = m.first;
|
||||
u32 frag_id = m.second;
|
||||
u32 frag_id = m.second.fragment_id;
|
||||
hwlm_group_t groups = 0;
|
||||
const auto &lits = build.final_id_to_literal.at(final_id);
|
||||
for (auto lit_id : lits) {
|
||||
@ -665,7 +665,7 @@ void trim_to_suffix(Container &c, size_t len) {
|
||||
}
|
||||
|
||||
MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
||||
const map<u32, u32> &final_to_frag_map,
|
||||
const map<u32, LitFragment> &final_to_frag_map,
|
||||
rose_literal_table table, bool delay_rebuild,
|
||||
size_t max_len, u32 max_offset) {
|
||||
MatcherProto mp;
|
||||
@ -758,9 +758,11 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
||||
for (auto &lit : mp.lits) {
|
||||
u32 final_id = lit.id;
|
||||
assert(contains(final_to_frag_map, final_id));
|
||||
lit.id = final_to_frag_map.at(final_id);
|
||||
assert(contains(frag_group_map, lit.id));
|
||||
lit.groups = frag_group_map.at(lit.id);
|
||||
const auto &frag = final_to_frag_map.at(final_id);
|
||||
lit.id = delay_rebuild ? frag.delay_program_offset
|
||||
: frag.lit_program_offset;
|
||||
assert(contains(frag_group_map, frag.fragment_id));
|
||||
lit.groups = frag_group_map.at(frag.fragment_id);
|
||||
}
|
||||
|
||||
sort_and_unique(mp.lits);
|
||||
@ -803,7 +805,7 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp,
|
||||
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
const map<u32, u32> &final_to_frag_map,
|
||||
const map<u32, LitFragment> &final_to_frag_map,
|
||||
rose_group *fgroups, size_t *fsize,
|
||||
size_t *historyRequired) {
|
||||
*fsize = 0;
|
||||
@ -841,7 +843,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
|
||||
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
|
||||
const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
const map<u32, u32> &final_to_frag_map, size_t *drsize) {
|
||||
const map<u32, LitFragment> &final_to_frag_map, size_t *drsize) {
|
||||
*drsize = 0;
|
||||
|
||||
if (!build.cc.streaming) {
|
||||
@ -871,7 +873,8 @@ aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
|
||||
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
const map<u32, u32> &final_to_frag_map, size_t *sbsize) {
|
||||
const map<u32, LitFragment> &final_to_frag_map,
|
||||
size_t *sbsize) {
|
||||
*sbsize = 0;
|
||||
|
||||
if (build.cc.streaming) {
|
||||
@ -931,7 +934,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildEodAnchoredMatcher(const RoseBuildImpl &build,
|
||||
const map<u32, u32> &final_to_frag_map, size_t *esize) {
|
||||
const map<u32, LitFragment> &final_to_frag_map,
|
||||
size_t *esize) {
|
||||
*esize = 0;
|
||||
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,
|
||||
|
@ -67,30 +67,30 @@ struct MatcherProto {
|
||||
* If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
|
||||
* only lead to a pattern match after max_offset may be excluded.
|
||||
*/
|
||||
MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
||||
const std::map<u32, u32> &final_to_frag_map,
|
||||
rose_literal_table table, bool delay_rebuild,
|
||||
size_t max_len, u32 max_offset = ROSE_BOUND_INF);
|
||||
MatcherProto
|
||||
makeMatcherProto(const RoseBuildImpl &build,
|
||||
const std::map<u32, LitFragment> &final_to_frag_map,
|
||||
rose_literal_table table, bool delay_rebuild, size_t max_len,
|
||||
u32 max_offset = ROSE_BOUND_INF);
|
||||
|
||||
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
|
||||
size_t longLitLengthThreshold,
|
||||
const std::map<u32, u32> &final_to_frag_map,
|
||||
rose_group *fgroups,
|
||||
size_t *fsize,
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
const std::map<u32, LitFragment> &final_to_frag_map,
|
||||
rose_group *fgroups, size_t *fsize,
|
||||
size_t *historyRequired);
|
||||
|
||||
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
|
||||
const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
const std::map<u32, u32> &final_to_frag_map, size_t *drsize);
|
||||
const std::map<u32, LitFragment> &final_to_frag_map, size_t *drsize);
|
||||
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
const std::map<u32, u32> &final_to_frag_map,
|
||||
const std::map<u32, LitFragment> &final_to_frag_map,
|
||||
size_t *sbsize);
|
||||
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildEodAnchoredMatcher(const RoseBuildImpl &build,
|
||||
const std::map<u32, u32> &final_to_frag_map,
|
||||
const std::map<u32, LitFragment> &final_to_frag_map,
|
||||
size_t *esize);
|
||||
|
||||
void findMoreLiteralMasks(RoseBuildImpl &build);
|
||||
|
Loading…
x
Reference in New Issue
Block a user