rose: use program offsets directly in lit tables

This commit is contained in:
Justin Viiret 2017-01-30 09:14:03 +11:00 committed by Matthew Barr
parent ac858cd47c
commit 76f72b6ab4
7 changed files with 76 additions and 69 deletions

View File

@ -85,19 +85,13 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
const u32 *delayRebuildPrograms =
getByOffset(t, t->litDelayRebuildProgramOffset);
assert(id < t->literalCount);
const u32 program = delayRebuildPrograms[id];
if (program) {
assert(id < t->size); // id is a program offset
const u64a som = 0;
const size_t match_len = end - start + 1;
const u8 flags = 0;
UNUSED hwlmcb_rv_t rv = roseRunProgram(t, scratch, program, som,
real_end, match_len, flags);
UNUSED hwlmcb_rv_t rv =
roseRunProgram(t, scratch, id, som, real_end, match_len, flags);
assert(rv != HWLM_TERMINATE_MATCHING);
}
/* we are just repopulating the delay queue, groups should be
* already set from the original scan. */
@ -245,12 +239,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end,
size_t match_len, u32 id) {
DEBUG_PRINTF("id=%u\n", id);
const u32 *programs = getByOffset(t, t->litProgramOffset);
assert(id < t->literalCount);
assert(id < t->size); // id is an offset into bytecode
const u64a som = 0;
const u8 flags = 0;
return roseRunProgram_i(t, scratch, programs[id], som, end, match_len,
flags);
return roseRunProgram_i(t, scratch, id, som, end, match_len, flags);
}
static rose_inline

View File

@ -208,8 +208,8 @@ void remapAnchoredReports(RoseBuildImpl &build) {
* raw_dfa with program offsets.
*/
static
void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms,
const map<u32, u32> &final_to_frag_map) {
void remapIdsToPrograms(raw_dfa &rdfa,
const map<u32, LitFragment> &final_to_frag_map) {
for (dstate &ds : rdfa.states) {
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
if (ds.reports.empty()) {
@ -219,9 +219,8 @@ void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms,
flat_set<ReportID> new_reports;
for (auto final_id : ds.reports) {
assert(contains(final_to_frag_map, final_id));
auto frag_id = final_to_frag_map.at(final_id);
assert(frag_id < litPrograms.size());
new_reports.insert(litPrograms.at(frag_id));
auto &frag = final_to_frag_map.at(final_id);
new_reports.insert(frag.lit_program_offset);
}
ds.reports = move(new_reports);
}
@ -849,8 +848,8 @@ vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build) {
aligned_unique_ptr<anchored_matcher_info>
buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
const vector<u32> &litPrograms,
const map<u32, u32> &final_to_frag_map, size_t *asize) {
const map<u32, LitFragment> &final_to_frag_map,
size_t *asize) {
const CompileContext &cc = build.cc;
if (dfas.empty()) {
@ -860,7 +859,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
}
for (auto &rdfa : dfas) {
remapIdsToPrograms(rdfa, litPrograms, final_to_frag_map);
remapIdsToPrograms(rdfa, final_to_frag_map);
}
vector<aligned_unique_ptr<NFA>> nfas;

View File

@ -30,7 +30,7 @@
#define ROSE_BUILD_ANCHORED
#include "ue2common.h"
#include "rose_build.h"
#include "rose_build_impl.h"
#include "nfagraph/ng_holder.h"
#include "util/alloc.h"
@ -59,8 +59,7 @@ std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build);
*/
aligned_unique_ptr<anchored_matcher_info>
buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas,
const std::vector<u32> &litPrograms,
const std::map<u32, u32> &final_to_frag_map,
const std::map<u32, LitFragment> &final_to_frag_map,
size_t *asize);
u32 anchoredStateSize(const anchored_matcher_info &atable);

View File

@ -213,10 +213,6 @@ struct build_context : boost::noncopyable {
* that have already been pushed into the engine_blob. */
ue2::unordered_map<u32, u32> engineOffsets;
/** \brief Literal programs, indexed by final_id, after they have been
* written to the engine_blob. */
vector<u32> litPrograms;
/** \brief List of long literals (ones with CHECK_LONG_LIT instructions)
* that need hash table support. */
vector<ue2_case_string> longLiterals;
@ -4578,6 +4574,10 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc,
static
u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
const flat_set<u32> &final_ids) {
if (!build.cc.streaming) {
return 0; // We only do delayed rebuild in streaming mode.
}
RoseProgram program;
for (const auto &final_id : final_ids) {
@ -4649,9 +4649,9 @@ rose_literal_id getFragment(const rose_literal_id &lit) {
return frag;
}
map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build) {
u32 frag_id = 0;
map<u32, u32> final_to_frag;
map<u32, LitFragment> final_to_frag;
map<rose_literal_id, vector<u32>> frag_lits;
for (const auto &m : build.final_id_to_literal) {
@ -4660,21 +4660,21 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
assert(!lit_ids.empty());
if (lit_ids.size() > 1) {
final_to_frag.emplace(final_id, frag_id++);
final_to_frag.emplace(final_id, LitFragment(frag_id++));
continue;
}
const auto lit_id = *lit_ids.begin();
const auto &lit = build.literals.right.at(lit_id);
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
final_to_frag.emplace(final_id, frag_id++);
final_to_frag.emplace(final_id, LitFragment(frag_id++));
continue;
}
// Combining fragments that squash their groups is unsafe.
const auto &info = build.literal_info[lit_id];
if (info.squash_group) {
final_to_frag.emplace(final_id, frag_id++);
final_to_frag.emplace(final_id, LitFragment(frag_id++));
continue;
}
@ -4689,7 +4689,7 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
as_string_list(m.second).c_str());
for (const auto final_id : m.second) {
assert(!contains(final_to_frag, final_id));
final_to_frag.emplace(final_id, frag_id);
final_to_frag.emplace(final_id, LitFragment(frag_id));
}
frag_id++;
}
@ -4709,11 +4709,11 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
static
tuple<u32, u32, u32>
buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
const map<u32, u32> &final_to_frag_map) {
map<u32, LitFragment> &final_to_frag_map) {
// Build a reverse mapping from fragment -> final_id.
map<u32, flat_set<u32>> frag_to_final_map;
for (const auto &m : final_to_frag_map) {
frag_to_final_map[m.second].insert(m.first);
frag_to_final_map[m.second.fragment_id].insert(m.first);
}
const u32 num_fragments = verify_u32(frag_to_final_map.size());
@ -4721,7 +4721,7 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
auto lit_edge_map = findEdgesByLiteral(build);
bc.litPrograms.resize(num_fragments);
vector<u32> litPrograms(num_fragments);
vector<u32> delayRebuildPrograms(num_fragments);
for (u32 frag_id = 0; frag_id != num_fragments; ++frag_id) {
@ -4729,14 +4729,20 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag_id,
as_string_list(final_ids).c_str());
bc.litPrograms[frag_id] =
litPrograms[frag_id] =
writeLiteralProgram(build, bc, final_ids, lit_edge_map);
delayRebuildPrograms[frag_id] =
buildDelayRebuildProgram(build, bc, final_ids);
}
// Update LitFragment entries.
for (auto &frag : final_to_frag_map | map_values) {
frag.lit_program_offset = litPrograms[frag.fragment_id];
frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
}
u32 litProgramsOffset =
bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms));
bc.engine_blob.add(begin(litPrograms), end(litPrograms));
u32 delayRebuildProgramsOffset = bc.engine_blob.add(
begin(delayRebuildPrograms), end(delayRebuildPrograms));
@ -5513,8 +5519,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// Build anchored matcher.
size_t asize = 0;
u32 amatcherOffset = 0;
auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms,
final_to_frag_map, &asize);
auto atable =
buildAnchoredMatcher(*this, anchored_dfas, final_to_frag_map, &asize);
if (atable) {
currOffset = ROUNDUP_CL(currOffset);
amatcherOffset = currOffset;

View File

@ -642,7 +642,14 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
bool canImplementGraphs(const RoseBuildImpl &tbi);
#endif
std::map<u32, u32> groupByFragment(const RoseBuildImpl &build);
struct LitFragment {
explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {}
u32 fragment_id;
u32 lit_program_offset = 0;
u32 delay_program_offset = 0;
};
std::map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build);
} // namespace ue2

View File

@ -637,12 +637,12 @@ u64a literalMinReportOffset(const RoseBuildImpl &build,
static
map<u32, hwlm_group_t> makeFragGroupMap(const RoseBuildImpl &build,
const map<u32, u32> &final_to_frag_map) {
const map<u32, LitFragment> &final_to_frag_map) {
map<u32, hwlm_group_t> frag_to_group;
for (const auto &m : final_to_frag_map) {
u32 final_id = m.first;
u32 frag_id = m.second;
u32 frag_id = m.second.fragment_id;
hwlm_group_t groups = 0;
const auto &lits = build.final_id_to_literal.at(final_id);
for (auto lit_id : lits) {
@ -665,7 +665,7 @@ void trim_to_suffix(Container &c, size_t len) {
}
MatcherProto makeMatcherProto(const RoseBuildImpl &build,
const map<u32, u32> &final_to_frag_map,
const map<u32, LitFragment> &final_to_frag_map,
rose_literal_table table, bool delay_rebuild,
size_t max_len, u32 max_offset) {
MatcherProto mp;
@ -758,9 +758,11 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
for (auto &lit : mp.lits) {
u32 final_id = lit.id;
assert(contains(final_to_frag_map, final_id));
lit.id = final_to_frag_map.at(final_id);
assert(contains(frag_group_map, lit.id));
lit.groups = frag_group_map.at(lit.id);
const auto &frag = final_to_frag_map.at(final_id);
lit.id = delay_rebuild ? frag.delay_program_offset
: frag.lit_program_offset;
assert(contains(frag_group_map, frag.fragment_id));
lit.groups = frag_group_map.at(frag.fragment_id);
}
sort_and_unique(mp.lits);
@ -803,7 +805,7 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp,
aligned_unique_ptr<HWLM>
buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
const map<u32, u32> &final_to_frag_map,
const map<u32, LitFragment> &final_to_frag_map,
rose_group *fgroups, size_t *fsize,
size_t *historyRequired) {
*fsize = 0;
@ -841,7 +843,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
const RoseBuildImpl &build, size_t longLitLengthThreshold,
const map<u32, u32> &final_to_frag_map, size_t *drsize) {
const map<u32, LitFragment> &final_to_frag_map, size_t *drsize) {
*drsize = 0;
if (!build.cc.streaming) {
@ -871,7 +873,8 @@ aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
aligned_unique_ptr<HWLM>
buildSmallBlockMatcher(const RoseBuildImpl &build,
const map<u32, u32> &final_to_frag_map, size_t *sbsize) {
const map<u32, LitFragment> &final_to_frag_map,
size_t *sbsize) {
*sbsize = 0;
if (build.cc.streaming) {
@ -931,7 +934,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
aligned_unique_ptr<HWLM>
buildEodAnchoredMatcher(const RoseBuildImpl &build,
const map<u32, u32> &final_to_frag_map, size_t *esize) {
const map<u32, LitFragment> &final_to_frag_map,
size_t *esize) {
*esize = 0;
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,

View File

@ -67,30 +67,30 @@ struct MatcherProto {
* If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
* only lead to a pattern match after max_offset may be excluded.
*/
MatcherProto makeMatcherProto(const RoseBuildImpl &build,
const std::map<u32, u32> &final_to_frag_map,
rose_literal_table table, bool delay_rebuild,
size_t max_len, u32 max_offset = ROSE_BOUND_INF);
MatcherProto
makeMatcherProto(const RoseBuildImpl &build,
const std::map<u32, LitFragment> &final_to_frag_map,
rose_literal_table table, bool delay_rebuild, size_t max_len,
u32 max_offset = ROSE_BOUND_INF);
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
size_t longLitLengthThreshold,
const std::map<u32, u32> &final_to_frag_map,
rose_group *fgroups,
size_t *fsize,
aligned_unique_ptr<HWLM>
buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
const std::map<u32, LitFragment> &final_to_frag_map,
rose_group *fgroups, size_t *fsize,
size_t *historyRequired);
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
const RoseBuildImpl &build, size_t longLitLengthThreshold,
const std::map<u32, u32> &final_to_frag_map, size_t *drsize);
const std::map<u32, LitFragment> &final_to_frag_map, size_t *drsize);
aligned_unique_ptr<HWLM>
buildSmallBlockMatcher(const RoseBuildImpl &build,
const std::map<u32, u32> &final_to_frag_map,
const std::map<u32, LitFragment> &final_to_frag_map,
size_t *sbsize);
aligned_unique_ptr<HWLM>
buildEodAnchoredMatcher(const RoseBuildImpl &build,
const std::map<u32, u32> &final_to_frag_map,
const std::map<u32, LitFragment> &final_to_frag_map,
size_t *esize);
void findMoreLiteralMasks(RoseBuildImpl &build);