mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: use final_ids less in program construction
This commit is contained in:
parent
a83b7cb348
commit
7bdb327203
@ -2501,27 +2501,6 @@ void fillInSomRevNfas(RoseEngine *engine, const SomSlotManager &ssm,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
vector<const rose_literal_info *>
|
|
||||||
getLiteralInfoByFinalId(const RoseBuildImpl &build, const build_context &bc,
|
|
||||||
u32 final_id) {
|
|
||||||
vector<const rose_literal_info *> out;
|
|
||||||
|
|
||||||
const auto &final_id_to_literal = bc.final_id_to_literal;
|
|
||||||
assert(contains(final_id_to_literal, final_id));
|
|
||||||
|
|
||||||
const auto &lits = final_id_to_literal.find(final_id)->second;
|
|
||||||
assert(!lits.empty());
|
|
||||||
|
|
||||||
for (const auto &lit_id : lits) {
|
|
||||||
const rose_literal_info &li = build.literal_info[lit_id];
|
|
||||||
assert(li.final_id == final_id);
|
|
||||||
out.push_back(&li);
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
void applyFinalSpecialisation(RoseProgram &program) {
|
void applyFinalSpecialisation(RoseProgram &program) {
|
||||||
assert(!program.empty());
|
assert(!program.empty());
|
||||||
@ -4159,10 +4138,10 @@ void addPredBlocks(build_context &bc, map<u32, RoseProgram> &pred_blocks,
|
|||||||
|
|
||||||
static
|
static
|
||||||
void makePushDelayedInstructions(const RoseBuildImpl &build,
|
void makePushDelayedInstructions(const RoseBuildImpl &build,
|
||||||
const build_context &bc, u32 final_id,
|
const flat_set<u32> &lit_ids,
|
||||||
RoseProgram &program) {
|
RoseProgram &program) {
|
||||||
const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id);
|
assert(!lit_ids.empty());
|
||||||
const auto &arb_lit_info = **lit_infos.begin();
|
const auto &arb_lit_info = build.literal_info.at(*lit_ids.begin());
|
||||||
if (arb_lit_info.delayed_ids.empty()) {
|
if (arb_lit_info.delayed_ids.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -4172,8 +4151,7 @@ void makePushDelayedInstructions(const RoseBuildImpl &build,
|
|||||||
u32 child_id = build.literal_info[int_id].final_id;
|
u32 child_id = build.literal_info[int_id].final_id;
|
||||||
u32 delay_index = child_id - build.delay_base_id;
|
u32 delay_index = child_id - build.delay_base_id;
|
||||||
|
|
||||||
DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id,
|
DEBUG_PRINTF("delay=%u child_id=%u\n", child_literal.delay, child_id);
|
||||||
child_literal.delay, child_id);
|
|
||||||
|
|
||||||
auto ri = make_unique<RoseInstrPushDelayed>(
|
auto ri = make_unique<RoseInstrPushDelayed>(
|
||||||
verify_u8(child_literal.delay), delay_index);
|
verify_u8(child_literal.delay), delay_index);
|
||||||
@ -4182,23 +4160,21 @@ void makePushDelayedInstructions(const RoseBuildImpl &build,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build,
|
rose_group getLitGroupsUnion(const RoseBuildImpl &build,
|
||||||
const build_context &bc, u32 final_id) {
|
const flat_set<u32> &lit_ids) {
|
||||||
assert(contains(bc.final_id_to_literal, final_id));
|
|
||||||
const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id);
|
|
||||||
|
|
||||||
rose_group groups = 0;
|
rose_group groups = 0;
|
||||||
for (const auto &li : lit_infos) {
|
for (auto lit_id : lit_ids) {
|
||||||
groups |= li->group_mask;
|
const auto &info = build.literal_info.at(lit_id);
|
||||||
|
groups |= info.group_mask;
|
||||||
}
|
}
|
||||||
return groups;
|
return groups;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void makeGroupCheckInstruction(const RoseBuildImpl &build,
|
void makeGroupCheckInstruction(const RoseBuildImpl &build,
|
||||||
const build_context &bc, u32 final_id,
|
const flat_set<u32> &lit_ids,
|
||||||
RoseProgram &program) {
|
RoseProgram &program) {
|
||||||
rose_group groups = getFinalIdGroupsUnion(build, bc, final_id);
|
rose_group groups = getLitGroupsUnion(build, lit_ids);
|
||||||
if (!groups) {
|
if (!groups) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -4207,21 +4183,20 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build,
|
|||||||
|
|
||||||
static
|
static
|
||||||
void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc,
|
void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc,
|
||||||
u32 final_id, RoseProgram &program) {
|
const flat_set<u32> &lit_ids,
|
||||||
const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id);
|
RoseProgram &program) {
|
||||||
assert(!lit_infos.empty());
|
const auto &lit_info = build.literal_info.at(*lit_ids.begin());
|
||||||
|
if (!lit_info.requires_benefits) {
|
||||||
if (!lit_infos.front()->requires_benefits) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<LookEntry> look;
|
vector<LookEntry> look;
|
||||||
|
|
||||||
assert(bc.final_id_to_literal.at(final_id).size() == 1);
|
assert(lit_ids.size() == 1);
|
||||||
u32 lit_id = *bc.final_id_to_literal.at(final_id).begin();
|
u32 lit_id = *lit_ids.begin();
|
||||||
const ue2_literal &s = build.literals.right.at(lit_id).s;
|
const ue2_literal &s = build.literals.right.at(lit_id).s;
|
||||||
DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id,
|
DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id,
|
||||||
final_id, dumpString(s).c_str());
|
dumpString(s).c_str());
|
||||||
assert(s.length() <= MAX_MASK2_WIDTH);
|
assert(s.length() <= MAX_MASK2_WIDTH);
|
||||||
s32 i = 0 - s.length();
|
s32 i = 0 - s.length();
|
||||||
for (const auto &e : s) {
|
for (const auto &e : s) {
|
||||||
@ -4237,21 +4212,21 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc,
|
|||||||
|
|
||||||
static
|
static
|
||||||
void makeGroupSquashInstruction(const RoseBuildImpl &build,
|
void makeGroupSquashInstruction(const RoseBuildImpl &build,
|
||||||
const build_context &bc, u32 final_id,
|
const flat_set<u32> &lit_ids,
|
||||||
RoseProgram &program) {
|
RoseProgram &program) {
|
||||||
assert(contains(bc.final_id_to_literal, final_id));
|
assert(!lit_ids.empty());
|
||||||
const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id);
|
const u32 lit_id = *lit_ids.begin();
|
||||||
|
const auto &info = build.literal_info[lit_id];
|
||||||
if (!lit_infos.front()->squash_group) {
|
if (!info.squash_group) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rose_group groups = getFinalIdGroupsUnion(build, bc, final_id);
|
rose_group groups = getLitGroupsUnion(build, lit_ids);
|
||||||
if (!groups) {
|
if (!groups) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups);
|
DEBUG_PRINTF("squashes 0x%llx\n", groups);
|
||||||
program.add_before_end(
|
program.add_before_end(
|
||||||
make_unique<RoseInstrSquashGroups>(~groups)); // Note negated.
|
make_unique<RoseInstrSquashGroups>(~groups)); // Note negated.
|
||||||
}
|
}
|
||||||
@ -4312,7 +4287,7 @@ u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
|
void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
|
||||||
u32 final_id,
|
const flat_set<u32> &lit_ids,
|
||||||
const vector<RoseEdge> &lit_edges,
|
const vector<RoseEdge> &lit_edges,
|
||||||
RoseProgram &program) {
|
RoseProgram &program) {
|
||||||
if (lit_edges.empty()) {
|
if (lit_edges.empty()) {
|
||||||
@ -4328,7 +4303,6 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto &lit_ids = bc.final_id_to_literal.at(final_id);
|
|
||||||
if (lit_ids.empty()) {
|
if (lit_ids.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -4345,8 +4319,8 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
|
|||||||
min_offset = min(min_offset, lit_min_offset);
|
min_offset = min(min_offset, lit_min_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("final_id=%u has min_len=%zu, min_offset=%u, "
|
DEBUG_PRINTF("has min_len=%zu, min_offset=%u, "
|
||||||
"global min is %u\n", final_id, min_len, min_offset,
|
"global min is %u\n", min_len, min_offset,
|
||||||
bc.floatingMinLiteralMatchOffset);
|
bc.floatingMinLiteralMatchOffset);
|
||||||
|
|
||||||
// If we can't match before the min offset, we don't need the check.
|
// If we can't match before the min offset, we don't need the check.
|
||||||
@ -4367,14 +4341,14 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
|
|||||||
|
|
||||||
static
|
static
|
||||||
void makeCheckLiteralInstruction(const RoseBuildImpl &build,
|
void makeCheckLiteralInstruction(const RoseBuildImpl &build,
|
||||||
const build_context &bc, u32 final_id,
|
const build_context &bc,
|
||||||
|
const flat_set<u32> &lits,
|
||||||
RoseProgram &program) {
|
RoseProgram &program) {
|
||||||
assert(bc.longLitLengthThreshold > 0);
|
assert(bc.longLitLengthThreshold > 0);
|
||||||
|
|
||||||
DEBUG_PRINTF("final_id %u, long lit threshold %zu\n", final_id,
|
DEBUG_PRINTF("lits [%s], long lit threshold %zu\n",
|
||||||
bc.longLitLengthThreshold);
|
as_string_list(lits).c_str(), bc.longLitLengthThreshold);
|
||||||
|
|
||||||
const auto &lits = bc.final_id_to_literal.at(final_id);
|
|
||||||
if (lits.size() != 1) {
|
if (lits.size() != 1) {
|
||||||
// final_id sharing is only allowed for literals that are short enough
|
// final_id sharing is only allowed for literals that are short enough
|
||||||
// to not require any additional confirm work.
|
// to not require any additional confirm work.
|
||||||
@ -4461,24 +4435,26 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc,
|
|||||||
|
|
||||||
DEBUG_PRINTF("final_id %u\n", final_id);
|
DEBUG_PRINTF("final_id %u\n", final_id);
|
||||||
|
|
||||||
|
const auto &lit_ids = bc.final_id_to_literal.at(final_id);
|
||||||
|
|
||||||
// Check long literal info.
|
// Check long literal info.
|
||||||
makeCheckLiteralInstruction(build, bc, final_id, program);
|
makeCheckLiteralInstruction(build, bc, lit_ids, program);
|
||||||
|
|
||||||
// Check lit mask.
|
// Check lit mask.
|
||||||
makeCheckLitMaskInstruction(build, bc, final_id, program);
|
makeCheckLitMaskInstruction(build, bc, lit_ids, program);
|
||||||
|
|
||||||
// Check literal groups. This is an optimisation that we only perform for
|
// Check literal groups. This is an optimisation that we only perform for
|
||||||
// delayed literals, as their groups may be switched off; ordinarily, we
|
// delayed literals, as their groups may be switched off; ordinarily, we
|
||||||
// can trust the HWLM matcher.
|
// can trust the HWLM matcher.
|
||||||
if (hasDelayedLiteral(build, lit_edges)) {
|
if (hasDelayedLiteral(build, lit_edges)) {
|
||||||
makeGroupCheckInstruction(build, bc, final_id, program);
|
makeGroupCheckInstruction(build, lit_ids, program);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add instructions for pushing delayed matches, if there are any.
|
// Add instructions for pushing delayed matches, if there are any.
|
||||||
makePushDelayedInstructions(build, bc, final_id, program);
|
makePushDelayedInstructions(build, lit_ids, program);
|
||||||
|
|
||||||
// Add pre-check for early literals in the floating table.
|
// Add pre-check for early literals in the floating table.
|
||||||
makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, program);
|
makeCheckLitEarlyInstruction(build, bc, lit_ids, lit_edges, program);
|
||||||
|
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
@ -4525,10 +4501,11 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (final_id != MO_INVALID_IDX) {
|
if (final_id != MO_INVALID_IDX) {
|
||||||
|
const auto &lit_ids = bc.final_id_to_literal.at(final_id);
|
||||||
RoseProgram root_block;
|
RoseProgram root_block;
|
||||||
|
|
||||||
// Literal may squash groups.
|
// Literal may squash groups.
|
||||||
makeGroupSquashInstruction(build, bc, final_id, root_block);
|
makeGroupSquashInstruction(build, lit_ids, root_block);
|
||||||
|
|
||||||
// Literal may be anchored and need to be recorded.
|
// Literal may be anchored and need to be recorded.
|
||||||
makeRecordAnchoredInstruction(build, bc, final_id, root_block);
|
makeRecordAnchoredInstruction(build, bc, final_id, root_block);
|
||||||
@ -4588,16 +4565,18 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
|
|||||||
RoseProgram program;
|
RoseProgram program;
|
||||||
|
|
||||||
for (const auto &final_id : final_ids) {
|
for (const auto &final_id : final_ids) {
|
||||||
const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id);
|
const auto &lit_ids = bc.final_id_to_literal.at(final_id);
|
||||||
const auto &arb_lit_info = **lit_infos.begin();
|
assert(!lit_ids.empty());
|
||||||
|
|
||||||
|
const auto &arb_lit_info = build.literal_info.at(*lit_ids.begin());
|
||||||
if (arb_lit_info.delayed_ids.empty()) {
|
if (arb_lit_info.delayed_ids.empty()) {
|
||||||
continue; // No delayed IDs, no work to do.
|
continue; // No delayed IDs, no work to do.
|
||||||
}
|
}
|
||||||
|
|
||||||
RoseProgram prog;
|
RoseProgram prog;
|
||||||
makeCheckLiteralInstruction(build, bc, final_id, prog);
|
makeCheckLiteralInstruction(build, bc, lit_ids, prog);
|
||||||
makeCheckLitMaskInstruction(build, bc, final_id, prog);
|
makeCheckLitMaskInstruction(build, bc, lit_ids, prog);
|
||||||
makePushDelayedInstructions(build, bc, final_id, prog);
|
makePushDelayedInstructions(build, lit_ids, prog);
|
||||||
program.add_block(move(prog));
|
program.add_block(move(prog));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user