mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: rearrange anchored program generation
This commit is contained in:
parent
6a945e27fb
commit
f5dd20e461
@ -247,7 +247,7 @@ struct build_context : boost::noncopyable {
|
|||||||
/** \brief Mapping from final ID to the set of literals it is used for. */
|
/** \brief Mapping from final ID to the set of literals it is used for. */
|
||||||
map<u32, flat_set<u32>> final_id_to_literal;
|
map<u32, flat_set<u32>> final_id_to_literal;
|
||||||
|
|
||||||
/** \brief Mapping from final ID to anchored program index. */
|
/** \brief Mapping from Rose literal ID to anchored program index. */
|
||||||
map<u32, u32> anchored_programs;
|
map<u32, u32> anchored_programs;
|
||||||
|
|
||||||
/** \brief Mapping from final ID to delayed program index. */
|
/** \brief Mapping from final ID to delayed program index. */
|
||||||
@ -4260,37 +4260,26 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void makeRecordAnchoredInstruction(const RoseBuildImpl &build,
|
void makeRecordAnchoredInstruction(const RoseBuildImpl &build,
|
||||||
build_context &bc, u32 final_id,
|
build_context &bc,
|
||||||
|
const flat_set<u32> &lit_ids,
|
||||||
RoseProgram &program) {
|
RoseProgram &program) {
|
||||||
assert(contains(bc.final_id_to_literal, final_id));
|
assert(!lit_ids.empty());
|
||||||
const auto &lit_ids = bc.final_id_to_literal.at(final_id);
|
u32 first_lit_id = *begin(lit_ids);
|
||||||
|
|
||||||
// Must be anchored.
|
// Must be anchored.
|
||||||
assert(!lit_ids.empty());
|
if (build.literals.right.at(first_lit_id).table != ROSE_ANCHORED) {
|
||||||
if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this anchored literal can never match past
|
for (const auto &lit_id : lit_ids) {
|
||||||
// floatingMinLiteralMatchOffset, we will never have to record it.
|
|
||||||
u32 max_offset = 0;
|
|
||||||
for (u32 lit_id : lit_ids) {
|
|
||||||
assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED);
|
assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED);
|
||||||
max_offset = max(max_offset, findMaxOffset(build, lit_id));
|
if (!contains(bc.anchored_programs, lit_id)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u32 anch_id = bc.anchored_programs.at(lit_id);
|
||||||
|
DEBUG_PRINTF("adding RECORD_ANCHORED for anch_id=%u\n", anch_id);
|
||||||
|
program.add_before_end(make_unique<RoseInstrRecordAnchored>(anch_id));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_offset <= bc.floatingMinLiteralMatchOffset) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto it = bc.anchored_programs.find(final_id);
|
|
||||||
if (it == bc.anchored_programs.end()) {
|
|
||||||
u32 anch_id = verify_u32(bc.anchored_programs.size());
|
|
||||||
it = bc.anchored_programs.emplace(final_id, anch_id).first;
|
|
||||||
DEBUG_PRINTF("added anch_id=%u for final_id %u\n", anch_id, final_id);
|
|
||||||
}
|
|
||||||
u32 anch_id = it->second;
|
|
||||||
program.add_before_end(make_unique<RoseInstrRecordAnchored>(anch_id));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -4529,7 +4518,7 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
|||||||
makeGroupSquashInstruction(build, lit_ids, root_block);
|
makeGroupSquashInstruction(build, lit_ids, root_block);
|
||||||
|
|
||||||
// Literal may be anchored and need to be recorded.
|
// Literal may be anchored and need to be recorded.
|
||||||
makeRecordAnchoredInstruction(build, bc, final_id, root_block);
|
makeRecordAnchoredInstruction(build, bc, lit_ids, root_block);
|
||||||
|
|
||||||
program.add_block(move(root_block));
|
program.add_block(move(root_block));
|
||||||
}
|
}
|
||||||
@ -4776,20 +4765,42 @@ u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 buildAnchoredPrograms(RoseBuildImpl &build, build_context &bc) {
|
u32 writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) {
|
||||||
auto lit_edge_map = findEdgesByLiteral(build);
|
auto lit_edge_map = findEdgesByLiteral(build);
|
||||||
|
|
||||||
vector<u32> programs(bc.anchored_programs.size(), ROSE_INVALID_PROG_OFFSET);
|
vector<u32> programs;
|
||||||
DEBUG_PRINTF("%zu anchored programs\n", programs.size());
|
|
||||||
|
for (const auto &m : build.literals.right) {
|
||||||
|
u32 lit_id = m.first;
|
||||||
|
const auto &lit = m.second;
|
||||||
|
|
||||||
|
if (lit.table != ROSE_ANCHORED) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 final_id = build.literal_info.at(lit_id).final_id;
|
||||||
|
if (final_id == MO_INVALID_IDX) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this anchored literal can never match past
|
||||||
|
// floatingMinLiteralMatchOffset, we will never have to record it.
|
||||||
|
if (findMaxOffset(build, lit_id) <= bc.floatingMinLiteralMatchOffset) {
|
||||||
|
DEBUG_PRINTF("can never match after "
|
||||||
|
"floatingMinLiteralMatchOffset=%u\n",
|
||||||
|
bc.floatingMinLiteralMatchOffset);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
for (const auto &m : bc.anchored_programs) {
|
|
||||||
u32 final_id = m.first;
|
|
||||||
u32 anch_id = m.second;
|
|
||||||
u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map);
|
u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map);
|
||||||
DEBUG_PRINTF("final_id %u -> anch prog at %u\n", final_id, offset);
|
DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id,
|
||||||
programs[anch_id] = offset;
|
final_id, offset);
|
||||||
|
u32 anch_id = verify_u32(programs.size());
|
||||||
|
programs.push_back(offset);
|
||||||
|
bc.anchored_programs.emplace(lit_id, anch_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("%zu anchored programs\n", programs.size());
|
||||||
return bc.engine_blob.add(begin(programs), end(programs));
|
return bc.engine_blob.add(begin(programs), end(programs));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5494,9 +5505,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
queue_count - leftfixBeginQueue, leftInfoTable,
|
queue_count - leftfixBeginQueue, leftInfoTable,
|
||||||
&laggedRoseCount, &historyRequired);
|
&laggedRoseCount, &historyRequired);
|
||||||
|
|
||||||
|
u32 anchoredProgramOffset = writeAnchoredPrograms(*this, bc);
|
||||||
|
|
||||||
buildLiteralPrograms(*this, bc);
|
buildLiteralPrograms(*this, bc);
|
||||||
u32 delayProgramOffset = buildDelayPrograms(*this, bc);
|
u32 delayProgramOffset = buildDelayPrograms(*this, bc);
|
||||||
u32 anchoredProgramOffset = buildAnchoredPrograms(*this, bc);
|
|
||||||
|
|
||||||
u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
|
u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user