mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: new program construction code
This commit is contained in:
parent
176c61aeaa
commit
b68694b729
@ -4531,51 +4531,60 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
|||||||
return lit_program;
|
return lit_program;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Consumes list of program blocks, checks them for duplicates and then
|
||||||
|
* concatenates them into one program.
|
||||||
|
*/
|
||||||
static
|
static
|
||||||
RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks) {
|
||||||
const flat_set<u32> &final_ids,
|
|
||||||
const map<u32, vector<RoseEdge>> &lit_edge_map,
|
|
||||||
bool is_anchored_program) {
|
|
||||||
assert(!final_ids.empty());
|
|
||||||
|
|
||||||
DEBUG_PRINTF("entry, %zu final ids: {%s}\n", final_ids.size(),
|
|
||||||
as_string_list(final_ids).c_str());
|
|
||||||
|
|
||||||
const auto &g = build.g;
|
|
||||||
vector<RoseEdge> lit_edges;
|
|
||||||
|
|
||||||
RoseProgram program;
|
RoseProgram program;
|
||||||
for (const auto &final_id : final_ids) {
|
|
||||||
assert(contains(bc.final_id_to_literal, final_id));
|
|
||||||
const auto &lit_ids = bc.final_id_to_literal.at(final_id);
|
|
||||||
|
|
||||||
lit_edges.clear();
|
DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size());
|
||||||
for (const auto &lit_id : lit_ids) {
|
|
||||||
if (contains(lit_edge_map, lit_id)) {
|
|
||||||
insert(&lit_edges, lit_edges.end(), lit_edge_map.at(lit_id));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sort_and_unique(lit_edges, [&g](const RoseEdge &a, const RoseEdge &b) {
|
|
||||||
return tie(g[source(a, g)].index, g[target(a, g)].index) <
|
|
||||||
tie(g[source(b, g)].index, g[target(b, g)].index);
|
|
||||||
});
|
|
||||||
|
|
||||||
auto prog = buildLiteralProgram(build, bc, lit_ids, lit_edges,
|
sort(blocks.begin(), blocks.end(),
|
||||||
is_anchored_program);
|
[](const RoseProgram &a, const RoseProgram &b) {
|
||||||
DEBUG_PRINTF("final_id=%u, prog has %zu entries\n", final_id,
|
RoseProgramHash hasher;
|
||||||
prog.size());
|
return hasher(a) < hasher(b);
|
||||||
|
});
|
||||||
|
|
||||||
|
blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()),
|
||||||
|
blocks.end());
|
||||||
|
|
||||||
|
DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size());
|
||||||
|
|
||||||
|
for (auto &prog : blocks) {
|
||||||
program.add_block(move(prog));
|
program.add_block(move(prog));
|
||||||
}
|
}
|
||||||
|
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
||||||
const flat_set<u32> &final_ids,
|
const flat_set<u32> &lit_ids,
|
||||||
const map<u32, vector<RoseEdge>> &lit_edge_map,
|
const map<u32, vector<RoseEdge>> &lit_edge_map,
|
||||||
bool is_anchored_program) {
|
bool is_anchored_program) {
|
||||||
auto program = buildLiteralProgram(build, bc, final_ids, lit_edge_map,
|
assert(!lit_ids.empty());
|
||||||
is_anchored_program);
|
|
||||||
|
vector<RoseProgram> blocks;
|
||||||
|
|
||||||
|
const vector<RoseEdge> no_edges;
|
||||||
|
|
||||||
|
for (const auto &lit_id : lit_ids) {
|
||||||
|
DEBUG_PRINTF("lit_id=%u\n", lit_id);
|
||||||
|
const vector<RoseEdge> *edges_ptr;
|
||||||
|
if (contains(lit_edge_map, lit_id)) {
|
||||||
|
edges_ptr = &lit_edge_map.at(lit_id);
|
||||||
|
} else {
|
||||||
|
edges_ptr = &no_edges;
|
||||||
|
}
|
||||||
|
auto prog = buildLiteralProgram(build, bc, {lit_id}, *edges_ptr,
|
||||||
|
is_anchored_program);
|
||||||
|
blocks.push_back(move(prog));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto program = assembleProgramBlocks(move(blocks));
|
||||||
|
|
||||||
if (program.empty()) {
|
if (program.empty()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -4584,30 +4593,32 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
|
u32 writeDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
|
||||||
const flat_set<u32> &final_ids) {
|
const flat_set<u32> &lit_ids) {
|
||||||
|
assert(!lit_ids.empty());
|
||||||
|
|
||||||
if (!build.cc.streaming) {
|
if (!build.cc.streaming) {
|
||||||
return 0; // We only do delayed rebuild in streaming mode.
|
return 0; // We only do delayed rebuild in streaming mode.
|
||||||
}
|
}
|
||||||
|
|
||||||
RoseProgram program;
|
vector<RoseProgram> blocks;
|
||||||
|
|
||||||
for (const auto &final_id : final_ids) {
|
for (const auto &lit_id : lit_ids) {
|
||||||
const auto &lit_ids = bc.final_id_to_literal.at(final_id);
|
DEBUG_PRINTF("lit_id=%u\n", lit_id);
|
||||||
assert(!lit_ids.empty());
|
const auto &info = build.literal_info.at(lit_id);
|
||||||
|
if (info.delayed_ids.empty()) {
|
||||||
const auto &arb_lit_info = build.literal_info.at(*lit_ids.begin());
|
|
||||||
if (arb_lit_info.delayed_ids.empty()) {
|
|
||||||
continue; // No delayed IDs, no work to do.
|
continue; // No delayed IDs, no work to do.
|
||||||
}
|
}
|
||||||
|
|
||||||
RoseProgram prog;
|
RoseProgram prog;
|
||||||
makeCheckLiteralInstruction(build, bc, lit_ids, prog);
|
makeCheckLiteralInstruction(build, bc, {lit_id}, prog);
|
||||||
makeCheckLitMaskInstruction(build, bc, lit_ids, prog);
|
makeCheckLitMaskInstruction(build, bc, {lit_id}, prog);
|
||||||
makePushDelayedInstructions(build, bc, lit_ids, prog);
|
makePushDelayedInstructions(build, bc, {lit_id}, prog);
|
||||||
program.add_block(move(prog));
|
blocks.push_back(move(prog));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto program = assembleProgramBlocks(move(blocks));
|
||||||
|
|
||||||
if (program.empty()) {
|
if (program.empty()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -4751,26 +4762,29 @@ void groupByFragment(RoseBuildImpl &build, const build_context &bc) {
|
|||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
||||||
// Build a reverse mapping from fragment -> final_id.
|
// Build a reverse mapping from fragment -> {lit_id, lit_id,...}
|
||||||
map<u32, flat_set<u32>> frag_to_final_map;
|
map<u32, flat_set<u32>> frag_to_lit_map;
|
||||||
for (const auto &info : build.literal_info) {
|
for (u32 lit_id = 0; lit_id < verify_u32(build.literal_info.size());
|
||||||
|
lit_id++) {
|
||||||
|
const auto &info = build.literal_info[lit_id];
|
||||||
if (info.fragment_id == MO_INVALID_IDX) {
|
if (info.fragment_id == MO_INVALID_IDX) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
frag_to_final_map[info.fragment_id].insert(info.final_id);
|
frag_to_lit_map[info.fragment_id].insert(lit_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("%zu fragments\n", build.fragments.size());
|
DEBUG_PRINTF("%zu fragments\n", build.fragments.size());
|
||||||
auto lit_edge_map = findEdgesByLiteral(build);
|
auto lit_edge_map = findEdgesByLiteral(build);
|
||||||
|
|
||||||
for (auto &frag : build.fragments) {
|
for (auto &frag : build.fragments) {
|
||||||
const auto &final_ids = frag_to_final_map[frag.fragment_id];
|
const auto &lit_ids = frag_to_lit_map[frag.fragment_id];
|
||||||
DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag.fragment_id,
|
DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id,
|
||||||
as_string_list(final_ids).c_str());
|
as_string_list(lit_ids).c_str());
|
||||||
|
|
||||||
frag.lit_program_offset =
|
frag.lit_program_offset =
|
||||||
writeLiteralProgram(build, bc, final_ids, lit_edge_map, false);
|
writeLiteralProgram(build, bc, lit_ids, lit_edge_map, false);
|
||||||
frag.delay_program_offset =
|
frag.delay_program_offset =
|
||||||
buildDelayRebuildProgram(build, bc, final_ids);
|
writeDelayRebuildProgram(build, bc, lit_ids);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4789,14 +4803,15 @@ pair<u32, u32> writeDelayPrograms(RoseBuildImpl &build, build_context &bc) {
|
|||||||
|
|
||||||
for (const auto &lit_id : build.literals.right | map_keys) {
|
for (const auto &lit_id : build.literals.right | map_keys) {
|
||||||
const auto &info = build.literal_info.at(lit_id);
|
const auto &info = build.literal_info.at(lit_id);
|
||||||
|
|
||||||
|
if (info.fragment_id == MO_INVALID_IDX) {
|
||||||
|
continue; // Unused literal.
|
||||||
|
}
|
||||||
|
|
||||||
for (const auto &delayed_lit_id : info.delayed_ids) {
|
for (const auto &delayed_lit_id : info.delayed_ids) {
|
||||||
DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id);
|
DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id);
|
||||||
u32 final_id = build.literal_info.at(delayed_lit_id).final_id;
|
u32 offset = writeLiteralProgram(build, bc, {delayed_lit_id},
|
||||||
if (final_id == MO_INVALID_IDX) {
|
lit_edge_map, false);
|
||||||
continue;
|
|
||||||
}
|
|
||||||
u32 offset =
|
|
||||||
writeLiteralProgram(build, bc, {final_id}, lit_edge_map, false);
|
|
||||||
|
|
||||||
u32 delay_id;
|
u32 delay_id;
|
||||||
auto it = cache.find(offset);
|
auto it = cache.find(offset);
|
||||||
@ -4841,9 +4856,8 @@ pair<u32, u32> writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 final_id = build.literal_info.at(lit_id).final_id;
|
if (build.literal_info.at(lit_id).fragment_id == MO_INVALID_IDX) {
|
||||||
if (final_id == MO_INVALID_IDX) {
|
continue; // Unused literal.
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this anchored literal can never match past
|
// If this anchored literal can never match past
|
||||||
@ -4856,7 +4870,7 @@ pair<u32, u32> writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
u32 offset =
|
u32 offset =
|
||||||
writeLiteralProgram(build, bc, {final_id}, lit_edge_map, true);
|
writeLiteralProgram(build, bc, {lit_id}, lit_edge_map, true);
|
||||||
DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id,
|
DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id,
|
||||||
final_id, offset);
|
final_id, offset);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user