tamarama: add container engine for exclusive nfas

Add the new Tamarama engine that acts as a container for infix/suffix
engines that can be proven to run exclusively of one another.

This reduces stream state for pattern sets with many exclusive engines.
This commit is contained in:
Xiang Wang
2015-08-11 05:23:12 -04:00
committed by Matthew Barr
parent 013dbd3b3c
commit 9087d59be5
38 changed files with 2418 additions and 56 deletions

View File

@@ -33,6 +33,7 @@
#include "hs_compile.h" // for HS_MODE_*
#include "rose_build_add_internal.h"
#include "rose_build_anchored.h"
#include "rose_build_exclusive.h"
#include "rose_build_groups.h"
#include "rose_build_infix.h"
#include "rose_build_lookaround.h"
@@ -50,6 +51,8 @@
#include "nfa/nfa_build_util.h"
#include "nfa/nfa_internal.h"
#include "nfa/shufticompile.h"
#include "nfa/tamaramacompile.h"
#include "nfa/tamarama_internal.h"
#include "nfagraph/ng_execute.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_lbr.h"
@@ -71,6 +74,7 @@
#include "util/compile_error.h"
#include "util/container.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/order_check.h"
#include "util/queue_index_factory.h"
@@ -1422,6 +1426,296 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
return true;
}
static
unique_ptr<TamaInfo> constructTamaInfo(const RoseGraph &g,
const vector<ExclusiveSubengine> &subengines,
const bool is_suffix) {
unique_ptr<TamaInfo> tamaInfo = ue2::make_unique<TamaInfo>();
for (const auto &sub : subengines) {
const auto &rose_vertices = sub.vertices;
NFA *nfa = sub.nfa.get();
set<u32> tops;
for (const auto &v : rose_vertices) {
if (is_suffix) {
tops.insert(g[v].suffix.top);
} else {
for (const auto &e : in_edges_range(v, g)) {
tops.insert(g[e].rose_top);
}
}
}
tamaInfo->add(nfa, tops);
}
return tamaInfo;
}
static
void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo,
TamaProto &tamaProto,
const vector<ExclusiveSubengine> &subengines,
const map<pair<const NFA *, u32>, u32> &out_top_remap,
const bool is_suffix) {
u32 i = 0;
for (const auto &n : tamaInfo.subengines) {
for (const auto &v : subengines[i].vertices) {
if (is_suffix) {
tamaProto.add(n, g[v].idx, g[v].suffix.top,
out_top_remap);
} else {
for (const auto &e : in_edges_range(v, g)) {
tamaProto.add(n, g[v].idx, g[e].rose_top,
out_top_remap);
}
}
}
i++;
}
}
static
shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g,
build_context &bc,
const ExclusiveInfo &info,
const u32 queue,
const bool is_suffix) {
const auto &subengines = info.subengines;
auto tamaInfo =
constructTamaInfo(g, subengines, is_suffix);
map<pair<const NFA *, u32>, u32> out_top_remap;
auto n = buildTamarama(*tamaInfo, queue, out_top_remap);
add_nfa_to_blob(bc, *n);
DEBUG_PRINTF("queue id:%u\n", queue);
shared_ptr<TamaProto> tamaProto = make_shared<TamaProto>();
tamaProto->reports = info.reports;
updateTops(g, *tamaInfo, *tamaProto, subengines,
out_top_remap, is_suffix);
return tamaProto;
}
static
void buildInfixContainer(RoseGraph &g, build_context &bc,
const vector<ExclusiveInfo> &exclusive_info) {
// Build tamarama engine
for (const auto &info : exclusive_info) {
const u32 queue = info.queue;
const auto &subengines = info.subengines;
auto tamaProto =
constructContainerEngine(g, bc, info, queue, false);
for (const auto &sub : subengines) {
const auto &verts = sub.vertices;
for (const auto &v : verts) {
DEBUG_PRINTF("vert id:%lu\n", g[v].idx);
g[v].left.tamarama = tamaProto;
}
}
}
}
static
void buildSuffixContainer(RoseGraph &g, build_context &bc,
const vector<ExclusiveInfo> &exclusive_info) {
// Build tamarama engine
for (const auto &info : exclusive_info) {
const u32 queue = info.queue;
const auto &subengines = info.subengines;
auto tamaProto =
constructContainerEngine(g, bc, info, queue, true);
for (const auto &sub : subengines) {
const auto &verts = sub.vertices;
for (const auto &v : verts) {
DEBUG_PRINTF("vert id:%lu\n", g[v].idx);
g[v].suffix.tamarama = tamaProto;
}
const auto &v = verts[0];
suffix_id newSuffix(g[v].suffix);
bc.suffixes.emplace(newSuffix, queue);
}
}
}
static
void updateExclusiveInfixProperties(const RoseBuildImpl &build,
build_context &bc,
const vector<ExclusiveInfo> &exclusive_info,
set<u32> *no_retrigger_queues) {
const RoseGraph &g = build.g;
for (const auto &info : exclusive_info) {
// Set leftfix optimisations, disabled for tamarama subengines
rose_group squash_mask = ~rose_group{0};
// Leftfixes can have stop alphabets.
vector<u8> stop(N_CHARS, 0);
// Infix NFAs can have bounds on their queue lengths.
u32 max_queuelen = 0;
u32 max_width = 0;
u8 cm_count = 0;
CharReach cm_cr;
const auto &qi = info.queue;
const auto &subengines = info.subengines;
bool no_retrigger = true;
for (const auto &sub : subengines) {
const auto &verts = sub.vertices;
const auto &v_first = verts[0];
left_id leftfix(g[v_first].left);
if (leftfix.haig() || !leftfix.graph() ||
!nfaStuckOn(*leftfix.graph())) {
no_retrigger = false;
}
for (const auto &v : verts) {
set<ue2_literal> lits;
for (auto u : inv_adjacent_vertices_range(v, build.g)) {
for (u32 lit_id : build.g[u].literals) {
lits.insert(build.literals.right.at(lit_id).s);
}
}
DEBUG_PRINTF("%zu literals\n", lits.size());
u32 queuelen = findMaxInfixMatches(leftfix, lits);
if (queuelen < UINT32_MAX) {
queuelen++;
}
max_queuelen = max(max_queuelen, queuelen);
}
}
if (no_retrigger) {
no_retrigger_queues->insert(qi);
}
for (const auto &sub : subengines) {
const auto &verts = sub.vertices;
for (const auto &v : verts) {
u32 lag = g[v].left.lag;
bc.leftfix_info.emplace(
v, left_build_info(qi, lag, max_width, squash_mask, stop,
max_queuelen, cm_count, cm_cr));
}
}
}
}
static
void updateExclusiveSuffixProperties(const RoseBuildImpl &build,
const vector<ExclusiveInfo> &exclusive_info,
set<u32> *no_retrigger_queues) {
const RoseGraph &g = build.g;
for (auto &info : exclusive_info) {
const auto &qi = info.queue;
const auto &subengines = info.subengines;
bool no_retrigger = true;
for (const auto &sub : subengines) {
const auto &v_first = sub.vertices[0];
suffix_id suffix(g[v_first].suffix);
if (!suffix.graph() || !nfaStuckOn(*suffix.graph())) {
no_retrigger = false;
break;
}
}
if (no_retrigger) {
no_retrigger_queues->insert(qi);
}
}
}
static
void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
const map<left_id, set<PredTopPair>> &infixTriggers,
const map<u32, vector<RoseVertex>> &vertex_map,
const vector<vector<u32>> &groups,
set<u32> *no_retrigger_queues) {
RoseGraph &g = build.g;
const CompileContext &cc = build.cc;
vector<ExclusiveInfo> exclusive_info;
for (const auto &gp : groups) {
ExclusiveInfo info;
for (const auto &id : gp) {
const auto &verts = vertex_map.at(id);
left_id leftfix(g[verts[0]].left);
bool is_transient = false;
auto n = makeLeftNfa(build, leftfix, false, is_transient,
infixTriggers, cc);
assert(n);
setLeftNfaProperties(*n, leftfix);
ExclusiveSubengine engine;
engine.nfa = move(n);
engine.vertices = verts;
info.subengines.push_back(move(engine));
}
info.queue = qif.get_queue();
exclusive_info.push_back(move(info));
}
updateExclusiveInfixProperties(build, bc, exclusive_info,
no_retrigger_queues);
buildInfixContainer(g, bc, exclusive_info);
}
static
void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
const map<left_id, set<PredTopPair>> &infixTriggers,
set<u32> *no_retrigger_queues) {
const RoseGraph &g = build.g;
set<RoleInfo<left_id>> roleInfoSet;
map<u32, vector<RoseVertex>> vertex_map;
u32 role_id = 0;
map<left_id, u32> leftfixes;
for (auto v : vertices_range(g)) {
if (!g[v].left || build.isRootSuccessor(v)) {
continue;
}
left_id leftfix(g[v].left);
// Sanity check: our NFA should contain each of the tops mentioned on
// our in-edges.
assert(roseHasTops(g, v));
if (contains(leftfixes, leftfix)) {
// NFA already built.
u32 id = leftfixes[leftfix];
if (contains(vertex_map, id)) {
vertex_map[id].push_back(v);
}
DEBUG_PRINTF("sharing leftfix, id=%u\n", id);
continue;
}
if (leftfix.graph() || leftfix.castle()) {
leftfixes.emplace(leftfix, role_id);
vertex_map[role_id].push_back(v);
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(build, infixTriggers.at(leftfix), &triggers);
RoleInfo<left_id> info(leftfix, role_id);
if (setTriggerLiteralsInfix(info, triggers)) {
roleInfoSet.insert(info);
}
role_id++;
}
}
if (leftfixes.size() > 1) {
DEBUG_PRINTF("leftfix size:%lu\n", leftfixes.size());
vector<vector<u32>> groups;
exclusiveAnalysisInfix(build, vertex_map, roleInfoSet, groups);
buildExclusiveInfixes(build, bc, qif, infixTriggers, vertex_map,
groups, no_retrigger_queues);
}
}
static
bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
QueueIndexFactory &qif, set<u32> *no_retrigger_queues,
@@ -1434,8 +1728,13 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
unordered_map<left_id, vector<RoseVertex> > succs;
findInfixTriggers(tbi, &infixTriggers);
if (cc.grey.allowTamarama && cc.streaming && !do_prefix) {
findExclusiveInfixes(tbi, bc, qif, infixTriggers,
no_retrigger_queues);
}
for (auto v : vertices_range(g)) {
if (!g[v].left) {
if (!g[v].left || g[v].left.tamarama) {
continue;
}
@@ -1753,11 +2052,111 @@ void setSuffixProperties(NFA &n, const suffix_id &suff,
}
static
bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
set<u32> *no_retrigger_queues) {
map<suffix_id, set<PredTopPair> > suffixTriggers;
findSuffixTriggers(tbi, &suffixTriggers);
void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
map<suffix_id, set<PredTopPair>> &suffixTriggers,
const map<u32, vector<RoseVertex>> &vertex_map,
const vector<vector<u32>> &groups,
set<u32> *no_retrigger_queues) {
RoseGraph &g = build.g;
vector<ExclusiveInfo> exclusive_info;
for (const auto &gp : groups) {
ExclusiveInfo info;
for (const auto &id : gp) {
const auto &verts = vertex_map.at(id);
suffix_id s(g[verts[0]].suffix);
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
map<u32, u32> fixed_depth_tops;
findFixedDepthTops(g, s_triggers, &fixed_depth_tops);
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(build, s_triggers, &triggers);
auto n = buildSuffix(build.rm, build.ssm, fixed_depth_tops,
triggers, s, build.cc);
assert(n);
setSuffixProperties(*n, s, build.rm);
ExclusiveSubengine engine;
engine.nfa = move(n);
engine.vertices = verts;
info.subengines.push_back(move(engine));
const auto &reports = all_reports(s);
info.reports.insert(reports.begin(), reports.end());
}
info.queue = qif.get_queue();
exclusive_info.push_back(move(info));
}
updateExclusiveSuffixProperties(build, exclusive_info,
no_retrigger_queues);
buildSuffixContainer(g, bc, exclusive_info);
}
static
void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
QueueIndexFactory &qif,
map<suffix_id, set<PredTopPair>> &suffixTriggers,
set<u32> *no_retrigger_queues) {
const RoseGraph &g = tbi.g;
map<suffix_id, u32> suffixes;
set<RoleInfo<suffix_id>> roleInfoSet;
map<u32, vector<RoseVertex>> vertex_map;
u32 role_id = 0;
for (auto v : vertices_range(g)) {
if (!g[v].suffix) {
continue;
}
const suffix_id s(g[v].suffix);
DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph());
// We may have already built this NFA.
if (contains(suffixes, s)) {
u32 id = suffixes[s];
if (!tbi.isInETable(v)) {
vertex_map[id].push_back(v);
}
continue;
}
// Currently disable eod suffixes for exclusive analysis
if (!tbi.isInETable(v) && (s.graph() || s.castle())) {
DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id);
suffixes.emplace(s, role_id);
vertex_map[role_id].push_back(v);
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(tbi, s_triggers, &triggers);
RoleInfo<suffix_id> info(s, role_id);
if (setTriggerLiteralsSuffix(info, triggers)) {
roleInfoSet.insert(info);
}
role_id++;
}
}
if (suffixes.size() > 1) {
DEBUG_PRINTF("suffix size:%lu\n", suffixes.size());
vector<vector<u32>> groups;
exclusiveAnalysisSuffix(tbi, vertex_map, roleInfoSet, groups);
buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map,
groups, no_retrigger_queues);
}
}
static
bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
set<u32> *no_retrigger_queues,
const map<suffix_id, set<PredTopPair>> &suffixTriggers) {
// To ensure compile determinism, build suffix engines in order of their
// (unique) queue indices, so that we call add_nfa_to_blob in the same
// order.
@@ -1770,6 +2169,11 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
for (const auto &e : ordered) {
const u32 queue = e.first;
const suffix_id &s = e.second;
if (s.tamarama()) {
continue;
}
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
map<u32, u32> fixed_depth_tops;
@@ -1860,11 +2264,20 @@ static
bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
set<u32> *no_retrigger_queues, set<u32> *eager_queues,
u32 *leftfixBeginQueue) {
map<suffix_id, set<PredTopPair>> suffixTriggers;
findSuffixTriggers(tbi, &suffixTriggers);
if (tbi.cc.grey.allowTamarama && tbi.cc.streaming) {
findExclusiveSuffixes(tbi, bc, qif, suffixTriggers,
no_retrigger_queues);
}
assignSuffixQueues(tbi, bc);
if (!buildSuffixes(tbi, bc, no_retrigger_queues)) {
if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) {
return false;
}
suffixTriggers.clear();
*leftfixBeginQueue = qif.allocated_count();
@@ -3205,7 +3618,15 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v,
assert(contains(bc.engineOffsets, qi));
const NFA *nfa = get_nfa_from_blob(bc, qi);
u32 suffixEvent;
if (isMultiTopType(nfa->type)) {
if (isContainerType(nfa->type)) {
auto tamaProto = g[v].suffix.tamarama.get();
assert(tamaProto);
u32 top = (u32)MQE_TOP_FIRST +
tamaProto->top_remap.at(make_pair(g[v].idx,
g[v].suffix.top));
assert(top < MQE_INVALID);
suffixEvent = top;
} else if (isMultiTopType(nfa->type)) {
assert(!g[v].suffix.haig);
u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top;
assert(top < MQE_INVALID);
@@ -3283,7 +3704,13 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc,
// DFAs have no TOP_N support, so they get a classic MQE_TOP event.
u32 top;
if (!isMultiTopType(nfa->type)) {
if (isContainerType(nfa->type)) {
auto tamaProto = g[v].left.tamarama.get();
assert(tamaProto);
top = MQE_TOP_FIRST + tamaProto->top_remap.at(
make_pair(g[v].idx, g[e].rose_top));
assert(top < MQE_INVALID);
} else if (!isMultiTopType(nfa->type)) {
assert(num_tops(g[v].left) == 1);
top = MQE_TOP;
} else {

View File

@@ -0,0 +1,446 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "ue2common.h"
#include "rose_build_exclusive.h"
#include "rose_build_merge.h"
#include "nfa/castlecompile.h"
#include "nfagraph/ng_execute.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_util.h"
#include "util/clique.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/graph.h"
#include "util/make_unique.h"
using namespace std;
namespace ue2 {
template<typename role_id>
struct RoleChunk {
vector<RoleInfo<role_id>> roles;
};
static
CharReach getReachability(const NGHolder &h) {
CharReach cr;
for (const auto &v : vertices_range(h)) {
if (!is_special(v, h)) {
cr |= h[v].char_reach;
}
}
return cr;
}
template<typename role_id>
static
vector<RoleChunk<role_id>> divideIntoChunks(const RoseBuildImpl &build,
set<RoleInfo<role_id>> &roleInfoSet) {
u32 chunkSize = build.cc.grey.tamaChunkSize;
u32 cnt = 1;
vector<RoleChunk<role_id>> chunks;
RoleChunk<role_id> roleChunk;
for (const auto &roleInfo : roleInfoSet) {
if (cnt == chunkSize) {
cnt -= chunkSize;
chunks.push_back(roleChunk);
roleChunk.roles.clear();
}
roleChunk.roles.push_back(roleInfo);
cnt++;
}
if (cnt > 1) {
chunks.push_back(roleChunk);
}
return chunks;
}
/* add prefix literals to engine graph */
static
bool addPrefixLiterals(NGHolder &h, ue2::unordered_set<u32> &tailId,
const vector<vector<CharReach>> &triggers) {
DEBUG_PRINTF("add literals to graph\n");
NFAVertex start = h.start;
vector<NFAVertex> heads;
vector<NFAVertex> tails;
for (const auto &lit : triggers) {
NFAVertex last = start;
if (lit.empty()) {
return false;
}
u32 i = 0;
for (const auto &c : lit) {
DEBUG_PRINTF("lit:%s \n", c.to_string().c_str());
NFAVertex u = add_vertex(h);
h[u].char_reach = c;
if (!i++) {
heads.push_back(u);
last = u;
continue;
}
add_edge(last, u, h);
last = u;
}
tails.push_back(last);
tailId.insert(h[last].index);
}
for (auto v : adjacent_vertices_range(start, h)) {
if (v != h.startDs) {
for (auto &t : tails) {
add_edge(t, v, h);
}
}
}
clear_out_edges(start, h);
add_edge(h.start, h.start, h);
for (auto &t : heads) {
add_edge(start, t, h);
}
DEBUG_PRINTF("literals addition done\n");
return true;
}
/* check if one literal is suffix of another */
static
bool isSuffix(const vector<vector<CharReach>> &triggers1,
const vector<vector<CharReach>> &triggers2) {
// literal suffix test
for (const auto &lit1 : triggers1) {
for (const auto &lit2 : triggers2) {
const size_t len = min(lit1.size(), lit2.size());
if (equal(lit1.rbegin(), lit1.rbegin() + len,
lit2.rbegin(), overlaps)) {
return true;
}
}
}
return false;
}
/* prepare initial infix or suffix graph used for exclusive analysis */
template<typename role_id>
static
u32 prepareRoleGraph(NGHolder &h, const role_id &s1) {
u32 num = 0;
if (s1.castle()) {
num = num_vertices(h);
NFAVertex u = add_vertex(h);
h[u].char_reach = s1.castle()->reach();
add_edge(h.startDs, u, h);
// add self loop to repeat characters
add_edge(u, u, h);
} else if (s1.graph()) {
const NGHolder &g = *s1.graph();
cloneHolder(h, g);
num = num_vertices(h);
} else {
// only infixes and suffixes with graph properties are possible
// candidates, already filtered out other cases before
// exclusive analysis
assert(0);
}
return num;
}
/* get a subset of literal if reset character is found */
static
vector<CharReach> findStartPos(const CharReach &cr1,
const vector<CharReach> &lit) {
auto it = lit.rbegin(), ite = lit.rend();
u32 pos = lit.size();
for (; it != ite; it++) {
if (!overlaps(cr1, *it)) {
break;
}
pos--;
}
return vector<CharReach> (lit.begin() + pos, lit.end());
}
template<typename role_id>
static
bool isExclusive(const NGHolder &h,
const u32 num, ue2::unordered_set<u32> &tailId,
map<u32, ue2::unordered_set<u32>> &skipList,
const RoleInfo<role_id> &role1,
const RoleInfo<role_id> &role2) {
const u32 id1 = role1.id;
const u32 id2 = role2.id;
if (contains(skipList, id1) && contains(skipList[id1], id2)) {
return false;
}
const auto &triggers1 = role1.literals;
const auto &triggers2 = role2.literals;
if (isSuffix(triggers1, triggers2)) {
skipList[id2].insert(id1);
return false;
}
DEBUG_PRINTF("role id2:%u\n", id2);
const auto &cr1 = role1.cr;
if (overlaps(cr1, role2.last_cr)) {
CharReach cr = cr1 | role1.prefix_cr;
for (const auto &lit : triggers2) {
auto lit1 = findStartPos(cr, lit);
if (lit1.empty()) {
continue;
}
u32 lower_bound = 0;
if (lit1.size() < lit.size()) {
lower_bound = ~0U;
}
ue2::flat_set<NFAVertex> states;
for (const auto &v : vertices_range(h)) {
if (h[v].index >= lower_bound || h[v].index < 2) {
states.insert(v);
}
}
auto activeStates = execute_graph(h, lit1, states);
// Check if has only literal states are on
for (const auto &s : activeStates) {
u32 stateId = h[s].index;
if ((stateId > 1 && stateId <= num) ||
contains(tailId, stateId)) {
skipList[id2].insert(id1);
return false;
}
}
}
}
return true;
}
template<typename role_id>
static
ue2::unordered_set<u32> checkExclusivity(const NGHolder &h,
const u32 num, ue2::unordered_set<u32> &tailId,
map<u32, ue2::unordered_set<u32>> &skipList,
const RoleInfo<role_id> &role1,
const RoleChunk<role_id> &roleChunk) {
ue2::unordered_set<u32> info;
const u32 id1 = role1.id;
for (const auto &role2 : roleChunk.roles) {
const u32 id2 = role2.id;
if (id1 != id2 && isExclusive(h, num, tailId, skipList,
role1, role2)) {
info.insert(id2);
}
}
return info;
}
static
void findCliques(const map<u32, set<u32>> &exclusiveGroups,
vector<vector<u32>> &exclusive_roles) {
if (exclusiveGroups.empty()) {
return;
}
// Construct the exclusivity graph
map<u32, CliqueVertex> vertex_map;
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
// Add vertices representing infixes/suffixes
for (const auto &e : exclusiveGroups) {
const u32 id = e.first;
CliqueVertex v1 = add_vertex(CliqueVertexProps(id), *cg);
vertex_map[id] = v1;
}
// Wire exclusive pairs
for (const auto &e1 : exclusiveGroups) {
const u32 literalId1 = e1.first;
CliqueVertex lv = vertex_map[literalId1];
const set<u32> &exclusiveSet = e1.second;
for (const auto &e2 : exclusiveGroups) {
const u32 literalId2 = e2.first;
if (literalId1 < literalId2 &&
contains(exclusiveSet, literalId2)) {
add_edge(lv, vertex_map[literalId2], *cg);
DEBUG_PRINTF("Wire %u:%u\n", literalId1, literalId2);
}
}
}
// Find clique groups
const auto &clique = removeClique(*cg);
for (const auto &i : clique) {
DEBUG_PRINTF("cliq:%lu\n", i.size());
if (i.size() > 1) {
exclusive_roles.push_back(i);
}
}
DEBUG_PRINTF("Clique graph size:%lu\n", exclusive_roles.size());
}
static
map<u32, set<u32>> findExclusiveGroups(const RoseBuildImpl &build,
const map<u32, ue2::unordered_set<u32>> &exclusiveInfo,
const map<u32, vector<RoseVertex>> &vertex_map,
const bool is_infix) {
map<u32, set<u32>> exclusiveGroups;
for (const auto &e : exclusiveInfo) {
u32 i = e.first;
const auto &s = e.second;
set<u32> group;
set<RoseVertex> q1(vertex_map.at(i).begin(),
vertex_map.at(i).end());
DEBUG_PRINTF("vertex set:%lu\n", q1.size());
for (const auto &val : s) {
set<RoseVertex> q2(vertex_map.at(val).begin(),
vertex_map.at(val).end());
if (contains(exclusiveInfo.at(val), i) &&
(!is_infix || mergeableRoseVertices(build, q1, q2))) {
group.insert(val);
}
}
if (!group.empty()) {
exclusiveGroups[i] = group;
}
}
return exclusiveGroups;
}
template<typename role_id>
static
bool setTriggerLiterals(RoleInfo<role_id> &roleInfo,
const map<u32, vector<vector<CharReach>>> &triggers) {
u32 minLiteralLen = ~0U;
for (const auto &tr : triggers) {
for (const auto &lit : tr.second) {
if (lit.empty()) {
return false;
}
minLiteralLen = min(minLiteralLen, (u32)lit.size());
roleInfo.last_cr |= lit.back();
for (const auto &c : lit) {
roleInfo.prefix_cr |= c;
}
roleInfo.literals.push_back(lit);
}
}
if (roleInfo.role.graph()) {
const NGHolder &g = *roleInfo.role.graph();
roleInfo.cr = getReachability(g);
} else if (roleInfo.role.castle()) {
roleInfo.cr = roleInfo.role.castle()->reach();
}
// test the score of this engine
roleInfo.score = 256 - roleInfo.cr.count() + minLiteralLen;
if (roleInfo.score < 20) {
return false;
}
return true;
}
bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo,
const map<u32, vector<vector<CharReach>>> &triggers) {
return setTriggerLiterals(roleInfo, triggers);
}
bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo,
const map<u32, vector<vector<CharReach>>> &triggers) {
return setTriggerLiterals(roleInfo, triggers);
}
template<typename role_id>
static
void exclusiveAnalysis(const RoseBuildImpl &build,
const map<u32, vector<RoseVertex>> &vertex_map,
set<RoleInfo<role_id>> &roleInfoSet,
vector<vector<u32>> &exclusive_roles, const bool is_infix) {
const auto &chunks = divideIntoChunks(build, roleInfoSet);
DEBUG_PRINTF("Exclusivity analysis entry\n");
map<u32, ue2::unordered_set<u32>> exclusiveInfo;
for (const auto &roleChunk : chunks) {
map<u32, ue2::unordered_set<u32>> skipList;
for (const auto &role1 : roleChunk.roles) {
const u32 id1 = role1.id;
const role_id &s1 = role1.role;
const auto &triggers1 = role1.literals;
NGHolder h;
u32 num = prepareRoleGraph(h, s1);
DEBUG_PRINTF("role id1:%u\n", id1);
unordered_set<u32> tailId;
if (!addPrefixLiterals(h, tailId, triggers1)) {
continue;
}
exclusiveInfo[id1] = checkExclusivity(h, num, tailId,
skipList, role1, roleChunk);
}
}
// Create final candidate exclusive groups
const auto exclusiveGroups =
findExclusiveGroups(build, exclusiveInfo, vertex_map, is_infix);
exclusiveInfo.clear();
// Find cliques for each exclusive groups
findCliques(exclusiveGroups, exclusive_roles);
}
void exclusiveAnalysisInfix(const RoseBuildImpl &build,
const map<u32, vector<RoseVertex>> &vertex_map,
set<RoleInfo<left_id>> &roleInfoSet,
vector<vector<u32>> &exclusive_roles) {
exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles,
true);
}
void exclusiveAnalysisSuffix(const RoseBuildImpl &build,
const map<u32, vector<RoseVertex>> &vertex_map,
set<RoleInfo<suffix_id>> &roleInfoSet,
vector<vector<u32>> &exclusive_roles) {
exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles,
false);
}
} // namespace ue2

View File

@@ -0,0 +1,144 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief exclusive analysis for infix and suffix engines.
* Two engines are considered as exclusive if they can never be alive
* at the same time. This analysis takes advantage of the property of
* triggering literal + engine graph. If the triggering literals of
* two engines can make all the states dead in each other's graph,
* then they are exclusive.
*/
#ifndef ROSE_BUILD_EXCLUSIVE_H
#define ROSE_BUILD_EXCLUSIVE_H
#include "ue2common.h"
#include "rose_build_impl.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include <map>
#include <set>
#include <vector>
namespace ue2 {
/** brief subengine info including built engine and
* corresponding triggering rose vertices */
struct ExclusiveSubengine {
aligned_unique_ptr<NFA> nfa;
std::vector<RoseVertex> vertices;
};
/** \brief exclusive info to build tamarama */
struct ExclusiveInfo {
// subengine info
std::vector<ExclusiveSubengine> subengines;
// all the report in tamarama
std::set<ReportID> reports;
// assigned queue id
u32 queue;
};
/** \brief role info structure for exclusive analysis */
template<typename role_id>
struct RoleInfo {
RoleInfo(role_id role_in, u32 id_in) : role(role_in), id(id_in) {}
bool operator==(const RoleInfo &b) const {
return id == b.id;
}
bool operator!=(const RoleInfo &b) const { return !(*this == b); }
bool operator<(const RoleInfo &b) const {
const RoleInfo &a = *this;
if (a.score != b.score) {
return a.score > b.score;
}
ORDER_CHECK(id);
return false;
}
std::vector<std::vector<CharReach>> literals; // prefix literals
CharReach prefix_cr; // reach of prefix literals
CharReach last_cr; // reach of the last character of literals
CharReach cr; // reach of engine graph
const role_id role; // infix or suffix info
const u32 id; // infix or suffix id
u32 score; // score for exclusive analysis
};
/**
* \brief add triggering literals to infix info.
*/
bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo,
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers);
/**
* \brief add triggering literals to suffix info.
*/
bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo,
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers);
/**
* Exclusive analysis for infix engines.
*
* @param build rose build info mainly used to set exclusive chunk size here
* @param vertex_map mapping between engine id and rose vertices
* related to this engine
* @param roleInfoSet structure contains role properties including infix info,
* triggering literals and literal reachabilities.
* Used for exclusive analysis.
* @param exclusive_roles output mapping between engine id and its exclusive
* group id
*/
void exclusiveAnalysisInfix(const RoseBuildImpl &build,
const std::map<u32, std::vector<RoseVertex>> &vertex_map,
std::set<RoleInfo<left_id>> &roleInfoSet,
std::vector<std::vector<u32>> &exclusive_roles);
/**
* Exclusive analysis for suffix engines.
*
* @param build rose build info mainly used to set exclusive chunk size here
* @param vertex_map mapping between engine id and rose vertices
* related to this engine
* @param roleInfoSet structure contains role properties including suffix info,
* triggering literals and literal reachabilities.
* Used for exclusive analysis.
* @param exclusive_roles output mapping between engine id and its exclusive
* group id
*/
void exclusiveAnalysisSuffix(const RoseBuildImpl &build,
const std::map<u32, std::vector<RoseVertex>> &vertex_map,
std::set<RoleInfo<suffix_id>> &roleInfoSet,
std::vector<std::vector<u32>> &exclusive_roles);
} // namespace ue2
#endif //ROSE_BUILD_EXCLUSIVE_H

View File

@@ -65,12 +65,13 @@ class SomSlotManager;
struct suffix_id {
suffix_id(const RoseSuffixInfo &in)
: g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
h(in.haig.get()), dfa_min_width(in.dfa_min_width),
h(in.haig.get()), t(in.tamarama.get()),
dfa_min_width(in.dfa_min_width),
dfa_max_width(in.dfa_max_width) {
assert(!g || g->kind == NFA_SUFFIX);
}
bool operator==(const suffix_id &b) const {
bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t;
assert(!rv || dfa_min_width == b.dfa_min_width);
assert(!rv || dfa_max_width == b.dfa_max_width);
return rv;
@@ -82,6 +83,7 @@ struct suffix_id {
ORDER_CHECK(c);
ORDER_CHECK(d);
ORDER_CHECK(h);
ORDER_CHECK(t);
return false;
}
@@ -113,6 +115,22 @@ struct suffix_id {
}
return c;
}
TamaProto *tamarama() {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return t;
}
const TamaProto *tamarama() const {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return t;
}
raw_som_dfa *haig() { return h; }
const raw_som_dfa *haig() const { return h; }
raw_dfa *dfa() { return d; }
@@ -125,6 +143,7 @@ private:
CastleProto *c;
raw_dfa *d;
raw_som_dfa *h;
TamaProto *t;
depth dfa_min_width;
depth dfa_max_width;

View File

@@ -34,6 +34,7 @@
#include "nfa/mcclellancompile_util.h"
#include "nfa/nfa_api.h"
#include "nfa/rdfa.h"
#include "nfa/tamaramacompile.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_limex.h"
#include "nfagraph/ng_reports.h"
@@ -909,7 +910,7 @@ set<ReportID> all_reports(const OutfixInfo &outfix) {
bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const {
return top == b.top && graph == b.graph && castle == b.castle &&
rdfa == b.rdfa && haig == b.haig;
rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama;
}
bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
@@ -919,6 +920,7 @@ bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
ORDER_CHECK(castle);
ORDER_CHECK(haig);
ORDER_CHECK(rdfa);
ORDER_CHECK(tamarama);
assert(a.dfa_min_width == b.dfa_min_width);
assert(a.dfa_max_width == b.dfa_max_width);
return false;
@@ -931,13 +933,16 @@ void RoseSuffixInfo::reset(void) {
castle.reset();
rdfa.reset();
haig.reset();
tamarama.reset();
dfa_min_width = 0;
dfa_max_width = depth::infinity();
}
std::set<ReportID> all_reports(const suffix_id &s) {
assert(s.graph() || s.castle() || s.haig() || s.dfa());
if (s.graph()) {
if (s.tamarama()) {
return all_reports(*s.tamarama());
} else if (s.graph()) {
return all_reports(*s.graph());
} else if (s.castle()) {
return all_reports(*s.castle());
@@ -1149,6 +1154,7 @@ void LeftEngInfo::reset(void) {
castle.reset();
dfa.reset();
haig.reset();
tamarama.reset();
lag = 0;
leftfix_report = MO_INVALID_IDX;
dfa_min_width = 0;

View File

@@ -718,7 +718,7 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) {
FILE *f;
f = fopen(ssdot.str().c_str(), "w");
nfaDumpDot(n, f);
nfaDumpDot(n, f, base);
fclose(f);
f = fopen(sstxt.str().c_str(), "w");
@@ -778,7 +778,7 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) {
FILE *f;
f = fopen(ssdot.str().c_str(), "w");
nfaDumpDot(n, f);
nfaDumpDot(n, f, base);
fclose(f);
f = fopen(sstxt.str().c_str(), "w");
@@ -809,7 +809,7 @@ void dumpAnchored(const RoseEngine *t, const string &base) {
FILE *f;
f = fopen(ssdot.str().c_str(), "w");
nfaDumpDot(n, f);
nfaDumpDot(n, f, base);
fclose(f);
f = fopen(sstxt.str().c_str(), "w");

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -55,6 +55,7 @@ namespace ue2 {
struct CastleProto;
struct raw_dfa;
struct raw_som_dfa;
struct TamaProto;
/** \brief Table type for a literal. */
enum rose_literal_table {
@@ -82,6 +83,7 @@ struct LeftEngInfo {
std::shared_ptr<CastleProto> castle;
std::shared_ptr<raw_dfa> dfa;
std::shared_ptr<raw_som_dfa> haig;
std::shared_ptr<TamaProto> tamarama;
u32 lag = 0U;
ReportID leftfix_report = MO_INVALID_IDX;
depth dfa_min_width = 0;
@@ -92,6 +94,7 @@ struct LeftEngInfo {
&& other.castle == castle
&& other.dfa == dfa
&& other.haig == haig
&& other.tamarama == tamarama
&& other.lag == lag
&& other.leftfix_report == leftfix_report;
}
@@ -104,6 +107,7 @@ struct LeftEngInfo {
ORDER_CHECK(castle);
ORDER_CHECK(dfa);
ORDER_CHECK(haig);
ORDER_CHECK(tamarama);
ORDER_CHECK(lag);
ORDER_CHECK(leftfix_report);
return false;
@@ -121,6 +125,7 @@ struct RoseSuffixInfo {
std::shared_ptr<CastleProto> castle;
std::shared_ptr<raw_som_dfa> haig;
std::shared_ptr<raw_dfa> rdfa;
std::shared_ptr<TamaProto> tamarama;
depth dfa_min_width = 0;
depth dfa_max_width = depth::infinity();
@@ -128,7 +133,7 @@ struct RoseSuffixInfo {
bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
bool operator<(const RoseSuffixInfo &b) const;
void reset(void);
operator bool() const { return graph || castle || haig || rdfa; }
operator bool() const { return graph || castle || haig || rdfa || tamarama; }
};
/** \brief Properties attached to each Rose graph vertex. */