mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-10-10 08:12:21 +03:00
tamarama: add container engine for exclusive nfas
Add the new Tamarama engine that acts as a container for infix/suffix engines that can be proven to run exclusively of one another. This reduces stream state for pattern sets with many exclusive engines.
This commit is contained in:
446
src/rose/rose_build_exclusive.cpp
Normal file
446
src/rose/rose_build_exclusive.cpp
Normal file
@@ -0,0 +1,446 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "rose_build_exclusive.h"
|
||||
#include "rose_build_merge.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "nfagraph/ng_execute.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "util/clique.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
template<typename role_id>
|
||||
struct RoleChunk {
|
||||
vector<RoleInfo<role_id>> roles;
|
||||
};
|
||||
|
||||
static
|
||||
CharReach getReachability(const NGHolder &h) {
|
||||
CharReach cr;
|
||||
for (const auto &v : vertices_range(h)) {
|
||||
if (!is_special(v, h)) {
|
||||
cr |= h[v].char_reach;
|
||||
}
|
||||
}
|
||||
return cr;
|
||||
}
|
||||
|
||||
template<typename role_id>
|
||||
static
|
||||
vector<RoleChunk<role_id>> divideIntoChunks(const RoseBuildImpl &build,
|
||||
set<RoleInfo<role_id>> &roleInfoSet) {
|
||||
u32 chunkSize = build.cc.grey.tamaChunkSize;
|
||||
u32 cnt = 1;
|
||||
vector<RoleChunk<role_id>> chunks;
|
||||
RoleChunk<role_id> roleChunk;
|
||||
for (const auto &roleInfo : roleInfoSet) {
|
||||
if (cnt == chunkSize) {
|
||||
cnt -= chunkSize;
|
||||
chunks.push_back(roleChunk);
|
||||
roleChunk.roles.clear();
|
||||
}
|
||||
roleChunk.roles.push_back(roleInfo);
|
||||
cnt++;
|
||||
}
|
||||
|
||||
if (cnt > 1) {
|
||||
chunks.push_back(roleChunk);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/* add prefix literals to engine graph */
|
||||
static
|
||||
bool addPrefixLiterals(NGHolder &h, ue2::unordered_set<u32> &tailId,
|
||||
const vector<vector<CharReach>> &triggers) {
|
||||
DEBUG_PRINTF("add literals to graph\n");
|
||||
|
||||
NFAVertex start = h.start;
|
||||
vector<NFAVertex> heads;
|
||||
vector<NFAVertex> tails;
|
||||
for (const auto &lit : triggers) {
|
||||
NFAVertex last = start;
|
||||
if (lit.empty()) {
|
||||
return false;
|
||||
}
|
||||
u32 i = 0;
|
||||
for (const auto &c : lit) {
|
||||
DEBUG_PRINTF("lit:%s \n", c.to_string().c_str());
|
||||
NFAVertex u = add_vertex(h);
|
||||
h[u].char_reach = c;
|
||||
if (!i++) {
|
||||
heads.push_back(u);
|
||||
last = u;
|
||||
continue;
|
||||
}
|
||||
add_edge(last, u, h);
|
||||
last = u;
|
||||
}
|
||||
tails.push_back(last);
|
||||
tailId.insert(h[last].index);
|
||||
}
|
||||
|
||||
for (auto v : adjacent_vertices_range(start, h)) {
|
||||
if (v != h.startDs) {
|
||||
for (auto &t : tails) {
|
||||
add_edge(t, v, h);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clear_out_edges(start, h);
|
||||
add_edge(h.start, h.start, h);
|
||||
for (auto &t : heads) {
|
||||
add_edge(start, t, h);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("literals addition done\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/* check if one literal is suffix of another */
|
||||
static
|
||||
bool isSuffix(const vector<vector<CharReach>> &triggers1,
|
||||
const vector<vector<CharReach>> &triggers2) {
|
||||
// literal suffix test
|
||||
for (const auto &lit1 : triggers1) {
|
||||
for (const auto &lit2 : triggers2) {
|
||||
const size_t len = min(lit1.size(), lit2.size());
|
||||
if (equal(lit1.rbegin(), lit1.rbegin() + len,
|
||||
lit2.rbegin(), overlaps)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* prepare initial infix or suffix graph used for exclusive analysis */
|
||||
template<typename role_id>
|
||||
static
|
||||
u32 prepareRoleGraph(NGHolder &h, const role_id &s1) {
|
||||
u32 num = 0;
|
||||
if (s1.castle()) {
|
||||
num = num_vertices(h);
|
||||
NFAVertex u = add_vertex(h);
|
||||
h[u].char_reach = s1.castle()->reach();
|
||||
add_edge(h.startDs, u, h);
|
||||
// add self loop to repeat characters
|
||||
add_edge(u, u, h);
|
||||
} else if (s1.graph()) {
|
||||
const NGHolder &g = *s1.graph();
|
||||
cloneHolder(h, g);
|
||||
num = num_vertices(h);
|
||||
} else {
|
||||
// only infixes and suffixes with graph properties are possible
|
||||
// candidates, already filtered out other cases before
|
||||
// exclusive analysis
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return num;
|
||||
}
|
||||
|
||||
/* get a subset of literal if reset character is found */
|
||||
static
|
||||
vector<CharReach> findStartPos(const CharReach &cr1,
|
||||
const vector<CharReach> &lit) {
|
||||
auto it = lit.rbegin(), ite = lit.rend();
|
||||
u32 pos = lit.size();
|
||||
for (; it != ite; it++) {
|
||||
if (!overlaps(cr1, *it)) {
|
||||
break;
|
||||
}
|
||||
pos--;
|
||||
}
|
||||
|
||||
return vector<CharReach> (lit.begin() + pos, lit.end());
|
||||
}
|
||||
|
||||
template<typename role_id>
|
||||
static
|
||||
bool isExclusive(const NGHolder &h,
|
||||
const u32 num, ue2::unordered_set<u32> &tailId,
|
||||
map<u32, ue2::unordered_set<u32>> &skipList,
|
||||
const RoleInfo<role_id> &role1,
|
||||
const RoleInfo<role_id> &role2) {
|
||||
const u32 id1 = role1.id;
|
||||
const u32 id2 = role2.id;
|
||||
|
||||
if (contains(skipList, id1) && contains(skipList[id1], id2)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto &triggers1 = role1.literals;
|
||||
const auto &triggers2 = role2.literals;
|
||||
if (isSuffix(triggers1, triggers2)) {
|
||||
skipList[id2].insert(id1);
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("role id2:%u\n", id2);
|
||||
const auto &cr1 = role1.cr;
|
||||
if (overlaps(cr1, role2.last_cr)) {
|
||||
CharReach cr = cr1 | role1.prefix_cr;
|
||||
for (const auto &lit : triggers2) {
|
||||
auto lit1 = findStartPos(cr, lit);
|
||||
if (lit1.empty()) {
|
||||
continue;
|
||||
}
|
||||
u32 lower_bound = 0;
|
||||
if (lit1.size() < lit.size()) {
|
||||
lower_bound = ~0U;
|
||||
}
|
||||
|
||||
ue2::flat_set<NFAVertex> states;
|
||||
for (const auto &v : vertices_range(h)) {
|
||||
if (h[v].index >= lower_bound || h[v].index < 2) {
|
||||
states.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
auto activeStates = execute_graph(h, lit1, states);
|
||||
// Check if has only literal states are on
|
||||
for (const auto &s : activeStates) {
|
||||
u32 stateId = h[s].index;
|
||||
if ((stateId > 1 && stateId <= num) ||
|
||||
contains(tailId, stateId)) {
|
||||
skipList[id2].insert(id1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename role_id>
|
||||
static
|
||||
ue2::unordered_set<u32> checkExclusivity(const NGHolder &h,
|
||||
const u32 num, ue2::unordered_set<u32> &tailId,
|
||||
map<u32, ue2::unordered_set<u32>> &skipList,
|
||||
const RoleInfo<role_id> &role1,
|
||||
const RoleChunk<role_id> &roleChunk) {
|
||||
ue2::unordered_set<u32> info;
|
||||
const u32 id1 = role1.id;
|
||||
for (const auto &role2 : roleChunk.roles) {
|
||||
const u32 id2 = role2.id;
|
||||
if (id1 != id2 && isExclusive(h, num, tailId, skipList,
|
||||
role1, role2)) {
|
||||
info.insert(id2);
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
static
|
||||
void findCliques(const map<u32, set<u32>> &exclusiveGroups,
|
||||
vector<vector<u32>> &exclusive_roles) {
|
||||
if (exclusiveGroups.empty()) {
|
||||
return;
|
||||
}
|
||||
// Construct the exclusivity graph
|
||||
map<u32, CliqueVertex> vertex_map;
|
||||
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
||||
|
||||
// Add vertices representing infixes/suffixes
|
||||
for (const auto &e : exclusiveGroups) {
|
||||
const u32 id = e.first;
|
||||
CliqueVertex v1 = add_vertex(CliqueVertexProps(id), *cg);
|
||||
vertex_map[id] = v1;
|
||||
}
|
||||
|
||||
// Wire exclusive pairs
|
||||
for (const auto &e1 : exclusiveGroups) {
|
||||
const u32 literalId1 = e1.first;
|
||||
CliqueVertex lv = vertex_map[literalId1];
|
||||
const set<u32> &exclusiveSet = e1.second;
|
||||
for (const auto &e2 : exclusiveGroups) {
|
||||
const u32 literalId2 = e2.first;
|
||||
if (literalId1 < literalId2 &&
|
||||
contains(exclusiveSet, literalId2)) {
|
||||
add_edge(lv, vertex_map[literalId2], *cg);
|
||||
DEBUG_PRINTF("Wire %u:%u\n", literalId1, literalId2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find clique groups
|
||||
const auto &clique = removeClique(*cg);
|
||||
for (const auto &i : clique) {
|
||||
DEBUG_PRINTF("cliq:%lu\n", i.size());
|
||||
if (i.size() > 1) {
|
||||
exclusive_roles.push_back(i);
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("Clique graph size:%lu\n", exclusive_roles.size());
|
||||
}
|
||||
|
||||
static
|
||||
map<u32, set<u32>> findExclusiveGroups(const RoseBuildImpl &build,
|
||||
const map<u32, ue2::unordered_set<u32>> &exclusiveInfo,
|
||||
const map<u32, vector<RoseVertex>> &vertex_map,
|
||||
const bool is_infix) {
|
||||
map<u32, set<u32>> exclusiveGroups;
|
||||
for (const auto &e : exclusiveInfo) {
|
||||
u32 i = e.first;
|
||||
const auto &s = e.second;
|
||||
set<u32> group;
|
||||
set<RoseVertex> q1(vertex_map.at(i).begin(),
|
||||
vertex_map.at(i).end());
|
||||
DEBUG_PRINTF("vertex set:%lu\n", q1.size());
|
||||
for (const auto &val : s) {
|
||||
set<RoseVertex> q2(vertex_map.at(val).begin(),
|
||||
vertex_map.at(val).end());
|
||||
if (contains(exclusiveInfo.at(val), i) &&
|
||||
(!is_infix || mergeableRoseVertices(build, q1, q2))) {
|
||||
group.insert(val);
|
||||
}
|
||||
}
|
||||
if (!group.empty()) {
|
||||
exclusiveGroups[i] = group;
|
||||
}
|
||||
}
|
||||
|
||||
return exclusiveGroups;
|
||||
}
|
||||
|
||||
template<typename role_id>
|
||||
static
|
||||
bool setTriggerLiterals(RoleInfo<role_id> &roleInfo,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers) {
|
||||
u32 minLiteralLen = ~0U;
|
||||
for (const auto &tr : triggers) {
|
||||
for (const auto &lit : tr.second) {
|
||||
if (lit.empty()) {
|
||||
return false;
|
||||
}
|
||||
minLiteralLen = min(minLiteralLen, (u32)lit.size());
|
||||
roleInfo.last_cr |= lit.back();
|
||||
for (const auto &c : lit) {
|
||||
roleInfo.prefix_cr |= c;
|
||||
}
|
||||
roleInfo.literals.push_back(lit);
|
||||
}
|
||||
}
|
||||
|
||||
if (roleInfo.role.graph()) {
|
||||
const NGHolder &g = *roleInfo.role.graph();
|
||||
roleInfo.cr = getReachability(g);
|
||||
} else if (roleInfo.role.castle()) {
|
||||
roleInfo.cr = roleInfo.role.castle()->reach();
|
||||
}
|
||||
|
||||
// test the score of this engine
|
||||
roleInfo.score = 256 - roleInfo.cr.count() + minLiteralLen;
|
||||
if (roleInfo.score < 20) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers) {
|
||||
return setTriggerLiterals(roleInfo, triggers);
|
||||
}
|
||||
|
||||
bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers) {
|
||||
return setTriggerLiterals(roleInfo, triggers);
|
||||
}
|
||||
|
||||
template<typename role_id>
|
||||
static
|
||||
void exclusiveAnalysis(const RoseBuildImpl &build,
|
||||
const map<u32, vector<RoseVertex>> &vertex_map,
|
||||
set<RoleInfo<role_id>> &roleInfoSet,
|
||||
vector<vector<u32>> &exclusive_roles, const bool is_infix) {
|
||||
const auto &chunks = divideIntoChunks(build, roleInfoSet);
|
||||
DEBUG_PRINTF("Exclusivity analysis entry\n");
|
||||
map<u32, ue2::unordered_set<u32>> exclusiveInfo;
|
||||
|
||||
for (const auto &roleChunk : chunks) {
|
||||
map<u32, ue2::unordered_set<u32>> skipList;
|
||||
for (const auto &role1 : roleChunk.roles) {
|
||||
const u32 id1 = role1.id;
|
||||
const role_id &s1 = role1.role;
|
||||
const auto &triggers1 = role1.literals;
|
||||
|
||||
NGHolder h;
|
||||
u32 num = prepareRoleGraph(h, s1);
|
||||
DEBUG_PRINTF("role id1:%u\n", id1);
|
||||
unordered_set<u32> tailId;
|
||||
if (!addPrefixLiterals(h, tailId, triggers1)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
exclusiveInfo[id1] = checkExclusivity(h, num, tailId,
|
||||
skipList, role1, roleChunk);
|
||||
}
|
||||
}
|
||||
|
||||
// Create final candidate exclusive groups
|
||||
const auto exclusiveGroups =
|
||||
findExclusiveGroups(build, exclusiveInfo, vertex_map, is_infix);
|
||||
exclusiveInfo.clear();
|
||||
|
||||
// Find cliques for each exclusive groups
|
||||
findCliques(exclusiveGroups, exclusive_roles);
|
||||
}
|
||||
|
||||
void exclusiveAnalysisInfix(const RoseBuildImpl &build,
|
||||
const map<u32, vector<RoseVertex>> &vertex_map,
|
||||
set<RoleInfo<left_id>> &roleInfoSet,
|
||||
vector<vector<u32>> &exclusive_roles) {
|
||||
exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles,
|
||||
true);
|
||||
}
|
||||
|
||||
void exclusiveAnalysisSuffix(const RoseBuildImpl &build,
|
||||
const map<u32, vector<RoseVertex>> &vertex_map,
|
||||
set<RoleInfo<suffix_id>> &roleInfoSet,
|
||||
vector<vector<u32>> &exclusive_roles) {
|
||||
exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles,
|
||||
false);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
Reference in New Issue
Block a user