limex: add fast NFA check

This commit is contained in:
Wang Xiang W
2020-09-10 09:55:12 +00:00
committed by Konstantinos Margaritis
parent 5ad3d64b4b
commit 9ea1e4be3d
8 changed files with 114 additions and 69 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -85,6 +85,18 @@ namespace ue2 {
*/
static constexpr u32 NO_STATE = ~0;
/* Maximum number of states taken as a small NFA */
static constexpr u32 MAX_SMALL_NFA_STATES = 64;
/* Maximum bounded repeat upper bound to consider as a fast NFA */
static constexpr u64a MAX_REPEAT_SIZE = 200;
/* Maximum bounded repeat char reach size to consider as a fast NFA */
static constexpr u32 MAX_REPEAT_CHAR_REACH = 26;
/* Minimum bounded repeat trigger distance to consider as a fast NFA */
static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6;
namespace {
struct precalcAccel {
@@ -2422,6 +2434,68 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
}
#endif // NDEBUG
static
bool isFast(const build_info &args) {
const NGHolder &h = args.h;
const u32 num_states = args.num_states;
if (num_states > MAX_SMALL_NFA_STATES) {
return false;
}
unordered_map<NFAVertex, bool> pos_trigger;
for (u32 i = 0; i < args.repeats.size(); i++) {
const BoundedRepeatData &br = args.repeats[i];
assert(!contains(pos_trigger, br.pos_trigger));
pos_trigger[br.pos_trigger] = br.repeatMax <= MAX_REPEAT_SIZE;
}
// Small NFA without bounded repeat should be fast.
if (pos_trigger.empty()) {
return true;
}
vector<NFAVertex> cur;
unordered_set<NFAVertex> visited;
for (const auto &m : args.tops) {
for (NFAVertex v : m.second) {
cur.push_back(v);
visited.insert(v);
}
}
u8 pos_dist = 0;
while (!cur.empty()) {
vector<NFAVertex> next;
for (const auto &v : cur) {
if (contains(pos_trigger, v)) {
const CharReach &cr = h[v].char_reach;
if (!pos_trigger[v] && cr.count() > MAX_REPEAT_CHAR_REACH) {
return false;
}
}
for (const auto &w : adjacent_vertices_range(v, h)) {
if (w == v) {
continue;
}
u32 j = args.state_ids.at(w);
if (j == NO_STATE) {
continue;
}
if (!contains(visited, w)) {
next.push_back(w);
visited.insert(w);
}
}
}
if (++pos_dist >= MIN_REPEAT_TRIGGER_DISTANCE) {
break;
}
swap(cur, next);
}
return true;
}
static
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
u32 rv = 0;
@@ -2442,7 +2516,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies, bool do_accel,
bool stateCompression, u32 hint,
bool stateCompression, bool &fast, u32 hint,
const CompileContext &cc) {
const u32 num_states = max_state(states) + 1;
DEBUG_PRINTF("total states: %u\n", num_states);
@@ -2497,6 +2571,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
if (nfa) {
DEBUG_PRINTF("successful build with NFA engine: %s\n",
nfa_type_name(limex_model));
fast = isFast(arg);
return nfa;
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -78,6 +78,7 @@ bytecode_ptr<NFA> generate(NGHolder &g,
const std::set<NFAVertex> &zombies,
bool do_accel,
bool stateCompression,
bool &fast,
u32 hint,
const CompileContext &cc);

View File

@@ -181,7 +181,6 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
static const u32 stateAlign = \
MAX(mlt_align, alignof(RepeatControl)); \
static const bool fast = mlt_size <= 64; \
}; \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
= has_accel_limex<LimExNFA##mlt_size>; \
@@ -210,7 +209,6 @@ template<> struct NFATraits<MCCLELLAN_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -226,7 +224,6 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -242,7 +239,6 @@ template<> struct NFATraits<GOUGH_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -258,7 +254,6 @@ template<> struct NFATraits<GOUGH_NFA_16> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -274,7 +269,6 @@ template<> struct NFATraits<MPV_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -290,7 +284,6 @@ template<> struct NFATraits<CASTLE_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -306,7 +299,6 @@ template<> struct NFATraits<LBR_NFA_DOT> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -322,7 +314,6 @@ template<> struct NFATraits<LBR_NFA_VERM> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -338,7 +329,6 @@ template<> struct NFATraits<LBR_NFA_NVERM> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -354,7 +344,6 @@ template<> struct NFATraits<LBR_NFA_SHUF> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -370,7 +359,6 @@ template<> struct NFATraits<LBR_NFA_TRUF> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -386,7 +374,6 @@ template<> struct NFATraits<SHENG_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -402,7 +389,6 @@ template<> struct NFATraits<TAMARAMA_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 64;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -418,7 +404,6 @@ template<> struct NFATraits<MCSHENG_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -434,7 +419,6 @@ template<> struct NFATraits<MCSHENG_NFA_16> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -450,7 +434,6 @@ template<> struct NFATraits<SHENG_NFA_32> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -466,7 +449,6 @@ template<> struct NFATraits<SHENG_NFA_64> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -482,7 +464,6 @@ template<> struct NFATraits<MCSHENG_64_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -498,7 +479,6 @@ template<> struct NFATraits<MCSHENG_64_NFA_16> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
@@ -536,20 +516,6 @@ u32 state_alignment(const NFA &nfa) {
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getStateAlign, nullptr);
}
namespace {
template<NFAEngineType t>
struct getFastness {
static u32 call(void *) {
return NFATraits<t>::fast;
}
};
}
bool is_fast(const NFA &nfa) {
NFAEngineType t = (NFAEngineType)nfa.type;
return DISPATCH_BY_NFA_TYPE(t, getFastness, nullptr);
}
namespace {
template<NFAEngineType t>
struct is_limex {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -47,10 +47,6 @@ std::string describe(const NFA &nfa);
// For a given NFA, retrieve the alignment required by its uncompressed state.
u32 state_alignment(const NFA &nfa);
/* returns true if the nfa is considered 'fast'. TODO: work out what we mean by
* fast. */
bool is_fast(const NFA &n);
bool has_bounded_repeats_other_than_firsts(const NFA &n);
bool has_bounded_repeats(const NFA &n);