mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
De-multiaccel
This commit is contained in:
@@ -37,7 +37,6 @@
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/multiaccel_compilehelper.h"
|
||||
|
||||
#include "util/bitutils.h" // for CASE_CLEAR
|
||||
#include "util/charreach.h"
|
||||
@@ -677,134 +676,6 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
|
||||
return g.startDs;
|
||||
}
|
||||
|
||||
static
|
||||
NFAVertex find_next(const NFAVertex v, const NGHolder &g) {
|
||||
NFAVertex res = NGHolder::null_vertex();
|
||||
for (NFAVertex u : adjacent_vertices_range(v, g)) {
|
||||
if (u != v) {
|
||||
res = u;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
|
||||
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
|
||||
const vector<NFAVertex> &states,
|
||||
const CompileContext &cc) {
|
||||
// For a set of states to be accelerable, we basically have to have only
|
||||
// one state to accelerate.
|
||||
if (states.size() != 1) {
|
||||
DEBUG_PRINTF("can't accelerate multiple states\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
// Get our base vertex
|
||||
NFAVertex v = states[0];
|
||||
|
||||
// We need the base vertex to be a self-looping dotall leading to exactly
|
||||
// one vertex.
|
||||
if (!hasSelfLoop(v, g)) {
|
||||
DEBUG_PRINTF("base vertex has self-loop\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
if (!g[v].char_reach.all()) {
|
||||
DEBUG_PRINTF("can't accelerate anything but dot\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
if (proper_out_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("can't accelerate states with multiple successors\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
// find our start vertex
|
||||
NFAVertex cur = find_next(v, g);
|
||||
if (cur == NGHolder::null_vertex()) {
|
||||
DEBUG_PRINTF("invalid start vertex\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
bool has_offset = false;
|
||||
u32 offset = 0;
|
||||
CharReach cr = g[cur].char_reach;
|
||||
|
||||
// if we start with a dot, we have an offset, so defer figuring out the
|
||||
// real CharReach for this accel scheme
|
||||
if (cr == CharReach::dot()) {
|
||||
has_offset = true;
|
||||
offset = 1;
|
||||
}
|
||||
|
||||
// figure out our offset
|
||||
while (has_offset) {
|
||||
// vertices have to have no self loops
|
||||
if (hasSelfLoop(cur, g)) {
|
||||
DEBUG_PRINTF("can't have self-loops\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
// we have to have exactly 1 successor to have this acceleration scheme
|
||||
if (out_degree(cur, g) != 1) {
|
||||
DEBUG_PRINTF("can't have multiple successors\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
cur = *adjacent_vertices(cur, g).first;
|
||||
|
||||
// if we met a special vertex, bail out
|
||||
if (is_special(cur, g)) {
|
||||
DEBUG_PRINTF("can't have special vertices\n");
|
||||
return MultibyteAccelInfo();
|
||||
}
|
||||
|
||||
// now, get the real char reach
|
||||
if (g[cur].char_reach != CharReach::dot()) {
|
||||
cr = g[cur].char_reach;
|
||||
has_offset = false;
|
||||
} else {
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
// now, fire up the compilation machinery
|
||||
target_t ti = cc.target_info;
|
||||
unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE;
|
||||
MultiaccelCompileHelper mac(cr, offset, max_len);
|
||||
|
||||
while (mac.canAdvance()) {
|
||||
// vertices have to have no self loops
|
||||
if (hasSelfLoop(cur, g)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// we have to have exactly 1 successor to have this acceleration scheme
|
||||
if (out_degree(cur, g) != 1) {
|
||||
break;
|
||||
}
|
||||
|
||||
cur = *adjacent_vertices(cur, g).first;
|
||||
|
||||
// if we met a special vertex, bail out
|
||||
if (is_special(cur, g)) {
|
||||
break;
|
||||
}
|
||||
|
||||
mac.advance(g[cur].char_reach);
|
||||
}
|
||||
MultibyteAccelInfo mai = mac.getBestScheme();
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n",
|
||||
mai.type, mai.offset, mai.len1, mai.len2);
|
||||
for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) {
|
||||
DEBUG_PRINTF("multibyte accel char: %zu\n", c);
|
||||
}
|
||||
#endif
|
||||
return mai;
|
||||
}
|
||||
|
||||
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
|
||||
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
||||
const vector<CharReach> &refined_cr,
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -51,9 +51,6 @@ namespace ue2 {
|
||||
#define MAX_MERGED_ACCEL_STOPS 200
|
||||
#define ACCEL_MAX_STOP_CHAR 24
|
||||
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
|
||||
#define MULTIACCEL_MIN_LEN 3
|
||||
#define MULTIACCEL_MAX_LEN_SSE 15
|
||||
#define MULTIACCEL_MAX_LEN_AVX2 31
|
||||
|
||||
// forward-declaration of CompileContext
|
||||
struct CompileContext;
|
||||
@@ -84,11 +81,6 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
AccelScheme *as, bool allow_wide);
|
||||
|
||||
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA).
|
||||
*/
|
||||
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
|
||||
const std::vector<NFAVertex> &verts,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
Reference in New Issue
Block a user