Multibyte acceleration compile side

This commit is contained in:
Anatoly Burakov 2015-12-09 13:38:58 +00:00 committed by Matthew Barr
parent 081b3ef369
commit 87424713a7
8 changed files with 1002 additions and 13 deletions

View File

@ -599,6 +599,8 @@ SET (hs_SRCS
src/nfa/mpv_internal.h
src/nfa/mpvcompile.cpp
src/nfa/mpvcompile.h
src/nfa/multiaccel_compilehelper.cpp
src/nfa/multiaccel_compilehelper.h
src/nfa/nfa_api.h
src/nfa/nfa_api_queue.h
src/nfa/nfa_api_util.h

View File

@ -169,13 +169,285 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
aux->accel_type = ACCEL_NONE;
}
static
void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
if (info.ma_type == MultibyteAccelInfo::MAT_NONE) {
DEBUG_PRINTF("no multimatch for us :(");
return;
}
u32 offset = info.multiaccel_offset;
const CharReach &stops = info.multiaccel_stops;
assert(aux->accel_type == ACCEL_NONE);
if (stops.all()) {
return;
}
size_t outs = stops.count();
DEBUG_PRINTF("%zu outs\n", outs);
assert(outs && outs < 256);
switch (info.ma_type) {
case MultibyteAccelInfo::MAT_LONG:
if (outs == 1) {
aux->accel_type = ACCEL_MLVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MLVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_LONGGRAB:
if (outs == 1) {
aux->accel_type = ACCEL_MLGVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MLGVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_SHIFT:
if (outs == 1) {
aux->accel_type = ACCEL_MSVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MSVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
if (outs == 1) {
aux->accel_type = ACCEL_MSGVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MSGVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_DSHIFT:
if (outs == 1) {
aux->accel_type = ACCEL_MDSVERM;
aux->mdverm.offset = offset;
aux->mdverm.c = stops.find_first();
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MDSVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
if (outs == 1) {
aux->accel_type = ACCEL_MDSGVERM;
aux->mdverm.offset = offset;
aux->mdverm.c = stops.find_first();
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MDSGVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
default:
// shouldn't happen
assert(0);
return;
}
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
switch (info.ma_type) {
case MultibyteAccelInfo::MAT_LONG:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MLSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_LONGGRAB:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MLGSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_SHIFT:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MSSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MSGSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_DSHIFT:
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
&aux->mdshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MDSSHUFTI;
aux->mdshufti.offset = offset;
aux->mdshufti.len1 = info.ma_len1;
aux->mdshufti.len2 = info.ma_len2;
return;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
&aux->mdshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MDSGSHUFTI;
aux->mdshufti.offset = offset;
aux->mdshufti.len1 = info.ma_len1;
aux->mdshufti.len2 = info.ma_len2;
return;
default:
// shouldn't happen
assert(0);
return;
}
DEBUG_PRINTF("shufti build failed, falling through\n");
if (outs <= ACCEL_MAX_STOP_CHAR) {
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
switch (info.ma_type) {
case MultibyteAccelInfo::MAT_LONG:
aux->accel_type = ACCEL_MLTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_LONGGRAB:
aux->accel_type = ACCEL_MLGTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_SHIFT:
aux->accel_type = ACCEL_MSTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
aux->accel_type = ACCEL_MSGTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_DSHIFT:
aux->accel_type = ACCEL_MDSTRUFFLE;
aux->mdtruffle.offset = offset;
aux->mdtruffle.len1 = info.ma_len1;
aux->mdtruffle.len2 = info.ma_len2;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mdtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
aux->accel_type = ACCEL_MDSGTRUFFLE;
aux->mdtruffle.offset = offset;
aux->mdtruffle.len1 = info.ma_len1;
aux->mdtruffle.len2 = info.ma_len2;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mdtruffle.mask2);
break;
default:
// shouldn't happen
assert(0);
return;
}
return;
}
DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs);
}
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
assert(aux->accel_type == ACCEL_NONE);
if (info.single_stops.none()) {
DEBUG_PRINTF("picked red tape\n");
aux->accel_type = ACCEL_RED_TAPE;
aux->generic.offset = info.single_offset;
} else {
}
if (aux->accel_type == ACCEL_NONE) {
buildAccelMulti(info, aux);
}
if (aux->accel_type == ACCEL_NONE) {
buildAccelDouble(info, aux);
}
if (aux->accel_type == ACCEL_NONE) {

View File

@ -32,6 +32,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include "nfagraph/ng_limex_accel.h"
union AccelAux;
@ -39,7 +40,9 @@ namespace ue2 {
struct AccelInfo {
AccelInfo() : single_offset(0U), double_offset(0U),
single_stops(CharReach::dot()) {}
single_stops(CharReach::dot()),
multiaccel_offset(0), ma_len1(0), ma_len2(0),
ma_type(MultibyteAccelInfo::MAT_NONE) {}
u32 single_offset; /**< offset correction to apply to single schemes */
u32 double_offset; /**< offset correction to apply to double schemes */
CharReach double_stop1; /**< single-byte accel stop literals for double
@ -47,6 +50,11 @@ struct AccelInfo {
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
* literals */
CharReach single_stops; /**< escapes for single byte acceleration */
u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */
CharReach multiaccel_stops; /**< escapes for multibyte acceleration */
u32 ma_len1; /**< multiaccel len1 */
u32 ma_len2; /**< multiaccel len2 */
MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */
};
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);

View File

@ -80,9 +80,11 @@ struct precalcAccel {
CharReach double_cr;
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
u32 double_offset;
MultibyteAccelInfo ma_info;
};
struct meteor_accel_info {
struct limex_accel_info {
ue2::unordered_set<NFAVertex> accelerable;
map<NFAStateSet, precalcAccel> precalc;
ue2::unordered_map<NFAVertex, flat_set<NFAVertex> > friends;
@ -162,7 +164,7 @@ struct build_info {
bool stateCompression;
const CompileContext &cc;
u32 num_states;
meteor_accel_info accel;
limex_accel_info accel;
};
// Constants for scoring mechanism
@ -334,12 +336,16 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
}
struct AccelBuild {
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0) {}
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0),
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
NFAVertex v;
u32 state;
u32 offset; // offset correction to apply
CharReach stop1; // single-byte accel stop literals
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
u32 ma_len1; // multiaccel len1
u32 ma_len2; // multiaccel len2
MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type
};
static
@ -354,7 +360,12 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
build.stop1 = CharReach::dot();
} else {
const precalcAccel &precalc = bi.accel.precalc.at(ss);
if (precalc.double_lits.empty()) {
unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2;
if (ma_len >= MULTIACCEL_MIN_LEN) {
build.ma_len1 = precalc.ma_info.len1;
build.stop1 = precalc.ma_info.cr;
build.offset = precalc.ma_info.offset;
} else if (precalc.double_lits.empty()) {
build.stop1 = precalc.single_cr;
build.offset = precalc.single_offset;
} else {
@ -534,7 +545,7 @@ void filterAccelStates(NGHolder &g, const map<u32, NFAVertex> &tops,
}
static
bool containsBadSubset(const meteor_accel_info &accel,
bool containsBadSubset(const limex_accel_info &accel,
const NFAStateSet &state_set, const u32 effective_sds) {
NFAStateSet subset(state_set.size());
for (size_t j = state_set.find_first(); j != state_set.npos;
@ -559,7 +570,8 @@ void doAccelCommon(NGHolder &g,
ue2::unordered_map<NFAVertex, AccelScheme> &accel_map,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
const u32 num_states, meteor_accel_info *accel) {
const u32 num_states, limex_accel_info *accel,
const CompileContext &cc) {
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
vector<NFAVertex> astates;
@ -607,10 +619,22 @@ void doAccelCommon(NGHolder &g,
DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset);
// try multibyte acceleration first
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
precalcAccel &pa = accel->precalc[state_set];
useful |= state_set;
// if we successfully built a multibyte accel scheme, use that
if (mai.type != MultibyteAccelInfo::MAT_NONE) {
pa.ma_info = mai;
DEBUG_PRINTF("multibyte acceleration!\n");
continue;
}
pa.single_offset = as.offset;
pa.single_cr = as.cr;
useful |= state_set;
if (states.size() == 1) {
DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front());
@ -660,7 +684,7 @@ void fillAccelInfo(build_info &bi) {
filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map);
assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES);
doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic,
bi.num_states, &bi.accel);
bi.num_states, &bi.accel, bi.cc);
}
/** The AccelAux structure has large alignment specified, and this makes some
@ -672,7 +696,7 @@ static
void buildAccel(const build_info &args, NFAStateSet &accelMask,
NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec,
vector<u8> &accelTable) {
const meteor_accel_info &accel = args.accel;
const limex_accel_info &accel = args.accel;
// Init, all zeroes.
accelMask.resize(args.num_states);
@ -737,8 +761,16 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
if (contains(accel.precalc, states)) {
const precalcAccel &precalc = accel.precalc.at(states);
ainfo.single_offset = precalc.single_offset;
ainfo.single_stops = precalc.single_cr;
if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
ainfo.ma_len1 = precalc.ma_info.len1;
ainfo.ma_len2 = precalc.ma_info.len2;
ainfo.multiaccel_offset = precalc.ma_info.offset;
ainfo.multiaccel_stops = precalc.ma_info.cr;
ainfo.ma_type = precalc.ma_info.type;
} else {
ainfo.single_offset = precalc.single_offset;
ainfo.single_stops = precalc.single_cr;
}
}
buildAccelAux(ainfo, &aux);

View File

@ -0,0 +1,439 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "multiaccel_compilehelper.h"
using namespace std;
using namespace ue2;
#ifdef DEBUG
static const char* state_to_str[] = {
"FIRST_RUN",
"SECOND_RUN",
"WAITING_FOR_GRAB",
"FIRST_TAIL",
"SECOND_TAIL",
"STOPPED",
"INVALID"
};
static const char* type_to_str[] = {
"SHIFT",
"SHIFTGRAB",
"DOUBLESHIFT",
"DOUBLESHIFTGRAB",
"LONG",
"LONGGRAB",
"NONE"
};
static
void dumpMultiaccelState(const accel_data &d) {
DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n",
type_to_str[(unsigned) d.type],
state_to_str[(unsigned) d.state],
d.len1, d.tlen1, d.len2, d.tlen2);
}
#endif
/* stop all the matching. this may render most schemes invalid. */
static
void stop(accel_data &d) {
switch (d.state) {
case STATE_STOPPED:
case STATE_INVALID:
break;
case STATE_FIRST_TAIL:
case STATE_SECOND_RUN:
/*
* Shift matchers are special case, because they have "tails".
* When shift matcher reaches a mid/endpoint, tail mode is
* activated, which looks for more matches to extend the match.
*
* For example, consider pattern /a{5}ba{3}/. Under normal circumstances,
* long-grab matcher will be picked for this pattern (matching a run of a's,
* followed by a not-a), because doubleshift matcher would be confused by
* consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts
* by 1) and throw out the rest of the pattern.
*
* With tails, we defer ending the run until we actually run out of
* matching characters, so the above pattern will now be parsed by
* doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4).
*
* So if we are stopping shift matchers, we should check if we aren't in
* the process of matching first tail or second run. If we are, we can't
* finish the second run as we are stopping, but we can try and split
* the first tail instead to obtain a valid second run.
*/
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) {
// can't split an empty void...
d.state = STATE_INVALID;
break;
}
d.len2 = 0;
d.state = STATE_STOPPED;
break;
case STATE_SECOND_TAIL:
d.state = STATE_STOPPED;
break;
case STATE_WAITING_FOR_GRAB:
case STATE_FIRST_RUN:
if (d.type == MultibyteAccelInfo::MAT_LONG) {
d.state = STATE_STOPPED;
} else {
d.state = STATE_INVALID;
}
break;
}
}
static
void validate(accel_data &d, unsigned max_len) {
// try and fit in all our tails
if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) {
// case 1: everything fits in
d.len1 += d.tlen1;
d.len2 += d.tlen2;
d.tlen1 = 0;
d.tlen2 = 0;
} else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) {
// case 2: everything but the second tail fits in
d.len1 += d.tlen1;
d.tlen1 = 0;
// try going for a partial tail
if (d.tlen2 != 0) {
int new_tlen2 = max_len - 1 - d.len1 - d.len2;
if (new_tlen2 > 0) {
d.len2 += new_tlen2;
}
d.tlen2 = 0;
}
} else if (d.len1 + d.tlen1 < max_len) {
// case 3: first run and its tail fits in
if (d.type == MultibyteAccelInfo::MAT_DSHIFT ||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
// split the tail into a second run
d.len2 = d.tlen1;
} else {
d.len1 += d.tlen1;
d.len2 = 0;
}
d.tlen1 = 0;
d.tlen2 = 0;
} else if (d.len1 < max_len) {
// case 4: nothing but the first run fits in
// try going for a partial tail
if (d.tlen1 != 0) {
int new_tlen1 = max_len - 1 - d.len1;
if (new_tlen1 > 0) {
d.len1 += new_tlen1;
}
d.tlen1 = 0;
}
d.len2 = 0;
d.tlen2 = 0;
}
// if we removed our second run, doubleshift matchers are no longer valid
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) {
d.state = STATE_INVALID;
} else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) {
// long matchers can just stop whenever they want to
d.len1 = max_len - 1;
}
// now, general sanity checks
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) {
d.state = STATE_INVALID;
}
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) {
d.state = STATE_INVALID;
}
}
static
void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
switch (d.type) {
case MultibyteAccelInfo::MAT_LONG:
{
/*
* For long matcher, we want lots of consecutive same-or-subset
* char-reaches
*/
if ((ref_cr & cur_cr) == cur_cr) {
d.len1++;
} else {
d.state = STATE_STOPPED;
}
}
break;
case MultibyteAccelInfo::MAT_LONGGRAB:
{
/*
* For long-grab matcher, we want lots of consecutive same-or-subset
* char-reaches with a negative match in the end.
*/
if ((ref_cr & cur_cr) == cur_cr) {
d.len1++;
} else if (!(ref_cr & cur_cr).any()) {
/* we grabbed, stop immediately */
d.state = STATE_STOPPED;
} else {
/* our run-n-grab was interrupted; mark as invalid */
d.state = STATE_INVALID;
}
}
break;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
{
/*
* For shift-grab matcher, we want two matches separated by anything;
* however the second vertex *must* be a negative (non-overlapping) match.
*
* Shiftgrab matcher is identical to shift except for presence of grab.
*/
if (d.state == STATE_WAITING_FOR_GRAB) {
if ((ref_cr & cur_cr).any()) {
d.state = STATE_INVALID;
} else {
d.state = STATE_FIRST_RUN;
d.len1++;
}
return;
}
}
/* no break, falling through */
case MultibyteAccelInfo::MAT_SHIFT:
{
/*
* For shift-matcher, we want two matches separated by anything.
*/
if (ref_cr == cur_cr) {
// keep matching tail
switch (d.state) {
case STATE_FIRST_RUN:
d.state = STATE_FIRST_TAIL;
break;
case STATE_FIRST_TAIL:
d.tlen1++;
break;
default:
// shouldn't happen
assert(0);
}
} else {
switch (d.state) {
case STATE_FIRST_RUN:
// simply advance
d.len1++;
break;
case STATE_FIRST_TAIL:
// we found a non-matching char after tail, so stop
d.state = STATE_STOPPED;
break;
default:
// shouldn't happen
assert(0);
}
}
}
break;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
{
/*
* For double shift-grab matcher, we want two matches separated by
* either negative matches or dots; however the second vertex *must*
* be a negative match.
*
* Doubleshiftgrab matcher is identical to doubleshift except for
* presence of grab.
*/
if (d.state == STATE_WAITING_FOR_GRAB) {
if ((ref_cr & cur_cr).any()) {
d.state = STATE_INVALID;
} else {
d.state = STATE_FIRST_RUN;
d.len1++;
}
return;
}
}
/* no break, falling through */
case MultibyteAccelInfo::MAT_DSHIFT:
{
/*
* For double shift matcher, we want three matches, each separated
* by a lot of anything.
*
* Doubleshift matcher is complicated by presence of tails.
*/
if (ref_cr == cur_cr) {
// decide if we are activating second shift or matching tails
switch (d.state) {
case STATE_FIRST_RUN:
d.state = STATE_FIRST_TAIL;
d.len2 = 1; // we're now ready for our second run
break;
case STATE_FIRST_TAIL:
d.tlen1++;
break;
case STATE_SECOND_RUN:
d.state = STATE_SECOND_TAIL;
break;
case STATE_SECOND_TAIL:
d.tlen2++;
break;
default:
// shouldn't happen
assert(0);
}
} else {
switch (d.state) {
case STATE_FIRST_RUN:
d.len1++;
break;
case STATE_FIRST_TAIL:
// start second run
d.state = STATE_SECOND_RUN;
d.len2++;
break;
case STATE_SECOND_RUN:
d.len2++;
break;
case STATE_SECOND_TAIL:
// stop
d.state = STATE_STOPPED;
break;
default:
// shouldn't happen
assert(0);
}
}
}
break;
default:
// shouldn't happen
assert(0);
break;
}
}
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off,
unsigned max_len) :
cr(ref_cr), offset(off), max_len(max_len) {
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
accels.resize(accel_num);
// mark everything as valid
for (int i = 0; i < accel_num; i++) {
accel_data &ad = accels[i];
ad.len1 = 1;
ad.type = (MultibyteAccelInfo::multiaccel_type) i;
/* for shift-grab matchers, we are waiting for the grab right at the start */
if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB
|| ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
ad.state = STATE_WAITING_FOR_GRAB;
} else {
ad.state = STATE_FIRST_RUN;
}
}
}
bool MultiaccelCompileHelper::canAdvance() {
for (const accel_data &ad : accels) {
if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) {
return true;
}
}
return false;
}
void MultiaccelCompileHelper::advance(const CharReach &cur_cr) {
for (accel_data &ad : accels) {
if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) {
continue;
}
match(ad, cr, cur_cr);
#ifdef DEBUG
dumpMultiaccelState(ad);
#endif
}
}
MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() {
int best_len = 0;
accel_data best;
DEBUG_PRINTF("Stopping multiaccel compile\n");
for (accel_data &ad : accels) {
// stop our matching
stop(ad);
validate(ad, max_len);
#ifdef DEBUG
dumpMultiaccelState(ad);
#endif
// skip invalid schemes
if (ad.state == STATE_INVALID) {
continue;
}
DEBUG_PRINTF("Marking as viable\n");
// TODO: relative strengths of accel schemes? maybe e.g. a shorter
// long match would in some cases be preferable to a longer
// double shift match (for example, depending on length)?
int as_len = ad.len1 + ad.len2;
if (as_len >= best_len) {
DEBUG_PRINTF("Marking as best\n");
best_len = as_len;
best = ad;
}
}
// if we found at least one accel scheme, return it
if (best.state != STATE_INVALID) {
#ifdef DEBUG
DEBUG_PRINTF("Picked best multiaccel state:\n");
dumpMultiaccelState(best);
#endif
MultibyteAccelInfo info;
info.cr = cr;
info.offset = offset;
info.len1 = best.len1;
info.len2 = best.len2;
info.type = best.type;
return info;
}
return MultibyteAccelInfo();
}

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCELCOMPILE_H_
#define MULTIACCELCOMPILE_H_
#include "ue2common.h"
#include "nfagraph/ng_limex_accel.h"
#include <vector>
namespace ue2 {
/* accel scheme state machine */
enum accel_scheme_state {
STATE_FIRST_RUN,
STATE_SECOND_RUN,
STATE_WAITING_FOR_GRAB,
STATE_FIRST_TAIL,
STATE_SECOND_TAIL,
STATE_STOPPED,
STATE_INVALID
};
struct accel_data {
MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE;
accel_scheme_state state = STATE_INVALID;
unsigned len1 = 0; /* length of first run */
unsigned len2 = 0; /* length of second run, if present */
unsigned tlen1 = 0; /* first tail length */
unsigned tlen2 = 0; /* second tail length */
};
class MultiaccelCompileHelper {
private:
const CharReach &cr;
u32 offset;
std::vector<accel_data> accels;
unsigned max_len;
public:
MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len);
bool canAdvance();
MultibyteAccelInfo getBestScheme();
void advance(const ue2::CharReach &cr);
};
}; // namespace
#endif /* MULTIACCELCOMPILE_H_ */

View File

@ -37,12 +37,15 @@
#include "ue2common.h"
#include "nfa/accel.h"
#include "nfa/multiaccel_compilehelper.h"
#include "util/bitutils.h" // for CASE_CLEAR
#include "util/charreach.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/graph_range.h"
#include "util/target_info.h"
#include <algorithm>
#include <map>
@ -647,6 +650,134 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
return g.startDs;
}
static
NFAVertex find_next(const NFAVertex v, const NGHolder &g) {
NFAVertex res = NFAGraph::null_vertex();
for (NFAVertex u : adjacent_vertices_range(v, g)) {
if (u != v) {
res = u;
break;
}
}
return res;
}
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
const vector<NFAVertex> &states,
const CompileContext &cc) {
// For a set of states to be accelerable, we basically have to have only
// one state to accelerate.
if (states.size() != 1) {
DEBUG_PRINTF("can't accelerate multiple states\n");
return MultibyteAccelInfo();
}
// Get our base vertex
NFAVertex v = states[0];
// We need the base vertex to be a self-looping dotall leading to exactly
// one vertex.
if (!hasSelfLoop(v, g)) {
DEBUG_PRINTF("base vertex has self-loop\n");
return MultibyteAccelInfo();
}
if (!g[v].char_reach.all()) {
DEBUG_PRINTF("can't accelerate anything but dot\n");
return MultibyteAccelInfo();
}
if (proper_out_degree(v, g) != 1) {
DEBUG_PRINTF("can't accelerate states with multiple successors\n");
return MultibyteAccelInfo();
}
// find our start vertex
NFAVertex cur = find_next(v, g);
if (cur == NFAGraph::null_vertex()) {
DEBUG_PRINTF("invalid start vertex\n");
return MultibyteAccelInfo();
}
bool has_offset = false;
u32 offset = 0;
CharReach cr = g[cur].char_reach;
// if we start with a dot, we have an offset, so defer figuring out the
// real CharReach for this accel scheme
if (cr == CharReach::dot()) {
has_offset = true;
offset = 1;
}
// figure out our offset
while (has_offset) {
// vertices have to have no self loops
if (hasSelfLoop(cur, g)) {
DEBUG_PRINTF("can't have self-loops\n");
return MultibyteAccelInfo();
}
// we have to have exactly 1 successor to have this acceleration scheme
if (out_degree(cur, g) != 1) {
DEBUG_PRINTF("can't have multiple successors\n");
return MultibyteAccelInfo();
}
cur = *adjacent_vertices(cur, g).first;
// if we met a special vertex, bail out
if (is_special(cur, g)) {
DEBUG_PRINTF("can't have special vertices\n");
return MultibyteAccelInfo();
}
// now, get the real char reach
if (g[cur].char_reach != CharReach::dot()) {
cr = g[cur].char_reach;
has_offset = false;
} else {
offset++;
}
}
// now, fire up the compilation machinery
target_t ti = cc.target_info;
unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE;
MultiaccelCompileHelper mac(cr, offset, max_len);
while (mac.canAdvance()) {
// vertices have to have no self loops
if (hasSelfLoop(cur, g)) {
break;
}
// we have to have exactly 1 successor to have this acceleration scheme
if (out_degree(cur, g) != 1) {
break;
}
cur = *adjacent_vertices(cur, g).first;
// if we met a special vertex, bail out
if (is_special(cur, g)) {
break;
}
mac.advance(g[cur].char_reach);
}
MultibyteAccelInfo mai = mac.getBestScheme();
#ifdef DEBUG
DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n",
mai.type, mai.offset, mai.len1, mai.len2);
for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) {
DEBUG_PRINTF("multibyte accel char: %zu\n", c);
}
#endif
return mai;
}
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr,

View File

@ -50,6 +50,12 @@ namespace ue2 {
#define MAX_MERGED_ACCEL_STOPS 200
#define ACCEL_MAX_STOP_CHAR 24
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
#define MULTIACCEL_MIN_LEN 3
#define MULTIACCEL_MAX_LEN_SSE 15
#define MULTIACCEL_MAX_LEN_AVX2 31
// forward-declaration of CompileContext
struct CompileContext;
void findAccelFriends(const NGHolder &g, NFAVertex v,
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
@ -65,6 +71,25 @@ struct DoubleAccelInfo {
DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v);
struct MultibyteAccelInfo {
/* multibyte accel schemes, ordered by strength */
enum multiaccel_type {
MAT_SHIFT,
MAT_SHIFTGRAB,
MAT_DSHIFT,
MAT_DSHIFTGRAB,
MAT_LONG,
MAT_LONGGRAB,
MAT_MAX,
MAT_NONE = MAT_MAX
};
CharReach cr;
u32 offset = 0;
u32 len1 = 0;
u32 len2 = 0;
multiaccel_type type = MAT_NONE;
};
struct AccelScheme {
AccelScheme(const CharReach &cr_in, u32 offset_in)
: cr(cr_in), offset(offset_in) {
@ -109,6 +134,11 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
AccelScheme *as, bool allow_wide);
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
const std::vector<NFAVertex> &verts,
const CompileContext &cc);
} // namespace ue2
#endif