diff --git a/CMakeLists.txt b/CMakeLists.txt index db123c1b..1abab0fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -599,6 +599,8 @@ SET (hs_SRCS src/nfa/mpv_internal.h src/nfa/mpvcompile.cpp src/nfa/mpvcompile.h + src/nfa/multiaccel_compilehelper.cpp + src/nfa/multiaccel_compilehelper.h src/nfa/nfa_api.h src/nfa/nfa_api_queue.h src/nfa/nfa_api_util.h diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 2a22716a..5739618a 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -169,13 +169,285 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_NONE; } +static +void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { + if (info.ma_type == MultibyteAccelInfo::MAT_NONE) { + DEBUG_PRINTF("no multimatch for us :("); + return; + } + + u32 offset = info.multiaccel_offset; + const CharReach &stops = info.multiaccel_stops; + + assert(aux->accel_type == ACCEL_NONE); + if (stops.all()) { + return; + } + + size_t outs = stops.count(); + DEBUG_PRINTF("%zu outs\n", outs); + assert(outs && outs < 256); + + switch (info.ma_type) { + case MultibyteAccelInfo::MAT_LONG: + if (outs == 1) { + aux->accel_type = ACCEL_MLVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MLVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_LONGGRAB: + if (outs == 1) { + aux->accel_type = ACCEL_MLGVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MLGVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_SHIFT: + if (outs == 1) { + aux->accel_type = ACCEL_MSVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MSVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_SHIFTGRAB: + if (outs == 1) { + aux->accel_type = ACCEL_MSGVERM; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first(); + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MSGVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mverm.len = info.ma_len1; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_DSHIFT: + if (outs == 1) { + aux->accel_type = ACCEL_MDSVERM; + aux->mdverm.offset = offset; + aux->mdverm.c = stops.find_first(); + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MDSVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + if (outs == 1) { + aux->accel_type = ACCEL_MDSGVERM; + aux->mdverm.offset = offset; + aux->mdverm.c = stops.find_first(); + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + if (outs == 2 && stops.isCaselessChar()) { + aux->accel_type = ACCEL_MDSGVERM_NOCASE; + aux->mverm.offset = offset; + aux->mverm.c = stops.find_first() & CASE_CLEAR; + aux->mdverm.len1 = info.ma_len1; + aux->mdverm.len2 = info.ma_len2; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + break; + default: + // shouldn't happen + assert(0); + return; + } + + DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); + + switch (info.ma_type) { + case MultibyteAccelInfo::MAT_LONG: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MLSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_LONGGRAB: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MLGSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_SHIFT: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MSSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_SHIFTGRAB: + if (shuftiBuildMasks(stops, &aux->mshufti.lo, + &aux->mshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MSGSHUFTI; + aux->mshufti.offset = offset; + aux->mshufti.len = info.ma_len1; + return; + case MultibyteAccelInfo::MAT_DSHIFT: + if (shuftiBuildMasks(stops, &aux->mdshufti.lo, + &aux->mdshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MDSSHUFTI; + aux->mdshufti.offset = offset; + aux->mdshufti.len1 = info.ma_len1; + aux->mdshufti.len2 = info.ma_len2; + return; + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + if (shuftiBuildMasks(stops, &aux->mdshufti.lo, + &aux->mdshufti.hi) == -1) { + break; + } + aux->accel_type = ACCEL_MDSGSHUFTI; + aux->mdshufti.offset = offset; + aux->mdshufti.len1 = info.ma_len1; + aux->mdshufti.len2 = info.ma_len2; + return; + default: + // shouldn't happen + assert(0); + return; + } + DEBUG_PRINTF("shufti build failed, falling through\n"); + + if (outs <= ACCEL_MAX_STOP_CHAR) { + DEBUG_PRINTF("building Truffle for %zu chars\n", outs); + switch (info.ma_type) { + case MultibyteAccelInfo::MAT_LONG: + aux->accel_type = ACCEL_MLTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_LONGGRAB: + aux->accel_type = ACCEL_MLGTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_SHIFT: + aux->accel_type = ACCEL_MSTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_SHIFTGRAB: + aux->accel_type = ACCEL_MSGTRUFFLE; + aux->mtruffle.offset = offset; + aux->mtruffle.len = info.ma_len1; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_DSHIFT: + aux->accel_type = ACCEL_MDSTRUFFLE; + aux->mdtruffle.offset = offset; + aux->mdtruffle.len1 = info.ma_len1; + aux->mdtruffle.len2 = info.ma_len2; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mdtruffle.mask2); + break; + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + aux->accel_type = ACCEL_MDSGTRUFFLE; + aux->mdtruffle.offset = offset; + aux->mdtruffle.len1 = info.ma_len1; + aux->mdtruffle.len2 = info.ma_len2; + truffleBuildMasks(stops, &aux->mtruffle.mask1, + &aux->mdtruffle.mask2); + break; + default: + // shouldn't happen + assert(0); + return; + } + return; + } + + DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs); +} + bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { assert(aux->accel_type == ACCEL_NONE); if (info.single_stops.none()) { DEBUG_PRINTF("picked red tape\n"); aux->accel_type = ACCEL_RED_TAPE; aux->generic.offset = info.single_offset; - } else { + } + if (aux->accel_type == ACCEL_NONE) { + buildAccelMulti(info, aux); + } + if (aux->accel_type == ACCEL_NONE) { buildAccelDouble(info, aux); } if (aux->accel_type == ACCEL_NONE) { diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 12af559c..e9467531 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -32,6 +32,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/ue2_containers.h" +#include "nfagraph/ng_limex_accel.h" union AccelAux; @@ -39,7 +40,9 @@ namespace ue2 { struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), - single_stops(CharReach::dot()) {} + single_stops(CharReach::dot()), + multiaccel_offset(0), ma_len1(0), ma_len2(0), + ma_type(MultibyteAccelInfo::MAT_NONE) {} u32 single_offset; /**< offset correction to apply to single schemes */ u32 double_offset; /**< offset correction to apply to double schemes */ CharReach double_stop1; /**< single-byte accel stop literals for double @@ -47,6 +50,11 @@ struct AccelInfo { flat_set> double_stop2; /**< double-byte accel stop * literals */ CharReach single_stops; /**< escapes for single byte acceleration */ + u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */ + CharReach multiaccel_stops; /**< escapes for multibyte acceleration */ + u32 ma_len1; /**< multiaccel len1 */ + u32 ma_len2; /**< multiaccel len2 */ + MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */ }; bool buildAccelAux(const AccelInfo &info, AccelAux *aux); diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index a6c34cb6..d3e1a8ee 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -80,9 +80,11 @@ struct precalcAccel { CharReach double_cr; flat_set> double_lits; /* double-byte accel stop literals */ u32 double_offset; + + MultibyteAccelInfo ma_info; }; -struct meteor_accel_info { +struct limex_accel_info { ue2::unordered_set accelerable; map precalc; ue2::unordered_map > friends; @@ -162,7 +164,7 @@ struct build_info { bool stateCompression; const CompileContext &cc; u32 num_states; - meteor_accel_info accel; + limex_accel_info accel; }; // Constants for scoring mechanism @@ -334,12 +336,16 @@ void buildReachMapping(const build_info &args, vector &reach, } struct AccelBuild { - AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0) {} + AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0), + ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {} NFAVertex v; u32 state; u32 offset; // offset correction to apply CharReach stop1; // single-byte accel stop literals flat_set> stop2; // double-byte accel stop literals + u32 ma_len1; // multiaccel len1 + u32 ma_len2; // multiaccel len2 + MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type }; static @@ -354,7 +360,12 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) { build.stop1 = CharReach::dot(); } else { const precalcAccel &precalc = bi.accel.precalc.at(ss); - if (precalc.double_lits.empty()) { + unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2; + if (ma_len >= MULTIACCEL_MIN_LEN) { + build.ma_len1 = precalc.ma_info.len1; + build.stop1 = precalc.ma_info.cr; + build.offset = precalc.ma_info.offset; + } else if (precalc.double_lits.empty()) { build.stop1 = precalc.single_cr; build.offset = precalc.single_offset; } else { @@ -534,7 +545,7 @@ void filterAccelStates(NGHolder &g, const map &tops, } static -bool containsBadSubset(const meteor_accel_info &accel, +bool containsBadSubset(const limex_accel_info &accel, const NFAStateSet &state_set, const u32 effective_sds) { NFAStateSet subset(state_set.size()); for (size_t j = state_set.find_first(); j != state_set.npos; @@ -559,7 +570,8 @@ void doAccelCommon(NGHolder &g, ue2::unordered_map &accel_map, const ue2::unordered_map &state_ids, const map &br_cyclic, - const u32 num_states, meteor_accel_info *accel) { + const u32 num_states, limex_accel_info *accel, + const CompileContext &cc) { vector refined_cr = reduced_cr(g, br_cyclic); vector astates; @@ -607,10 +619,22 @@ void doAccelCommon(NGHolder &g, DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset); + // try multibyte acceleration first + MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc); + precalcAccel &pa = accel->precalc[state_set]; + useful |= state_set; + + // if we successfully built a multibyte accel scheme, use that + if (mai.type != MultibyteAccelInfo::MAT_NONE) { + pa.ma_info = mai; + + DEBUG_PRINTF("multibyte acceleration!\n"); + continue; + } + pa.single_offset = as.offset; pa.single_cr = as.cr; - useful |= state_set; if (states.size() == 1) { DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front()); @@ -660,7 +684,7 @@ void fillAccelInfo(build_info &bi) { filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map); assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES); doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic, - bi.num_states, &bi.accel); + bi.num_states, &bi.accel, bi.cc); } /** The AccelAux structure has large alignment specified, and this makes some @@ -672,7 +696,7 @@ static void buildAccel(const build_info &args, NFAStateSet &accelMask, NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec, vector &accelTable) { - const meteor_accel_info &accel = args.accel; + const limex_accel_info &accel = args.accel; // Init, all zeroes. accelMask.resize(args.num_states); @@ -737,8 +761,16 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, if (contains(accel.precalc, states)) { const precalcAccel &precalc = accel.precalc.at(states); - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; + if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { + ainfo.ma_len1 = precalc.ma_info.len1; + ainfo.ma_len2 = precalc.ma_info.len2; + ainfo.multiaccel_offset = precalc.ma_info.offset; + ainfo.multiaccel_stops = precalc.ma_info.cr; + ainfo.ma_type = precalc.ma_info.type; + } else { + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; + } } buildAccelAux(ainfo, &aux); diff --git a/src/nfa/multiaccel_compilehelper.cpp b/src/nfa/multiaccel_compilehelper.cpp new file mode 100644 index 00000000..f1cf2a4c --- /dev/null +++ b/src/nfa/multiaccel_compilehelper.cpp @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "multiaccel_compilehelper.h" + +using namespace std; +using namespace ue2; + +#ifdef DEBUG +static const char* state_to_str[] = { + "FIRST_RUN", + "SECOND_RUN", + "WAITING_FOR_GRAB", + "FIRST_TAIL", + "SECOND_TAIL", + "STOPPED", + "INVALID" +}; +static const char* type_to_str[] = { + "SHIFT", + "SHIFTGRAB", + "DOUBLESHIFT", + "DOUBLESHIFTGRAB", + "LONG", + "LONGGRAB", + "NONE" +}; + +static +void dumpMultiaccelState(const accel_data &d) { + DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n", + type_to_str[(unsigned) d.type], + state_to_str[(unsigned) d.state], + d.len1, d.tlen1, d.len2, d.tlen2); +} +#endif + +/* stop all the matching. this may render most schemes invalid. */ +static +void stop(accel_data &d) { + switch (d.state) { + case STATE_STOPPED: + case STATE_INVALID: + break; + case STATE_FIRST_TAIL: + case STATE_SECOND_RUN: + /* + * Shift matchers are special case, because they have "tails". + * When shift matcher reaches a mid/endpoint, tail mode is + * activated, which looks for more matches to extend the match. + * + * For example, consider pattern /a{5}ba{3}/. Under normal circumstances, + * long-grab matcher will be picked for this pattern (matching a run of a's, + * followed by a not-a), because doubleshift matcher would be confused by + * consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts + * by 1) and throw out the rest of the pattern. + * + * With tails, we defer ending the run until we actually run out of + * matching characters, so the above pattern will now be parsed by + * doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4). + * + * So if we are stopping shift matchers, we should check if we aren't in + * the process of matching first tail or second run. If we are, we can't + * finish the second run as we are stopping, but we can try and split + * the first tail instead to obtain a valid second run. + */ + if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || + d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) { + // can't split an empty void... + d.state = STATE_INVALID; + break; + } + d.len2 = 0; + d.state = STATE_STOPPED; + break; + case STATE_SECOND_TAIL: + d.state = STATE_STOPPED; + break; + case STATE_WAITING_FOR_GRAB: + case STATE_FIRST_RUN: + if (d.type == MultibyteAccelInfo::MAT_LONG) { + d.state = STATE_STOPPED; + } else { + d.state = STATE_INVALID; + } + break; + } +} + +static +void validate(accel_data &d, unsigned max_len) { + // try and fit in all our tails + if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) { + // case 1: everything fits in + d.len1 += d.tlen1; + d.len2 += d.tlen2; + d.tlen1 = 0; + d.tlen2 = 0; + } else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) { + // case 2: everything but the second tail fits in + d.len1 += d.tlen1; + d.tlen1 = 0; + // try going for a partial tail + if (d.tlen2 != 0) { + int new_tlen2 = max_len - 1 - d.len1 - d.len2; + if (new_tlen2 > 0) { + d.len2 += new_tlen2; + } + d.tlen2 = 0; + } + } else if (d.len1 + d.tlen1 < max_len) { + // case 3: first run and its tail fits in + if (d.type == MultibyteAccelInfo::MAT_DSHIFT || + d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { + // split the tail into a second run + d.len2 = d.tlen1; + } else { + d.len1 += d.tlen1; + d.len2 = 0; + } + d.tlen1 = 0; + d.tlen2 = 0; + } else if (d.len1 < max_len) { + // case 4: nothing but the first run fits in + // try going for a partial tail + if (d.tlen1 != 0) { + int new_tlen1 = max_len - 1 - d.len1; + if (new_tlen1 > 0) { + d.len1 += new_tlen1; + } + d.tlen1 = 0; + } + d.len2 = 0; + d.tlen2 = 0; + } + // if we removed our second run, doubleshift matchers are no longer valid + if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || + d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) { + d.state = STATE_INVALID; + } else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) { + // long matchers can just stop whenever they want to + d.len1 = max_len - 1; + } + + // now, general sanity checks + if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) { + d.state = STATE_INVALID; + } + if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) { + d.state = STATE_INVALID; + } +} + +static +void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) { + switch (d.type) { + case MultibyteAccelInfo::MAT_LONG: + { + /* + * For long matcher, we want lots of consecutive same-or-subset + * char-reaches + */ + if ((ref_cr & cur_cr) == cur_cr) { + d.len1++; + } else { + d.state = STATE_STOPPED; + } + } + break; + + case MultibyteAccelInfo::MAT_LONGGRAB: + { + /* + * For long-grab matcher, we want lots of consecutive same-or-subset + * char-reaches with a negative match in the end. + */ + if ((ref_cr & cur_cr) == cur_cr) { + d.len1++; + } else if (!(ref_cr & cur_cr).any()) { + /* we grabbed, stop immediately */ + d.state = STATE_STOPPED; + } else { + /* our run-n-grab was interrupted; mark as invalid */ + d.state = STATE_INVALID; + } + } + break; + + case MultibyteAccelInfo::MAT_SHIFTGRAB: + { + /* + * For shift-grab matcher, we want two matches separated by anything; + * however the second vertex *must* be a negative (non-overlapping) match. + * + * Shiftgrab matcher is identical to shift except for presence of grab. + */ + if (d.state == STATE_WAITING_FOR_GRAB) { + if ((ref_cr & cur_cr).any()) { + d.state = STATE_INVALID; + } else { + d.state = STATE_FIRST_RUN; + d.len1++; + } + return; + } + } + /* no break, falling through */ + case MultibyteAccelInfo::MAT_SHIFT: + { + /* + * For shift-matcher, we want two matches separated by anything. + */ + if (ref_cr == cur_cr) { + // keep matching tail + switch (d.state) { + case STATE_FIRST_RUN: + d.state = STATE_FIRST_TAIL; + break; + case STATE_FIRST_TAIL: + d.tlen1++; + break; + default: + // shouldn't happen + assert(0); + } + } else { + switch (d.state) { + case STATE_FIRST_RUN: + // simply advance + d.len1++; + break; + case STATE_FIRST_TAIL: + // we found a non-matching char after tail, so stop + d.state = STATE_STOPPED; + break; + default: + // shouldn't happen + assert(0); + } + } + } + break; + + case MultibyteAccelInfo::MAT_DSHIFTGRAB: + { + /* + * For double shift-grab matcher, we want two matches separated by + * either negative matches or dots; however the second vertex *must* + * be a negative match. + * + * Doubleshiftgrab matcher is identical to doubleshift except for + * presence of grab. + */ + if (d.state == STATE_WAITING_FOR_GRAB) { + if ((ref_cr & cur_cr).any()) { + d.state = STATE_INVALID; + } else { + d.state = STATE_FIRST_RUN; + d.len1++; + } + return; + } + } + /* no break, falling through */ + case MultibyteAccelInfo::MAT_DSHIFT: + { + /* + * For double shift matcher, we want three matches, each separated + * by a lot of anything. + * + * Doubleshift matcher is complicated by presence of tails. + */ + if (ref_cr == cur_cr) { + // decide if we are activating second shift or matching tails + switch (d.state) { + case STATE_FIRST_RUN: + d.state = STATE_FIRST_TAIL; + d.len2 = 1; // we're now ready for our second run + break; + case STATE_FIRST_TAIL: + d.tlen1++; + break; + case STATE_SECOND_RUN: + d.state = STATE_SECOND_TAIL; + break; + case STATE_SECOND_TAIL: + d.tlen2++; + break; + default: + // shouldn't happen + assert(0); + } + } else { + switch (d.state) { + case STATE_FIRST_RUN: + d.len1++; + break; + case STATE_FIRST_TAIL: + // start second run + d.state = STATE_SECOND_RUN; + d.len2++; + break; + case STATE_SECOND_RUN: + d.len2++; + break; + case STATE_SECOND_TAIL: + // stop + d.state = STATE_STOPPED; + break; + default: + // shouldn't happen + assert(0); + } + } + } + break; + + default: + // shouldn't happen + assert(0); + break; + } +} + +MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off, + unsigned max_len) : + cr(ref_cr), offset(off), max_len(max_len) { + int accel_num = (int) MultibyteAccelInfo::MAT_MAX; + accels.resize(accel_num); + + // mark everything as valid + for (int i = 0; i < accel_num; i++) { + accel_data &ad = accels[i]; + ad.len1 = 1; + ad.type = (MultibyteAccelInfo::multiaccel_type) i; + + /* for shift-grab matchers, we are waiting for the grab right at the start */ + if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB + || ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { + ad.state = STATE_WAITING_FOR_GRAB; + } else { + ad.state = STATE_FIRST_RUN; + } + } +} + +bool MultiaccelCompileHelper::canAdvance() { + for (const accel_data &ad : accels) { + if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) { + return true; + } + } + return false; +} + +void MultiaccelCompileHelper::advance(const CharReach &cur_cr) { + for (accel_data &ad : accels) { + if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) { + continue; + } + match(ad, cr, cur_cr); +#ifdef DEBUG + dumpMultiaccelState(ad); +#endif + } +} + +MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() { + int best_len = 0; + accel_data best; + + DEBUG_PRINTF("Stopping multiaccel compile\n"); + + for (accel_data &ad : accels) { + // stop our matching + stop(ad); + validate(ad, max_len); + +#ifdef DEBUG + dumpMultiaccelState(ad); +#endif + + // skip invalid schemes + if (ad.state == STATE_INVALID) { + continue; + } + DEBUG_PRINTF("Marking as viable\n"); + + // TODO: relative strengths of accel schemes? maybe e.g. a shorter + // long match would in some cases be preferable to a longer + // double shift match (for example, depending on length)? + int as_len = ad.len1 + ad.len2; + if (as_len >= best_len) { + DEBUG_PRINTF("Marking as best\n"); + best_len = as_len; + best = ad; + } + } + // if we found at least one accel scheme, return it + if (best.state != STATE_INVALID) { +#ifdef DEBUG + DEBUG_PRINTF("Picked best multiaccel state:\n"); + dumpMultiaccelState(best); +#endif + MultibyteAccelInfo info; + info.cr = cr; + info.offset = offset; + info.len1 = best.len1; + info.len2 = best.len2; + info.type = best.type; + return info; + } + return MultibyteAccelInfo(); +} diff --git a/src/nfa/multiaccel_compilehelper.h b/src/nfa/multiaccel_compilehelper.h new file mode 100644 index 00000000..27dbe634 --- /dev/null +++ b/src/nfa/multiaccel_compilehelper.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTIACCELCOMPILE_H_ +#define MULTIACCELCOMPILE_H_ + +#include "ue2common.h" + +#include "nfagraph/ng_limex_accel.h" + +#include + +namespace ue2 { + +/* accel scheme state machine */ +enum accel_scheme_state { + STATE_FIRST_RUN, + STATE_SECOND_RUN, + STATE_WAITING_FOR_GRAB, + STATE_FIRST_TAIL, + STATE_SECOND_TAIL, + STATE_STOPPED, + STATE_INVALID +}; + +struct accel_data { + MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE; + accel_scheme_state state = STATE_INVALID; + unsigned len1 = 0; /* length of first run */ + unsigned len2 = 0; /* length of second run, if present */ + unsigned tlen1 = 0; /* first tail length */ + unsigned tlen2 = 0; /* second tail length */ +}; + +class MultiaccelCompileHelper { +private: + const CharReach &cr; + u32 offset; + std::vector accels; + unsigned max_len; +public: + MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len); + bool canAdvance(); + MultibyteAccelInfo getBestScheme(); + void advance(const ue2::CharReach &cr); +}; + +}; // namespace + +#endif /* MULTIACCELCOMPILE_H_ */ diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index da103f8d..ed9f5bfe 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -37,12 +37,15 @@ #include "ue2common.h" #include "nfa/accel.h" +#include "nfa/multiaccel_compilehelper.h" #include "util/bitutils.h" // for CASE_CLEAR #include "util/charreach.h" +#include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/target_info.h" #include #include @@ -647,6 +650,134 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { return g.startDs; } +static +NFAVertex find_next(const NFAVertex v, const NGHolder &g) { + NFAVertex res = NFAGraph::null_vertex(); + for (NFAVertex u : adjacent_vertices_range(v, g)) { + if (u != v) { + res = u; + break; + } + } + return res; +} + +/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ +MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, + const vector &states, + const CompileContext &cc) { + // For a set of states to be accelerable, we basically have to have only + // one state to accelerate. + if (states.size() != 1) { + DEBUG_PRINTF("can't accelerate multiple states\n"); + return MultibyteAccelInfo(); + } + + // Get our base vertex + NFAVertex v = states[0]; + + // We need the base vertex to be a self-looping dotall leading to exactly + // one vertex. + if (!hasSelfLoop(v, g)) { + DEBUG_PRINTF("base vertex has self-loop\n"); + return MultibyteAccelInfo(); + } + + if (!g[v].char_reach.all()) { + DEBUG_PRINTF("can't accelerate anything but dot\n"); + return MultibyteAccelInfo(); + } + + if (proper_out_degree(v, g) != 1) { + DEBUG_PRINTF("can't accelerate states with multiple successors\n"); + return MultibyteAccelInfo(); + } + + // find our start vertex + NFAVertex cur = find_next(v, g); + if (cur == NFAGraph::null_vertex()) { + DEBUG_PRINTF("invalid start vertex\n"); + return MultibyteAccelInfo(); + } + + bool has_offset = false; + u32 offset = 0; + CharReach cr = g[cur].char_reach; + + // if we start with a dot, we have an offset, so defer figuring out the + // real CharReach for this accel scheme + if (cr == CharReach::dot()) { + has_offset = true; + offset = 1; + } + + // figure out our offset + while (has_offset) { + // vertices have to have no self loops + if (hasSelfLoop(cur, g)) { + DEBUG_PRINTF("can't have self-loops\n"); + return MultibyteAccelInfo(); + } + + // we have to have exactly 1 successor to have this acceleration scheme + if (out_degree(cur, g) != 1) { + DEBUG_PRINTF("can't have multiple successors\n"); + return MultibyteAccelInfo(); + } + + cur = *adjacent_vertices(cur, g).first; + + // if we met a special vertex, bail out + if (is_special(cur, g)) { + DEBUG_PRINTF("can't have special vertices\n"); + return MultibyteAccelInfo(); + } + + // now, get the real char reach + if (g[cur].char_reach != CharReach::dot()) { + cr = g[cur].char_reach; + has_offset = false; + } else { + offset++; + } + } + + // now, fire up the compilation machinery + target_t ti = cc.target_info; + unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE; + MultiaccelCompileHelper mac(cr, offset, max_len); + + while (mac.canAdvance()) { + // vertices have to have no self loops + if (hasSelfLoop(cur, g)) { + break; + } + + // we have to have exactly 1 successor to have this acceleration scheme + if (out_degree(cur, g) != 1) { + break; + } + + cur = *adjacent_vertices(cur, g).first; + + // if we met a special vertex, bail out + if (is_special(cur, g)) { + break; + } + + mac.advance(g[cur].char_reach); + } + MultibyteAccelInfo mai = mac.getBestScheme(); +#ifdef DEBUG + DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n", + mai.type, mai.offset, mai.len1, mai.len2); + for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) { + DEBUG_PRINTF("multibyte accel char: %zu\n", c); + } +#endif + return mai; +} + /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const vector &refined_cr, diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 005eddd2..61dfaed9 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -50,6 +50,12 @@ namespace ue2 { #define MAX_MERGED_ACCEL_STOPS 200 #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ +#define MULTIACCEL_MIN_LEN 3 +#define MULTIACCEL_MAX_LEN_SSE 15 +#define MULTIACCEL_MAX_LEN_AVX2 31 + +// forward-declaration of CompileContext +struct CompileContext; void findAccelFriends(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, @@ -65,6 +71,25 @@ struct DoubleAccelInfo { DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v); +struct MultibyteAccelInfo { + /* multibyte accel schemes, ordered by strength */ + enum multiaccel_type { + MAT_SHIFT, + MAT_SHIFTGRAB, + MAT_DSHIFT, + MAT_DSHIFTGRAB, + MAT_LONG, + MAT_LONGGRAB, + MAT_MAX, + MAT_NONE = MAT_MAX + }; + CharReach cr; + u32 offset = 0; + u32 len1 = 0; + u32 len2 = 0; + multiaccel_type type = MAT_NONE; +}; + struct AccelScheme { AccelScheme(const CharReach &cr_in, u32 offset_in) : cr(cr_in), offset(offset_in) { @@ -109,6 +134,11 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, AccelScheme *as, bool allow_wide); +/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ +MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, + const std::vector &verts, + const CompileContext &cc); + } // namespace ue2 #endif