diff --git a/src/nfa/accel.c b/src/nfa/accel.c index a8fc4e36..8a8694a8 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -84,6 +84,18 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { c_end - 1); break; + case ACCEL_DVERM_MASKED: + DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); + if (c + 16 + 1 >= c_end) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = vermicelliDoubleMaskedExec(accel->dverm.c1, accel->dverm.c2, + accel->dverm.m1, accel->dverm.m2, + c, c_end - 1); + break; + case ACCEL_SHUFTI: DEBUG_PRINTF("accel shufti %p %p\n", c, c_end); if (c + 15 >= c_end) { diff --git a/src/nfa/accel.h b/src/nfa/accel.h index af029566..a13563b6 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,7 +87,10 @@ enum AccelType { ACCEL_MSTRUFFLE, ACCEL_MSGTRUFFLE, ACCEL_MDSTRUFFLE, - ACCEL_MDSGTRUFFLE + ACCEL_MDSGTRUFFLE, + /* masked dverm */ + ACCEL_DVERM_MASKED, + }; /** \brief Structure for accel framework. */ @@ -107,6 +110,8 @@ union AccelAux { u8 offset; u8 c1; // uppercase if nocase u8 c2; // uppercase if nocase + u8 m1; // masked variant + u8 m2; // masked variant } dverm; struct { u8 accel_type; diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 9e4fb7e9..40abd12c 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -66,6 +66,8 @@ const char *accelName(u8 accel_type) { return "double-vermicelli"; case ACCEL_DVERM_NOCASE: return "double-vermicelli nocase"; + case ACCEL_DVERM_MASKED: + return "double-vermicelli masked"; case ACCEL_RVERM: return "reverse vermicelli"; case ACCEL_RVERM_NOCASE: @@ -247,6 +249,10 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { case ACCEL_RDVERM_NOCASE: fprintf(f, " [\\x%02hhx\\x%02hhx]\n", accel.dverm.c1, accel.dverm.c2); break; + case ACCEL_DVERM_MASKED: + fprintf(f, " [\\x%02hhx\\x%02hhx] & [\\x%02hhx\\x%02hhx]\n", + accel.dverm.c1, accel.dverm.c2, accel.dverm.m1, accel.dverm.m2); + break; case ACCEL_SHUFTI: { fprintf(f, "\n"); dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi); diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 6f3b6e8a..a9281c13 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -94,6 +94,48 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs); } +bool buildDvermMask(const flat_set> &escape_set, u8 *m1_out, + u8 *m2_out) { + u8 a1 = 0xff; + u8 a2 = 0xff; + u8 b1 = 0xff; + u8 b2 = 0xff; + + for (const auto &e : escape_set) { + DEBUG_PRINTF("%0hhx %0hhx\n", e.first, e.second); + a1 &= e.first; + b1 &= ~e.first; + a2 &= e.second; + b2 &= ~e.second; + } + + u8 m1 = a1 | b1; + u8 m2 = a2 | b2; + + u32 holes1 = 8 - popcount32(m1); + u32 holes2 = 8 - popcount32(m2); + + DEBUG_PRINTF("aaaa %0hhx %0hhx\n", a1, a2); + DEBUG_PRINTF("bbbb %0hhx %0hhx\n", b1, b2); + DEBUG_PRINTF("mask %0hhx %0hhx\n", m1, m2); + + assert(holes1 <= 8 && holes2 <= 8); + assert(escape_set.size() <= 1U << (holes1 + holes2)); + if (escape_set.size() != 1U << (holes1 + holes2)) { + return false; + } + + if (m1_out) { + *m1_out = m1; + } + if (m2_out) { + *m2_out = m2; + } + + return true; +} + +static bool isCaselessDouble(const flat_set> &stop) { // test for vector containing if (stop.size() != 4) { @@ -148,6 +190,23 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { return; } + if (outs1 == 0) { + u8 m1; + u8 m2; + + if (buildDvermMask(info.double_stop2, &m1, &m2)) { + aux->accel_type = ACCEL_DVERM_MASKED; + aux->dverm.offset = offset; + aux->dverm.c1 = info.double_stop2.begin()->first & m1; + aux->dverm.c2 = info.double_stop2.begin()->second & m2; + aux->dverm.m1 = m1; + aux->dverm.m2 = m2; + DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + return; + } + } + if (outs1 + outs2 <= 8) { if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index d479a545..9b30146c 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -56,8 +56,6 @@ struct MultibyteAccelInfo { multiaccel_type type = MAT_NONE; }; -bool isCaselessDouble(const flat_set> &stop); - struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), single_stops(CharReach::dot()), @@ -79,6 +77,10 @@ struct AccelInfo { bool buildAccelAux(const AccelInfo &info, AccelAux *aux); +/* returns true is the escape set can be handled with a masked double_verm */ +bool buildDvermMask(const flat_set> &escape_set, + u8 *m1_out = nullptr, u8 *m2_out = nullptr); + } // namespace ue2 #endif diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 77ed5ac0..0bfc9d85 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -81,6 +81,15 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end); break; + case ACCEL_DVERM_MASKED: + DEBUG_PRINTF("double vermicelli masked for " + "0x%02hhx%02hhx/0x%02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2, + aux->dverm.m1, aux->dverm.m2); + offset = aux->dverm.offset; + ptr = vermicelliDoubleMaskedExec(aux->dverm.c1, aux->dverm.c2, + aux->dverm.m1, aux->dverm.m2, ptr, end); + break; case ACCEL_MLVERM: DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c); offset = aux->mverm.offset; diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 279f454e..87eed250 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -29,6 +29,7 @@ #include "mcclellancompile.h" #include "accel.h" +#include "accelcompile.h" #include "grey.h" #include "mcclellan_internal.h" #include "mcclellancompile_accel.h" @@ -239,6 +240,20 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); return; } + + u8 m1; + u8 m2; + if (buildDvermMask(info.outs2, &m1, &m2)) { + accel->accel_type = ACCEL_DVERM_MASKED; + accel->dverm.offset = verify_u8(info.outs2_offset); + accel->dverm.c1 = info.outs2.begin()->first & m1; + accel->dverm.c2 = info.outs2.begin()->second & m2; + accel->dverm.m1 = m1; + accel->dverm.m2 = m2; + DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", + accel->dverm.c1, accel->dverm.c2); + return; + } } if (double_byte_ok(info)) { diff --git a/src/nfa/vermicelli.h b/src/nfa/vermicelli.h index e6957f9f..36d7fb5f 100644 --- a/src/nfa/vermicelli.h +++ b/src/nfa/vermicelli.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -185,6 +185,41 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, } } +static really_inline +const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) " + "over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf)); + assert(buf < buf_end); + assert((buf_end - buf) >= VERM_BOUNDARY); + + uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; + VERM_TYPE chars1 = VERM_SET_FN(c1); + VERM_TYPE chars2 = VERM_SET_FN(c2); + VERM_TYPE mask1 = VERM_SET_FN(m1); + VERM_TYPE mask2 = VERM_SET_FN(m2); + + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf forward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? + const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf); + if (p) { + return p; + } + + buf += VERM_BOUNDARY - min; + if (buf >= buf_end) { + return buf_end - 1; + } + } + + // Aligned loops from here on in + return dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2, + buf, buf_end); +} + // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if // character not found. static really_inline diff --git a/src/nfa/vermicelli_sse.h b/src/nfa/vermicelli_sse.h index 1a041505..0a30306f 100644 --- a/src/nfa/vermicelli_sse.h +++ b/src/nfa/vermicelli_sse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -172,6 +172,27 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, return buf; } +static really_inline +const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, + m128 mask1, m128 mask2, u8 c1, u8 c2, u8 m1, + u8 m2, const u8 *buf, const u8 *buf_end) { + assert((size_t)buf % 16 == 0); + + for (; buf + 16 < buf_end; buf += 16) { + m128 data = load128(buf); + u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)), + shiftRight8Bits(eq128(chars2, and128(data, mask2))))); + if ((buf[15] & m1) == c1 && (buf[16] & m2) == c2) { + z |= (1 << 15); + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + return buf; +} + // returns NULL if not found static really_inline const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { @@ -205,6 +226,22 @@ const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { return NULL; } +// returns NULL if not found +static really_inline +const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, + m128 mask1, m128 mask2, const u8 *buf) { + m128 data = loadu128(buf); // unaligned + u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)), + shiftRight8Bits(eq128(chars2, and128(data, mask2))))); + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + return NULL; +} + static really_inline const u8 *lastMatchOffset(const u8 *buf_end, u32 z) { assert(z); diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 80b3f0ec..9c77dc67 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -96,10 +96,12 @@ struct AccelScheme { return a.double_byte.size() < b.double_byte.size(); } - bool cd_a = isCaselessDouble(a.double_byte); - bool cd_b = isCaselessDouble(b.double_byte); - if (cd_a != cd_b) { - return cd_a > cd_b; + if (!a_dcount) { + bool cd_a = buildDvermMask(a.double_byte); + bool cd_b = buildDvermMask(b.double_byte); + if (cd_a != cd_b) { + return cd_a > cd_b; + } } ORDER_CHECK(double_byte.size()); ORDER_CHECK(double_offset); diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 4442754e..6866b7c8 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -345,3 +345,179 @@ TEST(NVermicelli, Exec4) { } } +TEST(DoubleVermicelliMasked, ExecNoMatch1) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = vermicelliDoubleMaskedExec('a', 'b', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() -i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i - j); + + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv); + } + } +} + +TEST(DoubleVermicelliMasked, Exec1) { + std::string t1("bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMaskedExec('a', 'b', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'B', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'b', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('b', 'a', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'A', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + } +} + +TEST(DoubleVermicelliMasked, Exec2) { + std::string t1("bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMaskedExec('a', 'a', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'a', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); +} +} + +TEST(DoubleVermicelliMasked, Exec3) { + /* 012345678901234567890123 */ + std::string t1("bbbbbbbbbbbbbbbbbaAaaAAaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMaskedExec('A', 'a', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 21, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, CASE_CLEAR, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'a', CASE_CLEAR, 0xff, + t1_raw + i, + t1_raw + t1.length() - i); + + ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv); +} +} + +TEST(DoubleVermicelliMasked, Exec4) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + for (size_t i = 0; i < 31; i++) { + t1[48 - i] = 'a'; + t1[48 - i + 1] = 'a'; + const u8 *rv = vermicelliDoubleMaskedExec('a', 'a', 0xff, 0xff, t1_raw, + t1_raw + t1.length()); + + ASSERT_EQ((size_t)&t1_raw[48 - i], (size_t)rv); + + rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR, t1_raw, + t1_raw + t1.length()); + + ASSERT_EQ((size_t)&t1_raw[48 - i], (size_t)rv); + } +} +