diff --git a/src/nfa/shufticompile.cpp b/src/nfa/shufticompile.cpp index 05072a44..f909a0b8 100644 --- a/src/nfa/shufticompile.cpp +++ b/src/nfa/shufticompile.cpp @@ -32,6 +32,7 @@ #include "shufticompile.h" #include "ue2common.h" #include "util/charreach.h" +#include "util/container.h" #include "util/ue2_containers.h" #include @@ -107,13 +108,35 @@ int shuftiBuildMasks(const CharReach &c, m128 *lo, m128 *hi) { return bit_index; } -void shuftiBuildDoubleMasks(const CharReach &onechar, +static +array or_array(array a, const array &b) { + a[0] |= b[0]; + a[1] |= b[1]; + a[2] |= b[2]; + a[3] |= b[3]; + + return a; +} + + +#define MAX_BUCKETS 8 +static +void set_buckets_from_mask(u16 nibble_mask, u32 bucket, + array &byte_mask) { + assert(bucket < MAX_BUCKETS); + + u32 mask = nibble_mask; + while (mask) { + u32 n = findAndClearLSB_32(&mask); + byte_mask[n] &= ~(1 << bucket); + } +} + +bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2) { DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), twochar.size()); - assert(onechar.count() + twochar.size() <= 8); - array lo1_a; array lo2_a; array hi1_a; @@ -124,43 +147,63 @@ void shuftiBuildDoubleMasks(const CharReach &onechar, hi1_a.fill(0xff); hi2_a.fill(0xff); - u32 i = 0; - // two-byte literals - for (flat_set>::const_iterator it = twochar.begin(); - it != twochar.end(); ++it, i++) { - DEBUG_PRINTF("%u: %02hhx %02hhx\n", i, it->first, it->second); - u8 b1 = it->first & 0xf; - u8 t1 = it->first >> 4; - u8 b2 = it->second & 0xf; - u8 t2 = it->second >> 4; - - lo1_a[b1] &= ~(1 << i); - hi1_a[t1] &= ~(1 << i); - lo2_a[b2] &= ~(1 << i); - hi2_a[t2] &= ~(1 << i); + vector> nibble_masks; + for (const auto &p : twochar) { + DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second); + u16 a_lo = 1U << (p.first & 0xf); + u16 a_hi = 1U << (p.first >> 4); + u16 b_lo = 1U << (p.second & 0xf); + u16 b_hi = 1U << (p.second >> 4); + nibble_masks.push_back({a_lo, a_hi, b_lo, b_hi}); } // one-byte literals (second byte is a wildcard) for (size_t it = onechar.find_first(); it != CharReach::npos; - it = onechar.find_next(it), i++) { - DEBUG_PRINTF("%u: %02hhx\n", i, (u8)it); - u8 b1 = it & 0xf; - u8 t1 = it >> 4; + it = onechar.find_next(it)) { + DEBUG_PRINTF("%02hhx\n", (u8)it); + nibble_masks.push_back({(u16)(1U << (it & 0xf)), (u16)(1U << (it >> 4)), + 0xffff, 0xffff}); + } - lo1_a[b1] &= ~(1 << i); - hi1_a[t1] &= ~(1 << i); - - for (int j = 0; j < 16; j++) { - lo2_a[j] &= ~(1 << i); - hi2_a[j] &= ~(1 << i); + // try to merge strings into shared buckets + for (u32 i = 0; i < 4; i++) { + map, array> new_masks; + for (const auto &a : nibble_masks) { + auto key = a; + key[i] = 0; + if (!contains(new_masks, key)) { + new_masks[key] = a; + } else { + new_masks[key] = or_array(new_masks[key], a); + } } + nibble_masks.clear(); + for (const auto &e : new_masks) { + nibble_masks.push_back(e.second); + } + } + + if (nibble_masks.size() > MAX_BUCKETS) { + DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size()); + return false; + } + + u32 i = 0; + for (const auto &a : nibble_masks) { + set_buckets_from_mask(a[0], i, lo1_a); + set_buckets_from_mask(a[1], i, hi1_a); + set_buckets_from_mask(a[2], i, lo2_a); + set_buckets_from_mask(a[3], i, hi2_a); + i++; } memcpy(lo1, lo1_a.data(), sizeof(m128)); memcpy(lo2, lo2_a.data(), sizeof(m128)); memcpy(hi1, hi1_a.data(), sizeof(m128)); memcpy(hi2, hi2_a.data(), sizeof(m128)); + + return true; } #ifdef DUMP_SUPPORT diff --git a/src/nfa/shufticompile.h b/src/nfa/shufticompile.h index 2795b73a..59126b0b 100644 --- a/src/nfa/shufticompile.h +++ b/src/nfa/shufticompile.h @@ -50,7 +50,11 @@ namespace ue2 { */ int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi); -void shuftiBuildDoubleMasks(const CharReach &onechar, +/** \brief Double-byte variant + * + * Returns false if we are unable to build the masks (too many buckets required) + */ +bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2); diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index ed48ad5c..b8d77d37 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -283,7 +283,9 @@ TEST(DoubleShufti, BuildMask1) { lits.insert(make_pair('a', 'B')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -324,7 +326,9 @@ TEST(DoubleShufti, BuildMask2) { lits.insert(make_pair('a','z')); lits.insert(make_pair('B','z')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -350,7 +354,9 @@ TEST(DoubleShufti, BuildMask4) { lits.insert(make_pair('A','z')); lits.insert(make_pair('b','z')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -377,7 +383,9 @@ TEST(DoubleShufti, BuildMask5) { CharReach bytes; bytes.set('X'); - shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -395,6 +403,81 @@ TEST(DoubleShufti, BuildMask5) { lo1['B' % 16] | hi1['B' >> 4] | lo2['X' % 16] | hi2['X' >> 4]); } +TEST(DoubleShufti, BuildMask6) { + m128 lo1m, hi1m, lo2m, hi2m; + + flat_set> lits; + + lits.insert(make_pair('a','z')); + lits.insert(make_pair('B','z')); + lits.insert(make_pair('A','z')); + lits.insert(make_pair('b','z')); + lits.insert(make_pair('a','y')); + lits.insert(make_pair('B','y')); + lits.insert(make_pair('A','y')); + lits.insert(make_pair('b','y')); + lits.insert(make_pair('a','x')); + lits.insert(make_pair('B','x')); + lits.insert(make_pair('A','x')); + lits.insert(make_pair('b','x')); + + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_TRUE(rv); + + u8 *lo1 = (u8 *)&lo1m; + u8 *lo2 = (u8 *)&lo2m; + u8 *hi1 = (u8 *)&hi1m; + u8 *hi2 = (u8 *)&hi2m; + ASSERT_NE(0xff, + lo1['a' % 16] | hi1['a' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['A' % 16] | hi1['A' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['b' % 16] | hi1['b' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['B' % 16] | hi1['B' >> 4] | lo2['z' % 16] | hi2['z' >> 4]); + ASSERT_NE(0xff, + lo1['a' % 16] | hi1['a' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['A' % 16] | hi1['A' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['b' % 16] | hi1['b' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['B' % 16] | hi1['B' >> 4] | lo2['y' % 16] | hi2['y' >> 4]); + ASSERT_NE(0xff, + lo1['a' % 16] | hi1['a' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); + ASSERT_NE(0xff, + lo1['A' % 16] | hi1['A' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); + ASSERT_NE(0xff, + lo1['b' % 16] | hi1['b' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); + ASSERT_NE(0xff, + lo1['B' % 16] | hi1['B' >> 4] | lo2['x' % 16] | hi2['x' >> 4]); +} + +TEST(DoubleShufti, BuildMask7) { + m128 lo1m, hi1m, lo2m, hi2m; + + flat_set> lits; + + lits.insert(make_pair('a','b')); + lits.insert(make_pair('c','d')); + lits.insert(make_pair('e','f')); + lits.insert(make_pair('g','h')); + lits.insert(make_pair('i','j')); + lits.insert(make_pair('k','l')); + lits.insert(make_pair('m','n')); + lits.insert(make_pair('o','p')); + lits.insert(make_pair('q','r')); + lits.insert(make_pair('s','t')); + lits.insert(make_pair('u','v')); + lits.insert(make_pair('w','x')); + + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + &lo2m, &hi2m); + ASSERT_FALSE(rv); +} + TEST(DoubleShufti, ExecNoMatch1) { m128 lo1, hi1, lo2, hi2; @@ -402,7 +485,9 @@ TEST(DoubleShufti, ExecNoMatch1) { lits.insert(make_pair('a','b')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, + &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -421,7 +506,8 @@ TEST(DoubleShufti, ExecNoMatch1b) { lits.insert(make_pair('b','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -441,7 +527,8 @@ TEST(DoubleShufti, ExecNoMatch2) { lits.insert(make_pair('a','b')); lits.insert(make_pair('B','b')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -461,7 +548,8 @@ TEST(DoubleShufti, ExecNoMatch2b) { lits.insert(make_pair('b','a')); lits.insert(make_pair('b','B')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -480,7 +568,8 @@ TEST(DoubleShufti, ExecNoMatch3) { lits.insert(make_pair('V','e')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -499,7 +588,8 @@ TEST(DoubleShufti, ExecNoMatch3b) { lits.insert(make_pair('e','V')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -518,7 +608,8 @@ TEST(DoubleShufti, ExecMatch1) { lits.insert(make_pair('a','b')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -538,7 +629,8 @@ TEST(DoubleShufti, ExecMatch2) { lits.insert(make_pair('a','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -559,7 +651,8 @@ TEST(DoubleShufti, ExecMatch3) { lits.insert(make_pair('B','a')); lits.insert(make_pair('a','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbBaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -582,8 +675,8 @@ TEST(DoubleShufti, ExecMatch4) { lits.insert(make_pair('C','a')); lits.insert(make_pair('c','a')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -624,8 +717,8 @@ TEST(DoubleShufti, ExecMatch4b) { lits.insert(make_pair('a','C')); lits.insert(make_pair('a','c')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaAaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -663,7 +756,8 @@ TEST(DoubleShufti, ExecMatch5) { lits.insert(make_pair('a','A')); - shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -686,7 +780,8 @@ TEST(DoubleShufti, ExecMatchMixed1) { // just one one-byte literal onebyte.set('a'); - shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -709,7 +804,8 @@ TEST(DoubleShufti, ExecMatchMixed2) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; char t2[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -742,7 +838,8 @@ TEST(DoubleShufti, ExecMatchMixed3) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(rv); const int len = 420; char t1[len + 1];