diff --git a/src/nfa/limex_shuffle.hpp b/src/nfa/limex_shuffle.hpp new file mode 100644 index 00000000..fe303311 --- /dev/null +++ b/src/nfa/limex_shuffle.hpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2020-2021, VectorCamp PC + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Naive dynamic shuffles. + * + * These are written with the assumption that the provided masks are sparsely + * populated and never contain more than 32 on bits. Other implementations will + * be faster and actually correct if these assumptions don't hold true. + */ + +#ifndef LIMEX_SHUFFLE_HPP +#define LIMEX_SHUFFLE_HPP + +#include "ue2common.h" +#include "util/arch.h" +#include "util/bitutils.h" +#include "util/unaligned.h" +#include "util/supervector/supervector.hpp" + +template +u32 packedExtract(SuperVector s, const SuperVector permute, const SuperVector compare); + + +template <> +really_really_inline +u32 packedExtract<16>(SuperVector<16> s, const SuperVector<16> permute, const SuperVector<16> compare) { + SuperVector<16> shuffled = s.pshufb(permute); + SuperVector<16> compared = shuffled & compare; + u16 rv = ~compared.eqmask(shuffled); + return (u32)rv; +} + +template <> +really_really_inline +u32 packedExtract<32>(SuperVector<32> s, const SuperVector<32> permute, const SuperVector<32> compare) { + SuperVector<32> shuffled = s.pshufb(permute); + SuperVector<32> compared = shuffled & compare; + u32 rv = ~compared.eqmask(shuffled); + return (u32)((rv >> 16) | (rv & 0xffffU)); +} + +template <> +really_really_inline +u32 packedExtract<64>(SuperVector<64> s, const SuperVector<64> permute, const SuperVector<64> compare) { + SuperVector<64> shuffled = s.pshufb(permute); + SuperVector<64> compared = shuffled & compare; + u64a rv = ~compared.eqmask(shuffled); + rv = rv >> 32 | rv; + return (u32)(((rv >> 16) | rv) & 0xffffU); +} + + +#endif // LIMEX_SHUFFLE_HPP \ No newline at end of file diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index b2316bab..d74509d6 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -33,6 +33,9 @@ #include "util/arch.h" #include "util/simd_utils.h" #include "nfa/limex_shuffle.h" +#include"util/supervector/supervector.hpp" +#include "nfa/limex_shuffle.hpp" + namespace { @@ -196,6 +199,26 @@ TEST(Shuffle, PackedExtract128_1) { } } +TEST(Shuffle, PackedExtract_templatized_128_1) { + // Try all possible one-bit masks + for (unsigned int i = 0; i < 128; i++) { + // shuffle a single 1 bit to the front + SuperVector<16> permute = SuperVector<16>::Zeroes(); + SuperVector<16> compare = SuperVector<16>::Zeroes(); + build_pshufb_masks_onebit(i, &permute.u.v128[0], &compare.u.v128[0]); + EXPECT_EQ(1U, packedExtract<16>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>::Ones(), permute, compare)); + // we should get zero out of these cases + EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>::Zeroes(), permute, compare)); + EXPECT_EQ(0U, packedExtract<16>(not128(setbit(i)), permute, compare)); + // we should get zero out of all the other bit positions + for (unsigned int j = 0; (j != i && j < 128); j++) { + EXPECT_EQ(0U, packedExtract<16>(setbit(j), permute, compare)); + } + } +} + + #if defined(HAVE_AVX2) TEST(Shuffle, PackedExtract256_1) { // Try all possible one-bit masks @@ -214,6 +237,27 @@ TEST(Shuffle, PackedExtract256_1) { } } } + + +TEST(Shuffle, PackedExtract_templatized_256_1) { + // Try all possible one-bit masks + for (unsigned int i = 0; i < 256; i++) { + // shuffle a single 1 bit to the front + SuperVector<32> permute = SuperVector<32>::Zeroes(); + SuperVector<32> compare = SuperVector<32>::Zeroes(); + build_pshufb_masks_onebit(i, &permute.u.v256[0], &compare.u.v256[0]); + EXPECT_EQ(1U, packedExtract<32>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>::Ones(), permute, compare)); + // we should get zero out of these cases + EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>::Zeroes(), permute, compare)); + EXPECT_EQ(0U, packedExtract<32>(not256(setbit(i)), permute, compare)); + // we should get zero out of all the other bit positions + for (unsigned int j = 0; (j != i && j < 256); j++) { + EXPECT_EQ(0U, packedExtract<32>(setbit(j), permute, compare)); + } + } +} + #endif #if defined(HAVE_AVX512) @@ -234,5 +278,25 @@ TEST(Shuffle, PackedExtract512_1) { } } } + +TEST(Shuffle, PackedExtract_templatized_512_1) { + // Try all possible one-bit masks + for (unsigned int i = 0; i < 512; i++) { + // shuffle a single 1 bit to the front + SuperVector<64> permute = SuperVector<64>::Zeroes(); + SuperVector<64> compare = SuperVector<64>::Zeroes(); + build_pshufb_masks_onebit(i, &permute.u.v512[0], &compare.u.v512[0]); + EXPECT_EQ(1U, packedExtract<64>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>::Ones(), permute, compare)); + // we should get zero out of these cases + EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>::Zeroes(), permute, compare)); + EXPECT_EQ(0U, packedExtract<64>(not512(setbit(i)), permute, compare)); + // we should get zero out of all the other bit positions + for (unsigned int j = 0; (j != i && j < 512); j++) { + EXPECT_EQ(0U, packedExtract<64>(setbit(j), permute, compare)); + } + } +} + #endif } // namespace diff --git a/unit/internal/supervector.cpp b/unit/internal/supervector.cpp index 261eeac0..e85d815e 100644 --- a/unit/internal/supervector.cpp +++ b/unit/internal/supervector.cpp @@ -290,6 +290,55 @@ TEST(SuperVectorUtilsTest,pshufb128c) { } } + +/*Define LSHIFT128_128 macro*/ +#define TEST_LSHIFT128_128(buf, vec, v, l) { \ + auto v_shifted = SP.lshift128(l); \ + for (int i=15; i>= l; --i) { \ + buf[i] = vec[i-l]; \ + } \ + for (int i=0; i::loadu(vec); + u8 buf[16]; + for (int j = 0; j<16; j++) { + TEST_LSHIFT128_128(buf, vec, SP, j); + } +} + +/*Define RSHIFT128_128 macro*/ +#define TEST_RSHIFT128_128(buf, vec, v, l) { \ + auto v_shifted = SP.rshift128(l); \ + for (int i=0; i<16-l; i++) { \ + buf[i] = vec[i+l]; \ + } \ + for (int i=16-l; i<16; i++) { \ + buf[i] = 0; \ + } \ + for(int i=0; i<16; i++) { \ + ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \ + } \ + } + +TEST(SuperVectorUtilsTest,RShift128_128c){ + u8 vec[16]; + for (int i = 0; i<16; i++ ){ vec[i] = i+1; } + auto SP = SuperVector<16>::loadu(vec); + u8 buf[16]; + for (int j = 0; j<16; j++) { + TEST_RSHIFT128_128(buf, vec, SP, j); + } +} + /*Define ALIGNR128 macro*/ #define TEST_ALIGNR128(v1, v2, buf, l) { \ auto v_aligned = v2.alignr(v1, l); \ @@ -538,7 +587,7 @@ TEST(SuperVectorUtilsTest,LShift256c){ } } -/* + TEST(SuperVectorUtilsTest,LShift64_256c){ u64a vec[4] = {128, 512, 256, 1024}; auto SP = SuperVector<32>::loadu(vec); @@ -560,7 +609,7 @@ TEST(SuperVectorUtilsTest,RShift64_256c){ } } } -*/ + /*Define RSHIFT256 macro*/ #define TEST_RSHIFT256(buf, vec, v, l) { \ @@ -587,6 +636,62 @@ TEST(SuperVectorUtilsTest,RShift256c){ } + + + +/*Define LSHIFT128_256 macro*/ +#define TEST_LSHIFT128_256(buf, vec, v, l) { \ + auto v_shifted = SP.lshift128(l); \ + for (int i=16; i>= l; --i) { \ + buf[i] = vec[i-l]; \ + buf[i+16] = vec[(16+i)-l]; \ + } \ + for (int i=0; i::loadu(vec); + u8 buf[32]; + for (int j=0; j<16; j++) { + TEST_LSHIFT128_256(buf, vec, SP, j); + } +} + +/*Define RSHIFT128_128 macro*/ +#define TEST_RSHIFT128_256(buf, vec, v, l) { \ + auto v_shifted = SP.rshift128(l); \ + for (int i=0; i<16-l; i++) { \ + buf[i] = vec[i+l]; \ + buf[i+16] = vec[(i+16)+l]; \ + } \ + for (int i=16-l; i<16; i++) { \ + buf[i] = 0; \ + buf[i+16] = 0; \ + } \ + for(int i=0; i<32; i++) { \ + ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \ + } \ + } + +TEST(SuperVectorUtilsTest,RShift128_256c){ + u8 vec[32]; + for (int i = 0; i<32; i++ ){ vec[i] = i+1; } + auto SP = SuperVector<32>::loadu(vec); + u8 buf[32]; + for(int j=0; j<16; j++) { + TEST_RSHIFT128_256(buf, vec, SP, j); + } +} + + /*Define ALIGNR256 macro*/ /* #define TEST_ALIGNR256(v1, v2, buf, l) { \ @@ -772,13 +877,13 @@ TEST(SuperVectorUtilsTest,OPANDNOT512c){ } } -/* + TEST(SuperVectorUtilsTest,Movemask512c){ srand (time(NULL)); u8 vec[64] = {0}; u64a r = rand() % 100 + 1; for(int i=0; i<64; i++) { - if (r & (1 << i)) { + if (r & (1ULL << i)) { vec[i] = 0xff; } } @@ -786,16 +891,16 @@ TEST(SuperVectorUtilsTest,Movemask512c){ u8 vec2[64] = {0}; u64a mask = SP.movemask(); for(int i=0; i<64; i++) { - if (mask & (1 << i)) { + if (mask & (1ULL << i)) { vec2[i] = 0xff; } } for (int i=0; i<64; i++){ - printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]); - //ASSERT_EQ(vec[i],vec2[i]); + //printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]); + ASSERT_EQ(vec[i],vec2[i]); } } -*/ + TEST(SuperVectorUtilsTest,Eqmask512c){ srand (time(NULL)); @@ -858,7 +963,7 @@ TEST(SuperVectorUtilsTest,LShift512c){ } } -/* + TEST(SuperVectorUtilsTest,LShift64_512c){ u64a vec[8] = {32, 64, 128, 256, 512, 512, 256, 1024}; auto SP = SuperVector<64>::loadu(vec); @@ -880,7 +985,7 @@ TEST(SuperVectorUtilsTest,RShift64_512c){ } } } -*/ + /*Define RSHIFT512 macro*/ #define TEST_RSHIFT512(buf, vec, v, l) { \ @@ -906,6 +1011,67 @@ TEST(SuperVectorUtilsTest,RShift512c){ } } + +/*Define RSHIFT128_512 macro*/ +#define TEST_RSHIFT128_512(buf, vec, v, l) { \ + auto v_shifted = SP.rshift128(l); \ + for (int i=0; i<16-l; i++) { \ + buf[i] = vec[i+l]; \ + buf[i+16] = vec[(i+16)+l]; \ + buf[i+32] = vec[(i+32)+l]; \ + buf[i+48] = vec[(i+48)+l]; \ + } \ + for (int i=16-l; i<16; i++) { \ + buf[i] = 0; \ + buf[i+16] = 0; \ + buf[i+32] = 0; \ + buf[i+48] = 0; \ + } \ + for(int i=0; i<64; i++) { \ + ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \ + } \ + } +TEST(SuperVectorUtilsTest,RShift128_512c){ + u8 vec[64]; + for (int i = 0; i<64; i++ ){ vec[i] = i+1; } + auto SP = SuperVector<64>::loadu(vec); + u8 buf[64] = {1}; + for(int j=0; j<16; j++){ + TEST_RSHIFT128_512(buf, vec, SP, j) + } +} + +/*Define LSHIFT512 macro*/ +#define TEST_LSHIFT128_512(buf, vec, v, l) { \ + auto v_shifted = SP.lshift128(l); \ + for (int i=16; i>=l; --i) { \ + buf[i] = vec[i-l]; \ + buf[i+16] = vec[(i+16)-l]; \ + buf[i+32] = vec[(i+32)-l]; \ + buf[i+48] = vec[(i+48)-l]; \ + } \ + for (int i=0; i::loadu(vec); + u8 buf[64] = {1}; + for(int j=0; j<16;j++){ + TEST_LSHIFT128_512(buf, vec, SP, j); + } +} + + /*Define ALIGNR512 macro*/ /* #define TEST_ALIGNR512(v1, v2, buf, l) { \