mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
limex_shuffle added and it's unit tests
This commit is contained in:
parent
de30471edd
commit
b3a20afbbc
80
src/nfa/limex_shuffle.hpp
Normal file
80
src/nfa/limex_shuffle.hpp
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2020-2021, VectorCamp PC
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Naive dynamic shuffles.
|
||||
*
|
||||
* These are written with the assumption that the provided masks are sparsely
|
||||
* populated and never contain more than 32 on bits. Other implementations will
|
||||
* be faster and actually correct if these assumptions don't hold true.
|
||||
*/
|
||||
|
||||
#ifndef LIMEX_SHUFFLE_HPP
|
||||
#define LIMEX_SHUFFLE_HPP
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/arch.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/supervector/supervector.hpp"
|
||||
|
||||
template <u16 S>
|
||||
u32 packedExtract(SuperVector<S> s, const SuperVector<S> permute, const SuperVector<S> compare);
|
||||
|
||||
|
||||
template <>
|
||||
really_really_inline
|
||||
u32 packedExtract<16>(SuperVector<16> s, const SuperVector<16> permute, const SuperVector<16> compare) {
|
||||
SuperVector<16> shuffled = s.pshufb(permute);
|
||||
SuperVector<16> compared = shuffled & compare;
|
||||
u16 rv = ~compared.eqmask(shuffled);
|
||||
return (u32)rv;
|
||||
}
|
||||
|
||||
template <>
|
||||
really_really_inline
|
||||
u32 packedExtract<32>(SuperVector<32> s, const SuperVector<32> permute, const SuperVector<32> compare) {
|
||||
SuperVector<32> shuffled = s.pshufb(permute);
|
||||
SuperVector<32> compared = shuffled & compare;
|
||||
u32 rv = ~compared.eqmask(shuffled);
|
||||
return (u32)((rv >> 16) | (rv & 0xffffU));
|
||||
}
|
||||
|
||||
template <>
|
||||
really_really_inline
|
||||
u32 packedExtract<64>(SuperVector<64> s, const SuperVector<64> permute, const SuperVector<64> compare) {
|
||||
SuperVector<64> shuffled = s.pshufb(permute);
|
||||
SuperVector<64> compared = shuffled & compare;
|
||||
u64a rv = ~compared.eqmask(shuffled);
|
||||
rv = rv >> 32 | rv;
|
||||
return (u32)(((rv >> 16) | rv) & 0xffffU);
|
||||
}
|
||||
|
||||
|
||||
#endif // LIMEX_SHUFFLE_HPP
|
@ -33,6 +33,9 @@
|
||||
#include "util/arch.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "nfa/limex_shuffle.h"
|
||||
#include"util/supervector/supervector.hpp"
|
||||
#include "nfa/limex_shuffle.hpp"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
@ -196,6 +199,26 @@ TEST(Shuffle, PackedExtract128_1) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Shuffle, PackedExtract_templatized_128_1) {
|
||||
// Try all possible one-bit masks
|
||||
for (unsigned int i = 0; i < 128; i++) {
|
||||
// shuffle a single 1 bit to the front
|
||||
SuperVector<16> permute = SuperVector<16>::Zeroes();
|
||||
SuperVector<16> compare = SuperVector<16>::Zeroes();
|
||||
build_pshufb_masks_onebit(i, &permute.u.v128[0], &compare.u.v128[0]);
|
||||
EXPECT_EQ(1U, packedExtract<16>(setbit<m128>(i), permute, compare));
|
||||
EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>::Ones(), permute, compare));
|
||||
// we should get zero out of these cases
|
||||
EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>::Zeroes(), permute, compare));
|
||||
EXPECT_EQ(0U, packedExtract<16>(not128(setbit<m128>(i)), permute, compare));
|
||||
// we should get zero out of all the other bit positions
|
||||
for (unsigned int j = 0; (j != i && j < 128); j++) {
|
||||
EXPECT_EQ(0U, packedExtract<16>(setbit<m128>(j), permute, compare));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(HAVE_AVX2)
|
||||
TEST(Shuffle, PackedExtract256_1) {
|
||||
// Try all possible one-bit masks
|
||||
@ -214,6 +237,27 @@ TEST(Shuffle, PackedExtract256_1) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(Shuffle, PackedExtract_templatized_256_1) {
|
||||
// Try all possible one-bit masks
|
||||
for (unsigned int i = 0; i < 256; i++) {
|
||||
// shuffle a single 1 bit to the front
|
||||
SuperVector<32> permute = SuperVector<32>::Zeroes();
|
||||
SuperVector<32> compare = SuperVector<32>::Zeroes();
|
||||
build_pshufb_masks_onebit(i, &permute.u.v256[0], &compare.u.v256[0]);
|
||||
EXPECT_EQ(1U, packedExtract<32>(setbit<m256>(i), permute, compare));
|
||||
EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>::Ones(), permute, compare));
|
||||
// we should get zero out of these cases
|
||||
EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>::Zeroes(), permute, compare));
|
||||
EXPECT_EQ(0U, packedExtract<32>(not256(setbit<m256>(i)), permute, compare));
|
||||
// we should get zero out of all the other bit positions
|
||||
for (unsigned int j = 0; (j != i && j < 256); j++) {
|
||||
EXPECT_EQ(0U, packedExtract<32>(setbit<m256>(j), permute, compare));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
@ -234,5 +278,25 @@ TEST(Shuffle, PackedExtract512_1) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Shuffle, PackedExtract_templatized_512_1) {
|
||||
// Try all possible one-bit masks
|
||||
for (unsigned int i = 0; i < 512; i++) {
|
||||
// shuffle a single 1 bit to the front
|
||||
SuperVector<64> permute = SuperVector<64>::Zeroes();
|
||||
SuperVector<64> compare = SuperVector<64>::Zeroes();
|
||||
build_pshufb_masks_onebit(i, &permute.u.v512[0], &compare.u.v512[0]);
|
||||
EXPECT_EQ(1U, packedExtract<64>(setbit<m512>(i), permute, compare));
|
||||
EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>::Ones(), permute, compare));
|
||||
// we should get zero out of these cases
|
||||
EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>::Zeroes(), permute, compare));
|
||||
EXPECT_EQ(0U, packedExtract<64>(not512(setbit<m512>(i)), permute, compare));
|
||||
// we should get zero out of all the other bit positions
|
||||
for (unsigned int j = 0; (j != i && j < 512); j++) {
|
||||
EXPECT_EQ(0U, packedExtract<64>(setbit<m512>(j), permute, compare));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
} // namespace
|
||||
|
@ -290,6 +290,55 @@ TEST(SuperVectorUtilsTest,pshufb128c) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*Define LSHIFT128_128 macro*/
|
||||
#define TEST_LSHIFT128_128(buf, vec, v, l) { \
|
||||
auto v_shifted = SP.lshift128(l); \
|
||||
for (int i=15; i>= l; --i) { \
|
||||
buf[i] = vec[i-l]; \
|
||||
} \
|
||||
for (int i=0; i<l; i++) { \
|
||||
buf[i] = 0; \
|
||||
} \
|
||||
for(int i=0; i<16; i++) { \
|
||||
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST(SuperVectorUtilsTest,LShift128_128c){
|
||||
u8 vec[16];
|
||||
for (int i = 0; i<16; i++ ){ vec[i] = i+1; }
|
||||
auto SP = SuperVector<16>::loadu(vec);
|
||||
u8 buf[16];
|
||||
for (int j = 0; j<16; j++) {
|
||||
TEST_LSHIFT128_128(buf, vec, SP, j);
|
||||
}
|
||||
}
|
||||
|
||||
/*Define RSHIFT128_128 macro*/
|
||||
#define TEST_RSHIFT128_128(buf, vec, v, l) { \
|
||||
auto v_shifted = SP.rshift128(l); \
|
||||
for (int i=0; i<16-l; i++) { \
|
||||
buf[i] = vec[i+l]; \
|
||||
} \
|
||||
for (int i=16-l; i<16; i++) { \
|
||||
buf[i] = 0; \
|
||||
} \
|
||||
for(int i=0; i<16; i++) { \
|
||||
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST(SuperVectorUtilsTest,RShift128_128c){
|
||||
u8 vec[16];
|
||||
for (int i = 0; i<16; i++ ){ vec[i] = i+1; }
|
||||
auto SP = SuperVector<16>::loadu(vec);
|
||||
u8 buf[16];
|
||||
for (int j = 0; j<16; j++) {
|
||||
TEST_RSHIFT128_128(buf, vec, SP, j);
|
||||
}
|
||||
}
|
||||
|
||||
/*Define ALIGNR128 macro*/
|
||||
#define TEST_ALIGNR128(v1, v2, buf, l) { \
|
||||
auto v_aligned = v2.alignr(v1, l); \
|
||||
@ -538,7 +587,7 @@ TEST(SuperVectorUtilsTest,LShift256c){
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
TEST(SuperVectorUtilsTest,LShift64_256c){
|
||||
u64a vec[4] = {128, 512, 256, 1024};
|
||||
auto SP = SuperVector<32>::loadu(vec);
|
||||
@ -560,7 +609,7 @@ TEST(SuperVectorUtilsTest,RShift64_256c){
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*Define RSHIFT256 macro*/
|
||||
#define TEST_RSHIFT256(buf, vec, v, l) { \
|
||||
@ -587,6 +636,62 @@ TEST(SuperVectorUtilsTest,RShift256c){
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*Define LSHIFT128_256 macro*/
|
||||
#define TEST_LSHIFT128_256(buf, vec, v, l) { \
|
||||
auto v_shifted = SP.lshift128(l); \
|
||||
for (int i=16; i>= l; --i) { \
|
||||
buf[i] = vec[i-l]; \
|
||||
buf[i+16] = vec[(16+i)-l]; \
|
||||
} \
|
||||
for (int i=0; i<l; i++) { \
|
||||
buf[i] = 0; \
|
||||
buf[i+16]= 0; \
|
||||
} \
|
||||
for(int i=0; i<32; i++) { \
|
||||
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST(SuperVectorUtilsTest,LShift128_256c){
|
||||
u8 vec[32];
|
||||
for (int i = 0; i<32; i++) { vec[i]= i+1;}
|
||||
auto SP = SuperVector<32>::loadu(vec);
|
||||
u8 buf[32];
|
||||
for (int j=0; j<16; j++) {
|
||||
TEST_LSHIFT128_256(buf, vec, SP, j);
|
||||
}
|
||||
}
|
||||
|
||||
/*Define RSHIFT128_128 macro*/
|
||||
#define TEST_RSHIFT128_256(buf, vec, v, l) { \
|
||||
auto v_shifted = SP.rshift128(l); \
|
||||
for (int i=0; i<16-l; i++) { \
|
||||
buf[i] = vec[i+l]; \
|
||||
buf[i+16] = vec[(i+16)+l]; \
|
||||
} \
|
||||
for (int i=16-l; i<16; i++) { \
|
||||
buf[i] = 0; \
|
||||
buf[i+16] = 0; \
|
||||
} \
|
||||
for(int i=0; i<32; i++) { \
|
||||
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST(SuperVectorUtilsTest,RShift128_256c){
|
||||
u8 vec[32];
|
||||
for (int i = 0; i<32; i++ ){ vec[i] = i+1; }
|
||||
auto SP = SuperVector<32>::loadu(vec);
|
||||
u8 buf[32];
|
||||
for(int j=0; j<16; j++) {
|
||||
TEST_RSHIFT128_256(buf, vec, SP, j);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*Define ALIGNR256 macro*/
|
||||
/*
|
||||
#define TEST_ALIGNR256(v1, v2, buf, l) { \
|
||||
@ -772,13 +877,13 @@ TEST(SuperVectorUtilsTest,OPANDNOT512c){
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
TEST(SuperVectorUtilsTest,Movemask512c){
|
||||
srand (time(NULL));
|
||||
u8 vec[64] = {0};
|
||||
u64a r = rand() % 100 + 1;
|
||||
for(int i=0; i<64; i++) {
|
||||
if (r & (1 << i)) {
|
||||
if (r & (1ULL << i)) {
|
||||
vec[i] = 0xff;
|
||||
}
|
||||
}
|
||||
@ -786,16 +891,16 @@ TEST(SuperVectorUtilsTest,Movemask512c){
|
||||
u8 vec2[64] = {0};
|
||||
u64a mask = SP.movemask();
|
||||
for(int i=0; i<64; i++) {
|
||||
if (mask & (1 << i)) {
|
||||
if (mask & (1ULL << i)) {
|
||||
vec2[i] = 0xff;
|
||||
}
|
||||
}
|
||||
for (int i=0; i<64; i++){
|
||||
printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]);
|
||||
//ASSERT_EQ(vec[i],vec2[i]);
|
||||
//printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]);
|
||||
ASSERT_EQ(vec[i],vec2[i]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
TEST(SuperVectorUtilsTest,Eqmask512c){
|
||||
srand (time(NULL));
|
||||
@ -858,7 +963,7 @@ TEST(SuperVectorUtilsTest,LShift512c){
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
TEST(SuperVectorUtilsTest,LShift64_512c){
|
||||
u64a vec[8] = {32, 64, 128, 256, 512, 512, 256, 1024};
|
||||
auto SP = SuperVector<64>::loadu(vec);
|
||||
@ -880,7 +985,7 @@ TEST(SuperVectorUtilsTest,RShift64_512c){
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*Define RSHIFT512 macro*/
|
||||
#define TEST_RSHIFT512(buf, vec, v, l) { \
|
||||
@ -906,6 +1011,67 @@ TEST(SuperVectorUtilsTest,RShift512c){
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*Define RSHIFT128_512 macro*/
|
||||
#define TEST_RSHIFT128_512(buf, vec, v, l) { \
|
||||
auto v_shifted = SP.rshift128(l); \
|
||||
for (int i=0; i<16-l; i++) { \
|
||||
buf[i] = vec[i+l]; \
|
||||
buf[i+16] = vec[(i+16)+l]; \
|
||||
buf[i+32] = vec[(i+32)+l]; \
|
||||
buf[i+48] = vec[(i+48)+l]; \
|
||||
} \
|
||||
for (int i=16-l; i<16; i++) { \
|
||||
buf[i] = 0; \
|
||||
buf[i+16] = 0; \
|
||||
buf[i+32] = 0; \
|
||||
buf[i+48] = 0; \
|
||||
} \
|
||||
for(int i=0; i<64; i++) { \
|
||||
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
|
||||
} \
|
||||
}
|
||||
TEST(SuperVectorUtilsTest,RShift128_512c){
|
||||
u8 vec[64];
|
||||
for (int i = 0; i<64; i++ ){ vec[i] = i+1; }
|
||||
auto SP = SuperVector<64>::loadu(vec);
|
||||
u8 buf[64] = {1};
|
||||
for(int j=0; j<16; j++){
|
||||
TEST_RSHIFT128_512(buf, vec, SP, j)
|
||||
}
|
||||
}
|
||||
|
||||
/*Define LSHIFT512 macro*/
|
||||
#define TEST_LSHIFT128_512(buf, vec, v, l) { \
|
||||
auto v_shifted = SP.lshift128(l); \
|
||||
for (int i=16; i>=l; --i) { \
|
||||
buf[i] = vec[i-l]; \
|
||||
buf[i+16] = vec[(i+16)-l]; \
|
||||
buf[i+32] = vec[(i+32)-l]; \
|
||||
buf[i+48] = vec[(i+48)-l]; \
|
||||
} \
|
||||
for (int i=0; i<l; i++) { \
|
||||
buf[i] = 0; \
|
||||
buf[i+16] = 0; \
|
||||
buf[i+32] = 0; \
|
||||
buf[i+48] = 0; \
|
||||
} \
|
||||
for(int i=0; i<64; i++) { \
|
||||
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST(SuperVectorUtilsTest,LShift128_512c){
|
||||
u8 vec[64];
|
||||
for (int i = 0; i<64; i++) { vec[i]= i+1;}
|
||||
auto SP = SuperVector<64>::loadu(vec);
|
||||
u8 buf[64] = {1};
|
||||
for(int j=0; j<16;j++){
|
||||
TEST_LSHIFT128_512(buf, vec, SP, j);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*Define ALIGNR512 macro*/
|
||||
/*
|
||||
#define TEST_ALIGNR512(v1, v2, buf, l) { \
|
||||
|
Loading…
x
Reference in New Issue
Block a user