limex_shuffle added and it's unit tests

This commit is contained in:
apostolos 2021-07-27 11:44:35 +03:00 committed by Konstantinos Margaritis
parent de30471edd
commit b3a20afbbc
3 changed files with 320 additions and 10 deletions

80
src/nfa/limex_shuffle.hpp Normal file
View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Naive dynamic shuffles.
*
* These are written with the assumption that the provided masks are sparsely
* populated and never contain more than 32 on bits. Other implementations will
* be faster and actually correct if these assumptions don't hold true.
*/
#ifndef LIMEX_SHUFFLE_HPP
#define LIMEX_SHUFFLE_HPP
#include "ue2common.h"
#include "util/arch.h"
#include "util/bitutils.h"
#include "util/unaligned.h"
#include "util/supervector/supervector.hpp"
template <u16 S>
u32 packedExtract(SuperVector<S> s, const SuperVector<S> permute, const SuperVector<S> compare);
template <>
really_really_inline
u32 packedExtract<16>(SuperVector<16> s, const SuperVector<16> permute, const SuperVector<16> compare) {
SuperVector<16> shuffled = s.pshufb(permute);
SuperVector<16> compared = shuffled & compare;
u16 rv = ~compared.eqmask(shuffled);
return (u32)rv;
}
template <>
really_really_inline
u32 packedExtract<32>(SuperVector<32> s, const SuperVector<32> permute, const SuperVector<32> compare) {
SuperVector<32> shuffled = s.pshufb(permute);
SuperVector<32> compared = shuffled & compare;
u32 rv = ~compared.eqmask(shuffled);
return (u32)((rv >> 16) | (rv & 0xffffU));
}
template <>
really_really_inline
u32 packedExtract<64>(SuperVector<64> s, const SuperVector<64> permute, const SuperVector<64> compare) {
SuperVector<64> shuffled = s.pshufb(permute);
SuperVector<64> compared = shuffled & compare;
u64a rv = ~compared.eqmask(shuffled);
rv = rv >> 32 | rv;
return (u32)(((rv >> 16) | rv) & 0xffffU);
}
#endif // LIMEX_SHUFFLE_HPP

View File

@ -33,6 +33,9 @@
#include "util/arch.h"
#include "util/simd_utils.h"
#include "nfa/limex_shuffle.h"
#include"util/supervector/supervector.hpp"
#include "nfa/limex_shuffle.hpp"
namespace {
@ -196,6 +199,26 @@ TEST(Shuffle, PackedExtract128_1) {
}
}
TEST(Shuffle, PackedExtract_templatized_128_1) {
// Try all possible one-bit masks
for (unsigned int i = 0; i < 128; i++) {
// shuffle a single 1 bit to the front
SuperVector<16> permute = SuperVector<16>::Zeroes();
SuperVector<16> compare = SuperVector<16>::Zeroes();
build_pshufb_masks_onebit(i, &permute.u.v128[0], &compare.u.v128[0]);
EXPECT_EQ(1U, packedExtract<16>(setbit<m128>(i), permute, compare));
EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>::Ones(), permute, compare));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>::Zeroes(), permute, compare));
EXPECT_EQ(0U, packedExtract<16>(not128(setbit<m128>(i)), permute, compare));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 128); j++) {
EXPECT_EQ(0U, packedExtract<16>(setbit<m128>(j), permute, compare));
}
}
}
#if defined(HAVE_AVX2)
TEST(Shuffle, PackedExtract256_1) {
// Try all possible one-bit masks
@ -214,6 +237,27 @@ TEST(Shuffle, PackedExtract256_1) {
}
}
}
TEST(Shuffle, PackedExtract_templatized_256_1) {
// Try all possible one-bit masks
for (unsigned int i = 0; i < 256; i++) {
// shuffle a single 1 bit to the front
SuperVector<32> permute = SuperVector<32>::Zeroes();
SuperVector<32> compare = SuperVector<32>::Zeroes();
build_pshufb_masks_onebit(i, &permute.u.v256[0], &compare.u.v256[0]);
EXPECT_EQ(1U, packedExtract<32>(setbit<m256>(i), permute, compare));
EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>::Ones(), permute, compare));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>::Zeroes(), permute, compare));
EXPECT_EQ(0U, packedExtract<32>(not256(setbit<m256>(i)), permute, compare));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 256); j++) {
EXPECT_EQ(0U, packedExtract<32>(setbit<m256>(j), permute, compare));
}
}
}
#endif
#if defined(HAVE_AVX512)
@ -234,5 +278,25 @@ TEST(Shuffle, PackedExtract512_1) {
}
}
}
TEST(Shuffle, PackedExtract_templatized_512_1) {
// Try all possible one-bit masks
for (unsigned int i = 0; i < 512; i++) {
// shuffle a single 1 bit to the front
SuperVector<64> permute = SuperVector<64>::Zeroes();
SuperVector<64> compare = SuperVector<64>::Zeroes();
build_pshufb_masks_onebit(i, &permute.u.v512[0], &compare.u.v512[0]);
EXPECT_EQ(1U, packedExtract<64>(setbit<m512>(i), permute, compare));
EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>::Ones(), permute, compare));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>::Zeroes(), permute, compare));
EXPECT_EQ(0U, packedExtract<64>(not512(setbit<m512>(i)), permute, compare));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 512); j++) {
EXPECT_EQ(0U, packedExtract<64>(setbit<m512>(j), permute, compare));
}
}
}
#endif
} // namespace

View File

@ -290,6 +290,55 @@ TEST(SuperVectorUtilsTest,pshufb128c) {
}
}
/*Define LSHIFT128_128 macro*/
#define TEST_LSHIFT128_128(buf, vec, v, l) { \
auto v_shifted = SP.lshift128(l); \
for (int i=15; i>= l; --i) { \
buf[i] = vec[i-l]; \
} \
for (int i=0; i<l; i++) { \
buf[i] = 0; \
} \
for(int i=0; i<16; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
}
TEST(SuperVectorUtilsTest,LShift128_128c){
u8 vec[16];
for (int i = 0; i<16; i++ ){ vec[i] = i+1; }
auto SP = SuperVector<16>::loadu(vec);
u8 buf[16];
for (int j = 0; j<16; j++) {
TEST_LSHIFT128_128(buf, vec, SP, j);
}
}
/*Define RSHIFT128_128 macro*/
#define TEST_RSHIFT128_128(buf, vec, v, l) { \
auto v_shifted = SP.rshift128(l); \
for (int i=0; i<16-l; i++) { \
buf[i] = vec[i+l]; \
} \
for (int i=16-l; i<16; i++) { \
buf[i] = 0; \
} \
for(int i=0; i<16; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
}
TEST(SuperVectorUtilsTest,RShift128_128c){
u8 vec[16];
for (int i = 0; i<16; i++ ){ vec[i] = i+1; }
auto SP = SuperVector<16>::loadu(vec);
u8 buf[16];
for (int j = 0; j<16; j++) {
TEST_RSHIFT128_128(buf, vec, SP, j);
}
}
/*Define ALIGNR128 macro*/
#define TEST_ALIGNR128(v1, v2, buf, l) { \
auto v_aligned = v2.alignr(v1, l); \
@ -538,7 +587,7 @@ TEST(SuperVectorUtilsTest,LShift256c){
}
}
/*
TEST(SuperVectorUtilsTest,LShift64_256c){
u64a vec[4] = {128, 512, 256, 1024};
auto SP = SuperVector<32>::loadu(vec);
@ -560,7 +609,7 @@ TEST(SuperVectorUtilsTest,RShift64_256c){
}
}
}
*/
/*Define RSHIFT256 macro*/
#define TEST_RSHIFT256(buf, vec, v, l) { \
@ -587,6 +636,62 @@ TEST(SuperVectorUtilsTest,RShift256c){
}
/*Define LSHIFT128_256 macro*/
#define TEST_LSHIFT128_256(buf, vec, v, l) { \
auto v_shifted = SP.lshift128(l); \
for (int i=16; i>= l; --i) { \
buf[i] = vec[i-l]; \
buf[i+16] = vec[(16+i)-l]; \
} \
for (int i=0; i<l; i++) { \
buf[i] = 0; \
buf[i+16]= 0; \
} \
for(int i=0; i<32; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
}
TEST(SuperVectorUtilsTest,LShift128_256c){
u8 vec[32];
for (int i = 0; i<32; i++) { vec[i]= i+1;}
auto SP = SuperVector<32>::loadu(vec);
u8 buf[32];
for (int j=0; j<16; j++) {
TEST_LSHIFT128_256(buf, vec, SP, j);
}
}
/*Define RSHIFT128_128 macro*/
#define TEST_RSHIFT128_256(buf, vec, v, l) { \
auto v_shifted = SP.rshift128(l); \
for (int i=0; i<16-l; i++) { \
buf[i] = vec[i+l]; \
buf[i+16] = vec[(i+16)+l]; \
} \
for (int i=16-l; i<16; i++) { \
buf[i] = 0; \
buf[i+16] = 0; \
} \
for(int i=0; i<32; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
}
TEST(SuperVectorUtilsTest,RShift128_256c){
u8 vec[32];
for (int i = 0; i<32; i++ ){ vec[i] = i+1; }
auto SP = SuperVector<32>::loadu(vec);
u8 buf[32];
for(int j=0; j<16; j++) {
TEST_RSHIFT128_256(buf, vec, SP, j);
}
}
/*Define ALIGNR256 macro*/
/*
#define TEST_ALIGNR256(v1, v2, buf, l) { \
@ -772,13 +877,13 @@ TEST(SuperVectorUtilsTest,OPANDNOT512c){
}
}
/*
TEST(SuperVectorUtilsTest,Movemask512c){
srand (time(NULL));
u8 vec[64] = {0};
u64a r = rand() % 100 + 1;
for(int i=0; i<64; i++) {
if (r & (1 << i)) {
if (r & (1ULL << i)) {
vec[i] = 0xff;
}
}
@ -786,16 +891,16 @@ TEST(SuperVectorUtilsTest,Movemask512c){
u8 vec2[64] = {0};
u64a mask = SP.movemask();
for(int i=0; i<64; i++) {
if (mask & (1 << i)) {
if (mask & (1ULL << i)) {
vec2[i] = 0xff;
}
}
for (int i=0; i<64; i++){
printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]);
//ASSERT_EQ(vec[i],vec2[i]);
//printf("%d) vec =%i , vec2 = %i \n",i,vec[i],vec2[i]);
ASSERT_EQ(vec[i],vec2[i]);
}
}
*/
TEST(SuperVectorUtilsTest,Eqmask512c){
srand (time(NULL));
@ -858,7 +963,7 @@ TEST(SuperVectorUtilsTest,LShift512c){
}
}
/*
TEST(SuperVectorUtilsTest,LShift64_512c){
u64a vec[8] = {32, 64, 128, 256, 512, 512, 256, 1024};
auto SP = SuperVector<64>::loadu(vec);
@ -880,7 +985,7 @@ TEST(SuperVectorUtilsTest,RShift64_512c){
}
}
}
*/
/*Define RSHIFT512 macro*/
#define TEST_RSHIFT512(buf, vec, v, l) { \
@ -906,6 +1011,67 @@ TEST(SuperVectorUtilsTest,RShift512c){
}
}
/*Define RSHIFT128_512 macro*/
#define TEST_RSHIFT128_512(buf, vec, v, l) { \
auto v_shifted = SP.rshift128(l); \
for (int i=0; i<16-l; i++) { \
buf[i] = vec[i+l]; \
buf[i+16] = vec[(i+16)+l]; \
buf[i+32] = vec[(i+32)+l]; \
buf[i+48] = vec[(i+48)+l]; \
} \
for (int i=16-l; i<16; i++) { \
buf[i] = 0; \
buf[i+16] = 0; \
buf[i+32] = 0; \
buf[i+48] = 0; \
} \
for(int i=0; i<64; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
}
TEST(SuperVectorUtilsTest,RShift128_512c){
u8 vec[64];
for (int i = 0; i<64; i++ ){ vec[i] = i+1; }
auto SP = SuperVector<64>::loadu(vec);
u8 buf[64] = {1};
for(int j=0; j<16; j++){
TEST_RSHIFT128_512(buf, vec, SP, j)
}
}
/*Define LSHIFT512 macro*/
#define TEST_LSHIFT128_512(buf, vec, v, l) { \
auto v_shifted = SP.lshift128(l); \
for (int i=16; i>=l; --i) { \
buf[i] = vec[i-l]; \
buf[i+16] = vec[(i+16)-l]; \
buf[i+32] = vec[(i+32)-l]; \
buf[i+48] = vec[(i+48)-l]; \
} \
for (int i=0; i<l; i++) { \
buf[i] = 0; \
buf[i+16] = 0; \
buf[i+32] = 0; \
buf[i+48] = 0; \
} \
for(int i=0; i<64; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
}
TEST(SuperVectorUtilsTest,LShift128_512c){
u8 vec[64];
for (int i = 0; i<64; i++) { vec[i]= i+1;}
auto SP = SuperVector<64>::loadu(vec);
u8 buf[64] = {1};
for(int j=0; j<16;j++){
TEST_LSHIFT128_512(buf, vec, SP, j);
}
}
/*Define ALIGNR512 macro*/
/*
#define TEST_ALIGNR512(v1, v2, buf, l) { \