introduce Sheng-McClellan hybrid

This commit is contained in:
Alex Coyte
2016-12-01 14:32:47 +11:00
committed by Matthew Barr
parent f626276271
commit e51b6d23b9
35 changed files with 3804 additions and 206 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -436,3 +436,16 @@ TEST(BitUtils, rank_in_mask64) {
ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31));
ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63));
}
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
TEST(BitUtils, pdep64) {
u64a data = 0xF123456789ABCDEF;
ASSERT_EQ(0xfULL, pdep64(data, 0xf));
ASSERT_EQ(0xefULL, pdep64(data, 0xff));
ASSERT_EQ(0xf0ULL, pdep64(data, 0xf0));
ASSERT_EQ(0xfULL, pdep64(data, 0xf));
ASSERT_EQ(0xef0ULL, pdep64(data, 0xff0));
ASSERT_EQ(0xef00ULL, pdep64(data, 0xff00));
ASSERT_EQ(0xd0e0f00ULL, pdep64(data, 0xf0f0f00));
}
#endif

View File

@@ -320,9 +320,9 @@ TEST(NFAGraph, cyclicVerts1) {
add_edge(a, b, g);
add_edge(b, a, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b}), cyclics);
}
TEST(NFAGraph, cyclicVerts2) {
@@ -341,9 +341,9 @@ TEST(NFAGraph, cyclicVerts2) {
add_edge(c, d, g);
add_edge(a, e, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c}), cyclics);
}
TEST(NFAGraph, cyclicVerts3) {
@@ -369,9 +369,9 @@ TEST(NFAGraph, cyclicVerts3) {
add_edge(f, h, g);
add_edge(h, h, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c, d, e, h}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c, d, e, h}), cyclics);
}
TEST(NFAGraph, cyclicVerts4) {
@@ -396,9 +396,9 @@ TEST(NFAGraph, cyclicVerts4) {
add_edge(e, f, g);
add_edge(f, h, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c, d, e}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c, d, e}), cyclics);
}
TEST(NFAGraph, cyclicVerts5) {
@@ -418,7 +418,7 @@ TEST(NFAGraph, cyclicVerts5) {
add_edge(c, d, g);
add_edge(e, c, g);
auto cyclics = findVerticesInCycles(g);
auto cyclics = find_vertices_in_cycles(g);
ASSERT_EQ(set<NFAVertex>({g.startDs, b, c}), cyclics);
ASSERT_EQ(flat_set<NFAVertex>({g.startDs, b, c}), cyclics);
}

View File

@@ -54,14 +54,14 @@ TEST(Shuffle, PackedExtract32_1) {
for (unsigned int i = 0; i < 32; i++) {
// shuffle a single 1 bit to the front
u32 mask = 1U << i;
EXPECT_EQ(1U, packedExtract32(mask, mask));
EXPECT_EQ(1U, packedExtract32(~0U, mask));
EXPECT_EQ(1U, pext32(mask, mask));
EXPECT_EQ(1U, pext32(~0U, mask));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract32(0, mask));
EXPECT_EQ(0U, packedExtract32(~mask, mask));
EXPECT_EQ(0U, pext32(0, mask));
EXPECT_EQ(0U, pext32(~mask, mask));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 32); j++) {
EXPECT_EQ(0U, packedExtract32((1U << j), mask));
EXPECT_EQ(0U, pext32((1U << j), mask));
}
}
}
@@ -69,10 +69,10 @@ TEST(Shuffle, PackedExtract32_1) {
TEST(Shuffle, PackedExtract32_2) {
// All 32 bits in mask are on
u32 mask = ~0U;
EXPECT_EQ(0U, packedExtract32(0, mask));
EXPECT_EQ(mask, packedExtract32(mask, mask));
EXPECT_EQ(0U, pext32(0, mask));
EXPECT_EQ(mask, pext32(mask, mask));
for (unsigned int i = 0; i < 32; i++) {
EXPECT_EQ(1U << i, packedExtract32(1U << i, mask));
EXPECT_EQ(1U << i, pext32(1U << i, mask));
}
}
@@ -84,16 +84,16 @@ TEST(Shuffle, PackedExtract32_3) {
}
// Test both cases (all even bits, all odd bits)
EXPECT_EQ((1U << 16) - 1, packedExtract32(mask, mask));
EXPECT_EQ((1U << 16) - 1, packedExtract32(~mask, ~mask));
EXPECT_EQ(0U, packedExtract32(~mask, mask));
EXPECT_EQ(0U, packedExtract32(mask, ~mask));
EXPECT_EQ((1U << 16) - 1, pext32(mask, mask));
EXPECT_EQ((1U << 16) - 1, pext32(~mask, ~mask));
EXPECT_EQ(0U, pext32(~mask, mask));
EXPECT_EQ(0U, pext32(mask, ~mask));
for (unsigned int i = 0; i < 32; i += 2) {
EXPECT_EQ(1U << (i/2), packedExtract32(1U << i, mask));
EXPECT_EQ(0U, packedExtract32(1U << i, ~mask));
EXPECT_EQ(1U << (i/2), packedExtract32(1U << (i+1), ~mask));
EXPECT_EQ(0U, packedExtract32(1U << (i+1), mask));
EXPECT_EQ(1U << (i/2), pext32(1U << i, mask));
EXPECT_EQ(0U, pext32(1U << i, ~mask));
EXPECT_EQ(1U << (i/2), pext32(1U << (i+1), ~mask));
EXPECT_EQ(0U, pext32(1U << (i+1), mask));
}
}
@@ -102,14 +102,14 @@ TEST(Shuffle, PackedExtract64_1) {
for (unsigned int i = 0; i < 64; i++) {
// shuffle a single 1 bit to the front
u64a mask = 1ULL << i;
EXPECT_EQ(1U, packedExtract64(mask, mask));
EXPECT_EQ(1U, packedExtract64(~0ULL, mask));
EXPECT_EQ(1U, pext64(mask, mask));
EXPECT_EQ(1U, pext64(~0ULL, mask));
// we should get zero out of these cases
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0U, packedExtract64(~mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0U, pext64(~mask, mask));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 64); j++) {
EXPECT_EQ(0U, packedExtract64((1ULL << j), mask));
EXPECT_EQ(0U, pext64((1ULL << j), mask));
}
}
}
@@ -117,26 +117,26 @@ TEST(Shuffle, PackedExtract64_1) {
TEST(Shuffle, PackedExtract64_2) {
// Fill first half of mask
u64a mask = 0x00000000ffffffffULL;
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 0; i < 32; i++) {
EXPECT_EQ(1U << i, packedExtract64(1ULL << i, mask));
EXPECT_EQ(1U << i, pext64(1ULL << i, mask));
}
// Fill second half of mask
mask = 0xffffffff00000000ULL;
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 32; i < 64; i++) {
EXPECT_EQ(1U << (i - 32), packedExtract64(1ULL << i, mask));
EXPECT_EQ(1U << (i - 32), pext64(1ULL << i, mask));
}
// Try one in the middle
mask = 0x0000ffffffff0000ULL;
EXPECT_EQ(0U, packedExtract64(0, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0U, pext64(0, mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 16; i < 48; i++) {
EXPECT_EQ(1U << (i - 16), packedExtract64(1ULL << i, mask));
EXPECT_EQ(1U << (i - 16), pext64(1ULL << i, mask));
}
}
@@ -148,16 +148,16 @@ TEST(Shuffle, PackedExtract64_3) {
}
// Test both cases (all even bits, all odd bits)
EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
EXPECT_EQ(0xffffffffU, packedExtract64(~mask, ~mask));
EXPECT_EQ(0U, packedExtract64(~mask, mask));
EXPECT_EQ(0U, packedExtract64(mask, ~mask));
EXPECT_EQ(0xffffffffU, pext64(mask, mask));
EXPECT_EQ(0xffffffffU, pext64(~mask, ~mask));
EXPECT_EQ(0U, pext64(~mask, mask));
EXPECT_EQ(0U, pext64(mask, ~mask));
for (unsigned int i = 0; i < 64; i += 2) {
EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << i, mask));
EXPECT_EQ(0U, packedExtract64(1ULL << i, ~mask));
EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << (i+1), ~mask));
EXPECT_EQ(0U, packedExtract64(1ULL << (i+1), mask));
EXPECT_EQ(1U << (i/2), pext64(1ULL << i, mask));
EXPECT_EQ(0U, pext64(1ULL << i, ~mask));
EXPECT_EQ(1U << (i/2), pext64(1ULL << (i+1), ~mask));
EXPECT_EQ(0U, pext64(1ULL << (i+1), mask));
}
}

View File

@@ -614,6 +614,12 @@ TEST(SimdUtilsTest, set16x8) {
}
}
TEST(SimdUtilsTest, set4x32) {
u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 };
m128 simd = set4x32(cmp[0]);
ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
}
#if defined(__AVX2__)
TEST(SimdUtilsTest, set32x8) {
char cmp[sizeof(m256)];
@@ -693,4 +699,50 @@ TEST(SimdUtilsTest, variableByteShift128) {
EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16)));
}
TEST(SimdUtilsTest, max_u8_m128) {
char base1[] = "0123456789ABCDE\xfe";
char base2[] = "!!23455889aBCd\xff\xff";
char expec[] = "0123456889aBCd\xff\xff";
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = max_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, min_u8_m128) {
char base1[] = "0123456789ABCDE\xfe";
char base2[] = "!!23455889aBCd\xff\xff";
char expec[] = "!!23455789ABCDE\xfe";
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = min_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, sadd_u8_m128) {
unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
'1', '2', '3', '4', '1', '2', '3', '4'};
unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10,
0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
'a', 'b', 'c', 'd', '1', '2', '3', '4'};
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = sadd_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
TEST(SimdUtilsTest, sub_u8_m128) {
unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
'a', 'b', 'c', 'd', '1', '2', '3', '4'};
unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
'1', '2', '3', '4', '1', '2', '3', '4'};
unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10,
0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
m128 in1 = loadu128(base1);
m128 in2 = loadu128(base2);
m128 result = sub_u8_m128(in1, in2);
EXPECT_TRUE(!diff128(result, loadu128(expec)));
}
} // namespace