From b96d5c23d1f788c43b770d7a241e33bd49da8fac Mon Sep 17 00:00:00 2001 From: "Xu, Chi" Date: Fri, 22 Jul 2016 03:35:53 +0800 Subject: [PATCH] rose: add new instruction CHECK_MASK_32 This is a specialisation of the "lookaround" code. --- CMakeLists.txt | 1 + src/rose/program_runtime.h | 88 +++++++++++++ src/rose/rose_build_bytecode.cpp | 64 ++++++++++ src/rose/rose_dump.cpp | 14 ++ src/rose/rose_program.h | 16 ++- src/rose/validate_mask.h | 41 ++++++ src/util/copybytes.h | 86 +++++++++++++ src/util/simd_utils.h | 26 ++++ unit/CMakeLists.txt | 1 + unit/internal/rose_mask_32.cpp | 211 +++++++++++++++++++++++++++++++ 10 files changed, 545 insertions(+), 3 deletions(-) create mode 100644 src/util/copybytes.h create mode 100644 unit/internal/rose_mask_32.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b0094d94..8f7e9bf0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -555,6 +555,7 @@ set (hs_exec_SRCS src/rose/rose_common.h src/rose/validate_mask.h src/util/bitutils.h + src/util/copybytes.h src/util/exhaust.h src/util/fatbit.h src/util/fatbit.c diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 8bf41715..f54b1347 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -50,6 +50,7 @@ #include "ue2common.h" #include "hwlm/hwlm.h" // for hwlmcb_rv_t #include "util/compare.h" +#include "util/copybytes.h" #include "util/fatbit.h" #include "util/multibit.h" @@ -783,6 +784,82 @@ int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, return 0; } } + +static rose_inline +int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, + const u8 *cmp_mask, const u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + m256 data = zeroes256(); // consists of the following four parts. + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = 32; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/ + + if (offset < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (offset < -(s64a)ci->hlen) { + if (offset + 32 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 1; + } + h_shift = -(offset + (s64a)ci->hlen); + h_len = 32 - h_shift; + } else { + h_offset = ci->hlen + offset; + } + if (offset + 32 > 0) { + // part in current buffer. + c_len = offset + 32; + h_len = -(offset + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == 32); + copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (offset + 32 > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 1; + } + c_len = ci->len - offset; + c_shift = 32 - c_len; + copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len); + } else { + data = loadu256(ci->buf + offset); + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + // we use valid_data_mask to blind bytes before history/in the future. + u32 valid_data_mask; + valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift); + + m256 and_mask_m256 = loadu256(and_mask); + m256 cmp_mask_m256 = loadu256(cmp_mask); + if (validateMask32(data, valid_data_mask, and_mask_m256, + cmp_mask_m256, neg_mask)) { + DEBUG_PRINTF("Mask32 passed\n"); + return 1; + } + return 0; +} + /** * \brief Scan around a literal, checking that that "lookaround" reach masks * are satisfied. @@ -1213,6 +1290,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK_32) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BYTE) { const struct core_info *ci = &scratch->core_info; if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 56591de8..add3670b 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -201,6 +201,7 @@ public: case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; case ROSE_INSTR_CHECK_MASK: return &u.checkMask; + case ROSE_INSTR_CHECK_MASK_32: return &u.checkMask32; case ROSE_INSTR_CHECK_BYTE: return &u.checkByte; case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix; case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; @@ -253,6 +254,7 @@ public: case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask); + case ROSE_INSTR_CHECK_MASK_32: return sizeof(u.checkMask32); case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte); case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix); case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); @@ -304,6 +306,7 @@ public: ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; ROSE_STRUCT_CHECK_MASK checkMask; + ROSE_STRUCT_CHECK_MASK_32 checkMask32; ROSE_STRUCT_CHECK_BYTE checkByte; ROSE_STRUCT_CHECK_INFIX checkInfix; ROSE_STRUCT_CHECK_PREFIX checkPrefix; @@ -2847,6 +2850,9 @@ flattenProgram(const vector> &programs) { case ROSE_INSTR_CHECK_MASK: ri.u.checkMask.fail_jump = jump_val; break; + case ROSE_INSTR_CHECK_MASK_32: + ri.u.checkMask32.fail_jump = jump_val; + break; case ROSE_INSTR_CHECK_BYTE: ri.u.checkByte.fail_jump = jump_val; break; @@ -3292,6 +3298,60 @@ bool makeRoleMask(const vector &look, return false; } +static UNUSED +string convertMaskstoString(u8 *p, int byte_len) { + string s; + for (int i = 0; i < byte_len; i++) { + u8 hi = *p >> 4; + u8 lo = *p & 0xf; + s += (char)(hi + (hi < 10 ? 48 : 87)); + s += (char)(lo + (lo < 10 ? 48 : 87)); + p++; + } + return s; +} + +static +bool makeRoleMask32(const vector &look, + vector &program) { + if (look.back().offset >= look.front().offset + 32) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + u8 and_mask[32], cmp_mask[32]; + memset(and_mask, 0, sizeof(and_mask)); + memset(cmp_mask, 0, sizeof(cmp_mask)); + u32 neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 32); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1 << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", convertMaskstoString(and_mask, 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", convertMaskstoString(cmp_mask, 32).c_str()); + DEBUG_PRINTF("neg_mask %08x\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK_32, + JumpTarget::NEXT_BLOCK); + memcpy(ri.u.checkMask32.and_mask, and_mask, sizeof(and_mask)); + memcpy(ri.u.checkMask32.cmp_mask, cmp_mask, sizeof(cmp_mask)); + ri.u.checkMask32.neg_mask = neg_mask; + ri.u.checkMask32.offset = base_offset; + program.push_back(ri); + return true; +} + static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { @@ -3325,6 +3385,10 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } + if (makeRoleMask32(look, program)) { + return; + } + DEBUG_PRINTF("role has lookaround\n"); u32 look_idx = addLookaround(bc, look); u32 look_count = verify_u32(look.size()); diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index a3d00943..d9af8d87 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -303,6 +303,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK_32) { + os << " and_mask " + << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) + << endl; + os << " cmp_mask " + << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BYTE) { os << " and_mask 0x" << std::hex << std::setw(2) << std::setfill('0') << u32{ri->and_mask} << std::dec diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 545e190f..8dfa47ec 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -51,6 +51,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. + ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. @@ -170,9 +171,18 @@ struct ROSE_STRUCT_CHECK_LOOKAROUND { struct ROSE_STRUCT_CHECK_MASK { u8 code; //!< From enum roseInstructionCode. - u64a and_mask; //!< 64-bits and mask. - u64a cmp_mask; //!< 64-bits cmp mask. - u64a neg_mask; //!< 64-bits negation mask. + u64a and_mask; //!< 8-byte and mask. + u64a cmp_mask; //!< 8-byte cmp mask. + u64a neg_mask; //!< 8-byte negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MASK_32 { + u8 code; //!< From enum RoseInstructionCode. + u8 and_mask[32]; //!< 32-byte and mask. + u8 cmp_mask[32]; //!< 32-byte cmp mask. + u32 neg_mask; //!< negation mask with 32 bits. s32 offset; //!< Relative offset of the first byte. u32 fail_jump; //!< Jump forward this many bytes on failure. }; diff --git a/src/rose/validate_mask.h b/src/rose/validate_mask.h index b2c2f5d6..ac8cc312 100644 --- a/src/rose/validate_mask.h +++ b/src/rose/validate_mask.h @@ -26,7 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifndef VALIDATE_MASK_H +#define VALIDATE_MASK_H + #include "ue2common.h" +#include "util/simd_utils.h" + +#if defined(DEBUG) +static +void validateMask32Print(const u8 *mask) { + int i; + for (i = 0; i < 32; i++) { + printf("%02x", mask[i]); + } + printf("\n"); +} +#endif // check positive bytes in cmp_result. // return one if the check passed, zero otherwise. @@ -75,3 +90,29 @@ int validateMask(u64a data, u64a valid_data_mask, u64a and_mask, return 0; } } + +static really_inline +int validateMask32(const m256 data, const u32 valid_data_mask, + const m256 and_mask, const m256 cmp_mask, + const u32 neg_mask) { + m256 cmp_result_256 = eq256(and256(data, and_mask), cmp_mask); + u32 cmp_result = ~movemask256(cmp_result_256); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + validateMask32Print((const u8 *)&data); + DEBUG_PRINTF("cmp_result\n"); + validateMask32Print((const u8 *)&cmp_result_256); +#endif + DEBUG_PRINTF("cmp_result %08x neg_mask %08x\n", cmp_result, neg_mask); + DEBUG_PRINTF("valid_data_mask %08x\n", valid_data_mask); + + if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) { + DEBUG_PRINTF("checkCompareResult32 passed\n"); + return 1; + } else { + DEBUG_PRINTF("checkCompareResult32 failed\n"); + return 0; + } +} + +#endif diff --git a/src/util/copybytes.h b/src/util/copybytes.h new file mode 100644 index 00000000..872b8d28 --- /dev/null +++ b/src/util/copybytes.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef COPY_BYTES_H +#define COPY_BYTES_H + +#include "unaligned.h" +#include "simd_utils.h" + +static really_inline +void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) { + switch (len) { + case 0: + break; + case 1: + *dst = *src; + break; + case 2: + unaligned_store_u16(dst, unaligned_load_u16(src)); + break; + case 3: + unaligned_store_u16(dst, unaligned_load_u16(src)); + dst[2] = src[2]; + break; + case 4: + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 5: + case 6: + case 7: + unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 8: + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 16: + storeu128(dst, loadu128(src)); + break; + case 32: + storeu256(dst, loadu256(src)); + break; + default: + assert(len < 32); + storeu128(dst + len - 16, loadu128(src + len - 16)); + storeu128(dst, loadu128(src)); + break; + } +} + +#endif diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 3544629f..87de0940 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -71,6 +71,7 @@ #include "ue2common.h" #include "simd_types.h" +#include "unaligned.h" // Define a common assume_aligned using an appropriate compiler built-in, if // it's available. Note that we need to handle C or C++ compilation. @@ -354,6 +355,21 @@ m256 set32x8(u32 in) { return rv; } +static really_inline +m256 eq256(m256 a, m256 b) { + m256 rv; + rv.lo = eq128(a.lo, b.lo); + rv.hi = eq128(a.hi, b.hi); + return rv; +} + +static really_inline +u32 movemask256(m256 a) { + u32 lo_mask = movemask128(a.lo); + u32 hi_mask = movemask128(a.hi); + return lo_mask | (hi_mask << 16); +} + #endif static really_inline m256 zeroes256(void) { @@ -525,6 +541,16 @@ static really_inline m256 loadu256(const void *ptr) { #endif } +// unaligned store +static really_inline void storeu256(void *ptr, m256 a) { +#if defined(__AVX2__) + _mm256_storeu_si256((m256 *)ptr, a); +#else + storeu128(ptr, a.lo); + storeu128((char *)ptr + 16, a.hi); +#endif +} + // packed unaligned store of first N bytes static really_inline void storebytes256(void *ptr, m256 a, unsigned int n) { diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 63f3a9ac..17818cac 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -71,6 +71,7 @@ set(unit_internal_SOURCES internal/repeat.cpp internal/rose_build_merge.cpp internal/rose_mask.cpp + internal/rose_mask_32.cpp internal/rvermicelli.cpp internal/simd_utils.cpp internal/shuffle.cpp diff --git a/unit/internal/rose_mask_32.cpp b/unit/internal/rose_mask_32.cpp new file mode 100644 index 00000000..732f51a0 --- /dev/null +++ b/unit/internal/rose_mask_32.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "rose/validate_mask.h" +#include "gtest/gtest.h" + +#define ONES32 0xffffffffu + +union RoseLookaroundMask32 { + m256 a256; + u8 a8[32]; +}; + +struct ValidateMask32TestInfo { + RoseLookaroundMask32 data; + u32 valid_mask; + RoseLookaroundMask32 and_mask; + RoseLookaroundMask32 cmp_mask; + u32 neg_mask; +}; + +struct ValidateMask32InitInfo { + int idx; + u8 data; + u8 and_mask; + u8 cmp_mask; + u8 neg_mask; +}; + + +static const ValidateMask32InitInfo testBasicIdx[][33] = { + { + {1, 0x34, 0xf8, 0x30, 0}, + {2, 0x34, 0xf8, 0x30, 0}, + {8, 0x23, 0xff, 0x23, 0}, + {9, 0x34, 0xf8, 0x30, 0}, + {10, 0x41, 0xdf, 0x41, 0}, + {11, 0x63, 0xdd, 0x41, 0}, + {12, 0x61, 0xdd, 0x41, 0}, + {13, 0x41, 0xdf, 0x41, 0}, + {14, 0x61, 0xdf, 0x41, 0}, + {15, 0x41, 0xdf, 0x41, 0}, + {16, 0x43, 0xdd, 0x41, 0}, + {17, 0x61, 0xdd, 0x41, 0}, + {23, 0x63, 0xdd, 0x41, 0}, + {24, 0x4f, 0xfc, 0x4c, 0}, + {25, 0x4d, 0xfc, 0x4c, 0}, + {26, 0x4d, 0xfc, 0x4c, 0}, + {-1, 0, 0, 0, 0}, + }, + { + {11, 0, 0xff, 0x55, 1}, + {12, 0, 0xff, 0x36, 1}, + {13, 0, 0xfe, 0x34, 1}, + {14, 0x4d, 0xfe, 0x4c, 0}, + {15, 0x41, 0xbf, 0x01, 0}, + {16, 0x53, 0xdf, 0x73, 1}, + {17, 0x4b, 0, 0, 0}, + {18, 0, 0x2c, 0x2c, 1}, + {-1, 0, 0, 0, 0}, + }, + { + {15, 0x46, 0xdf, 0x46, 0}, + {16, 0x4f, 0xdf, 0x46, 1}, + {17, 0x6f, 0xff, 0x6f, 0}, + {18, 0x31, 0xfe, 0x30, 0}, + {19, 0x34, 0xf8, 0x30, 0}, + {20, 0x66, 0xc0, 0x40, 0}, + {21, 0x6f, 0xf0, 0x60, 0}, + {22, 0x6f, 0, 0, 0}, + {23, 0x46, 0xdf, 0x44, 1}, + {24, 0x4f, 0xdf, 0x46, 1}, + {25, 0x6f, 0xff, 0x4f, 1}, + {26, 0x31, 0xfe, 0x30, 0}, + {27, 0x34, 0xf8, 0x34, 1}, + {28, 0x66, 0xc0, 0x60, 1}, + {29, 0x6f, 0xf0, 0x6f, 1}, + {30, 0x6f, 0, 0x60, 1}, + {-1, 0, 0, 0, 0}, + }, + { + {31, 0x4a, 0x80, 0, 0}, + {-1, 0, 0, 0, 1}, + }, + { + {12, 0x2b, 0x3d, 0x2d, 1}, + {13, 0x2b, 0x3d, 0x4c, 1}, + {23, 0x4a, 0x88, 0x0a, 1}, + {-1, 0, 0, 0, 0}, + }, +}; + +static void initTestInfo(ValidateMask32TestInfo &t) { + t.data.a256 = zeroes256(); + t.valid_mask = 0xffffffff; + t.and_mask.a256 = zeroes256(); + t.cmp_mask.a256 = zeroes256(); + t.neg_mask = 0; +}; + + +static +int testBasicInit(ValidateMask32TestInfo *testB) { + int len = 0; + ValidateMask32TestInfo t; + for (size_t i = 0; i < ARRAY_LENGTH(testBasicIdx); i++) { + initTestInfo(t); + for (const auto &line: testBasicIdx[i]) { + if (line.idx < 0) { + break; + } + int index = line.idx; + t.data.a8[index] = line.data; + t.and_mask.a8[index] = line.and_mask; + t.cmp_mask.a8[index] = line.cmp_mask; + t.neg_mask |= line.neg_mask << index; + } + testB[i] = t; + len++; + } + return len; +} + +TEST(ValidateMask32, testMask32_1) { + ValidateMask32TestInfo testBasic[20]; + int test_len = testBasicInit(testBasic); + for (int i = 0; i < test_len; i++) { + const auto t = testBasic[i]; + EXPECT_EQ(1, validateMask32(t.data.a256, t.valid_mask, + t.and_mask.a256, t.cmp_mask.a256, + t.neg_mask)); + } +} + +TEST(ValidateMask32, testMask32_2) { + ValidateMask32TestInfo testBasic[20]; + int test_len = testBasicInit(testBasic); + for (int left = 0; left <= 32; left++) { + for (int right = 0; right + left < 32; right++) { + u32 valid_mask = ONES32 << (left + right) >> left; + for (int i = 0; i < test_len; i++) { + const auto &t = testBasic[i]; + int bool_result; + bool_result = !(valid_mask & t.neg_mask); + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + 0)); + bool_result = (valid_mask & t.neg_mask) == valid_mask; + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + ONES32)); + } + } + } +} + +TEST(ValidateMask32, testMask32_3) { + ValidateMask32TestInfo testBasic[20]; + testing::internal::Random neg_mask_rand(451); + int test_len = testBasicInit(testBasic); + for (int left = 0; left <= 32; left++) { + for (int right = 0; right + left < 32; right++) { + u32 valid_mask = ONES32 << (left + right) >> left; + for (int i = 0; i < test_len; i++) { + const auto &t = testBasic[i]; + int bool_result; + for (int j = 0; j < 5000; j++) { + u32 neg_mask = neg_mask_rand.Generate(1u << 31); + bool_result = (neg_mask & valid_mask) == + (t.neg_mask & valid_mask); + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + neg_mask)); + } + } + } + } +}