From b96d5c23d1f788c43b770d7a241e33bd49da8fac Mon Sep 17 00:00:00 2001
From: "Xu, Chi" <chi.xu@intel.com>
Date: Fri, 22 Jul 2016 03:35:53 +0800
Subject: [PATCH] rose: add new instruction CHECK_MASK_32

This is a specialisation of the "lookaround" code.
---
 CMakeLists.txt                   |   1 +
 src/rose/program_runtime.h       |  88 +++++++++++++
 src/rose/rose_build_bytecode.cpp |  64 ++++++++++
 src/rose/rose_dump.cpp           |  14 ++
 src/rose/rose_program.h          |  16 ++-
 src/rose/validate_mask.h         |  41 ++++++
 src/util/copybytes.h             |  86 +++++++++++++
 src/util/simd_utils.h            |  26 ++++
 unit/CMakeLists.txt              |   1 +
 unit/internal/rose_mask_32.cpp   | 211 +++++++++++++++++++++++++++++++
 10 files changed, 545 insertions(+), 3 deletions(-)
 create mode 100644 src/util/copybytes.h
 create mode 100644 unit/internal/rose_mask_32.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b0094d94..8f7e9bf0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -555,6 +555,7 @@ set (hs_exec_SRCS
     src/rose/rose_common.h
     src/rose/validate_mask.h
     src/util/bitutils.h
+    src/util/copybytes.h
     src/util/exhaust.h
     src/util/fatbit.h
     src/util/fatbit.c
diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h
index 8bf41715..f54b1347 100644
--- a/src/rose/program_runtime.h
+++ b/src/rose/program_runtime.h
@@ -50,6 +50,7 @@
 #include "ue2common.h"
 #include "hwlm/hwlm.h" // for hwlmcb_rv_t
 #include "util/compare.h"
+#include "util/copybytes.h"
 #include "util/fatbit.h"
 #include "util/multibit.h"
 
@@ -783,6 +784,82 @@ int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask,
         return 0;
     }
 }
+
+static rose_inline
+int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
+                    const u8 *cmp_mask, const u32 neg_mask,
+                    s32 checkOffset, u64a end) {
+    const s64a base_offset = (s64a)end - ci->buf_offset;
+    s64a offset = base_offset + checkOffset;
+    DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
+    DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
+
+    if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
+        DEBUG_PRINTF("too early, fail\n");
+        return 0;
+    }
+
+    m256 data = zeroes256(); // consists of the following four parts.
+    s32 c_shift = 0; // blank bytes after current.
+    s32 h_shift = 0; // blank bytes before history.
+    s32 h_len = 32; // number of bytes from history buffer.
+    s32 c_len = 0; // number of bytes from current buffer.
+    /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/
+
+    if (offset < 0) {
+        s32 h_offset = 0; // the start offset in history buffer.
+        if (offset < -(s64a)ci->hlen) {
+            if (offset + 32 <= -(s64a)ci->hlen) {
+                DEBUG_PRINTF("all before history\n");
+                return 1;
+            }
+            h_shift = -(offset + (s64a)ci->hlen);
+            h_len = 32 - h_shift;
+        } else {
+            h_offset = ci->hlen + offset;
+        }
+        if (offset + 32 > 0) {
+            // part in current buffer.
+            c_len = offset + 32;
+            h_len = -(offset + h_shift);
+            if (c_len > (s64a)ci->len) {
+                // out of current buffer.
+                c_shift = c_len - ci->len;
+                c_len = ci->len;
+            }
+            copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len);
+        }
+        assert(h_shift + h_len + c_len + c_shift == 32);
+        copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
+    } else {
+        if (offset + 32 > (s64a)ci->len) {
+            if (offset >= (s64a)ci->len) {
+                DEBUG_PRINTF("all in the future.\n");
+                return 1;
+            }
+            c_len = ci->len - offset;
+            c_shift = 32 - c_len;
+            copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len);
+        } else {
+            data = loadu256(ci->buf + offset);
+        }
+    }
+    DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
+    DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
+    // we use valid_data_mask to blind bytes before history/in the future.
+    u32 valid_data_mask;
+    valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift);
+
+    m256 and_mask_m256 = loadu256(and_mask);
+    m256 cmp_mask_m256 = loadu256(cmp_mask);
+    if (validateMask32(data, valid_data_mask, and_mask_m256,
+                       cmp_mask_m256, neg_mask)) {
+        DEBUG_PRINTF("Mask32 passed\n");
+        return 1;
+    }
+    return 0;
+}
+
 /**
  * \brief Scan around a literal, checking that that "lookaround" reach masks
  * are satisfied.
@@ -1213,6 +1290,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CHECK_MASK_32) {
+                struct core_info *ci = &scratch->core_info;
+                if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
+                                     ri->neg_mask, ri->offset, end)) {
+                    assert(ri->fail_jump);
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CHECK_BYTE) {
                 const struct core_info *ci = &scratch->core_info;
                 if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp
index 56591de8..add3670b 100644
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@@ -201,6 +201,7 @@ public:
         case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
         case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround;
         case ROSE_INSTR_CHECK_MASK: return &u.checkMask;
+        case ROSE_INSTR_CHECK_MASK_32: return &u.checkMask32;
         case ROSE_INSTR_CHECK_BYTE: return &u.checkByte;
         case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix;
         case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix;
@@ -253,6 +254,7 @@ public:
         case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
         case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround);
         case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask);
+        case ROSE_INSTR_CHECK_MASK_32: return sizeof(u.checkMask32);
         case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte);
         case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix);
         case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix);
@@ -304,6 +306,7 @@ public:
         ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
         ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround;
         ROSE_STRUCT_CHECK_MASK checkMask;
+        ROSE_STRUCT_CHECK_MASK_32 checkMask32;
         ROSE_STRUCT_CHECK_BYTE checkByte;
         ROSE_STRUCT_CHECK_INFIX checkInfix;
         ROSE_STRUCT_CHECK_PREFIX checkPrefix;
@@ -2847,6 +2850,9 @@ flattenProgram(const vector<vector<RoseInstruction>> &programs) {
         case ROSE_INSTR_CHECK_MASK:
             ri.u.checkMask.fail_jump = jump_val;
             break;
+        case ROSE_INSTR_CHECK_MASK_32:
+            ri.u.checkMask32.fail_jump = jump_val;
+            break;
         case ROSE_INSTR_CHECK_BYTE:
             ri.u.checkByte.fail_jump = jump_val;
             break;
@@ -3292,6 +3298,60 @@ bool makeRoleMask(const vector<LookEntry> &look,
     return false;
 }
 
+static UNUSED
+string convertMaskstoString(u8 *p, int byte_len) {
+    string s;
+    for (int i = 0; i < byte_len; i++) {
+        u8 hi = *p >> 4;
+        u8 lo = *p & 0xf;
+        s += (char)(hi + (hi < 10 ? 48 : 87));
+        s += (char)(lo + (lo < 10 ? 48 : 87));
+        p++;
+    }
+    return s;
+}
+
+static
+bool makeRoleMask32(const vector<LookEntry> &look,
+                    vector<RoseInstruction> &program) {
+    if (look.back().offset >= look.front().offset + 32) {
+        return false;
+    }
+    s32 base_offset = verify_s32(look.front().offset);
+    u8 and_mask[32], cmp_mask[32];
+    memset(and_mask, 0, sizeof(and_mask));
+    memset(cmp_mask, 0, sizeof(cmp_mask));
+    u32 neg_mask = 0;
+    for (const auto &entry : look) {
+        u8 andmask_u8, cmpmask_u8, flip;
+        if (!checkReachWithFlip(entry.reach, andmask_u8,
+                                cmpmask_u8, flip)) {
+            return false;
+        }
+        u32 shift = entry.offset - base_offset;
+        assert(shift < 32);
+        and_mask[shift] = andmask_u8;
+        cmp_mask[shift] = cmpmask_u8;
+        if (flip) {
+            neg_mask |= 1 << shift;
+        }
+    }
+
+    DEBUG_PRINTF("and_mask %s\n", convertMaskstoString(and_mask, 32).c_str());
+    DEBUG_PRINTF("cmp_mask %s\n", convertMaskstoString(cmp_mask, 32).c_str());
+    DEBUG_PRINTF("neg_mask %08x\n", neg_mask);
+    DEBUG_PRINTF("base_offset %d\n", base_offset);
+
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK_32,
+                              JumpTarget::NEXT_BLOCK);
+    memcpy(ri.u.checkMask32.and_mask, and_mask, sizeof(and_mask));
+    memcpy(ri.u.checkMask32.cmp_mask, cmp_mask, sizeof(cmp_mask));
+    ri.u.checkMask32.neg_mask = neg_mask;
+    ri.u.checkMask32.offset = base_offset;
+    program.push_back(ri);
+    return true;
+}
+
 static
 void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v,
                         vector<RoseInstruction> &program) {
@@ -3325,6 +3385,10 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v,
         return;
     }
 
+    if (makeRoleMask32(look, program)) {
+        return;
+    }
+
     DEBUG_PRINTF("role has lookaround\n");
     u32 look_idx = addLookaround(bc, look);
     u32 look_count = verify_u32(look.size());
diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp
index a3d00943..d9af8d87 100644
--- a/src/rose/rose_dump.cpp
+++ b/src/rose/rose_dump.cpp
@@ -303,6 +303,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CHECK_MASK_32) {
+                os << "    and_mask "
+                   << dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
+                   << endl;
+                os << "    cmp_mask "
+                   << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
+                   << endl;
+                os << "    neg_mask 0x" << std::hex << std::setw(8)
+                   << std::setfill('0') << ri->neg_mask << std::dec << endl;
+                os << "    offset " << ri->offset << endl;
+                os << "    fail_jump " << offset + ri->fail_jump << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CHECK_BYTE) {
                 os << "    and_mask 0x" << std::hex << std::setw(2)
                    << std::setfill('0') << u32{ri->and_mask} << std::dec
diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h
index 545e190f..8dfa47ec 100644
--- a/src/rose/rose_program.h
+++ b/src/rose/rose_program.h
@@ -51,6 +51,7 @@ enum RoseInstructionCode {
     ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
     ROSE_INSTR_CHECK_LOOKAROUND,  //!< Lookaround check.
     ROSE_INSTR_CHECK_MASK,        //!< 8-bytes mask check.
+    ROSE_INSTR_CHECK_MASK_32,     //!< 32-bytes and/cmp/neg mask check.
     ROSE_INSTR_CHECK_BYTE,        //!< Single Byte check.
     ROSE_INSTR_CHECK_INFIX,       //!< Infix engine must be in accept state.
     ROSE_INSTR_CHECK_PREFIX,      //!< Prefix engine must be in accept state.
@@ -170,9 +171,18 @@ struct ROSE_STRUCT_CHECK_LOOKAROUND {
 
 struct ROSE_STRUCT_CHECK_MASK {
     u8 code; //!< From enum roseInstructionCode.
-    u64a and_mask; //!< 64-bits and mask.
-    u64a cmp_mask; //!< 64-bits cmp mask.
-    u64a neg_mask; //!< 64-bits negation mask.
+    u64a and_mask; //!< 8-byte and mask.
+    u64a cmp_mask; //!< 8-byte cmp mask.
+    u64a neg_mask; //!< 8-byte negation mask.
+    s32 offset; //!< Relative offset of the first byte.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
+struct ROSE_STRUCT_CHECK_MASK_32 {
+    u8 code; //!< From enum RoseInstructionCode.
+    u8 and_mask[32]; //!< 32-byte and mask.
+    u8 cmp_mask[32]; //!< 32-byte cmp mask.
+    u32 neg_mask; //!< negation mask with 32 bits.
     s32 offset; //!< Relative offset of the first byte.
     u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
diff --git a/src/rose/validate_mask.h b/src/rose/validate_mask.h
index b2c2f5d6..ac8cc312 100644
--- a/src/rose/validate_mask.h
+++ b/src/rose/validate_mask.h
@@ -26,7 +26,22 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef VALIDATE_MASK_H
+#define VALIDATE_MASK_H
+
 #include "ue2common.h"
+#include "util/simd_utils.h"
+
+#if defined(DEBUG)
+static
+void validateMask32Print(const u8 *mask) {
+    int i;
+    for (i = 0; i < 32; i++) {
+        printf("%02x", mask[i]);
+    }
+    printf("\n");
+}
+#endif
 
 // check positive bytes in cmp_result.
 // return one if the check passed, zero otherwise.
@@ -75,3 +90,29 @@ int validateMask(u64a data, u64a valid_data_mask, u64a and_mask,
         return 0;
     }
 }
+
+static really_inline
+int validateMask32(const m256 data, const u32 valid_data_mask,
+                   const m256 and_mask, const m256 cmp_mask,
+                   const u32 neg_mask) {
+    m256 cmp_result_256 = eq256(and256(data, and_mask), cmp_mask);
+    u32 cmp_result = ~movemask256(cmp_result_256);
+#ifdef DEBUG
+    DEBUG_PRINTF("data\n");
+    validateMask32Print((const u8 *)&data);
+    DEBUG_PRINTF("cmp_result\n");
+    validateMask32Print((const u8 *)&cmp_result_256);
+#endif
+    DEBUG_PRINTF("cmp_result %08x neg_mask %08x\n", cmp_result, neg_mask);
+    DEBUG_PRINTF("valid_data_mask %08x\n", valid_data_mask);
+
+    if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) {
+        DEBUG_PRINTF("checkCompareResult32 passed\n");
+        return 1;
+    } else {
+        DEBUG_PRINTF("checkCompareResult32 failed\n");
+        return 0;
+    }
+}
+
+#endif
diff --git a/src/util/copybytes.h b/src/util/copybytes.h
new file mode 100644
index 00000000..872b8d28
--- /dev/null
+++ b/src/util/copybytes.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef COPY_BYTES_H
+#define COPY_BYTES_H
+
+#include "unaligned.h"
+#include "simd_utils.h"
+
+static really_inline
+void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) {
+    switch (len) {
+    case 0:
+        break;
+    case 1:
+        *dst = *src;
+        break;
+    case 2:
+        unaligned_store_u16(dst, unaligned_load_u16(src));
+        break;
+    case 3:
+        unaligned_store_u16(dst, unaligned_load_u16(src));
+        dst[2] = src[2];
+        break;
+    case 4:
+        unaligned_store_u32(dst, unaligned_load_u32(src));
+        break;
+    case 5:
+    case 6:
+    case 7:
+        unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4));
+        unaligned_store_u32(dst, unaligned_load_u32(src));
+        break;
+    case 8:
+        unaligned_store_u64a(dst, unaligned_load_u64a(src));
+        break;
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+    case 13:
+    case 14:
+    case 15:
+        unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8));
+        unaligned_store_u64a(dst, unaligned_load_u64a(src));
+        break;
+    case 16:
+        storeu128(dst, loadu128(src));
+        break;
+    case 32:
+        storeu256(dst, loadu256(src));
+        break;
+    default:
+        assert(len < 32);
+        storeu128(dst + len - 16, loadu128(src + len - 16));
+        storeu128(dst, loadu128(src));
+        break;
+    }
+}
+
+#endif
diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h
index 3544629f..87de0940 100644
--- a/src/util/simd_utils.h
+++ b/src/util/simd_utils.h
@@ -71,6 +71,7 @@
 
 #include "ue2common.h"
 #include "simd_types.h"
+#include "unaligned.h"
 
 // Define a common assume_aligned using an appropriate compiler built-in, if
 // it's available. Note that we need to handle C or C++ compilation.
@@ -354,6 +355,21 @@ m256 set32x8(u32 in) {
     return rv;
 }
 
+static really_inline
+m256 eq256(m256 a, m256 b) {
+    m256 rv;
+    rv.lo = eq128(a.lo, b.lo);
+    rv.hi = eq128(a.hi, b.hi);
+    return rv;
+}
+
+static really_inline
+u32 movemask256(m256 a) {
+    u32 lo_mask = movemask128(a.lo);
+    u32 hi_mask = movemask128(a.hi);
+    return lo_mask | (hi_mask << 16);
+}
+
 #endif
 
 static really_inline m256 zeroes256(void) {
@@ -525,6 +541,16 @@ static really_inline m256 loadu256(const void *ptr) {
 #endif
 }
 
+// unaligned store
+static really_inline void storeu256(void *ptr, m256 a) {
+#if defined(__AVX2__)
+    _mm256_storeu_si256((m256 *)ptr, a);
+#else
+    storeu128(ptr, a.lo);
+    storeu128((char *)ptr + 16, a.hi);
+#endif
+}
+
 // packed unaligned store of first N bytes
 static really_inline
 void storebytes256(void *ptr, m256 a, unsigned int n) {
diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt
index 63f3a9ac..17818cac 100644
--- a/unit/CMakeLists.txt
+++ b/unit/CMakeLists.txt
@@ -71,6 +71,7 @@ set(unit_internal_SOURCES
     internal/repeat.cpp
     internal/rose_build_merge.cpp
     internal/rose_mask.cpp
+    internal/rose_mask_32.cpp
     internal/rvermicelli.cpp
     internal/simd_utils.cpp
     internal/shuffle.cpp
diff --git a/unit/internal/rose_mask_32.cpp b/unit/internal/rose_mask_32.cpp
new file mode 100644
index 00000000..732f51a0
--- /dev/null
+++ b/unit/internal/rose_mask_32.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "rose/validate_mask.h"
+#include "gtest/gtest.h"
+
+#define ONES32 0xffffffffu
+
+union RoseLookaroundMask32 {
+    m256 a256;
+    u8 a8[32];
+};
+
+struct ValidateMask32TestInfo {
+    RoseLookaroundMask32 data;
+    u32 valid_mask;
+    RoseLookaroundMask32 and_mask;
+    RoseLookaroundMask32 cmp_mask;
+    u32 neg_mask;
+};
+
+struct ValidateMask32InitInfo {
+    int idx;
+    u8 data;
+    u8 and_mask;
+    u8 cmp_mask;
+    u8 neg_mask;
+};
+
+
+static const ValidateMask32InitInfo testBasicIdx[][33] = {
+    {
+        {1, 0x34, 0xf8, 0x30, 0},
+        {2, 0x34, 0xf8, 0x30, 0},
+        {8, 0x23, 0xff, 0x23, 0},
+        {9, 0x34, 0xf8, 0x30, 0},
+        {10, 0x41, 0xdf, 0x41, 0},
+        {11, 0x63, 0xdd, 0x41, 0},
+        {12, 0x61, 0xdd, 0x41, 0},
+        {13, 0x41, 0xdf, 0x41, 0},
+        {14, 0x61, 0xdf, 0x41, 0},
+        {15, 0x41, 0xdf, 0x41, 0},
+        {16, 0x43, 0xdd, 0x41, 0},
+        {17, 0x61, 0xdd, 0x41, 0},
+        {23, 0x63, 0xdd, 0x41, 0},
+        {24, 0x4f, 0xfc, 0x4c, 0},
+        {25, 0x4d, 0xfc, 0x4c, 0},
+        {26, 0x4d, 0xfc, 0x4c, 0},
+        {-1, 0, 0, 0, 0},
+    },
+    {
+        {11, 0, 0xff, 0x55, 1},
+        {12, 0, 0xff, 0x36, 1},
+        {13, 0, 0xfe, 0x34, 1},
+        {14, 0x4d, 0xfe, 0x4c, 0},
+        {15, 0x41, 0xbf, 0x01, 0},
+        {16, 0x53, 0xdf, 0x73, 1},
+        {17, 0x4b, 0, 0, 0},
+        {18, 0, 0x2c, 0x2c, 1},
+        {-1, 0, 0, 0, 0},
+    },
+    {
+        {15, 0x46, 0xdf, 0x46, 0},
+        {16, 0x4f, 0xdf, 0x46, 1},
+        {17, 0x6f, 0xff, 0x6f, 0},
+        {18, 0x31, 0xfe, 0x30, 0},
+        {19, 0x34, 0xf8, 0x30, 0},
+        {20, 0x66, 0xc0, 0x40, 0},
+        {21, 0x6f, 0xf0, 0x60, 0},
+        {22, 0x6f, 0, 0, 0},
+        {23, 0x46, 0xdf, 0x44, 1},
+        {24, 0x4f, 0xdf, 0x46, 1},
+        {25, 0x6f, 0xff, 0x4f, 1},
+        {26, 0x31, 0xfe, 0x30, 0},
+        {27, 0x34, 0xf8, 0x34, 1},
+        {28, 0x66, 0xc0, 0x60, 1},
+        {29, 0x6f, 0xf0, 0x6f, 1},
+        {30, 0x6f, 0, 0x60, 1},
+        {-1, 0, 0, 0, 0},
+    },
+    {
+        {31, 0x4a, 0x80, 0, 0},
+        {-1, 0, 0, 0, 1},
+    },
+    {
+        {12, 0x2b, 0x3d, 0x2d, 1},
+        {13, 0x2b, 0x3d, 0x4c, 1},
+        {23, 0x4a, 0x88, 0x0a, 1},
+        {-1, 0, 0, 0, 0},
+    },
+};
+
+static void initTestInfo(ValidateMask32TestInfo &t) {
+    t.data.a256 = zeroes256();
+    t.valid_mask = 0xffffffff;
+    t.and_mask.a256 = zeroes256();
+    t.cmp_mask.a256 = zeroes256();
+    t.neg_mask = 0;
+};
+
+
+static
+int testBasicInit(ValidateMask32TestInfo *testB) {
+    int len = 0;
+    ValidateMask32TestInfo t;
+    for (size_t i = 0; i < ARRAY_LENGTH(testBasicIdx); i++) {
+        initTestInfo(t);
+        for (const auto &line: testBasicIdx[i]) {
+            if (line.idx < 0) {
+                break;
+            }
+            int index = line.idx;
+            t.data.a8[index] = line.data;
+            t.and_mask.a8[index] = line.and_mask;
+            t.cmp_mask.a8[index] = line.cmp_mask;
+            t.neg_mask |= line.neg_mask << index;
+        }
+        testB[i] = t;
+        len++;
+    }
+    return len;
+}
+
+TEST(ValidateMask32, testMask32_1) {
+    ValidateMask32TestInfo testBasic[20];
+    int test_len = testBasicInit(testBasic);
+    for (int i = 0; i < test_len; i++) {
+        const auto t = testBasic[i];
+        EXPECT_EQ(1, validateMask32(t.data.a256, t.valid_mask,
+                                    t.and_mask.a256, t.cmp_mask.a256,
+                                    t.neg_mask));
+    }
+}
+
+TEST(ValidateMask32, testMask32_2) {
+    ValidateMask32TestInfo testBasic[20];
+    int test_len = testBasicInit(testBasic);
+    for (int left = 0; left <= 32; left++) {
+        for (int right = 0; right + left < 32; right++) {
+            u32 valid_mask = ONES32 << (left + right) >> left;
+            for (int i = 0; i < test_len; i++) {
+                const auto &t = testBasic[i];
+                int bool_result;
+                bool_result = !(valid_mask & t.neg_mask);
+                EXPECT_EQ(bool_result, validateMask32(t.data.a256,
+                                                      valid_mask,
+                                                      t.and_mask.a256,
+                                                      t.cmp_mask.a256,
+                                                      0));
+                bool_result = (valid_mask & t.neg_mask) == valid_mask;
+                EXPECT_EQ(bool_result, validateMask32(t.data.a256,
+                                                      valid_mask,
+                                                      t.and_mask.a256,
+                                                      t.cmp_mask.a256,
+                                                      ONES32));
+            }
+        }
+    }
+}
+
+TEST(ValidateMask32, testMask32_3) {
+    ValidateMask32TestInfo testBasic[20];
+    testing::internal::Random neg_mask_rand(451);
+    int test_len = testBasicInit(testBasic);
+    for (int left = 0; left <= 32; left++) {
+        for (int right = 0; right + left < 32; right++) {
+            u32 valid_mask = ONES32 << (left + right) >> left;
+            for (int i = 0; i < test_len; i++) {
+                const auto &t = testBasic[i];
+                int bool_result;
+                for (int j = 0; j < 5000; j++) {
+                    u32 neg_mask = neg_mask_rand.Generate(1u << 31);
+                    bool_result = (neg_mask & valid_mask) ==
+                                  (t.neg_mask & valid_mask);
+                    EXPECT_EQ(bool_result, validateMask32(t.data.a256,
+                                                          valid_mask,
+                                                          t.and_mask.a256,
+                                                          t.cmp_mask.a256,
+                                                          neg_mask));
+                }
+            }
+        }
+    }
+}