mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: add new instruction CHECK_MASK_32
This is a specialisation of the "lookaround" code.
This commit is contained in:
parent
8be6c8b2ca
commit
b96d5c23d1
@ -555,6 +555,7 @@ set (hs_exec_SRCS
|
|||||||
src/rose/rose_common.h
|
src/rose/rose_common.h
|
||||||
src/rose/validate_mask.h
|
src/rose/validate_mask.h
|
||||||
src/util/bitutils.h
|
src/util/bitutils.h
|
||||||
|
src/util/copybytes.h
|
||||||
src/util/exhaust.h
|
src/util/exhaust.h
|
||||||
src/util/fatbit.h
|
src/util/fatbit.h
|
||||||
src/util/fatbit.c
|
src/util/fatbit.c
|
||||||
|
@ -50,6 +50,7 @@
|
|||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "hwlm/hwlm.h" // for hwlmcb_rv_t
|
#include "hwlm/hwlm.h" // for hwlmcb_rv_t
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
|
#include "util/copybytes.h"
|
||||||
#include "util/fatbit.h"
|
#include "util/fatbit.h"
|
||||||
#include "util/multibit.h"
|
#include "util/multibit.h"
|
||||||
|
|
||||||
@ -783,6 +784,82 @@ int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static rose_inline
|
||||||
|
int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
|
||||||
|
const u8 *cmp_mask, const u32 neg_mask,
|
||||||
|
s32 checkOffset, u64a end) {
|
||||||
|
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||||
|
s64a offset = base_offset + checkOffset;
|
||||||
|
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||||
|
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||||
|
|
||||||
|
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||||
|
DEBUG_PRINTF("too early, fail\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
m256 data = zeroes256(); // consists of the following four parts.
|
||||||
|
s32 c_shift = 0; // blank bytes after current.
|
||||||
|
s32 h_shift = 0; // blank bytes before history.
|
||||||
|
s32 h_len = 32; // number of bytes from history buffer.
|
||||||
|
s32 c_len = 0; // number of bytes from current buffer.
|
||||||
|
/* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/
|
||||||
|
|
||||||
|
if (offset < 0) {
|
||||||
|
s32 h_offset = 0; // the start offset in history buffer.
|
||||||
|
if (offset < -(s64a)ci->hlen) {
|
||||||
|
if (offset + 32 <= -(s64a)ci->hlen) {
|
||||||
|
DEBUG_PRINTF("all before history\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
h_shift = -(offset + (s64a)ci->hlen);
|
||||||
|
h_len = 32 - h_shift;
|
||||||
|
} else {
|
||||||
|
h_offset = ci->hlen + offset;
|
||||||
|
}
|
||||||
|
if (offset + 32 > 0) {
|
||||||
|
// part in current buffer.
|
||||||
|
c_len = offset + 32;
|
||||||
|
h_len = -(offset + h_shift);
|
||||||
|
if (c_len > (s64a)ci->len) {
|
||||||
|
// out of current buffer.
|
||||||
|
c_shift = c_len - ci->len;
|
||||||
|
c_len = ci->len;
|
||||||
|
}
|
||||||
|
copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len);
|
||||||
|
}
|
||||||
|
assert(h_shift + h_len + c_len + c_shift == 32);
|
||||||
|
copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
|
||||||
|
} else {
|
||||||
|
if (offset + 32 > (s64a)ci->len) {
|
||||||
|
if (offset >= (s64a)ci->len) {
|
||||||
|
DEBUG_PRINTF("all in the future.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
c_len = ci->len - offset;
|
||||||
|
c_shift = 32 - c_len;
|
||||||
|
copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len);
|
||||||
|
} else {
|
||||||
|
data = loadu256(ci->buf + offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
|
||||||
|
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
|
||||||
|
// we use valid_data_mask to blind bytes before history/in the future.
|
||||||
|
u32 valid_data_mask;
|
||||||
|
valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift);
|
||||||
|
|
||||||
|
m256 and_mask_m256 = loadu256(and_mask);
|
||||||
|
m256 cmp_mask_m256 = loadu256(cmp_mask);
|
||||||
|
if (validateMask32(data, valid_data_mask, and_mask_m256,
|
||||||
|
cmp_mask_m256, neg_mask)) {
|
||||||
|
DEBUG_PRINTF("Mask32 passed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Scan around a literal, checking that that "lookaround" reach masks
|
* \brief Scan around a literal, checking that that "lookaround" reach masks
|
||||||
* are satisfied.
|
* are satisfied.
|
||||||
@ -1213,6 +1290,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_MASK_32) {
|
||||||
|
struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_BYTE) {
|
PROGRAM_CASE(CHECK_BYTE) {
|
||||||
const struct core_info *ci = &scratch->core_info;
|
const struct core_info *ci = &scratch->core_info;
|
||||||
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
||||||
|
@ -201,6 +201,7 @@ public:
|
|||||||
case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
|
case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
|
||||||
case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround;
|
case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround;
|
||||||
case ROSE_INSTR_CHECK_MASK: return &u.checkMask;
|
case ROSE_INSTR_CHECK_MASK: return &u.checkMask;
|
||||||
|
case ROSE_INSTR_CHECK_MASK_32: return &u.checkMask32;
|
||||||
case ROSE_INSTR_CHECK_BYTE: return &u.checkByte;
|
case ROSE_INSTR_CHECK_BYTE: return &u.checkByte;
|
||||||
case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix;
|
case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix;
|
||||||
case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix;
|
case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix;
|
||||||
@ -253,6 +254,7 @@ public:
|
|||||||
case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
|
case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
|
||||||
case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround);
|
case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround);
|
||||||
case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask);
|
case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask);
|
||||||
|
case ROSE_INSTR_CHECK_MASK_32: return sizeof(u.checkMask32);
|
||||||
case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte);
|
case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte);
|
||||||
case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix);
|
case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix);
|
||||||
case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix);
|
case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix);
|
||||||
@ -304,6 +306,7 @@ public:
|
|||||||
ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
|
ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
|
||||||
ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround;
|
ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround;
|
||||||
ROSE_STRUCT_CHECK_MASK checkMask;
|
ROSE_STRUCT_CHECK_MASK checkMask;
|
||||||
|
ROSE_STRUCT_CHECK_MASK_32 checkMask32;
|
||||||
ROSE_STRUCT_CHECK_BYTE checkByte;
|
ROSE_STRUCT_CHECK_BYTE checkByte;
|
||||||
ROSE_STRUCT_CHECK_INFIX checkInfix;
|
ROSE_STRUCT_CHECK_INFIX checkInfix;
|
||||||
ROSE_STRUCT_CHECK_PREFIX checkPrefix;
|
ROSE_STRUCT_CHECK_PREFIX checkPrefix;
|
||||||
@ -2847,6 +2850,9 @@ flattenProgram(const vector<vector<RoseInstruction>> &programs) {
|
|||||||
case ROSE_INSTR_CHECK_MASK:
|
case ROSE_INSTR_CHECK_MASK:
|
||||||
ri.u.checkMask.fail_jump = jump_val;
|
ri.u.checkMask.fail_jump = jump_val;
|
||||||
break;
|
break;
|
||||||
|
case ROSE_INSTR_CHECK_MASK_32:
|
||||||
|
ri.u.checkMask32.fail_jump = jump_val;
|
||||||
|
break;
|
||||||
case ROSE_INSTR_CHECK_BYTE:
|
case ROSE_INSTR_CHECK_BYTE:
|
||||||
ri.u.checkByte.fail_jump = jump_val;
|
ri.u.checkByte.fail_jump = jump_val;
|
||||||
break;
|
break;
|
||||||
@ -3292,6 +3298,60 @@ bool makeRoleMask(const vector<LookEntry> &look,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static UNUSED
|
||||||
|
string convertMaskstoString(u8 *p, int byte_len) {
|
||||||
|
string s;
|
||||||
|
for (int i = 0; i < byte_len; i++) {
|
||||||
|
u8 hi = *p >> 4;
|
||||||
|
u8 lo = *p & 0xf;
|
||||||
|
s += (char)(hi + (hi < 10 ? 48 : 87));
|
||||||
|
s += (char)(lo + (lo < 10 ? 48 : 87));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool makeRoleMask32(const vector<LookEntry> &look,
|
||||||
|
vector<RoseInstruction> &program) {
|
||||||
|
if (look.back().offset >= look.front().offset + 32) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
s32 base_offset = verify_s32(look.front().offset);
|
||||||
|
u8 and_mask[32], cmp_mask[32];
|
||||||
|
memset(and_mask, 0, sizeof(and_mask));
|
||||||
|
memset(cmp_mask, 0, sizeof(cmp_mask));
|
||||||
|
u32 neg_mask = 0;
|
||||||
|
for (const auto &entry : look) {
|
||||||
|
u8 andmask_u8, cmpmask_u8, flip;
|
||||||
|
if (!checkReachWithFlip(entry.reach, andmask_u8,
|
||||||
|
cmpmask_u8, flip)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
u32 shift = entry.offset - base_offset;
|
||||||
|
assert(shift < 32);
|
||||||
|
and_mask[shift] = andmask_u8;
|
||||||
|
cmp_mask[shift] = cmpmask_u8;
|
||||||
|
if (flip) {
|
||||||
|
neg_mask |= 1 << shift;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("and_mask %s\n", convertMaskstoString(and_mask, 32).c_str());
|
||||||
|
DEBUG_PRINTF("cmp_mask %s\n", convertMaskstoString(cmp_mask, 32).c_str());
|
||||||
|
DEBUG_PRINTF("neg_mask %08x\n", neg_mask);
|
||||||
|
DEBUG_PRINTF("base_offset %d\n", base_offset);
|
||||||
|
|
||||||
|
auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK_32,
|
||||||
|
JumpTarget::NEXT_BLOCK);
|
||||||
|
memcpy(ri.u.checkMask32.and_mask, and_mask, sizeof(and_mask));
|
||||||
|
memcpy(ri.u.checkMask32.cmp_mask, cmp_mask, sizeof(cmp_mask));
|
||||||
|
ri.u.checkMask32.neg_mask = neg_mask;
|
||||||
|
ri.u.checkMask32.offset = base_offset;
|
||||||
|
program.push_back(ri);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
||||||
vector<RoseInstruction> &program) {
|
vector<RoseInstruction> &program) {
|
||||||
@ -3325,6 +3385,10 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (makeRoleMask32(look, program)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("role has lookaround\n");
|
DEBUG_PRINTF("role has lookaround\n");
|
||||||
u32 look_idx = addLookaround(bc, look);
|
u32 look_idx = addLookaround(bc, look);
|
||||||
u32 look_count = verify_u32(look.size());
|
u32 look_count = verify_u32(look.size());
|
||||||
|
@ -303,6 +303,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_MASK_32) {
|
||||||
|
os << " and_mask "
|
||||||
|
<< dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " cmp_mask "
|
||||||
|
<< dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||||
|
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||||
|
os << " offset " << ri->offset << endl;
|
||||||
|
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_BYTE) {
|
PROGRAM_CASE(CHECK_BYTE) {
|
||||||
os << " and_mask 0x" << std::hex << std::setw(2)
|
os << " and_mask 0x" << std::hex << std::setw(2)
|
||||||
<< std::setfill('0') << u32{ri->and_mask} << std::dec
|
<< std::setfill('0') << u32{ri->and_mask} << std::dec
|
||||||
|
@ -51,6 +51,7 @@ enum RoseInstructionCode {
|
|||||||
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
|
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
|
||||||
ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
|
ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
|
||||||
ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check.
|
ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check.
|
||||||
|
ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check.
|
||||||
ROSE_INSTR_CHECK_BYTE, //!< Single Byte check.
|
ROSE_INSTR_CHECK_BYTE, //!< Single Byte check.
|
||||||
ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state.
|
ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state.
|
||||||
ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state.
|
ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state.
|
||||||
@ -170,9 +171,18 @@ struct ROSE_STRUCT_CHECK_LOOKAROUND {
|
|||||||
|
|
||||||
struct ROSE_STRUCT_CHECK_MASK {
|
struct ROSE_STRUCT_CHECK_MASK {
|
||||||
u8 code; //!< From enum roseInstructionCode.
|
u8 code; //!< From enum roseInstructionCode.
|
||||||
u64a and_mask; //!< 64-bits and mask.
|
u64a and_mask; //!< 8-byte and mask.
|
||||||
u64a cmp_mask; //!< 64-bits cmp mask.
|
u64a cmp_mask; //!< 8-byte cmp mask.
|
||||||
u64a neg_mask; //!< 64-bits negation mask.
|
u64a neg_mask; //!< 8-byte negation mask.
|
||||||
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_CHECK_MASK_32 {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u8 and_mask[32]; //!< 32-byte and mask.
|
||||||
|
u8 cmp_mask[32]; //!< 32-byte cmp mask.
|
||||||
|
u32 neg_mask; //!< negation mask with 32 bits.
|
||||||
s32 offset; //!< Relative offset of the first byte.
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
};
|
};
|
||||||
|
@ -26,7 +26,22 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef VALIDATE_MASK_H
|
||||||
|
#define VALIDATE_MASK_H
|
||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
|
#if defined(DEBUG)
|
||||||
|
static
|
||||||
|
void validateMask32Print(const u8 *mask) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 32; i++) {
|
||||||
|
printf("%02x", mask[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// check positive bytes in cmp_result.
|
// check positive bytes in cmp_result.
|
||||||
// return one if the check passed, zero otherwise.
|
// return one if the check passed, zero otherwise.
|
||||||
@ -75,3 +90,29 @@ int validateMask(u64a data, u64a valid_data_mask, u64a and_mask,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
int validateMask32(const m256 data, const u32 valid_data_mask,
|
||||||
|
const m256 and_mask, const m256 cmp_mask,
|
||||||
|
const u32 neg_mask) {
|
||||||
|
m256 cmp_result_256 = eq256(and256(data, and_mask), cmp_mask);
|
||||||
|
u32 cmp_result = ~movemask256(cmp_result_256);
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF("data\n");
|
||||||
|
validateMask32Print((const u8 *)&data);
|
||||||
|
DEBUG_PRINTF("cmp_result\n");
|
||||||
|
validateMask32Print((const u8 *)&cmp_result_256);
|
||||||
|
#endif
|
||||||
|
DEBUG_PRINTF("cmp_result %08x neg_mask %08x\n", cmp_result, neg_mask);
|
||||||
|
DEBUG_PRINTF("valid_data_mask %08x\n", valid_data_mask);
|
||||||
|
|
||||||
|
if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) {
|
||||||
|
DEBUG_PRINTF("checkCompareResult32 passed\n");
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("checkCompareResult32 failed\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
86
src/util/copybytes.h
Normal file
86
src/util/copybytes.h
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef COPY_BYTES_H
|
||||||
|
#define COPY_BYTES_H
|
||||||
|
|
||||||
|
#include "unaligned.h"
|
||||||
|
#include "simd_utils.h"
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) {
|
||||||
|
switch (len) {
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
*dst = *src;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
unaligned_store_u16(dst, unaligned_load_u16(src));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
unaligned_store_u16(dst, unaligned_load_u16(src));
|
||||||
|
dst[2] = src[2];
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
unaligned_store_u32(dst, unaligned_load_u32(src));
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
case 6:
|
||||||
|
case 7:
|
||||||
|
unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4));
|
||||||
|
unaligned_store_u32(dst, unaligned_load_u32(src));
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
case 10:
|
||||||
|
case 11:
|
||||||
|
case 12:
|
||||||
|
case 13:
|
||||||
|
case 14:
|
||||||
|
case 15:
|
||||||
|
unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8));
|
||||||
|
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
storeu128(dst, loadu128(src));
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
storeu256(dst, loadu256(src));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(len < 32);
|
||||||
|
storeu128(dst + len - 16, loadu128(src + len - 16));
|
||||||
|
storeu128(dst, loadu128(src));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -71,6 +71,7 @@
|
|||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "simd_types.h"
|
#include "simd_types.h"
|
||||||
|
#include "unaligned.h"
|
||||||
|
|
||||||
// Define a common assume_aligned using an appropriate compiler built-in, if
|
// Define a common assume_aligned using an appropriate compiler built-in, if
|
||||||
// it's available. Note that we need to handle C or C++ compilation.
|
// it's available. Note that we need to handle C or C++ compilation.
|
||||||
@ -354,6 +355,21 @@ m256 set32x8(u32 in) {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m256 eq256(m256 a, m256 b) {
|
||||||
|
m256 rv;
|
||||||
|
rv.lo = eq128(a.lo, b.lo);
|
||||||
|
rv.hi = eq128(a.hi, b.hi);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u32 movemask256(m256 a) {
|
||||||
|
u32 lo_mask = movemask128(a.lo);
|
||||||
|
u32 hi_mask = movemask128(a.hi);
|
||||||
|
return lo_mask | (hi_mask << 16);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline m256 zeroes256(void) {
|
static really_inline m256 zeroes256(void) {
|
||||||
@ -525,6 +541,16 @@ static really_inline m256 loadu256(const void *ptr) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// unaligned store
|
||||||
|
static really_inline void storeu256(void *ptr, m256 a) {
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
_mm256_storeu_si256((m256 *)ptr, a);
|
||||||
|
#else
|
||||||
|
storeu128(ptr, a.lo);
|
||||||
|
storeu128((char *)ptr + 16, a.hi);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// packed unaligned store of first N bytes
|
// packed unaligned store of first N bytes
|
||||||
static really_inline
|
static really_inline
|
||||||
void storebytes256(void *ptr, m256 a, unsigned int n) {
|
void storebytes256(void *ptr, m256 a, unsigned int n) {
|
||||||
|
@ -71,6 +71,7 @@ set(unit_internal_SOURCES
|
|||||||
internal/repeat.cpp
|
internal/repeat.cpp
|
||||||
internal/rose_build_merge.cpp
|
internal/rose_build_merge.cpp
|
||||||
internal/rose_mask.cpp
|
internal/rose_mask.cpp
|
||||||
|
internal/rose_mask_32.cpp
|
||||||
internal/rvermicelli.cpp
|
internal/rvermicelli.cpp
|
||||||
internal/simd_utils.cpp
|
internal/simd_utils.cpp
|
||||||
internal/shuffle.cpp
|
internal/shuffle.cpp
|
||||||
|
211
unit/internal/rose_mask_32.cpp
Normal file
211
unit/internal/rose_mask_32.cpp
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "rose/validate_mask.h"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
#define ONES32 0xffffffffu
|
||||||
|
|
||||||
|
union RoseLookaroundMask32 {
|
||||||
|
m256 a256;
|
||||||
|
u8 a8[32];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ValidateMask32TestInfo {
|
||||||
|
RoseLookaroundMask32 data;
|
||||||
|
u32 valid_mask;
|
||||||
|
RoseLookaroundMask32 and_mask;
|
||||||
|
RoseLookaroundMask32 cmp_mask;
|
||||||
|
u32 neg_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ValidateMask32InitInfo {
|
||||||
|
int idx;
|
||||||
|
u8 data;
|
||||||
|
u8 and_mask;
|
||||||
|
u8 cmp_mask;
|
||||||
|
u8 neg_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static const ValidateMask32InitInfo testBasicIdx[][33] = {
|
||||||
|
{
|
||||||
|
{1, 0x34, 0xf8, 0x30, 0},
|
||||||
|
{2, 0x34, 0xf8, 0x30, 0},
|
||||||
|
{8, 0x23, 0xff, 0x23, 0},
|
||||||
|
{9, 0x34, 0xf8, 0x30, 0},
|
||||||
|
{10, 0x41, 0xdf, 0x41, 0},
|
||||||
|
{11, 0x63, 0xdd, 0x41, 0},
|
||||||
|
{12, 0x61, 0xdd, 0x41, 0},
|
||||||
|
{13, 0x41, 0xdf, 0x41, 0},
|
||||||
|
{14, 0x61, 0xdf, 0x41, 0},
|
||||||
|
{15, 0x41, 0xdf, 0x41, 0},
|
||||||
|
{16, 0x43, 0xdd, 0x41, 0},
|
||||||
|
{17, 0x61, 0xdd, 0x41, 0},
|
||||||
|
{23, 0x63, 0xdd, 0x41, 0},
|
||||||
|
{24, 0x4f, 0xfc, 0x4c, 0},
|
||||||
|
{25, 0x4d, 0xfc, 0x4c, 0},
|
||||||
|
{26, 0x4d, 0xfc, 0x4c, 0},
|
||||||
|
{-1, 0, 0, 0, 0},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{11, 0, 0xff, 0x55, 1},
|
||||||
|
{12, 0, 0xff, 0x36, 1},
|
||||||
|
{13, 0, 0xfe, 0x34, 1},
|
||||||
|
{14, 0x4d, 0xfe, 0x4c, 0},
|
||||||
|
{15, 0x41, 0xbf, 0x01, 0},
|
||||||
|
{16, 0x53, 0xdf, 0x73, 1},
|
||||||
|
{17, 0x4b, 0, 0, 0},
|
||||||
|
{18, 0, 0x2c, 0x2c, 1},
|
||||||
|
{-1, 0, 0, 0, 0},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{15, 0x46, 0xdf, 0x46, 0},
|
||||||
|
{16, 0x4f, 0xdf, 0x46, 1},
|
||||||
|
{17, 0x6f, 0xff, 0x6f, 0},
|
||||||
|
{18, 0x31, 0xfe, 0x30, 0},
|
||||||
|
{19, 0x34, 0xf8, 0x30, 0},
|
||||||
|
{20, 0x66, 0xc0, 0x40, 0},
|
||||||
|
{21, 0x6f, 0xf0, 0x60, 0},
|
||||||
|
{22, 0x6f, 0, 0, 0},
|
||||||
|
{23, 0x46, 0xdf, 0x44, 1},
|
||||||
|
{24, 0x4f, 0xdf, 0x46, 1},
|
||||||
|
{25, 0x6f, 0xff, 0x4f, 1},
|
||||||
|
{26, 0x31, 0xfe, 0x30, 0},
|
||||||
|
{27, 0x34, 0xf8, 0x34, 1},
|
||||||
|
{28, 0x66, 0xc0, 0x60, 1},
|
||||||
|
{29, 0x6f, 0xf0, 0x6f, 1},
|
||||||
|
{30, 0x6f, 0, 0x60, 1},
|
||||||
|
{-1, 0, 0, 0, 0},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{31, 0x4a, 0x80, 0, 0},
|
||||||
|
{-1, 0, 0, 0, 1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{12, 0x2b, 0x3d, 0x2d, 1},
|
||||||
|
{13, 0x2b, 0x3d, 0x4c, 1},
|
||||||
|
{23, 0x4a, 0x88, 0x0a, 1},
|
||||||
|
{-1, 0, 0, 0, 0},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static void initTestInfo(ValidateMask32TestInfo &t) {
|
||||||
|
t.data.a256 = zeroes256();
|
||||||
|
t.valid_mask = 0xffffffff;
|
||||||
|
t.and_mask.a256 = zeroes256();
|
||||||
|
t.cmp_mask.a256 = zeroes256();
|
||||||
|
t.neg_mask = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static
|
||||||
|
int testBasicInit(ValidateMask32TestInfo *testB) {
|
||||||
|
int len = 0;
|
||||||
|
ValidateMask32TestInfo t;
|
||||||
|
for (size_t i = 0; i < ARRAY_LENGTH(testBasicIdx); i++) {
|
||||||
|
initTestInfo(t);
|
||||||
|
for (const auto &line: testBasicIdx[i]) {
|
||||||
|
if (line.idx < 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int index = line.idx;
|
||||||
|
t.data.a8[index] = line.data;
|
||||||
|
t.and_mask.a8[index] = line.and_mask;
|
||||||
|
t.cmp_mask.a8[index] = line.cmp_mask;
|
||||||
|
t.neg_mask |= line.neg_mask << index;
|
||||||
|
}
|
||||||
|
testB[i] = t;
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ValidateMask32, testMask32_1) {
|
||||||
|
ValidateMask32TestInfo testBasic[20];
|
||||||
|
int test_len = testBasicInit(testBasic);
|
||||||
|
for (int i = 0; i < test_len; i++) {
|
||||||
|
const auto t = testBasic[i];
|
||||||
|
EXPECT_EQ(1, validateMask32(t.data.a256, t.valid_mask,
|
||||||
|
t.and_mask.a256, t.cmp_mask.a256,
|
||||||
|
t.neg_mask));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ValidateMask32, testMask32_2) {
|
||||||
|
ValidateMask32TestInfo testBasic[20];
|
||||||
|
int test_len = testBasicInit(testBasic);
|
||||||
|
for (int left = 0; left <= 32; left++) {
|
||||||
|
for (int right = 0; right + left < 32; right++) {
|
||||||
|
u32 valid_mask = ONES32 << (left + right) >> left;
|
||||||
|
for (int i = 0; i < test_len; i++) {
|
||||||
|
const auto &t = testBasic[i];
|
||||||
|
int bool_result;
|
||||||
|
bool_result = !(valid_mask & t.neg_mask);
|
||||||
|
EXPECT_EQ(bool_result, validateMask32(t.data.a256,
|
||||||
|
valid_mask,
|
||||||
|
t.and_mask.a256,
|
||||||
|
t.cmp_mask.a256,
|
||||||
|
0));
|
||||||
|
bool_result = (valid_mask & t.neg_mask) == valid_mask;
|
||||||
|
EXPECT_EQ(bool_result, validateMask32(t.data.a256,
|
||||||
|
valid_mask,
|
||||||
|
t.and_mask.a256,
|
||||||
|
t.cmp_mask.a256,
|
||||||
|
ONES32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ValidateMask32, testMask32_3) {
|
||||||
|
ValidateMask32TestInfo testBasic[20];
|
||||||
|
testing::internal::Random neg_mask_rand(451);
|
||||||
|
int test_len = testBasicInit(testBasic);
|
||||||
|
for (int left = 0; left <= 32; left++) {
|
||||||
|
for (int right = 0; right + left < 32; right++) {
|
||||||
|
u32 valid_mask = ONES32 << (left + right) >> left;
|
||||||
|
for (int i = 0; i < test_len; i++) {
|
||||||
|
const auto &t = testBasic[i];
|
||||||
|
int bool_result;
|
||||||
|
for (int j = 0; j < 5000; j++) {
|
||||||
|
u32 neg_mask = neg_mask_rand.Generate(1u << 31);
|
||||||
|
bool_result = (neg_mask & valid_mask) ==
|
||||||
|
(t.neg_mask & valid_mask);
|
||||||
|
EXPECT_EQ(bool_result, validateMask32(t.data.a256,
|
||||||
|
valid_mask,
|
||||||
|
t.and_mask.a256,
|
||||||
|
t.cmp_mask.a256,
|
||||||
|
neg_mask));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user