lookaround:

add 64x8 and 64x16 shufti models
add mask64 model
expand entry quantity
This commit is contained in:
Hong, Yang A
2020-10-20 20:34:50 +00:00
committed by Konstantinos Margaritis
parent e1706c435c
commit 8436f95f24
12 changed files with 892 additions and 46 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2019, Intel Corporation
* Copyright (c) 2015-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -767,10 +767,10 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
c_shift = c_len - ci->len;
c_len = ci->len;
}
copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len);
copy_upto_64_bytes((u8 *)&data - offset, ci->buf, c_len);
}
assert(h_shift + h_len + c_len + c_shift == 32);
copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
copy_upto_64_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
} else {
if (offset + 32 > (s64a)ci->len) {
if (offset >= (s64a)ci->len) {
@@ -779,7 +779,7 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
}
c_len = ci->len - offset;
c_shift = 32 - c_len;
copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len);
copy_upto_64_bytes((u8 *)&data, ci->buf + offset, c_len);
} else {
data = loadu256(ci->buf + offset);
}
@@ -800,12 +800,90 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
return 0;
}
// get 128/256 bits data from history and current buffer.
#ifdef HAVE_AVX512
static rose_inline
int roseCheckMask64(const struct core_info *ci, const u8 *and_mask,
const u8 *cmp_mask, const u64a neg_mask,
s32 checkOffset, u64a end) {
const s64a base_offset = (s64a)end - ci->buf_offset;
s64a offset = base_offset + checkOffset;
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
DEBUG_PRINTF("too early, fail\n");
return 0;
}
m512 data = zeroes512(); // consists of the following four parts.
s32 c_shift = 0; // blank bytes after current.
s32 h_shift = 0; // blank bytes before history.
s32 h_len = 64; // number of bytes from history buffer.
s32 c_len = 0; // number of bytes from current buffer.
/* h_shift + h_len + c_len + c_shift = 64 need to be hold.*/
if (offset < 0) {
s32 h_offset = 0; // the start offset in history buffer.
if (offset < -(s64a)ci->hlen) {
if (offset + 64 <= -(s64a)ci->hlen) {
DEBUG_PRINTF("all before history\n");
return 1;
}
h_shift = -(offset + (s64a)ci->hlen);
h_len = 64 - h_shift;
} else {
h_offset = ci->hlen + offset;
}
if (offset + 64 > 0) {
// part in current buffer.
c_len = offset + 64;
h_len = -(offset + h_shift);
if (c_len > (s64a)ci->len) {
// out of current buffer.
c_shift = c_len - ci->len;
c_len = ci->len;
}
copy_upto_64_bytes((u8 *)&data - offset, ci->buf, c_len);
}
assert(h_shift + h_len + c_len + c_shift == 64);
copy_upto_64_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
} else {
if (offset + 64 > (s64a)ci->len) {
if (offset >= (s64a)ci->len) {
DEBUG_PRINTF("all in the future.\n");
return 1;
}
c_len = ci->len - offset;
c_shift = 64 - c_len;
copy_upto_64_bytes((u8 *)&data, ci->buf + offset, c_len);
} else {
data = loadu512(ci->buf + offset);
}
}
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
// we use valid_data_mask to blind bytes before history/in the future.
u64a valid_data_mask;
valid_data_mask = (~0ULL) << (h_shift + c_shift) >> (c_shift);
m512 and_mask_m512 = loadu512(and_mask);
m512 cmp_mask_m512 = loadu512(cmp_mask);
if (validateMask64(data, valid_data_mask, and_mask_m512,
cmp_mask_m512, neg_mask)) {
DEBUG_PRINTF("Mask64 passed\n");
return 1;
}
return 0;
}
#endif
// get 128/256/512 bits data from history and current buffer.
// return data and valid_data_mask.
static rose_inline
u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
u64a getBufferDataComplex(const struct core_info *ci, const s64a loc,
u8 *data, const u32 data_len) {
assert(data_len == 16 || data_len == 32);
assert(data_len == 16 || data_len == 32 || data_len == 64);
s32 c_shift = 0; // blank bytes after current.
s32 h_shift = 0; // blank bytes before history.
s32 h_len = data_len; // number of bytes from history buffer.
@@ -831,10 +909,10 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
c_shift = c_len - ci->len;
c_len = ci->len;
}
copy_upto_32_bytes(data - loc, ci->buf, c_len);
copy_upto_64_bytes(data - loc, ci->buf, c_len);
}
assert(h_shift + h_len + c_len + c_shift == (s32)data_len);
copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len);
copy_upto_64_bytes(data + h_shift, ci->hbuf + h_offset, h_len);
} else {
if (loc + data_len > (s64a)ci->len) {
if (loc >= (s64a)ci->len) {
@@ -843,8 +921,14 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
}
c_len = ci->len - loc;
c_shift = data_len - c_len;
copy_upto_32_bytes(data, ci->buf + loc, c_len);
copy_upto_64_bytes(data, ci->buf + loc, c_len);
} else {
#ifdef HAVE_AVX512
if (data_len == 64) {
storeu512(data, loadu512(ci->buf + loc));
return ~0ULL;
}
#endif
if (data_len == 16) {
storeu128(data, loadu128(ci->buf + loc));
return 0xffff;
@@ -857,6 +941,11 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
#ifdef HAVE_AVX512
if (data_len == 64) {
return (~0ULL) << (h_shift + c_shift) >> c_shift;
}
#endif
if (data_len == 16) {
return (u16)(0xffff << (h_shift + c_shift)) >> c_shift;
} else {
@@ -886,6 +975,19 @@ m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) {
return *(m256 *)data;
}
#ifdef HAVE_AVX512
static rose_inline
m512 getData512(const struct core_info *ci, s64a offset, u64a *valid_data_mask) {
if (offset > 0 && offset + sizeof(m512) <= ci->len) {
*valid_data_mask = ~0ULL;
return loadu512(ci->buf + offset);
}
ALIGN_CL_DIRECTIVE u8 data[sizeof(m512)];
*valid_data_mask = getBufferDataComplex(ci, offset, data, 64);
return *(m512 *)data;
}
#endif
static rose_inline
int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask,
const u8 *bucket_select_mask, u32 neg_mask,
@@ -1025,6 +1127,83 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask,
}
}
#ifdef HAVE_AVX512
static rose_inline
int roseCheckShufti64x8(const struct core_info *ci, const u8 *hi_mask,
const u8 *lo_mask, const u8 *bucket_select_mask,
u64a neg_mask, s32 checkOffset, u64a end) {
const s64a base_offset = (s64a)end - ci->buf_offset;
s64a offset = base_offset + checkOffset;
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
DEBUG_PRINTF("too early, fail\n");
return 0;
}
u64a valid_data_mask = 0;
m512 data = getData512(ci, offset, &valid_data_mask);
if (unlikely(!valid_data_mask)) {
return 1;
}
m512 hi_mask_m512 = loadu512(hi_mask);
m512 lo_mask_m512 = loadu512(lo_mask);
m512 bucket_select_mask_m512 = loadu512(bucket_select_mask);
if (validateShuftiMask64x8(data, hi_mask_m512, lo_mask_m512,
bucket_select_mask_m512,
neg_mask, valid_data_mask)) {
DEBUG_PRINTF("check shufti 64x8 successfully\n");
return 1;
} else {
return 0;
}
}
static rose_inline
int roseCheckShufti64x16(const struct core_info *ci, const u8 *hi_mask_1,
const u8 *hi_mask_2, const u8 *lo_mask_1,
const u8 *lo_mask_2, const u8 *bucket_select_mask_hi,
const u8 *bucket_select_mask_lo, u64a neg_mask,
s32 checkOffset, u64a end) {
const s64a base_offset = (s64a)end - ci->buf_offset;
s64a offset = base_offset + checkOffset;
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
DEBUG_PRINTF("too early, fail\n");
return 0;
}
u64a valid_data_mask = 0;
m512 data = getData512(ci, offset, &valid_data_mask);
if (unlikely(!valid_data_mask)) {
return 1;
}
m512 hi_mask_1_m512 = loadu512(hi_mask_1);
m512 hi_mask_2_m512 = loadu512(hi_mask_2);
m512 lo_mask_1_m512 = loadu512(lo_mask_1);
m512 lo_mask_2_m512 = loadu512(lo_mask_2);
m512 bucket_select_mask_hi_m512 = loadu512(bucket_select_mask_hi);
m512 bucket_select_mask_lo_m512 = loadu512(bucket_select_mask_lo);
if (validateShuftiMask64x16(data, hi_mask_1_m512, hi_mask_2_m512,
lo_mask_1_m512, lo_mask_2_m512,
bucket_select_mask_hi_m512,
bucket_select_mask_lo_m512,
neg_mask, valid_data_mask)) {
DEBUG_PRINTF("check shufti 64x16 successfully\n");
return 1;
} else {
return 0;
}
}
#endif
static rose_inline
int roseCheckSingleLookaround(const struct RoseEngine *t,
const struct hs_scratch *scratch,
@@ -2068,6 +2247,12 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
&&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
&&LABEL_ROSE_INSTR_SET_EXHAUST,
&&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
#ifdef HAVE_AVX512
,
&&LABEL_ROSE_INSTR_CHECK_SHUFTI_64x8, //!< Check 64-byte data by 8-bucket shufti.
&&LABEL_ROSE_INSTR_CHECK_SHUFTI_64x16, //!< Check 64-byte data by 16-bucket shufti.
&&LABEL_ROSE_INSTR_CHECK_MASK_64 //!< 64-bytes and/cmp/neg mask check.
#endif
};
#endif
@@ -2258,6 +2443,45 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
}
PROGRAM_NEXT_INSTRUCTION
#ifdef HAVE_AVX512
PROGRAM_CASE(CHECK_MASK_64) {
struct core_info *ci = &scratch->core_info;
if (!roseCheckMask64(ci, ri->and_mask, ri->cmp_mask,
ri->neg_mask, ri->offset, end)) {
assert(ri->fail_jump);
pc += ri->fail_jump;
PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_SHUFTI_64x8) {
const struct core_info *ci = &scratch->core_info;
if (!roseCheckShufti64x8(ci, ri->hi_mask, ri->lo_mask,
ri->bucket_select_mask,
ri->neg_mask, ri->offset, end)) {
assert(ri->fail_jump);
pc += ri->fail_jump;
PROGRAM_NEXT_INSTRUCTION_JUMP;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(CHECK_SHUFTI_64x16) {
const struct core_info *ci = &scratch->core_info;
if (!roseCheckShufti64x16(ci, ri->hi_mask_1, ri->hi_mask_2,
ri->lo_mask_1, ri->lo_mask_2,
ri->bucket_select_mask_hi,
ri->bucket_select_mask_lo,
ri->neg_mask, ri->offset, end)) {
assert(ri->fail_jump);
pc += ri->fail_jump;
PROGRAM_NEXT_INSTRUCTION_JUMP;
}
}
PROGRAM_NEXT_INSTRUCTION
#endif
PROGRAM_CASE(CHECK_INFIX) {
if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report,
end)) {
@@ -2945,6 +3169,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
#ifdef HAVE_AVX512
L_PROGRAM_CASE(CHECK_MASK_64) {
struct core_info *ci = &scratch->core_info;
if (!roseCheckMask64(ci, ri->and_mask, ri->cmp_mask,
ri->neg_mask, ri->offset, end)) {
assert(ri->fail_jump);
pc += ri->fail_jump;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
#endif
L_PROGRAM_CASE(CHECK_BYTE) {
const struct core_info *ci = &scratch->core_info;
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,