mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
lookaround:
add 64x8 and 64x16 shufti models add mask64 model expand entry quantity
This commit is contained in:
parent
56cb107005
commit
dea7c4dc2e
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -767,10 +767,10 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
|
|||||||
c_shift = c_len - ci->len;
|
c_shift = c_len - ci->len;
|
||||||
c_len = ci->len;
|
c_len = ci->len;
|
||||||
}
|
}
|
||||||
copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len);
|
copy_upto_64_bytes((u8 *)&data - offset, ci->buf, c_len);
|
||||||
}
|
}
|
||||||
assert(h_shift + h_len + c_len + c_shift == 32);
|
assert(h_shift + h_len + c_len + c_shift == 32);
|
||||||
copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
|
copy_upto_64_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
|
||||||
} else {
|
} else {
|
||||||
if (offset + 32 > (s64a)ci->len) {
|
if (offset + 32 > (s64a)ci->len) {
|
||||||
if (offset >= (s64a)ci->len) {
|
if (offset >= (s64a)ci->len) {
|
||||||
@ -779,7 +779,7 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
|
|||||||
}
|
}
|
||||||
c_len = ci->len - offset;
|
c_len = ci->len - offset;
|
||||||
c_shift = 32 - c_len;
|
c_shift = 32 - c_len;
|
||||||
copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len);
|
copy_upto_64_bytes((u8 *)&data, ci->buf + offset, c_len);
|
||||||
} else {
|
} else {
|
||||||
data = loadu256(ci->buf + offset);
|
data = loadu256(ci->buf + offset);
|
||||||
}
|
}
|
||||||
@ -800,12 +800,90 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// get 128/256 bits data from history and current buffer.
|
#ifdef HAVE_AVX512
|
||||||
|
static rose_inline
|
||||||
|
int roseCheckMask64(const struct core_info *ci, const u8 *and_mask,
|
||||||
|
const u8 *cmp_mask, const u64a neg_mask,
|
||||||
|
s32 checkOffset, u64a end) {
|
||||||
|
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||||
|
s64a offset = base_offset + checkOffset;
|
||||||
|
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||||
|
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||||
|
|
||||||
|
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||||
|
DEBUG_PRINTF("too early, fail\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 data = zeroes512(); // consists of the following four parts.
|
||||||
|
s32 c_shift = 0; // blank bytes after current.
|
||||||
|
s32 h_shift = 0; // blank bytes before history.
|
||||||
|
s32 h_len = 64; // number of bytes from history buffer.
|
||||||
|
s32 c_len = 0; // number of bytes from current buffer.
|
||||||
|
/* h_shift + h_len + c_len + c_shift = 64 need to be hold.*/
|
||||||
|
|
||||||
|
if (offset < 0) {
|
||||||
|
s32 h_offset = 0; // the start offset in history buffer.
|
||||||
|
if (offset < -(s64a)ci->hlen) {
|
||||||
|
if (offset + 64 <= -(s64a)ci->hlen) {
|
||||||
|
DEBUG_PRINTF("all before history\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
h_shift = -(offset + (s64a)ci->hlen);
|
||||||
|
h_len = 64 - h_shift;
|
||||||
|
} else {
|
||||||
|
h_offset = ci->hlen + offset;
|
||||||
|
}
|
||||||
|
if (offset + 64 > 0) {
|
||||||
|
// part in current buffer.
|
||||||
|
c_len = offset + 64;
|
||||||
|
h_len = -(offset + h_shift);
|
||||||
|
if (c_len > (s64a)ci->len) {
|
||||||
|
// out of current buffer.
|
||||||
|
c_shift = c_len - ci->len;
|
||||||
|
c_len = ci->len;
|
||||||
|
}
|
||||||
|
copy_upto_64_bytes((u8 *)&data - offset, ci->buf, c_len);
|
||||||
|
}
|
||||||
|
assert(h_shift + h_len + c_len + c_shift == 64);
|
||||||
|
copy_upto_64_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
|
||||||
|
} else {
|
||||||
|
if (offset + 64 > (s64a)ci->len) {
|
||||||
|
if (offset >= (s64a)ci->len) {
|
||||||
|
DEBUG_PRINTF("all in the future.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
c_len = ci->len - offset;
|
||||||
|
c_shift = 64 - c_len;
|
||||||
|
copy_upto_64_bytes((u8 *)&data, ci->buf + offset, c_len);
|
||||||
|
} else {
|
||||||
|
data = loadu512(ci->buf + offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
|
||||||
|
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
|
||||||
|
// we use valid_data_mask to blind bytes before history/in the future.
|
||||||
|
u64a valid_data_mask;
|
||||||
|
valid_data_mask = (~0ULL) << (h_shift + c_shift) >> (c_shift);
|
||||||
|
|
||||||
|
m512 and_mask_m512 = loadu512(and_mask);
|
||||||
|
m512 cmp_mask_m512 = loadu512(cmp_mask);
|
||||||
|
|
||||||
|
if (validateMask64(data, valid_data_mask, and_mask_m512,
|
||||||
|
cmp_mask_m512, neg_mask)) {
|
||||||
|
DEBUG_PRINTF("Mask64 passed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// get 128/256/512 bits data from history and current buffer.
|
||||||
// return data and valid_data_mask.
|
// return data and valid_data_mask.
|
||||||
static rose_inline
|
static rose_inline
|
||||||
u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
u64a getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
||||||
u8 *data, const u32 data_len) {
|
u8 *data, const u32 data_len) {
|
||||||
assert(data_len == 16 || data_len == 32);
|
assert(data_len == 16 || data_len == 32 || data_len == 64);
|
||||||
s32 c_shift = 0; // blank bytes after current.
|
s32 c_shift = 0; // blank bytes after current.
|
||||||
s32 h_shift = 0; // blank bytes before history.
|
s32 h_shift = 0; // blank bytes before history.
|
||||||
s32 h_len = data_len; // number of bytes from history buffer.
|
s32 h_len = data_len; // number of bytes from history buffer.
|
||||||
@ -831,10 +909,10 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
|||||||
c_shift = c_len - ci->len;
|
c_shift = c_len - ci->len;
|
||||||
c_len = ci->len;
|
c_len = ci->len;
|
||||||
}
|
}
|
||||||
copy_upto_32_bytes(data - loc, ci->buf, c_len);
|
copy_upto_64_bytes(data - loc, ci->buf, c_len);
|
||||||
}
|
}
|
||||||
assert(h_shift + h_len + c_len + c_shift == (s32)data_len);
|
assert(h_shift + h_len + c_len + c_shift == (s32)data_len);
|
||||||
copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len);
|
copy_upto_64_bytes(data + h_shift, ci->hbuf + h_offset, h_len);
|
||||||
} else {
|
} else {
|
||||||
if (loc + data_len > (s64a)ci->len) {
|
if (loc + data_len > (s64a)ci->len) {
|
||||||
if (loc >= (s64a)ci->len) {
|
if (loc >= (s64a)ci->len) {
|
||||||
@ -843,8 +921,14 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
|||||||
}
|
}
|
||||||
c_len = ci->len - loc;
|
c_len = ci->len - loc;
|
||||||
c_shift = data_len - c_len;
|
c_shift = data_len - c_len;
|
||||||
copy_upto_32_bytes(data, ci->buf + loc, c_len);
|
copy_upto_64_bytes(data, ci->buf + loc, c_len);
|
||||||
} else {
|
} else {
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (data_len == 64) {
|
||||||
|
storeu512(data, loadu512(ci->buf + loc));
|
||||||
|
return ~0ULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (data_len == 16) {
|
if (data_len == 16) {
|
||||||
storeu128(data, loadu128(ci->buf + loc));
|
storeu128(data, loadu128(ci->buf + loc));
|
||||||
return 0xffff;
|
return 0xffff;
|
||||||
@ -857,6 +941,11 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
|||||||
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
|
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
|
||||||
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
|
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (data_len == 64) {
|
||||||
|
return (~0ULL) << (h_shift + c_shift) >> c_shift;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (data_len == 16) {
|
if (data_len == 16) {
|
||||||
return (u16)(0xffff << (h_shift + c_shift)) >> c_shift;
|
return (u16)(0xffff << (h_shift + c_shift)) >> c_shift;
|
||||||
} else {
|
} else {
|
||||||
@ -886,6 +975,19 @@ m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) {
|
|||||||
return *(m256 *)data;
|
return *(m256 *)data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static rose_inline
|
||||||
|
m512 getData512(const struct core_info *ci, s64a offset, u64a *valid_data_mask) {
|
||||||
|
if (offset > 0 && offset + sizeof(m512) <= ci->len) {
|
||||||
|
*valid_data_mask = ~0ULL;
|
||||||
|
return loadu512(ci->buf + offset);
|
||||||
|
}
|
||||||
|
ALIGN_CL_DIRECTIVE u8 data[sizeof(m512)];
|
||||||
|
*valid_data_mask = getBufferDataComplex(ci, offset, data, 64);
|
||||||
|
return *(m512 *)data;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static rose_inline
|
static rose_inline
|
||||||
int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask,
|
int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask,
|
||||||
const u8 *bucket_select_mask, u32 neg_mask,
|
const u8 *bucket_select_mask, u32 neg_mask,
|
||||||
@ -1025,6 +1127,83 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static rose_inline
|
||||||
|
int roseCheckShufti64x8(const struct core_info *ci, const u8 *hi_mask,
|
||||||
|
const u8 *lo_mask, const u8 *bucket_select_mask,
|
||||||
|
u64a neg_mask, s32 checkOffset, u64a end) {
|
||||||
|
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||||
|
s64a offset = base_offset + checkOffset;
|
||||||
|
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||||
|
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||||
|
|
||||||
|
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||||
|
DEBUG_PRINTF("too early, fail\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64a valid_data_mask = 0;
|
||||||
|
m512 data = getData512(ci, offset, &valid_data_mask);
|
||||||
|
|
||||||
|
if (unlikely(!valid_data_mask)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 hi_mask_m512 = loadu512(hi_mask);
|
||||||
|
m512 lo_mask_m512 = loadu512(lo_mask);
|
||||||
|
m512 bucket_select_mask_m512 = loadu512(bucket_select_mask);
|
||||||
|
if (validateShuftiMask64x8(data, hi_mask_m512, lo_mask_m512,
|
||||||
|
bucket_select_mask_m512,
|
||||||
|
neg_mask, valid_data_mask)) {
|
||||||
|
DEBUG_PRINTF("check shufti 64x8 successfully\n");
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static rose_inline
|
||||||
|
int roseCheckShufti64x16(const struct core_info *ci, const u8 *hi_mask_1,
|
||||||
|
const u8 *hi_mask_2, const u8 *lo_mask_1,
|
||||||
|
const u8 *lo_mask_2, const u8 *bucket_select_mask_hi,
|
||||||
|
const u8 *bucket_select_mask_lo, u64a neg_mask,
|
||||||
|
s32 checkOffset, u64a end) {
|
||||||
|
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||||
|
s64a offset = base_offset + checkOffset;
|
||||||
|
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||||
|
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||||
|
|
||||||
|
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||||
|
DEBUG_PRINTF("too early, fail\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64a valid_data_mask = 0;
|
||||||
|
m512 data = getData512(ci, offset, &valid_data_mask);
|
||||||
|
if (unlikely(!valid_data_mask)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 hi_mask_1_m512 = loadu512(hi_mask_1);
|
||||||
|
m512 hi_mask_2_m512 = loadu512(hi_mask_2);
|
||||||
|
m512 lo_mask_1_m512 = loadu512(lo_mask_1);
|
||||||
|
m512 lo_mask_2_m512 = loadu512(lo_mask_2);
|
||||||
|
|
||||||
|
m512 bucket_select_mask_hi_m512 = loadu512(bucket_select_mask_hi);
|
||||||
|
m512 bucket_select_mask_lo_m512 = loadu512(bucket_select_mask_lo);
|
||||||
|
if (validateShuftiMask64x16(data, hi_mask_1_m512, hi_mask_2_m512,
|
||||||
|
lo_mask_1_m512, lo_mask_2_m512,
|
||||||
|
bucket_select_mask_hi_m512,
|
||||||
|
bucket_select_mask_lo_m512,
|
||||||
|
neg_mask, valid_data_mask)) {
|
||||||
|
DEBUG_PRINTF("check shufti 64x16 successfully\n");
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static rose_inline
|
static rose_inline
|
||||||
int roseCheckSingleLookaround(const struct RoseEngine *t,
|
int roseCheckSingleLookaround(const struct RoseEngine *t,
|
||||||
const struct hs_scratch *scratch,
|
const struct hs_scratch *scratch,
|
||||||
@ -2068,6 +2247,12 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
|||||||
&&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
|
&&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
|
||||||
&&LABEL_ROSE_INSTR_SET_EXHAUST,
|
&&LABEL_ROSE_INSTR_SET_EXHAUST,
|
||||||
&&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
|
&&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
,
|
||||||
|
&&LABEL_ROSE_INSTR_CHECK_SHUFTI_64x8, //!< Check 64-byte data by 8-bucket shufti.
|
||||||
|
&&LABEL_ROSE_INSTR_CHECK_SHUFTI_64x16, //!< Check 64-byte data by 16-bucket shufti.
|
||||||
|
&&LABEL_ROSE_INSTR_CHECK_MASK_64 //!< 64-bytes and/cmp/neg mask check.
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2258,6 +2443,45 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
PROGRAM_CASE(CHECK_MASK_64) {
|
||||||
|
struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckMask64(ci, ri->and_mask, ri->cmp_mask,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x8) {
|
||||||
|
const struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckShufti64x8(ci, ri->hi_mask, ri->lo_mask,
|
||||||
|
ri->bucket_select_mask,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
PROGRAM_NEXT_INSTRUCTION_JUMP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x16) {
|
||||||
|
const struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckShufti64x16(ci, ri->hi_mask_1, ri->hi_mask_2,
|
||||||
|
ri->lo_mask_1, ri->lo_mask_2,
|
||||||
|
ri->bucket_select_mask_hi,
|
||||||
|
ri->bucket_select_mask_lo,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
PROGRAM_NEXT_INSTRUCTION_JUMP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
#endif
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_INFIX) {
|
PROGRAM_CASE(CHECK_INFIX) {
|
||||||
if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report,
|
if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report,
|
||||||
end)) {
|
end)) {
|
||||||
@ -2945,6 +3169,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
L_PROGRAM_NEXT_INSTRUCTION
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
L_PROGRAM_CASE(CHECK_MASK_64) {
|
||||||
|
struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckMask64(ci, ri->and_mask, ri->cmp_mask,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
#endif
|
||||||
|
|
||||||
L_PROGRAM_CASE(CHECK_BYTE) {
|
L_PROGRAM_CASE(CHECK_BYTE) {
|
||||||
const struct core_info *ci = &scratch->core_info;
|
const struct core_info *ci = &scratch->core_info;
|
||||||
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -757,13 +757,12 @@ CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||||
const u8 *bucket_mask, u32 neg_mask, s32 offset) {
|
const u8 *bucket_mask, u64a neg_mask, s32 offset) {
|
||||||
assert(len == 16 || len == 32);
|
assert(len == 16 || len == 32 || len == 64);
|
||||||
os << " contents:" << endl;
|
os << " contents:" << endl;
|
||||||
for (u32 idx = 0; idx < len; idx++) {
|
for (u32 idx = 0; idx < len; idx++) {
|
||||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||||
|
if (neg_mask & (1ULL << idx)) {
|
||||||
if (neg_mask & (1U << idx)) {
|
|
||||||
cr.flip();
|
cr.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -779,14 +778,13 @@ void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
|||||||
static
|
static
|
||||||
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||||
const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
|
const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
|
||||||
const u8 *bucket_mask_2, u32 neg_mask, s32 offset) {
|
const u8 *bucket_mask_2, u64a neg_mask, s32 offset) {
|
||||||
assert(len == 16 || len == 32);
|
assert(len == 16 || len == 32 || len == 64);
|
||||||
os << " contents:" << endl;
|
os << " contents:" << endl;
|
||||||
for (u32 idx = 0; idx < len; idx++) {
|
for (u32 idx = 0; idx < len; idx++) {
|
||||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||||
cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
|
cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
|
||||||
|
if (neg_mask & (1ULL << idx)) {
|
||||||
if (neg_mask & (1U << idx)) {
|
|
||||||
cr.flip();
|
cr.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -970,6 +968,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_MASK_64) {
|
||||||
|
os << " and_mask "
|
||||||
|
<< dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " cmp_mask "
|
||||||
|
<< dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||||
|
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||||
|
os << " offset " << ri->offset << endl;
|
||||||
|
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_BYTE) {
|
PROGRAM_CASE(CHECK_BYTE) {
|
||||||
os << " and_mask 0x" << std::hex << std::setw(2)
|
os << " and_mask 0x" << std::hex << std::setw(2)
|
||||||
<< std::setfill('0') << u32{ri->and_mask} << std::dec
|
<< std::setfill('0') << u32{ri->and_mask} << std::dec
|
||||||
@ -1072,6 +1084,60 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x8) {
|
||||||
|
os << " hi_mask "
|
||||||
|
<< dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " lo_mask "
|
||||||
|
<< dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " bucket_select_mask "
|
||||||
|
<< dumpStrMask(ri->bucket_select_mask,
|
||||||
|
sizeof(ri->bucket_select_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||||
|
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||||
|
os << " offset " << ri->offset << endl;
|
||||||
|
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||||
|
dumpLookaroundShufti(os, 64, ri->lo_mask, ri->hi_mask,
|
||||||
|
ri->bucket_select_mask, ri->neg_mask,
|
||||||
|
ri->offset);
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x16) {
|
||||||
|
os << " hi_mask_1 "
|
||||||
|
<< dumpStrMask(ri->hi_mask_1, sizeof(ri->hi_mask_1))
|
||||||
|
<< endl;
|
||||||
|
os << " hi_mask_2 "
|
||||||
|
<< dumpStrMask(ri->hi_mask_2, sizeof(ri->hi_mask_2))
|
||||||
|
<< endl;
|
||||||
|
os << " lo_mask_1 "
|
||||||
|
<< dumpStrMask(ri->lo_mask_1, sizeof(ri->lo_mask_1))
|
||||||
|
<< endl;
|
||||||
|
os << " lo_mask_2 "
|
||||||
|
<< dumpStrMask(ri->lo_mask_2, sizeof(ri->lo_mask_2))
|
||||||
|
<< endl;
|
||||||
|
os << " bucket_select_mask_hi "
|
||||||
|
<< dumpStrMask(ri->bucket_select_mask_hi,
|
||||||
|
sizeof(ri->bucket_select_mask_hi))
|
||||||
|
<< endl;
|
||||||
|
os << " bucket_select_mask_lo "
|
||||||
|
<< dumpStrMask(ri->bucket_select_mask_lo,
|
||||||
|
sizeof(ri->bucket_select_mask_lo))
|
||||||
|
<< endl;
|
||||||
|
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||||
|
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||||
|
os << " offset " << ri->offset << endl;
|
||||||
|
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||||
|
dumpLookaroundShufti(os, 64, ri->lo_mask_1, ri->hi_mask_1,
|
||||||
|
ri->lo_mask_2, ri->hi_mask_2,
|
||||||
|
ri->bucket_select_mask_lo,
|
||||||
|
ri->bucket_select_mask_hi,
|
||||||
|
ri->neg_mask, ri->offset);
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_INFIX) {
|
PROGRAM_CASE(CHECK_INFIX) {
|
||||||
os << " queue " << ri->queue << endl;
|
os << " queue " << ri->queue << endl;
|
||||||
os << " lag " << ri->lag << endl;
|
os << " lag " << ri->lag << endl;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2019, Intel Corporation
|
* Copyright (c) 2017-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -162,6 +162,17 @@ void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob,
|
|||||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RoseInstrCheckMask64::write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const {
|
||||||
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
auto *inst = static_cast<impl_type *>(dest);
|
||||||
|
copy(begin(and_mask), end(and_mask), inst->and_mask);
|
||||||
|
copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask);
|
||||||
|
inst->neg_mask = neg_mask;
|
||||||
|
inst->offset = offset;
|
||||||
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
|
}
|
||||||
|
|
||||||
void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob,
|
void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob,
|
||||||
const OffsetMap &offset_map) const {
|
const OffsetMap &offset_map) const {
|
||||||
RoseInstrBase::write(dest, blob, offset_map);
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
@ -227,6 +238,36 @@ void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob,
|
|||||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RoseInstrCheckShufti64x8::write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const {
|
||||||
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
auto *inst = static_cast<impl_type *>(dest);
|
||||||
|
copy(begin(hi_mask), end(hi_mask), inst->hi_mask);
|
||||||
|
copy(begin(lo_mask), end(lo_mask), inst->lo_mask);
|
||||||
|
copy(begin(bucket_select_mask), end(bucket_select_mask),
|
||||||
|
inst->bucket_select_mask);
|
||||||
|
inst->neg_mask = neg_mask;
|
||||||
|
inst->offset = offset;
|
||||||
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RoseInstrCheckShufti64x16::write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const {
|
||||||
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
auto *inst = static_cast<impl_type *>(dest);
|
||||||
|
copy(begin(hi_mask_1), end(hi_mask_1), inst->hi_mask_1);
|
||||||
|
copy(begin(hi_mask_2), end(hi_mask_2), inst->hi_mask_2);
|
||||||
|
copy(begin(lo_mask_1), end(lo_mask_1), inst->lo_mask_1);
|
||||||
|
copy(begin(lo_mask_2), end(lo_mask_2), inst->lo_mask_2);
|
||||||
|
copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi),
|
||||||
|
inst->bucket_select_mask_hi);
|
||||||
|
copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo),
|
||||||
|
inst->bucket_select_mask_lo);
|
||||||
|
inst->neg_mask = neg_mask;
|
||||||
|
inst->offset = offset;
|
||||||
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
|
}
|
||||||
|
|
||||||
void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob,
|
void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob,
|
||||||
const OffsetMap &offset_map) const {
|
const OffsetMap &offset_map) const {
|
||||||
RoseInstrBase::write(dest, blob, offset_map);
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2019, Intel Corporation
|
* Copyright (c) 2017-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -519,6 +519,43 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoseInstrCheckMask64
|
||||||
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MASK_64,
|
||||||
|
ROSE_STRUCT_CHECK_MASK_64,
|
||||||
|
RoseInstrCheckMask64> {
|
||||||
|
public:
|
||||||
|
std::array<u8, 64> and_mask;
|
||||||
|
std::array<u8, 64> cmp_mask;
|
||||||
|
u64a neg_mask;
|
||||||
|
s32 offset;
|
||||||
|
const RoseInstruction *target;
|
||||||
|
|
||||||
|
RoseInstrCheckMask64(std::array<u8, 64> and_mask_in,
|
||||||
|
std::array<u8, 64> cmp_mask_in, u64a neg_mask_in,
|
||||||
|
s32 offset_in, const RoseInstruction *target_in)
|
||||||
|
: and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)),
|
||||||
|
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||||
|
bool operator==(const RoseInstrCheckMask64 &ri) const {
|
||||||
|
return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
target == ri.target;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t hash() const override {
|
||||||
|
return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const override;
|
||||||
|
|
||||||
|
bool equiv_to(const RoseInstrCheckMask64 &ri, const OffsetMap &offsets,
|
||||||
|
const OffsetMap &other_offsets) const {
|
||||||
|
return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
offsets.at(target) == other_offsets.at(ri.target);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class RoseInstrCheckByte
|
class RoseInstrCheckByte
|
||||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BYTE,
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BYTE,
|
||||||
ROSE_STRUCT_CHECK_BYTE,
|
ROSE_STRUCT_CHECK_BYTE,
|
||||||
@ -738,6 +775,109 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoseInstrCheckShufti64x8
|
||||||
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_64x8,
|
||||||
|
ROSE_STRUCT_CHECK_SHUFTI_64x8,
|
||||||
|
RoseInstrCheckShufti64x8> {
|
||||||
|
public:
|
||||||
|
std::array<u8, 64> hi_mask;
|
||||||
|
std::array<u8, 64> lo_mask;
|
||||||
|
std::array<u8, 64> bucket_select_mask;
|
||||||
|
u64a neg_mask;
|
||||||
|
s32 offset;
|
||||||
|
const RoseInstruction *target;
|
||||||
|
|
||||||
|
RoseInstrCheckShufti64x8(std::array<u8, 64> hi_mask_in,
|
||||||
|
std::array<u8, 64> lo_mask_in,
|
||||||
|
std::array<u8, 64> bucket_select_mask_in,
|
||||||
|
u64a neg_mask_in, s32 offset_in,
|
||||||
|
const RoseInstruction *target_in)
|
||||||
|
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||||
|
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||||
|
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||||
|
|
||||||
|
bool operator==(const RoseInstrCheckShufti64x8 &ri) const {
|
||||||
|
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||||
|
bucket_select_mask == ri.bucket_select_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
target == ri.target;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t hash() const override {
|
||||||
|
return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const override;
|
||||||
|
|
||||||
|
bool equiv_to(const RoseInstrCheckShufti64x8 &ri, const OffsetMap &offsets,
|
||||||
|
const OffsetMap &other_offsets) const {
|
||||||
|
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||||
|
bucket_select_mask == ri.bucket_select_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
offsets.at(target) == other_offsets.at(ri.target);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class RoseInstrCheckShufti64x16
|
||||||
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_64x16,
|
||||||
|
ROSE_STRUCT_CHECK_SHUFTI_64x16,
|
||||||
|
RoseInstrCheckShufti64x16> {
|
||||||
|
public:
|
||||||
|
std::array<u8, 64> hi_mask_1;
|
||||||
|
std::array<u8, 64> hi_mask_2;
|
||||||
|
std::array<u8, 64> lo_mask_1;
|
||||||
|
std::array<u8, 64> lo_mask_2;
|
||||||
|
std::array<u8, 64> bucket_select_mask_hi;
|
||||||
|
std::array<u8, 64> bucket_select_mask_lo;
|
||||||
|
u64a neg_mask;
|
||||||
|
s32 offset;
|
||||||
|
const RoseInstruction *target;
|
||||||
|
|
||||||
|
RoseInstrCheckShufti64x16(std::array<u8, 64> hi_mask_1_in,
|
||||||
|
std::array<u8, 64> hi_mask_2_in,
|
||||||
|
std::array<u8, 64> lo_mask_1_in,
|
||||||
|
std::array<u8, 64> lo_mask_2_in,
|
||||||
|
std::array<u8, 64> bucket_select_mask_hi_in,
|
||||||
|
std::array<u8, 64> bucket_select_mask_lo_in,
|
||||||
|
u64a neg_mask_in, s32 offset_in,
|
||||||
|
const RoseInstruction *target_in)
|
||||||
|
: hi_mask_1(std::move(hi_mask_1_in)), hi_mask_2(std::move(hi_mask_2_in)),
|
||||||
|
lo_mask_1(std::move(lo_mask_1_in)), lo_mask_2(std::move(lo_mask_2_in)),
|
||||||
|
bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)),
|
||||||
|
bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)),
|
||||||
|
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||||
|
|
||||||
|
bool operator==(const RoseInstrCheckShufti64x16 &ri) const {
|
||||||
|
return hi_mask_1 == ri.hi_mask_1 && hi_mask_2 == ri.hi_mask_2 &&
|
||||||
|
lo_mask_1 == ri.lo_mask_1 && lo_mask_2 == ri.lo_mask_2 &&
|
||||||
|
bucket_select_mask_hi == ri.bucket_select_mask_hi &&
|
||||||
|
bucket_select_mask_lo == ri.bucket_select_mask_lo &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
target == ri.target;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t hash() const override {
|
||||||
|
return hash_all(opcode, hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2,
|
||||||
|
bucket_select_mask_hi, bucket_select_mask_lo, neg_mask,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const override;
|
||||||
|
|
||||||
|
bool equiv_to(const RoseInstrCheckShufti64x16 &ri, const OffsetMap &offsets,
|
||||||
|
const OffsetMap &other_offsets) const {
|
||||||
|
return hi_mask_1 == ri.hi_mask_1 && hi_mask_2 == ri.hi_mask_2 &&
|
||||||
|
lo_mask_1 == ri.lo_mask_1 && lo_mask_2 == ri.lo_mask_2 &&
|
||||||
|
bucket_select_mask_hi == ri.bucket_select_mask_hi &&
|
||||||
|
bucket_select_mask_lo == ri.bucket_select_mask_lo &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
offsets.at(target) == other_offsets.at(ri.target);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class RoseInstrCheckInfix
|
class RoseInstrCheckInfix
|
||||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_INFIX,
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_INFIX,
|
||||||
ROSE_STRUCT_CHECK_INFIX,
|
ROSE_STRUCT_CHECK_INFIX,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -58,7 +58,7 @@ static const u32 MAX_FWD_LEN = 64;
|
|||||||
static const u32 MAX_BACK_LEN = 64;
|
static const u32 MAX_BACK_LEN = 64;
|
||||||
|
|
||||||
/** \brief Max lookaround entries for a role. */
|
/** \brief Max lookaround entries for a role. */
|
||||||
static const u32 MAX_LOOKAROUND_ENTRIES = 16;
|
static const u32 MAX_LOOKAROUND_ENTRIES = 32;
|
||||||
|
|
||||||
/** \brief We would rather have lookarounds with smaller reach than this. */
|
/** \brief We would rather have lookarounds with smaller reach than this. */
|
||||||
static const u32 LOOKAROUND_WIDE_REACH = 200;
|
static const u32 LOOKAROUND_WIDE_REACH = 200;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2019, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -1061,6 +1061,49 @@ bool makeRoleMask32(const vector<LookEntry> &look,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool makeRoleMask64(const vector<LookEntry> &look,
|
||||||
|
RoseProgram &program, const target_t &target) {
|
||||||
|
if (!target.has_avx512()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (look.back().offset >= look.front().offset + 64) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
s32 base_offset = verify_s32(look.front().offset);
|
||||||
|
array<u8, 64> and_mask, cmp_mask;
|
||||||
|
and_mask.fill(0);
|
||||||
|
cmp_mask.fill(0);
|
||||||
|
u64a neg_mask = 0;
|
||||||
|
for (const auto &entry : look) {
|
||||||
|
u8 andmask_u8, cmpmask_u8, flip;
|
||||||
|
if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
u32 shift = entry.offset - base_offset;
|
||||||
|
assert(shift < 64);
|
||||||
|
and_mask[shift] = andmask_u8;
|
||||||
|
cmp_mask[shift] = cmpmask_u8;
|
||||||
|
if (flip) {
|
||||||
|
neg_mask |= 1ULL << shift;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("and_mask %s\n",
|
||||||
|
convertMaskstoString(and_mask.data(), 64).c_str());
|
||||||
|
DEBUG_PRINTF("cmp_mask %s\n",
|
||||||
|
convertMaskstoString(cmp_mask.data(), 64).c_str());
|
||||||
|
DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
|
||||||
|
DEBUG_PRINTF("base_offset %d\n", base_offset);
|
||||||
|
|
||||||
|
const auto *end_inst = program.end_instruction();
|
||||||
|
auto ri = make_unique<RoseInstrCheckMask64>(and_mask, cmp_mask, neg_mask,
|
||||||
|
base_offset, end_inst);
|
||||||
|
program.add_before_end(move(ri));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Sorting by the size of every bucket.
|
// Sorting by the size of every bucket.
|
||||||
// Used in map<u32, vector<s8>, cmpNibble>.
|
// Used in map<u32, vector<s8>, cmpNibble>.
|
||||||
struct cmpNibble {
|
struct cmpNibble {
|
||||||
@ -1084,6 +1127,7 @@ void getAllBuckets(const vector<LookEntry> &look,
|
|||||||
} else {
|
} else {
|
||||||
neg_mask ^= 1ULL << (entry.offset - base_offset);
|
neg_mask ^= 1ULL << (entry.offset - base_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
map <u16, u16> lo2hi;
|
map <u16, u16> lo2hi;
|
||||||
// We treat Ascii Table as a 16x16 grid.
|
// We treat Ascii Table as a 16x16 grid.
|
||||||
// Push every row in cr into lo2hi and mark the row number.
|
// Push every row in cr into lo2hi and mark the row number.
|
||||||
@ -1237,6 +1281,7 @@ makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
|
|||||||
(hi_mask, lo_mask, bucket_select_mask_32,
|
(hi_mask, lo_mask, bucket_select_mask_32,
|
||||||
neg_mask & 0xffff, base_offset, end_inst);
|
neg_mask & 0xffff, base_offset, end_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
unique_ptr<RoseInstruction>
|
unique_ptr<RoseInstruction>
|
||||||
makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
|
makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
|
||||||
@ -1255,10 +1300,83 @@ makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
unique_ptr<RoseInstruction>
|
||||||
|
makeCheckShufti64x8(u32 offset_range, u8 bucket_idx,
|
||||||
|
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||||
|
const array<u8, 64> &bucket_select_mask,
|
||||||
|
u64a neg_mask, s32 base_offset,
|
||||||
|
const RoseInstruction *end_inst) {
|
||||||
|
if (offset_range > 64 || bucket_idx > 8) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
array<u8, 64> hi_mask_64;
|
||||||
|
array<u8, 64> lo_mask_64;
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin());
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 16);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 32);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 48);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin());
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 16);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 32);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 48);
|
||||||
|
|
||||||
|
return make_unique<RoseInstrCheckShufti64x8>
|
||||||
|
(hi_mask_64, lo_mask_64, bucket_select_mask,
|
||||||
|
neg_mask, base_offset, end_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
unique_ptr<RoseInstruction>
|
||||||
|
makeCheckShufti64x16(u32 offset_range, u8 bucket_idx,
|
||||||
|
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||||
|
const array<u8, 64> &bucket_select_mask_lo,
|
||||||
|
const array<u8, 64> &bucket_select_mask_hi,
|
||||||
|
u64a neg_mask, s32 base_offset,
|
||||||
|
const RoseInstruction *end_inst) {
|
||||||
|
if (offset_range > 64 || bucket_idx > 16) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
array<u8, 64> hi_mask_1;
|
||||||
|
array<u8, 64> hi_mask_2;
|
||||||
|
array<u8, 64> lo_mask_1;
|
||||||
|
array<u8, 64> lo_mask_2;
|
||||||
|
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin());
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 16);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 32);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 48);
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin());
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 16);
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 32);
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 48);
|
||||||
|
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin());
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 16);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 32);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 48);
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin());
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 16);
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 32);
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 48);
|
||||||
|
|
||||||
|
return make_unique<RoseInstrCheckShufti64x16>
|
||||||
|
(hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2, bucket_select_mask_hi,
|
||||||
|
bucket_select_mask_lo, neg_mask, base_offset, end_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
|
||||||
|
const target_t &target) {
|
||||||
|
s32 offset_limit;
|
||||||
|
if (target.has_avx512()) {
|
||||||
|
offset_limit = 64;
|
||||||
|
} else {
|
||||||
|
offset_limit = 32;
|
||||||
|
}
|
||||||
s32 base_offset = verify_s32(look.front().offset);
|
s32 base_offset = verify_s32(look.front().offset);
|
||||||
if (look.back().offset >= base_offset + 32) {
|
if (look.back().offset >= base_offset + offset_limit) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1266,17 +1384,40 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
u64a neg_mask_64;
|
u64a neg_mask_64;
|
||||||
array<u8, 32> hi_mask;
|
array<u8, 32> hi_mask;
|
||||||
array<u8, 32> lo_mask;
|
array<u8, 32> lo_mask;
|
||||||
|
array<u8, 64> bucket_select_hi_64; // for AVX512
|
||||||
|
array<u8, 64> bucket_select_lo_64; // for AVX512
|
||||||
array<u8, 32> bucket_select_hi;
|
array<u8, 32> bucket_select_hi;
|
||||||
array<u8, 32> bucket_select_lo;
|
array<u8, 32> bucket_select_lo;
|
||||||
hi_mask.fill(0);
|
hi_mask.fill(0);
|
||||||
lo_mask.fill(0);
|
lo_mask.fill(0);
|
||||||
|
bucket_select_hi_64.fill(0);
|
||||||
|
bucket_select_lo_64.fill(0);
|
||||||
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
|
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
|
||||||
bucket_select_lo.fill(0);
|
bucket_select_lo.fill(0);
|
||||||
|
|
||||||
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
|
if (target.has_avx512()) {
|
||||||
bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) {
|
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi_64.data(),
|
||||||
return false;
|
bucket_select_lo_64.data(), neg_mask_64, bucket_idx,
|
||||||
|
32)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
copy(bucket_select_hi_64.begin(), bucket_select_hi_64.begin() + 32,
|
||||||
|
bucket_select_hi.begin());
|
||||||
|
copy(bucket_select_lo_64.begin(), bucket_select_lo_64.begin() + 32,
|
||||||
|
bucket_select_lo.begin());
|
||||||
|
|
||||||
|
DEBUG_PRINTF("bucket_select_hi_64 %s\n",
|
||||||
|
convertMaskstoString(bucket_select_hi_64.data(), 64).c_str());
|
||||||
|
DEBUG_PRINTF("bucket_select_lo_64 %s\n",
|
||||||
|
convertMaskstoString(bucket_select_lo_64.data(), 64).c_str());
|
||||||
|
} else {
|
||||||
|
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
|
||||||
|
bucket_select_lo.data(), neg_mask_64, bucket_idx,
|
||||||
|
32)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 neg_mask = (u32)neg_mask_64;
|
u32 neg_mask = (u32)neg_mask_64;
|
||||||
|
|
||||||
DEBUG_PRINTF("hi_mask %s\n",
|
DEBUG_PRINTF("hi_mask %s\n",
|
||||||
@ -1299,6 +1440,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
bucket_select_lo, neg_mask, base_offset,
|
bucket_select_lo, neg_mask, base_offset,
|
||||||
end_inst);
|
end_inst);
|
||||||
}
|
}
|
||||||
|
if (target.has_avx512()) {
|
||||||
|
if (!ri) {
|
||||||
|
ri = makeCheckShufti64x8(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||||
|
bucket_select_lo_64, neg_mask_64,
|
||||||
|
base_offset, end_inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (!ri) {
|
if (!ri) {
|
||||||
ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||||
bucket_select_lo, bucket_select_hi,
|
bucket_select_lo, bucket_select_hi,
|
||||||
@ -1309,6 +1457,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
bucket_select_lo, bucket_select_hi,
|
bucket_select_lo, bucket_select_hi,
|
||||||
neg_mask, base_offset, end_inst);
|
neg_mask, base_offset, end_inst);
|
||||||
}
|
}
|
||||||
|
if (target.has_avx512()) {
|
||||||
|
if (!ri) {
|
||||||
|
ri = makeCheckShufti64x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||||
|
bucket_select_lo_64, bucket_select_hi_64,
|
||||||
|
neg_mask_64, base_offset, end_inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
assert(ri);
|
assert(ri);
|
||||||
program.add_before_end(move(ri));
|
program.add_before_end(move(ri));
|
||||||
|
|
||||||
@ -1321,7 +1476,7 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void makeLookaroundInstruction(const vector<LookEntry> &look,
|
void makeLookaroundInstruction(const vector<LookEntry> &look,
|
||||||
RoseProgram &program) {
|
RoseProgram &program, const target_t &target) {
|
||||||
assert(!look.empty());
|
assert(!look.empty());
|
||||||
|
|
||||||
if (makeRoleByte(look, program)) {
|
if (makeRoleByte(look, program)) {
|
||||||
@ -1345,7 +1500,11 @@ void makeLookaroundInstruction(const vector<LookEntry> &look,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (makeRoleShufti(look, program)) {
|
if (makeRoleMask64(look, program, target)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (makeRoleShufti(look, program, target)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1386,7 +1545,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id,
|
|||||||
return; // all caseful chars handled by HWLM mask.
|
return; // all caseful chars handled by HWLM mask.
|
||||||
}
|
}
|
||||||
|
|
||||||
makeLookaroundInstruction(look, program);
|
makeLookaroundInstruction(look, program, build.cc.target_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -1730,7 +1889,7 @@ void makeRoleLookaround(const RoseBuildImpl &build,
|
|||||||
findLookaroundMasks(build, v, look_more);
|
findLookaroundMasks(build, v, look_more);
|
||||||
mergeLookaround(look, look_more);
|
mergeLookaround(look, look_more);
|
||||||
if (!look.empty()) {
|
if (!look.empty()) {
|
||||||
makeLookaroundInstruction(look, program);
|
makeLookaroundInstruction(look, program, build.cc.target_info);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -208,7 +208,11 @@ enum RoseInstructionCode {
|
|||||||
*/
|
*/
|
||||||
ROSE_INSTR_LAST_FLUSH_COMBINATION,
|
ROSE_INSTR_LAST_FLUSH_COMBINATION,
|
||||||
|
|
||||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_LAST_FLUSH_COMBINATION //!< Sentinel.
|
ROSE_INSTR_CHECK_SHUFTI_64x8, //!< Check 64-byte data by 8-bucket shufti.
|
||||||
|
ROSE_INSTR_CHECK_SHUFTI_64x16, //!< Check 64-byte data by 16-bucket shufti.
|
||||||
|
ROSE_INSTR_CHECK_MASK_64, //!< 64-bytes and/cmp/neg mask check.
|
||||||
|
|
||||||
|
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MASK_64 //!< Sentinel.
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ROSE_STRUCT_END {
|
struct ROSE_STRUCT_END {
|
||||||
@ -285,6 +289,15 @@ struct ROSE_STRUCT_CHECK_MASK_32 {
|
|||||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_CHECK_MASK_64 {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u8 and_mask[64]; //!< 64-byte and mask.
|
||||||
|
u8 cmp_mask[64]; //!< 64-byte cmp mask.
|
||||||
|
u64a neg_mask; //!< negation mask with 32 bits.
|
||||||
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
|
};
|
||||||
|
|
||||||
struct ROSE_STRUCT_CHECK_BYTE {
|
struct ROSE_STRUCT_CHECK_BYTE {
|
||||||
u8 code; //!< From enum RoseInstructionCode.
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
u8 and_mask; //!< 8-bits and mask.
|
u8 and_mask; //!< 8-bits and mask.
|
||||||
@ -336,6 +349,29 @@ struct ROSE_STRUCT_CHECK_SHUFTI_32x16 {
|
|||||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_CHECK_SHUFTI_64x8 {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u8 hi_mask[64]; //!< High nibble mask in shufti.
|
||||||
|
u8 lo_mask[64]; //!< Low nibble mask in shufti.
|
||||||
|
u8 bucket_select_mask[64]; //!< Mask for bucket assigning.
|
||||||
|
u64a neg_mask; //!< 64 bits negation mask.
|
||||||
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_CHECK_SHUFTI_64x16 {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u8 hi_mask_1[64]; //!< 4 copies of 0-15 High nibble mask.
|
||||||
|
u8 hi_mask_2[64]; //!< 4 copies of 16-32 High nibble mask.
|
||||||
|
u8 lo_mask_1[64]; //!< 4 copies of 0-15 Low nibble mask.
|
||||||
|
u8 lo_mask_2[64]; //!< 4 copies of 16-32 Low nibble mask.
|
||||||
|
u8 bucket_select_mask_hi[64]; //!< Bucket mask for high 8 buckets.
|
||||||
|
u8 bucket_select_mask_lo[64]; //!< Bucket mask for low 8 buckets.
|
||||||
|
u64a neg_mask; //!< 64 bits negation mask.
|
||||||
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
|
};
|
||||||
|
|
||||||
struct ROSE_STRUCT_CHECK_INFIX {
|
struct ROSE_STRUCT_CHECK_INFIX {
|
||||||
u8 code; //!< From enum RoseInstructionCode.
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
u32 queue; //!< Queue of leftfix to check.
|
u32 queue; //!< Queue of leftfix to check.
|
||||||
|
@ -201,12 +201,12 @@ const u8 *prepScanBuffer(const struct core_info *ci,
|
|||||||
} else {
|
} else {
|
||||||
// Copy: first chunk from history buffer.
|
// Copy: first chunk from history buffer.
|
||||||
assert(overhang <= ci->hlen);
|
assert(overhang <= ci->hlen);
|
||||||
copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
|
copy_upto_64_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
|
||||||
overhang);
|
overhang);
|
||||||
// Copy: second chunk from current buffer.
|
// Copy: second chunk from current buffer.
|
||||||
size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
|
size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
|
||||||
assert(copy_buf_len <= ci->len);
|
assert(copy_buf_len <= ci->len);
|
||||||
copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
|
copy_upto_64_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
|
||||||
// Read from our temporary buffer for the hash.
|
// Read from our temporary buffer for the hash.
|
||||||
base = tempbuf;
|
base = tempbuf;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -41,6 +41,17 @@ void validateMask32Print(const u8 *mask) {
|
|||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static
|
||||||
|
void validateMask64Print(const u8 *mask) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 64; i++) {
|
||||||
|
printf("%02x ", mask[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// check positive bytes in cmp_result.
|
// check positive bytes in cmp_result.
|
||||||
@ -115,4 +126,29 @@ int validateMask32(const m256 data, const u32 valid_data_mask,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static really_inline
|
||||||
|
int validateMask64(const m512 data, const u64a valid_data_mask,
|
||||||
|
const m512 and_mask, const m512 cmp_mask,
|
||||||
|
const u64a neg_mask) {
|
||||||
|
u64a cmp_result = ~eq512mask(and512(data, and_mask), cmp_mask);
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF("data\n");
|
||||||
|
validateMask64Print((const u8 *)&data);
|
||||||
|
DEBUG_PRINTF("cmp_result\n");
|
||||||
|
validateMask64Print((const u8 *)&cmp_result);
|
||||||
|
#endif
|
||||||
|
DEBUG_PRINTF("cmp_result %016llx neg_mask %016llx\n", cmp_result, neg_mask);
|
||||||
|
DEBUG_PRINTF("valid_data_mask %016llx\n", valid_data_mask);
|
||||||
|
|
||||||
|
if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) {
|
||||||
|
DEBUG_PRINTF("checkCompareResult64 passed\n");
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("checkCompareResult64 failed\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -175,6 +175,84 @@ int validateShuftiMask32x16(const m256 data,
|
|||||||
return !cmp_result;
|
return !cmp_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static really_inline
|
||||||
|
int validateShuftiMask64x8(const m512 data, const m512 hi_mask,
|
||||||
|
const m512 lo_mask, const m512 and_mask,
|
||||||
|
const u64a neg_mask, const u64a valid_data_mask) {
|
||||||
|
m512 low4bits = set64x8(0xf);
|
||||||
|
m512 c_lo = pshufb_m512(lo_mask, and512(data, low4bits));
|
||||||
|
m512 c_hi = pshufb_m512(hi_mask,
|
||||||
|
rshift64_m512(andnot512(low4bits, data), 4));
|
||||||
|
m512 t = and512(c_lo, c_hi);
|
||||||
|
u64a nresult = eq512mask(and512(t, and_mask), zeroes512());
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF("data\n");
|
||||||
|
dumpMask(&data, 64);
|
||||||
|
DEBUG_PRINTF("hi_mask\n");
|
||||||
|
dumpMask(&hi_mask, 64);
|
||||||
|
DEBUG_PRINTF("lo_mask\n");
|
||||||
|
dumpMask(&lo_mask, 64);
|
||||||
|
DEBUG_PRINTF("c_lo\n");
|
||||||
|
dumpMask(&c_lo, 64);
|
||||||
|
DEBUG_PRINTF("c_hi\n");
|
||||||
|
dumpMask(&c_hi, 64);
|
||||||
|
DEBUG_PRINTF("nresult %llx\n", nresult);
|
||||||
|
DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask);
|
||||||
|
#endif
|
||||||
|
u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask;
|
||||||
|
return !cmp_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
int validateShuftiMask64x16(const m512 data,
|
||||||
|
const m512 hi_mask_1, const m512 hi_mask_2,
|
||||||
|
const m512 lo_mask_1, const m512 lo_mask_2,
|
||||||
|
const m512 and_mask_hi, const m512 and_mask_lo,
|
||||||
|
const u64a neg_mask, const u64a valid_data_mask) {
|
||||||
|
m512 low4bits = set64x8(0xf);
|
||||||
|
m512 data_lo = and512(data, low4bits);
|
||||||
|
m512 data_hi = and512(rshift64_m512(data, 4), low4bits);
|
||||||
|
m512 c_lo_1 = pshufb_m512(lo_mask_1, data_lo);
|
||||||
|
m512 c_lo_2 = pshufb_m512(lo_mask_2, data_lo);
|
||||||
|
m512 c_hi_1 = pshufb_m512(hi_mask_1, data_hi);
|
||||||
|
m512 c_hi_2 = pshufb_m512(hi_mask_2, data_hi);
|
||||||
|
m512 t1 = and512(c_lo_1, c_hi_1);
|
||||||
|
m512 t2 = and512(c_lo_2, c_hi_2);
|
||||||
|
m512 result = or512(and512(t1, and_mask_lo), and512(t2, and_mask_hi));
|
||||||
|
u64a nresult = eq512mask(result, zeroes512());
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF("data\n");
|
||||||
|
dumpMask(&data, 64);
|
||||||
|
DEBUG_PRINTF("data_lo\n");
|
||||||
|
dumpMask(&data_lo, 64);
|
||||||
|
DEBUG_PRINTF("data_hi\n");
|
||||||
|
dumpMask(&data_hi, 64);
|
||||||
|
DEBUG_PRINTF("hi_mask_1\n");
|
||||||
|
dumpMask(&hi_mask_1, 64);
|
||||||
|
DEBUG_PRINTF("hi_mask_2\n");
|
||||||
|
dumpMask(&hi_mask_2, 64);
|
||||||
|
DEBUG_PRINTF("lo_mask_1\n");
|
||||||
|
dumpMask(&lo_mask_1, 64);
|
||||||
|
DEBUG_PRINTF("lo_mask_2\n");
|
||||||
|
dumpMask(&lo_mask_2, 64);
|
||||||
|
DEBUG_PRINTF("c_lo_1\n");
|
||||||
|
dumpMask(&c_lo_1, 64);
|
||||||
|
DEBUG_PRINTF("c_lo_2\n");
|
||||||
|
dumpMask(&c_lo_2, 64);
|
||||||
|
DEBUG_PRINTF("c_hi_1\n");
|
||||||
|
dumpMask(&c_hi_1, 64);
|
||||||
|
DEBUG_PRINTF("c_hi_2\n");
|
||||||
|
dumpMask(&c_hi_2, 64);
|
||||||
|
DEBUG_PRINTF("result\n");
|
||||||
|
dumpMask(&result, 64);
|
||||||
|
DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask);
|
||||||
|
#endif
|
||||||
|
u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask;
|
||||||
|
return !cmp_result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) {
|
int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) {
|
||||||
u32 t = ~(data | hi_bits);
|
u32 t = ~(data | hi_bits);
|
||||||
|
@ -424,6 +424,11 @@ static really_inline m256 loadu256(const void *ptr) {
|
|||||||
return _mm256_loadu_si256((const m256 *)ptr);
|
return _mm256_loadu_si256((const m256 *)ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m256 loadu_maskz_m256(__mmask32 k, const void *ptr) {
|
||||||
|
return _mm256_maskz_loadu_epi8(k, ptr);
|
||||||
|
}
|
||||||
|
|
||||||
// unaligned store
|
// unaligned store
|
||||||
static really_inline void storeu256(void *ptr, m256 a) {
|
static really_inline void storeu256(void *ptr, m256 a) {
|
||||||
_mm256_storeu_si256((m256 *)ptr, a);
|
_mm256_storeu_si256((m256 *)ptr, a);
|
||||||
@ -712,6 +717,22 @@ m512 loadu512(const void *ptr) {
|
|||||||
return _mm512_loadu_si512(ptr);
|
return _mm512_loadu_si512(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// unaligned store
|
||||||
|
static really_inline
|
||||||
|
void storeu512(void *ptr, m512 a) {
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
_mm512_storeu_si512((m512 *)ptr, a);
|
||||||
|
#elif defined(HAVE_AVX2)
|
||||||
|
storeu256(ptr, a.lo);
|
||||||
|
storeu256((char *)ptr + 32, a.hi);
|
||||||
|
#else
|
||||||
|
storeu128(ptr, a.lo.lo);
|
||||||
|
storeu128((char *)ptr + 16, a.lo.hi);
|
||||||
|
storeu128((char *)ptr + 32, a.hi.lo);
|
||||||
|
storeu128((char *)ptr + 48, a.hi.hi);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 loadu_maskz_m512(__mmask64 k, const void *ptr) {
|
m512 loadu_maskz_m512(__mmask64 k, const void *ptr) {
|
||||||
return _mm512_maskz_loadu_epi8(k, ptr);
|
return _mm512_maskz_loadu_epi8(k, ptr);
|
||||||
@ -722,6 +743,11 @@ m512 loadu_mask_m512(m512 src, __mmask64 k, const void *ptr) {
|
|||||||
return _mm512_mask_loadu_epi8(src, k, ptr);
|
return _mm512_mask_loadu_epi8(src, k, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void storeu_mask_m512(void *ptr, __mmask64 k, m512 a) {
|
||||||
|
_mm512_mask_storeu_epi8(ptr, k, a);
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 set_mask_m512(__mmask64 k) {
|
m512 set_mask_m512(__mmask64 k) {
|
||||||
return _mm512_movm_epi8(k);
|
return _mm512_movm_epi8(k);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,7 +33,7 @@
|
|||||||
#include "simd_utils.h"
|
#include "simd_utils.h"
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) {
|
void copy_upto_64_bytes(u8 *dst, const u8 *src, unsigned int len) {
|
||||||
switch (len) {
|
switch (len) {
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
@ -72,14 +72,41 @@ void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) {
|
|||||||
case 16:
|
case 16:
|
||||||
storeu128(dst, loadu128(src));
|
storeu128(dst, loadu128(src));
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 17:
|
||||||
storeu256(dst, loadu256(src));
|
case 18:
|
||||||
break;
|
case 19:
|
||||||
default:
|
case 20:
|
||||||
assert(len < 32);
|
case 21:
|
||||||
|
case 22:
|
||||||
|
case 23:
|
||||||
|
case 24:
|
||||||
|
case 25:
|
||||||
|
case 26:
|
||||||
|
case 27:
|
||||||
|
case 28:
|
||||||
|
case 29:
|
||||||
|
case 30:
|
||||||
|
case 31:
|
||||||
storeu128(dst + len - 16, loadu128(src + len - 16));
|
storeu128(dst + len - 16, loadu128(src + len - 16));
|
||||||
storeu128(dst, loadu128(src));
|
storeu128(dst, loadu128(src));
|
||||||
break;
|
break;
|
||||||
|
case 32:
|
||||||
|
storeu256(dst, loadu256(src));
|
||||||
|
break;
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
case 64:
|
||||||
|
storebytes512(dst, loadu512(src), 64);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(len < 64);
|
||||||
|
u64a k = (1ULL << len) - 1;
|
||||||
|
storeu_mask_m512(dst, k, loadu_maskz_m512(k, src));
|
||||||
|
break;
|
||||||
|
#else
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user