mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: add multi-path shufti 16x8, 32x8, 32x16, 64x8 and multi-path lookaround instructions.
This commit is contained in:
parent
7533e3341e
commit
ae3cb7de6f
@ -857,13 +857,13 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
m128 getData128(const struct core_info *ci, s64a offset, u16 *valid_data_mask) {
|
||||
m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) {
|
||||
if (offset > 0 && offset + sizeof(m128) <= ci->len) {
|
||||
*valid_data_mask = 0xffff;
|
||||
return loadu128(ci->buf + offset);
|
||||
}
|
||||
ALIGN_DIRECTIVE u8 data[sizeof(m128)];
|
||||
*valid_data_mask = (u16)getBufferDataComplex(ci, offset, data, 16);
|
||||
*valid_data_mask = getBufferDataComplex(ci, offset, data, 16);
|
||||
return *(m128 *)data;
|
||||
}
|
||||
|
||||
@ -892,7 +892,7 @@ int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask,
|
||||
return 0;
|
||||
}
|
||||
|
||||
u16 valid_data_mask = 0;
|
||||
u32 valid_data_mask = 0;
|
||||
m128 data = getData128(ci, offset, &valid_data_mask);
|
||||
if (unlikely(!valid_data_mask)) {
|
||||
return 1;
|
||||
@ -924,7 +924,7 @@ int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask,
|
||||
return 0;
|
||||
}
|
||||
|
||||
u16 valid_data_mask = 0;
|
||||
u32 valid_data_mask = 0;
|
||||
m128 data = getData128(ci, offset, &valid_data_mask);
|
||||
if (unlikely(!valid_data_mask)) {
|
||||
return 1;
|
||||
@ -1020,8 +1020,9 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask,
|
||||
static rose_inline
|
||||
int roseCheckSingleLookaround(const struct RoseEngine *t,
|
||||
const struct hs_scratch *scratch,
|
||||
s8 checkOffset, u32 lookaroundIndex, u64a end) {
|
||||
assert(lookaroundIndex != MO_INVALID_IDX);
|
||||
s8 checkOffset, u32 lookaroundReachIndex,
|
||||
u64a end) {
|
||||
assert(lookaroundReachIndex != MO_INVALID_IDX);
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
|
||||
ci->buf_offset, ci->buf_offset + ci->len);
|
||||
@ -1037,7 +1038,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t,
|
||||
}
|
||||
|
||||
const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset;
|
||||
const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN;
|
||||
const u8 *reach = reach_base + lookaroundReachIndex;
|
||||
|
||||
u8 c;
|
||||
if (offset >= 0 && offset < (s64a)ci->len) {
|
||||
@ -1063,9 +1064,11 @@ int roseCheckSingleLookaround(const struct RoseEngine *t,
|
||||
*/
|
||||
static rose_inline
|
||||
int roseCheckLookaround(const struct RoseEngine *t,
|
||||
const struct hs_scratch *scratch, u32 lookaroundIndex,
|
||||
const struct hs_scratch *scratch,
|
||||
u32 lookaroundLookIndex, u32 lookaroundReachIndex,
|
||||
u32 lookaroundCount, u64a end) {
|
||||
assert(lookaroundIndex != MO_INVALID_IDX);
|
||||
assert(lookaroundLookIndex != MO_INVALID_IDX);
|
||||
assert(lookaroundReachIndex != MO_INVALID_IDX);
|
||||
assert(lookaroundCount > 0);
|
||||
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
@ -1074,12 +1077,12 @@ int roseCheckLookaround(const struct RoseEngine *t,
|
||||
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const s8 *look = look_base + lookaroundIndex;
|
||||
const s8 *look = look_base + lookaroundLookIndex;
|
||||
const s8 *look_end = look + lookaroundCount;
|
||||
assert(look < look_end);
|
||||
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN;
|
||||
const u8 *reach = reach_base + lookaroundReachIndex;
|
||||
|
||||
// The following code assumes that the lookaround structures are ordered by
|
||||
// increasing offset.
|
||||
@ -1151,6 +1154,359 @@ int roseCheckLookaround(const struct RoseEngine *t,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Trying to find a matching path by the corresponding path mask of
|
||||
* every lookaround location.
|
||||
*/
|
||||
static rose_inline
|
||||
int roseMultipathLookaround(const struct RoseEngine *t,
|
||||
const struct hs_scratch *scratch,
|
||||
u32 multipathLookaroundLookIndex,
|
||||
u32 multipathLookaroundReachIndex,
|
||||
u32 multipathLookaroundCount,
|
||||
s32 last_start, const u8 *start_mask,
|
||||
u64a end) {
|
||||
assert(multipathLookaroundCount > 0);
|
||||
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
|
||||
ci->buf_offset, ci->buf_offset + ci->len);
|
||||
|
||||
const s8 *look_base = getByOffset(t, t->lookaroundTableOffset);
|
||||
const s8 *look = look_base + multipathLookaroundLookIndex;
|
||||
const s8 *look_end = look + multipathLookaroundCount;
|
||||
assert(look < look_end);
|
||||
|
||||
const u8 *reach_base = getByOffset(t, t->lookaroundReachOffset);
|
||||
const u8 *reach = reach_base + multipathLookaroundReachIndex;
|
||||
|
||||
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||
DEBUG_PRINTF("base_offset=%lld\n", base_offset);
|
||||
|
||||
u8 path = 0xff;
|
||||
|
||||
assert(last_start < 0);
|
||||
|
||||
if (unlikely((u64a)(0 - last_start) > end)) {
|
||||
DEBUG_PRINTF("too early, fail\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 start_offset = 0;
|
||||
do {
|
||||
s64a offset = base_offset + *look;
|
||||
DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset,
|
||||
start_mask[start_offset]);
|
||||
path = start_mask[start_offset];
|
||||
if (offset >= -(s64a)ci->hlen) {
|
||||
break;
|
||||
}
|
||||
DEBUG_PRINTF("look=%d before history\n", *look);
|
||||
start_offset++;
|
||||
look++;
|
||||
reach += MULTI_REACH_BITVECTOR_LEN;
|
||||
} while (look < look_end);
|
||||
|
||||
DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look);
|
||||
for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) {
|
||||
s64a offset = base_offset + *look;
|
||||
DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
|
||||
|
||||
if (offset >= 0) {
|
||||
DEBUG_PRINTF("in buffer\n");
|
||||
break;
|
||||
}
|
||||
|
||||
assert(offset >= -(s64a)ci->hlen && offset < 0);
|
||||
u8 c = ci->hbuf[ci->hlen + offset];
|
||||
path &= reach[c];
|
||||
DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path);
|
||||
if (!path) {
|
||||
DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look);
|
||||
for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) {
|
||||
s64a offset = base_offset + *look;
|
||||
DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
|
||||
|
||||
if (offset >= (s64a)ci->len) {
|
||||
DEBUG_PRINTF("in the future\n");
|
||||
break;
|
||||
}
|
||||
|
||||
assert(offset >= 0 && offset < (s64a)ci->len);
|
||||
u8 c = ci->buf[offset];
|
||||
path &= reach[c];
|
||||
DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path);
|
||||
if (!path) {
|
||||
DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("OK :)\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch,
|
||||
const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri,
|
||||
u64a end) {
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
s32 checkOffset = ri->base_offset;
|
||||
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||
s64a offset = base_offset + checkOffset;
|
||||
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||
|
||||
assert(ri->last_start <= 0);
|
||||
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||
if ((u64a)(0 - ri->last_start) > end) {
|
||||
DEBUG_PRINTF("too early, fail\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 valid_data_mask;
|
||||
m128 data_init = getData128(ci, offset, &valid_data_mask);
|
||||
m128 data_select_mask = loadu128(ri->data_select_mask);
|
||||
|
||||
u32 valid_path_mask = 0;
|
||||
if (unlikely(!(valid_data_mask & 1))) {
|
||||
DEBUG_PRINTF("lose part of backward data\n");
|
||||
DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
|
||||
|
||||
m128 expand_valid;
|
||||
u64a expand_mask = 0x8080808080808080ULL;
|
||||
u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x2(valid_hi, valid_lo);
|
||||
valid_path_mask = ~movemask128(pshufb(expand_valid,
|
||||
data_select_mask));
|
||||
}
|
||||
|
||||
m128 data = pshufb(data_init, data_select_mask);
|
||||
m256 nib_mask = loadu256(ri->nib_mask);
|
||||
m128 bucket_select_mask = loadu128(ri->bucket_select_mask);
|
||||
|
||||
u32 hi_bits_mask = ri->hi_bits_mask;
|
||||
u32 lo_bits_mask = ri->lo_bits_mask;
|
||||
u32 neg_mask = ri->neg_mask;
|
||||
|
||||
if (validateMultipathShuftiMask16x8(data, nib_mask,
|
||||
bucket_select_mask,
|
||||
hi_bits_mask, lo_bits_mask,
|
||||
neg_mask, valid_path_mask)) {
|
||||
DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n");
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static never_inline
|
||||
int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch,
|
||||
const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri,
|
||||
u64a end) {
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
s32 checkOffset = ri->base_offset;
|
||||
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||
s64a offset = base_offset + checkOffset;
|
||||
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||
|
||||
assert(ri->last_start <= 0);
|
||||
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||
if ((u64a)(0 - ri->last_start) > end) {
|
||||
DEBUG_PRINTF("too early, fail\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 valid_data_mask;
|
||||
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
|
||||
m256 data_double = set2x128(data_m128);
|
||||
m256 data_select_mask = loadu256(ri->data_select_mask);
|
||||
|
||||
u32 valid_path_mask = 0;
|
||||
m256 expand_valid;
|
||||
if (unlikely(!(valid_data_mask & 1))) {
|
||||
DEBUG_PRINTF("lose part of backward data\n");
|
||||
DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
|
||||
|
||||
u64a expand_mask = 0x8080808080808080ULL;
|
||||
u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
|
||||
valid_lo);
|
||||
valid_path_mask = ~movemask256(vpshufb(expand_valid,
|
||||
data_select_mask));
|
||||
}
|
||||
|
||||
m256 data = vpshufb(data_double, data_select_mask);
|
||||
m256 hi_mask = loadu2x128(ri->hi_mask);
|
||||
m256 lo_mask = loadu2x128(ri->lo_mask);
|
||||
m256 bucket_select_mask = loadu256(ri->bucket_select_mask);
|
||||
|
||||
u32 hi_bits_mask = ri->hi_bits_mask;
|
||||
u32 lo_bits_mask = ri->lo_bits_mask;
|
||||
u32 neg_mask = ri->neg_mask;
|
||||
|
||||
if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask,
|
||||
bucket_select_mask,
|
||||
hi_bits_mask, lo_bits_mask,
|
||||
neg_mask, valid_path_mask)) {
|
||||
DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n");
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static never_inline
|
||||
int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch,
|
||||
const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri,
|
||||
u64a end) {
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||
s32 checkOffset = ri->base_offset;
|
||||
s64a offset = base_offset + checkOffset;
|
||||
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||
|
||||
assert(ri->last_start <= 0);
|
||||
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||
if ((u64a)(0 - ri->last_start) > end) {
|
||||
DEBUG_PRINTF("too early, fail\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 valid_data_mask;
|
||||
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
|
||||
m256 data_double = set2x128(data_m128);
|
||||
m256 data_select_mask = loadu256(ri->data_select_mask);
|
||||
|
||||
u32 valid_path_mask = 0;
|
||||
m256 expand_valid;
|
||||
if (unlikely(!(valid_data_mask & 1))) {
|
||||
DEBUG_PRINTF("lose part of backward data\n");
|
||||
DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
|
||||
|
||||
u64a expand_mask = 0x8080808080808080ULL;
|
||||
u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
|
||||
valid_lo);
|
||||
valid_path_mask = ~movemask256(vpshufb(expand_valid,
|
||||
data_select_mask));
|
||||
}
|
||||
|
||||
m256 data = vpshufb(data_double, data_select_mask);
|
||||
|
||||
m256 hi_mask_1 = loadu2x128(ri->hi_mask);
|
||||
m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16);
|
||||
m256 lo_mask_1 = loadu2x128(ri->lo_mask);
|
||||
m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16);
|
||||
|
||||
m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi);
|
||||
m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo);
|
||||
|
||||
u32 hi_bits_mask = ri->hi_bits_mask;
|
||||
u32 lo_bits_mask = ri->lo_bits_mask;
|
||||
u32 neg_mask = ri->neg_mask;
|
||||
|
||||
if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2,
|
||||
lo_mask_1, lo_mask_2,
|
||||
bucket_select_mask_hi,
|
||||
bucket_select_mask_lo,
|
||||
hi_bits_mask, lo_bits_mask,
|
||||
neg_mask, valid_path_mask)) {
|
||||
DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n");
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static never_inline
|
||||
int roseCheckMultipathShufti64(const struct hs_scratch *scratch,
|
||||
const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri,
|
||||
u64a end) {
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||
s32 checkOffset = ri->base_offset;
|
||||
s64a offset = base_offset + checkOffset;
|
||||
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||
|
||||
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||
if ((u64a)(0 - ri->last_start) > end) {
|
||||
DEBUG_PRINTF("too early, fail\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 valid_data_mask;
|
||||
m128 data_m128 = getData128(ci, offset, &valid_data_mask);
|
||||
m256 data_m256 = set2x128(data_m128);
|
||||
m256 data_select_mask_1 = loadu256(ri->data_select_mask);
|
||||
m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32);
|
||||
|
||||
u64a valid_path_mask = 0;
|
||||
m256 expand_valid;
|
||||
if (unlikely(!(valid_data_mask & 1))) {
|
||||
DEBUG_PRINTF("lose part of backward data\n");
|
||||
DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
|
||||
|
||||
u64a expand_mask = 0x8080808080808080ULL;
|
||||
u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
|
||||
u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
|
||||
DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
|
||||
DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
|
||||
expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
|
||||
valid_lo);
|
||||
u32 valid_path_1 = movemask256(vpshufb(expand_valid,
|
||||
data_select_mask_1));
|
||||
u32 valid_path_2 = movemask256(vpshufb(expand_valid,
|
||||
data_select_mask_2));
|
||||
valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32);
|
||||
}
|
||||
|
||||
m256 data_1 = vpshufb(data_m256, data_select_mask_1);
|
||||
m256 data_2 = vpshufb(data_m256, data_select_mask_2);
|
||||
|
||||
m256 hi_mask = loadu2x128(ri->hi_mask);
|
||||
m256 lo_mask = loadu2x128(ri->lo_mask);
|
||||
|
||||
m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask);
|
||||
m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32);
|
||||
|
||||
u64a hi_bits_mask = ri->hi_bits_mask;
|
||||
u64a lo_bits_mask = ri->lo_bits_mask;
|
||||
u64a neg_mask = ri->neg_mask;
|
||||
|
||||
if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask,
|
||||
bucket_select_mask_1,
|
||||
bucket_select_mask_2, hi_bits_mask,
|
||||
lo_bits_mask, neg_mask,
|
||||
valid_path_mask)) {
|
||||
DEBUG_PRINTF("check multi-path shufti-64 successfully\n");
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context);
|
||||
|
||||
static rose_inline
|
||||
@ -1614,8 +1970,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_LOOKAROUND) {
|
||||
if (!roseCheckLookaround(t, scratch, ri->index, ri->count,
|
||||
end)) {
|
||||
if (!roseCheckLookaround(t, scratch, ri->look_index,
|
||||
ri->reach_index, ri->count, end)) {
|
||||
DEBUG_PRINTF("failed lookaround check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
@ -2172,6 +2528,59 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
|
||||
work_done = 0;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(MULTIPATH_LOOKAROUND) {
|
||||
if (!roseMultipathLookaround(t, scratch, ri->look_index,
|
||||
ri->reach_index, ri->count,
|
||||
ri->last_start, ri->start_mask,
|
||||
end)) {
|
||||
DEBUG_PRINTF("failed multi-path lookaround check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) {
|
||||
if (!roseCheckMultipathShufti16x8(scratch, ri, end)) {
|
||||
DEBUG_PRINTF("failed multi-path shufti 16x8 check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) {
|
||||
if (!roseCheckMultipathShufti32x8(scratch, ri, end)) {
|
||||
DEBUG_PRINTF("failed multi-path shufti 32x8 check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) {
|
||||
if (!roseCheckMultipathShufti32x16(scratch, ri, end)) {
|
||||
DEBUG_PRINTF("failed multi-path shufti 32x16 check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) {
|
||||
if (!roseCheckMultipathShufti64(scratch, ri, end)) {
|
||||
DEBUG_PRINTF("failed multi-path shufti 64 check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -82,6 +82,7 @@
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/fatbit_build.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
@ -99,6 +100,7 @@
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
@ -141,8 +143,8 @@ struct left_build_info {
|
||||
countingMiracleReach(cm_cr) {}
|
||||
|
||||
// Constructor for a lookaround implementation.
|
||||
explicit left_build_info(const vector<LookEntry> &look)
|
||||
: has_lookaround(true), lookaround(look) {}
|
||||
explicit left_build_info(const vector<vector<LookEntry>> &looks)
|
||||
: has_lookaround(true), lookaround(looks) {}
|
||||
|
||||
u32 queue = 0; /* uniquely idents the left_build_info */
|
||||
u32 lag = 0;
|
||||
@ -154,7 +156,7 @@ struct left_build_info {
|
||||
CharReach countingMiracleReach;
|
||||
u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */
|
||||
bool has_lookaround = false;
|
||||
vector<LookEntry> lookaround; // alternative implementation to the NFA
|
||||
vector<vector<LookEntry>> lookaround; // alternative implementation to the NFA
|
||||
};
|
||||
|
||||
/**
|
||||
@ -197,12 +199,22 @@ struct build_context : noncopyable {
|
||||
ue2::unordered_map<RoseProgram, u32, RoseProgramHash,
|
||||
RoseProgramEquivalence> program_cache;
|
||||
|
||||
/** \brief LookEntry list cache, so that we don't have to go scanning
|
||||
* through the full list to find cases we've used already. */
|
||||
ue2::unordered_map<vector<LookEntry>, size_t> lookaround_cache;
|
||||
/** \brief LookEntry list cache, so that we can reuse the look index and
|
||||
* reach index for the same lookaround. */
|
||||
ue2::unordered_map<vector<vector<LookEntry>>,
|
||||
pair<size_t, size_t>> lookaround_cache;
|
||||
|
||||
/** \brief Lookaround table for Rose roles. */
|
||||
vector<LookEntry> lookaround;
|
||||
vector<vector<vector<LookEntry>>> lookaround;
|
||||
|
||||
/** \brief Lookaround look table size. */
|
||||
size_t lookTableSize = 0;
|
||||
|
||||
/** \brief Lookaround reach table size.
|
||||
* since single path lookaround and multi-path lookaround have different
|
||||
* bitvectors range (32 and 256), we need to maintain both look table size
|
||||
* and reach table size. */
|
||||
size_t reachTableSize = 0;
|
||||
|
||||
/** \brief State indices, for those roles that have them. */
|
||||
ue2::unordered_map<RoseVertex, u32> roleStateIndices;
|
||||
@ -1582,7 +1594,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
|
||||
// TODO: Handle SOM-tracking cases as well.
|
||||
if (cc.grey.roseLookaroundMasks && is_transient &&
|
||||
!g[v].left.tracksSom()) {
|
||||
vector<LookEntry> lookaround;
|
||||
vector<vector<LookEntry>> lookaround;
|
||||
if (makeLeftfixLookaround(tbi, v, lookaround)) {
|
||||
DEBUG_PRINTF("implementing as lookaround!\n");
|
||||
bc.leftfix_info.emplace(v, left_build_info(lookaround));
|
||||
@ -2651,15 +2663,7 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
|
||||
}
|
||||
|
||||
static
|
||||
void writeLookaroundTables(build_context &bc, RoseEngine &proto) {
|
||||
const auto &look_vec = bc.lookaround;
|
||||
DEBUG_PRINTF("%zu lookaround table entries\n", look_vec.size());
|
||||
|
||||
vector<s8> look_table(look_vec.size(), 0);
|
||||
vector<u8> reach_table(REACH_BITVECTOR_LEN * look_vec.size(), 0);
|
||||
|
||||
s8 *look = look_table.data();
|
||||
u8 *reach = reach_table.data();
|
||||
void writeLookaround(const vector<LookEntry> &look_vec, s8 *&look, u8 *&reach) {
|
||||
for (const auto &le : look_vec) {
|
||||
*look = verify_s8(le.offset);
|
||||
const CharReach &cr = le.reach;
|
||||
@ -2670,6 +2674,52 @@ void writeLookaroundTables(build_context &bc, RoseEngine &proto) {
|
||||
++look;
|
||||
reach += REACH_BITVECTOR_LEN;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void writeMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
|
||||
s8 *&look, u8 *&reach) {
|
||||
for (const auto &m : multi_look) {
|
||||
u8 u = 0;
|
||||
assert(m.size() == MAX_LOOKAROUND_PATHS);
|
||||
for (size_t i = 0; i < m.size(); i++) {
|
||||
if (m[i].reach.none()) {
|
||||
u |= (u8)1U << i;
|
||||
}
|
||||
}
|
||||
std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u);
|
||||
|
||||
for (size_t i = 0; i < m.size(); i++) {
|
||||
const CharReach &cr = m[i].reach;
|
||||
if (cr.none()) {
|
||||
continue;
|
||||
}
|
||||
*look = m[i].offset;
|
||||
|
||||
for (size_t c = cr.find_first(); c != cr.npos;
|
||||
c = cr.find_next(c)) {
|
||||
reach[c] |= (u8)1U << i;
|
||||
}
|
||||
}
|
||||
|
||||
++look;
|
||||
reach += MULTI_REACH_BITVECTOR_LEN;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void writeLookaroundTables(build_context &bc, RoseEngine &proto) {
|
||||
vector<s8> look_table(bc.lookTableSize, 0);
|
||||
vector<u8> reach_table(bc.reachTableSize, 0);
|
||||
s8 *look = look_table.data();
|
||||
u8 *reach = reach_table.data();
|
||||
for (const auto &l : bc.lookaround) {
|
||||
if (l.size() == 1) {
|
||||
writeLookaround(l.front(), look, reach);
|
||||
} else {
|
||||
writeMultipathLookaround(l, look, reach);
|
||||
}
|
||||
}
|
||||
|
||||
proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table);
|
||||
proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table);
|
||||
@ -2804,30 +2854,37 @@ bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) {
|
||||
}
|
||||
|
||||
static
|
||||
u32 addLookaround(build_context &bc, const vector<LookEntry> &look) {
|
||||
void addLookaround(build_context &bc,
|
||||
const vector<vector<LookEntry>> &look,
|
||||
u32 &look_index, u32 &reach_index) {
|
||||
// Check the cache.
|
||||
auto it = bc.lookaround_cache.find(look);
|
||||
if (it != bc.lookaround_cache.end()) {
|
||||
DEBUG_PRINTF("reusing look at idx %zu\n", it->second);
|
||||
return verify_u32(it->second);
|
||||
look_index = verify_u32(it->second.first);
|
||||
reach_index = verify_u32(it->second.second);
|
||||
DEBUG_PRINTF("reusing look at idx %u\n", look_index);
|
||||
DEBUG_PRINTF("reusing reach at idx %u\n", reach_index);
|
||||
return;
|
||||
}
|
||||
|
||||
// Linear scan for sequence.
|
||||
auto seq_it = search(begin(bc.lookaround), end(bc.lookaround), begin(look),
|
||||
end(look));
|
||||
if (seq_it != end(bc.lookaround)) {
|
||||
size_t idx = distance(begin(bc.lookaround), seq_it);
|
||||
DEBUG_PRINTF("linear scan found look at idx %zu\n", idx);
|
||||
bc.lookaround_cache.emplace(look, idx);
|
||||
return verify_u32(idx);
|
||||
size_t look_idx = bc.lookTableSize;
|
||||
size_t reach_idx = bc.reachTableSize;
|
||||
|
||||
if (look.size() == 1) {
|
||||
bc.lookTableSize += look.front().size();
|
||||
bc.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN;
|
||||
} else {
|
||||
bc.lookTableSize += look.size();
|
||||
bc.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN;
|
||||
}
|
||||
|
||||
// New sequence.
|
||||
size_t idx = bc.lookaround.size();
|
||||
bc.lookaround_cache.emplace(look, idx);
|
||||
insert(&bc.lookaround, bc.lookaround.end(), look);
|
||||
DEBUG_PRINTF("adding look at idx %zu\n", idx);
|
||||
return verify_u32(idx);
|
||||
bc.lookaround_cache.emplace(look, make_pair(look_idx, reach_idx));
|
||||
bc.lookaround.emplace_back(look);
|
||||
|
||||
DEBUG_PRINTF("adding look at idx %zu\n", look_idx);
|
||||
DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx);
|
||||
look_index = verify_u32(look_idx);
|
||||
reach_index = verify_u32(reach_idx);
|
||||
}
|
||||
|
||||
static
|
||||
@ -2977,7 +3034,7 @@ struct cmpNibble {
|
||||
// Insert all pairs of bucket and offset into buckets.
|
||||
static really_inline
|
||||
void getAllBuckets(const vector<LookEntry> &look,
|
||||
map<u32, vector<s8>, cmpNibble> &buckets, u32 &neg_mask) {
|
||||
map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) {
|
||||
s32 base_offset = verify_s32(look.front().offset);
|
||||
for (const auto &entry : look) {
|
||||
CharReach cr = entry.reach;
|
||||
@ -2985,7 +3042,7 @@ void getAllBuckets(const vector<LookEntry> &look,
|
||||
if (cr.count() > 128 ) {
|
||||
cr.flip();
|
||||
} else {
|
||||
neg_mask ^= 1 << (entry.offset - base_offset);
|
||||
neg_mask ^= 1ULL << (entry.offset - base_offset);
|
||||
}
|
||||
map <u16, u16> lo2hi;
|
||||
// We treat Ascii Table as a 16x16 grid.
|
||||
@ -3037,23 +3094,16 @@ void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) {
|
||||
}
|
||||
|
||||
static
|
||||
bool makeRoleShufti(const vector<LookEntry> &look,
|
||||
RoseProgram &program) {
|
||||
|
||||
s32 base_offset = verify_s32(look.front().offset);
|
||||
if (look.back().offset >= base_offset + 32) {
|
||||
return false;
|
||||
}
|
||||
array<u8, 32> hi_mask, lo_mask;
|
||||
hi_mask.fill(0);
|
||||
lo_mask.fill(0);
|
||||
array<u8, 32> bucket_select_hi, bucket_select_lo;
|
||||
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
|
||||
bucket_select_lo.fill(0);
|
||||
u8 bit_index = 0; // number of buckets
|
||||
bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask,
|
||||
array<u8, 32> &lo_mask, u8 *bucket_select_hi,
|
||||
u8 *bucket_select_lo, u64a &neg_mask,
|
||||
u8 &bit_idx, size_t len) {
|
||||
map<u32, u16> nib; // map every bucket to its bucket number.
|
||||
map<u32, vector<s8>, cmpNibble> bucket2offsets;
|
||||
u32 neg_mask = ~0u;
|
||||
s32 base_offset = look.front().offset;
|
||||
|
||||
bit_idx = 0;
|
||||
neg_mask = ~0ULL;
|
||||
|
||||
getAllBuckets(look, bucket2offsets, neg_mask);
|
||||
|
||||
@ -3061,15 +3111,15 @@ bool makeRoleShufti(const vector<LookEntry> &look,
|
||||
u32 hi_lo = it.first;
|
||||
// New bucket.
|
||||
if (!nib[hi_lo]) {
|
||||
if (bit_index >= 16) {
|
||||
if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) {
|
||||
return false;
|
||||
}
|
||||
nib[hi_lo] = 1 << bit_index;
|
||||
nib[hi_lo] = 1 << bit_idx;
|
||||
|
||||
nibUpdate(nib, hi_lo);
|
||||
nibMaskUpdate(hi_mask, hi_lo >> 16, bit_index);
|
||||
nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_index);
|
||||
bit_index++;
|
||||
nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx);
|
||||
nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx);
|
||||
bit_idx++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]);
|
||||
@ -3082,6 +3132,113 @@ bool makeRoleShufti(const vector<LookEntry> &look,
|
||||
bucket_select_lo[offset - base_offset] |= nib_lo;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<RoseInstruction>
|
||||
makeCheckShufti16x8(u32 offset_range, u8 bucket_idx,
|
||||
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||
const array<u8, 32> &bucket_select_mask,
|
||||
u32 neg_mask, s32 base_offset,
|
||||
const RoseInstruction *end_inst) {
|
||||
if (offset_range > 16 || bucket_idx > 8) {
|
||||
return nullptr;
|
||||
}
|
||||
array<u8, 32> nib_mask;
|
||||
array<u8, 16> bucket_select_mask_16;
|
||||
copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
|
||||
copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
|
||||
copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16,
|
||||
bucket_select_mask_16.begin());
|
||||
return make_unique<RoseInstrCheckShufti16x8>
|
||||
(nib_mask, bucket_select_mask_16,
|
||||
neg_mask & 0xffff, base_offset, end_inst);
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<RoseInstruction>
|
||||
makeCheckShufti32x8(u32 offset_range, u8 bucket_idx,
|
||||
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||
const array<u8, 32> &bucket_select_mask,
|
||||
u32 neg_mask, s32 base_offset,
|
||||
const RoseInstruction *end_inst) {
|
||||
if (offset_range > 32 || bucket_idx > 8) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
array<u8, 16> hi_mask_16;
|
||||
array<u8, 16> lo_mask_16;
|
||||
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin());
|
||||
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin());
|
||||
return make_unique<RoseInstrCheckShufti32x8>
|
||||
(hi_mask_16, lo_mask_16, bucket_select_mask,
|
||||
neg_mask, base_offset, end_inst);
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<RoseInstruction>
|
||||
makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
|
||||
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||
const array<u8, 32> &bucket_select_mask_lo,
|
||||
const array<u8, 32> &bucket_select_mask_hi,
|
||||
u32 neg_mask, s32 base_offset,
|
||||
const RoseInstruction *end_inst) {
|
||||
if (offset_range > 16 || bucket_idx > 16) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
array<u8, 32> bucket_select_mask_32;
|
||||
copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16,
|
||||
bucket_select_mask_32.begin());
|
||||
copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16,
|
||||
bucket_select_mask_32.begin() + 16);
|
||||
return make_unique<RoseInstrCheckShufti16x16>
|
||||
(hi_mask, lo_mask, bucket_select_mask_32,
|
||||
neg_mask & 0xffff, base_offset, end_inst);
|
||||
}
|
||||
static
|
||||
unique_ptr<RoseInstruction>
|
||||
makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
|
||||
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||
const array<u8, 32> &bucket_select_mask_lo,
|
||||
const array<u8, 32> &bucket_select_mask_hi,
|
||||
u32 neg_mask, s32 base_offset,
|
||||
const RoseInstruction *end_inst) {
|
||||
if (offset_range > 32 || bucket_idx > 16) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return make_unique<RoseInstrCheckShufti32x16>
|
||||
(hi_mask, lo_mask, bucket_select_mask_hi,
|
||||
bucket_select_mask_lo, neg_mask, base_offset, end_inst);
|
||||
}
|
||||
|
||||
static
|
||||
bool makeRoleShufti(const vector<LookEntry> &look,
|
||||
RoseProgram &program) {
|
||||
|
||||
s32 base_offset = verify_s32(look.front().offset);
|
||||
if (look.back().offset >= base_offset + 32) {
|
||||
return false;
|
||||
}
|
||||
|
||||
u8 bucket_idx = 0; // number of buckets
|
||||
u64a neg_mask_64;
|
||||
array<u8, 32> hi_mask;
|
||||
array<u8, 32> lo_mask;
|
||||
array<u8, 32> bucket_select_hi;
|
||||
array<u8, 32> bucket_select_lo;
|
||||
hi_mask.fill(0);
|
||||
lo_mask.fill(0);
|
||||
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
|
||||
bucket_select_lo.fill(0);
|
||||
|
||||
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
|
||||
bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) {
|
||||
return false;
|
||||
}
|
||||
u32 neg_mask = (u32)neg_mask_64;
|
||||
|
||||
DEBUG_PRINTF("hi_mask %s\n",
|
||||
convertMaskstoString(hi_mask.data(), 32).c_str());
|
||||
@ -3093,48 +3250,29 @@ bool makeRoleShufti(const vector<LookEntry> &look,
|
||||
convertMaskstoString(bucket_select_lo.data(), 32).c_str());
|
||||
|
||||
const auto *end_inst = program.end_instruction();
|
||||
if (bit_index < 8) {
|
||||
if (look.back().offset < base_offset + 16) {
|
||||
neg_mask &= 0xffff;
|
||||
array<u8, 32> nib_mask;
|
||||
array<u8, 16> bucket_select_mask_16;
|
||||
copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
|
||||
copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
|
||||
copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16,
|
||||
bucket_select_mask_16.begin());
|
||||
auto ri = make_unique<RoseInstrCheckShufti16x8>
|
||||
(nib_mask, bucket_select_mask_16,
|
||||
neg_mask, base_offset, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
} else {
|
||||
array<u8, 16> hi_mask_16;
|
||||
array<u8, 16> lo_mask_16;
|
||||
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin());
|
||||
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin());
|
||||
auto ri = make_unique<RoseInstrCheckShufti32x8>
|
||||
(hi_mask_16, lo_mask_16, bucket_select_lo,
|
||||
neg_mask, base_offset, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
} else {
|
||||
if (look.back().offset < base_offset + 16) {
|
||||
neg_mask &= 0xffff;
|
||||
array<u8, 32> bucket_select_mask_32;
|
||||
copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16,
|
||||
bucket_select_mask_32.begin());
|
||||
copy(bucket_select_hi.begin(), bucket_select_hi.begin() + 16,
|
||||
bucket_select_mask_32.begin() + 16);
|
||||
auto ri = make_unique<RoseInstrCheckShufti16x16>
|
||||
(hi_mask, lo_mask, bucket_select_mask_32,
|
||||
neg_mask, base_offset, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
} else {
|
||||
auto ri = make_unique<RoseInstrCheckShufti32x16>
|
||||
(hi_mask, lo_mask, bucket_select_hi, bucket_select_lo,
|
||||
neg_mask, base_offset, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
s32 offset_range = look.back().offset - base_offset + 1;
|
||||
|
||||
auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||
bucket_select_lo, neg_mask, base_offset,
|
||||
end_inst);
|
||||
if (!ri) {
|
||||
ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||
bucket_select_lo, neg_mask, base_offset,
|
||||
end_inst);
|
||||
}
|
||||
if (!ri) {
|
||||
ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||
bucket_select_lo, bucket_select_hi,
|
||||
neg_mask, base_offset, end_inst);
|
||||
}
|
||||
if (!ri) {
|
||||
ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||
bucket_select_lo, bucket_select_hi,
|
||||
neg_mask, base_offset, end_inst);
|
||||
}
|
||||
assert(ri);
|
||||
program.add_before_end(move(ri));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3153,9 +3291,13 @@ void makeLookaroundInstruction(build_context &bc, const vector<LookEntry> &look,
|
||||
|
||||
if (look.size() == 1) {
|
||||
s8 offset = look.begin()->offset;
|
||||
u32 look_idx = addLookaround(bc, look);
|
||||
auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, look_idx,
|
||||
program.end_instruction());
|
||||
u32 look_idx, reach_idx;
|
||||
vector<vector<LookEntry>> lookaround;
|
||||
lookaround.emplace_back(look);
|
||||
addLookaround(bc, lookaround, look_idx, reach_idx);
|
||||
// We don't need look_idx here.
|
||||
auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach_idx,
|
||||
program.end_instruction());
|
||||
program.add_before_end(move(ri));
|
||||
return;
|
||||
}
|
||||
@ -3172,10 +3314,242 @@ void makeLookaroundInstruction(build_context &bc, const vector<LookEntry> &look,
|
||||
return;
|
||||
}
|
||||
|
||||
u32 look_idx = addLookaround(bc, look);
|
||||
u32 look_idx, reach_idx;
|
||||
vector<vector<LookEntry>> lookaround;
|
||||
lookaround.emplace_back(look);
|
||||
addLookaround(bc, lookaround, look_idx, reach_idx);
|
||||
u32 look_count = verify_u32(look.size());
|
||||
|
||||
auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, look_count,
|
||||
auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, reach_idx,
|
||||
look_count,
|
||||
program.end_instruction());
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||
static UNUSED
|
||||
string dumpMultiLook(const vector<LookEntry> &looks) {
|
||||
ostringstream oss;
|
||||
for (auto it = looks.begin(); it != looks.end(); ++it) {
|
||||
if (it != looks.begin()) {
|
||||
oss << ", ";
|
||||
}
|
||||
oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
|
||||
RoseProgram &program) {
|
||||
if (multi_look.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// find the base offset
|
||||
assert(!multi_look[0].empty());
|
||||
s32 base_offset = multi_look[0].front().offset;
|
||||
s32 last_start = base_offset;
|
||||
s32 end_offset = multi_look[0].back().offset;
|
||||
size_t multi_len = 0;
|
||||
|
||||
for (const auto &look : multi_look) {
|
||||
assert(look.size() > 0);
|
||||
multi_len += look.size();
|
||||
|
||||
LIMIT_TO_AT_MOST(&base_offset, look.front().offset);
|
||||
ENSURE_AT_LEAST(&last_start, look.front().offset);
|
||||
ENSURE_AT_LEAST(&end_offset, look.back().offset);
|
||||
}
|
||||
|
||||
assert(last_start < 0);
|
||||
|
||||
if (end_offset - base_offset >= MULTIPATH_MAX_LEN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (multi_len <= 16) {
|
||||
multi_len = 16;
|
||||
} else if (multi_len <= 32) {
|
||||
multi_len = 32;
|
||||
} else if (multi_len <= 64) {
|
||||
multi_len = 64;
|
||||
} else {
|
||||
DEBUG_PRINTF("too long for multi-path\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<LookEntry> linear_look;
|
||||
array<u8, 64> data_select_mask;
|
||||
data_select_mask.fill(0);
|
||||
u64a hi_bits_mask = 0;
|
||||
u64a lo_bits_mask = 0;
|
||||
|
||||
for (const auto &look : multi_look) {
|
||||
assert(linear_look.size() < 64);
|
||||
lo_bits_mask |= 1LLU << linear_look.size();
|
||||
for (const auto &entry : look) {
|
||||
assert(entry.offset - base_offset < MULTIPATH_MAX_LEN);
|
||||
data_select_mask[linear_look.size()] =
|
||||
verify_u8(entry.offset - base_offset);
|
||||
linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach);
|
||||
}
|
||||
hi_bits_mask |= 1LLU << (linear_look.size() - 1);
|
||||
}
|
||||
|
||||
u8 bit_index = 0; // number of buckets
|
||||
u64a neg_mask;
|
||||
array<u8, 32> hi_mask;
|
||||
array<u8, 32> lo_mask;
|
||||
array<u8, 64> bucket_select_hi;
|
||||
array<u8, 64> bucket_select_lo;
|
||||
hi_mask.fill(0);
|
||||
lo_mask.fill(0);
|
||||
bucket_select_hi.fill(0);
|
||||
bucket_select_lo.fill(0);
|
||||
|
||||
if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(),
|
||||
bucket_select_lo.data(), neg_mask, bit_index,
|
||||
multi_len)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("hi_mask %s\n",
|
||||
convertMaskstoString(hi_mask.data(), 16).c_str());
|
||||
DEBUG_PRINTF("lo_mask %s\n",
|
||||
convertMaskstoString(lo_mask.data(), 16).c_str());
|
||||
DEBUG_PRINTF("bucket_select_hi %s\n",
|
||||
convertMaskstoString(bucket_select_hi.data(), 64).c_str());
|
||||
DEBUG_PRINTF("bucket_select_lo %s\n",
|
||||
convertMaskstoString(bucket_select_lo.data(), 64).c_str());
|
||||
DEBUG_PRINTF("data_select_mask %s\n",
|
||||
convertMaskstoString(data_select_mask.data(), 64).c_str());
|
||||
DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask);
|
||||
DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask);
|
||||
DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
|
||||
DEBUG_PRINTF("base_offset %d\n", base_offset);
|
||||
DEBUG_PRINTF("last_start %d\n", last_start);
|
||||
|
||||
// Since we don't have 16x16 now, just call 32x16 instead.
|
||||
if (bit_index > 8) {
|
||||
assert(multi_len <= 32);
|
||||
multi_len = 32;
|
||||
}
|
||||
|
||||
const auto *end_inst = program.end_instruction();
|
||||
assert(multi_len == 16 || multi_len == 32 || multi_len == 64);
|
||||
if (multi_len == 16) {
|
||||
neg_mask &= 0xffff;
|
||||
assert(!(hi_bits_mask & ~0xffffULL));
|
||||
assert(!(lo_bits_mask & ~0xffffULL));
|
||||
assert(bit_index <=8);
|
||||
array<u8, 32> nib_mask;
|
||||
copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin());
|
||||
copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16);
|
||||
|
||||
auto ri = make_unique<RoseInstrCheckMultipathShufti16x8>
|
||||
(nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask,
|
||||
lo_bits_mask, neg_mask, base_offset, last_start, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
} else if (multi_len == 32) {
|
||||
neg_mask &= 0xffffffff;
|
||||
assert(!(hi_bits_mask & ~0xffffffffULL));
|
||||
assert(!(lo_bits_mask & ~0xffffffffULL));
|
||||
if (bit_index <= 8) {
|
||||
auto ri = make_unique<RoseInstrCheckMultipathShufti32x8>
|
||||
(hi_mask, lo_mask, bucket_select_lo, data_select_mask,
|
||||
hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
|
||||
last_start, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
} else {
|
||||
auto ri = make_unique<RoseInstrCheckMultipathShufti32x16>
|
||||
(hi_mask, lo_mask, bucket_select_hi, bucket_select_lo,
|
||||
data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask,
|
||||
base_offset, last_start, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
} else {
|
||||
auto ri = make_unique<RoseInstrCheckMultipathShufti64>
|
||||
(hi_mask, lo_mask, bucket_select_lo, data_select_mask,
|
||||
hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
|
||||
last_start, end_inst);
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void makeRoleMultipathLookaround(build_context &bc,
|
||||
const vector<vector<LookEntry>> &multi_look,
|
||||
RoseProgram &program) {
|
||||
assert(!multi_look.empty());
|
||||
assert(multi_look.size() <= MAX_LOOKAROUND_PATHS);
|
||||
vector<vector<LookEntry>> ordered_look;
|
||||
set<s32> look_offset;
|
||||
|
||||
assert(!multi_look[0].empty());
|
||||
s32 last_start = multi_look[0][0].offset;
|
||||
|
||||
// build offset table.
|
||||
for (const auto &look : multi_look) {
|
||||
assert(look.size() > 0);
|
||||
last_start = max(last_start, (s32)look.begin()->offset);
|
||||
|
||||
for (const auto &t : look) {
|
||||
look_offset.insert(t.offset);
|
||||
}
|
||||
}
|
||||
|
||||
array<u8, MULTIPATH_MAX_LEN> start_mask;
|
||||
if (multi_look.size() < MAX_LOOKAROUND_PATHS) {
|
||||
start_mask.fill((1 << multi_look.size()) - 1);
|
||||
} else {
|
||||
start_mask.fill(0xff);
|
||||
}
|
||||
|
||||
u32 path_idx = 0;
|
||||
for (const auto &look : multi_look) {
|
||||
for (const auto &t : look) {
|
||||
assert(t.offset >= (int)*look_offset.begin());
|
||||
size_t update_offset = t.offset - *look_offset.begin() + 1;
|
||||
if (update_offset < start_mask.size()) {
|
||||
start_mask[update_offset] &= ~(1 << path_idx);
|
||||
}
|
||||
}
|
||||
path_idx++;
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) {
|
||||
start_mask[i] &= start_mask[i - 1];
|
||||
DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]);
|
||||
}
|
||||
|
||||
assert(look_offset.size() <= MULTIPATH_MAX_LEN);
|
||||
|
||||
assert(last_start < 0);
|
||||
|
||||
for (const auto &offset : look_offset) {
|
||||
vector<LookEntry> multi_entry;
|
||||
multi_entry.resize(MAX_LOOKAROUND_PATHS);
|
||||
|
||||
for (size_t i = 0; i < multi_look.size(); i++) {
|
||||
for (const auto &t : multi_look[i]) {
|
||||
if (t.offset == offset) {
|
||||
multi_entry[i] = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
ordered_look.emplace_back(multi_entry);
|
||||
}
|
||||
|
||||
u32 look_idx, reach_idx;
|
||||
addLookaround(bc, ordered_look, look_idx, reach_idx);
|
||||
u32 look_count = verify_u32(ordered_look.size());
|
||||
|
||||
auto ri = make_unique<RoseInstrMultipathLookaround>(look_idx, reach_idx,
|
||||
look_count, last_start,
|
||||
start_mask,
|
||||
program.end_instruction());
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
@ -3187,25 +3561,34 @@ void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc,
|
||||
return;
|
||||
}
|
||||
|
||||
vector<LookEntry> look;
|
||||
vector<vector<LookEntry>> looks;
|
||||
|
||||
// Lookaround from leftfix (mandatory).
|
||||
if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) {
|
||||
DEBUG_PRINTF("using leftfix lookaround\n");
|
||||
look = bc.leftfix_info.at(v).lookaround;
|
||||
looks = bc.leftfix_info.at(v).lookaround;
|
||||
}
|
||||
|
||||
// We may be able to find more lookaround info (advisory) and merge it
|
||||
// in.
|
||||
vector<LookEntry> look_more;
|
||||
findLookaroundMasks(build, v, look_more);
|
||||
mergeLookaround(look, look_more);
|
||||
|
||||
if (look.empty()) {
|
||||
if (looks.size() <= 1) {
|
||||
vector<LookEntry> look;
|
||||
vector<LookEntry> look_more;
|
||||
if (!looks.empty()) {
|
||||
look = move(looks.front());
|
||||
}
|
||||
findLookaroundMasks(build, v, look_more);
|
||||
mergeLookaround(look, look_more);
|
||||
if (!look.empty()) {
|
||||
makeLookaroundInstruction(bc, look, program);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
makeLookaroundInstruction(bc, look, program);
|
||||
if (!makeRoleMultipathShufti(looks, program)) {
|
||||
assert(looks.size() <= 8);
|
||||
makeRoleMultipathLookaround(bc, looks, program);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -569,10 +569,20 @@ static
|
||||
CharReach bitvectorToReach(const u8 *reach) {
|
||||
CharReach cr;
|
||||
|
||||
for (size_t i = 0; i < 256; i++) {
|
||||
for (size_t i = 0; i < N_CHARS; i++) {
|
||||
if (reach[i / 8] & (1U << (i % 8))) {
|
||||
cr.set(i);
|
||||
}
|
||||
}
|
||||
return cr;
|
||||
}
|
||||
|
||||
static
|
||||
CharReach multiBitvectorToReach(const u8 *reach, u8 path_mask) {
|
||||
CharReach cr;
|
||||
for (size_t i = 0; i < N_CHARS; i++) {
|
||||
if (reach[i] & path_mask) {
|
||||
cr.set(i);
|
||||
}
|
||||
}
|
||||
return cr;
|
||||
@ -587,9 +597,9 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
|
||||
const s8 *look = look_base + ri->index;
|
||||
const s8 *look = look_base + ri->look_index;
|
||||
const s8 *look_end = look + ri->count;
|
||||
const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN;
|
||||
const u8 *reach = reach_base + ri->reach_index;
|
||||
|
||||
os << " contents:" << endl;
|
||||
|
||||
@ -601,6 +611,41 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpMultipathLookaround(ofstream &os, const RoseEngine *t,
|
||||
const ROSE_STRUCT_MULTIPATH_LOOKAROUND *ri) {
|
||||
assert(ri);
|
||||
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
|
||||
const s8 *look_begin = look_base + ri->look_index;
|
||||
const s8 *look_end = look_begin + ri->count;
|
||||
const u8 *reach_begin = reach_base + ri->reach_index;
|
||||
|
||||
os << " contents:" << endl;
|
||||
|
||||
u32 path_mask = ri->start_mask[0];
|
||||
while (path_mask) {
|
||||
u32 path = findAndClearLSB_32(&path_mask);
|
||||
os << " Path #" << path << ":" << endl;
|
||||
os << " ";
|
||||
|
||||
const s8 *look = look_begin;
|
||||
const u8 *reach = reach_begin;
|
||||
for (; look < look_end; look++, reach += MULTI_REACH_BITVECTOR_LEN) {
|
||||
CharReach cr = multiBitvectorToReach(reach, 1U << path);
|
||||
if (cr.any() && !cr.all()) {
|
||||
os << "<" << int(*look) << ": ";
|
||||
describeClass(os, cr, 1000, CC_OUT_TEXT);
|
||||
os << "> ";
|
||||
}
|
||||
}
|
||||
os << endl;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
vector<u32> sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) {
|
||||
vector<u32> keys;
|
||||
@ -666,7 +711,126 @@ string dumpStrMask(const u8 *mask, size_t len) {
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
#define PROGRAM_CASE(name) \
|
||||
static
|
||||
CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) {
|
||||
CharReach cr;
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
if(lo[i & 0xf] & hi[i >> 4] & bucket_mask) {
|
||||
cr.set(i);
|
||||
}
|
||||
}
|
||||
return cr;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||
const u8 *bucket_mask, u32 neg_mask, s32 offset) {
|
||||
assert(len == 16 || len == 32);
|
||||
os << " contents:" << endl;
|
||||
for (u32 idx = 0; idx < len; idx++) {
|
||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||
|
||||
if (neg_mask & (1U << idx)) {
|
||||
cr.flip();
|
||||
}
|
||||
|
||||
if (cr.any() && !cr.all()) {
|
||||
os << " " << std::setw(4) << std::setfill(' ')
|
||||
<< int(offset + idx) << ": ";
|
||||
describeClass(os, cr, 1000, CC_OUT_TEXT);
|
||||
os << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||
const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
|
||||
const u8 *bucket_mask_2, u32 neg_mask, s32 offset) {
|
||||
assert(len == 16 || len == 32);
|
||||
os << " contents:" << endl;
|
||||
for (u32 idx = 0; idx < len; idx++) {
|
||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||
cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
|
||||
|
||||
if (neg_mask & (1U << idx)) {
|
||||
cr.flip();
|
||||
}
|
||||
|
||||
if (cr.any() && !cr.all()) {
|
||||
os << " " << std::setw(4) << std::setfill(' ')
|
||||
<< int(offset + idx) << ": ";
|
||||
describeClass(os, cr, 1000, CC_OUT_TEXT);
|
||||
os << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||
const u8 *bucket_mask, const u8 *data_offset,
|
||||
u64a neg_mask, s32 base_offset) {
|
||||
assert(len == 16 || len == 32 || len == 64);
|
||||
os << " contents:" << endl;
|
||||
u32 path = 0;
|
||||
for (u32 idx = 0; idx < len; idx++) {
|
||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||
|
||||
if (neg_mask & (1ULL << idx)) {
|
||||
cr.flip();
|
||||
}
|
||||
|
||||
if (cr.any() && !cr.all()) {
|
||||
if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) {
|
||||
path++;
|
||||
if (idx) {
|
||||
os << endl;
|
||||
}
|
||||
os << " Path #" << path << ":" << endl;
|
||||
os << " ";
|
||||
}
|
||||
|
||||
os << "<" << int(base_offset + data_offset[idx]) << ": ";
|
||||
describeClass(os, cr, 1000, CC_OUT_TEXT);
|
||||
os << "> ";
|
||||
}
|
||||
}
|
||||
os << endl;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||
const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
|
||||
const u8 *bucket_mask_2, const u8 *data_offset,
|
||||
u32 neg_mask, s32 base_offset) {
|
||||
assert(len == 16 || len == 32 || len == 64);
|
||||
os << " contents:";
|
||||
u32 path = 0;
|
||||
for (u32 idx = 0; idx < len; idx++) {
|
||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||
cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
|
||||
|
||||
if (neg_mask & (1ULL << idx)) {
|
||||
cr.flip();
|
||||
}
|
||||
|
||||
if (cr.any() && !cr.all()) {
|
||||
if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) {
|
||||
path++;
|
||||
os << endl;
|
||||
os << " Path #" << path << ":" << endl;
|
||||
os << " ";
|
||||
}
|
||||
|
||||
os << "<" << int(base_offset + data_offset[idx]) << ": ";
|
||||
describeClass(os, cr, 1000, CC_OUT_TEXT);
|
||||
os << "> ";
|
||||
}
|
||||
}
|
||||
os << endl;
|
||||
}
|
||||
|
||||
#define PROGRAM_CASE(name) \
|
||||
case ROSE_INSTR_##name: { \
|
||||
os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \
|
||||
<< ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \
|
||||
@ -741,7 +905,8 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_LOOKAROUND) {
|
||||
os << " index " << ri->index << endl;
|
||||
os << " look_index " << ri->look_index << endl;
|
||||
os << " reach_index " << ri->reach_index << endl;
|
||||
os << " count " << ri->count << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpLookaround(os, t, ri);
|
||||
@ -795,8 +960,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
<< dumpStrMask(ri->bucket_select_mask,
|
||||
sizeof(ri->bucket_select_mask))
|
||||
<< endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " offset " << ri->offset << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpLookaroundShufti(os, 16, ri->nib_mask, ri->nib_mask + 16,
|
||||
ri->bucket_select_mask, ri->neg_mask,
|
||||
ri->offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -811,8 +981,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
<< dumpStrMask(ri->bucket_select_mask,
|
||||
sizeof(ri->bucket_select_mask))
|
||||
<< endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " offset " << ri->offset << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask,
|
||||
ri->bucket_select_mask, ri->neg_mask,
|
||||
ri->offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -827,8 +1002,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
<< dumpStrMask(ri->bucket_select_mask,
|
||||
sizeof(ri->bucket_select_mask))
|
||||
<< endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " offset " << ri->offset << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpLookaroundShufti(os, 16, ri->lo_mask, ri->hi_mask,
|
||||
ri->lo_mask + 16, ri->hi_mask + 16,
|
||||
ri->bucket_select_mask,
|
||||
ri->bucket_select_mask + 16,
|
||||
ri->neg_mask, ri->offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -847,8 +1029,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
<< dumpStrMask(ri->bucket_select_mask_lo,
|
||||
sizeof(ri->bucket_select_mask_lo))
|
||||
<< endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " offset " << ri->offset << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask,
|
||||
ri->lo_mask + 16, ri->hi_mask + 16,
|
||||
ri->bucket_select_mask_lo,
|
||||
ri->bucket_select_mask_hi,
|
||||
ri->neg_mask, ri->offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -1103,6 +1292,146 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
PROGRAM_CASE(CLEAR_WORK_DONE) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(MULTIPATH_LOOKAROUND) {
|
||||
os << " look_index " << ri->look_index << endl;
|
||||
os << " reach_index " << ri->reach_index << endl;
|
||||
os << " count " << ri->count << endl;
|
||||
os << " last_start " << ri->last_start << endl;
|
||||
os << " start_mask "
|
||||
<< dumpStrMask(ri->start_mask, sizeof(ri->start_mask))
|
||||
<< endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpMultipathLookaround(os, t, ri);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) {
|
||||
os << " nib_mask "
|
||||
<< dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask))
|
||||
<< endl;
|
||||
os << " bucket_select_mask "
|
||||
<< dumpStrMask(ri->bucket_select_mask,
|
||||
sizeof(ri->bucket_select_mask))
|
||||
<< endl;
|
||||
os << " data_select_mask "
|
||||
<< dumpStrMask(ri->data_select_mask,
|
||||
sizeof(ri->data_select_mask))
|
||||
<< endl;
|
||||
os << " hi_bits_mask 0x" << std::hex << std::setw(4)
|
||||
<< std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
|
||||
os << " lo_bits_mask 0x" << std::hex << std::setw(4)
|
||||
<< std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(4)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " base_offset " << ri->base_offset << endl;
|
||||
os << " last_start " << ri->last_start << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpMultipathShufti(os, 16, ri->nib_mask, ri->nib_mask + 16,
|
||||
ri->bucket_select_mask,
|
||||
ri->data_select_mask,
|
||||
ri->neg_mask, ri->base_offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) {
|
||||
os << " hi_mask "
|
||||
<< dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
|
||||
<< endl;
|
||||
os << " lo_mask "
|
||||
<< dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
|
||||
<< endl;
|
||||
os << " bucket_select_mask "
|
||||
<< dumpStrMask(ri->bucket_select_mask,
|
||||
sizeof(ri->bucket_select_mask))
|
||||
<< endl;
|
||||
os << " data_select_mask "
|
||||
<< dumpStrMask(ri->data_select_mask,
|
||||
sizeof(ri->data_select_mask))
|
||||
<< endl;
|
||||
os << " hi_bits_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
|
||||
os << " lo_bits_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " base_offset " << ri->base_offset << endl;
|
||||
os << " last_start " << ri->last_start << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask,
|
||||
ri->bucket_select_mask,
|
||||
ri->data_select_mask,
|
||||
ri->neg_mask, ri->base_offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) {
|
||||
os << " hi_mask "
|
||||
<< dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
|
||||
<< endl;
|
||||
os << " lo_mask "
|
||||
<< dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
|
||||
<< endl;
|
||||
os << " bucket_select_mask_hi "
|
||||
<< dumpStrMask(ri->bucket_select_mask_hi,
|
||||
sizeof(ri->bucket_select_mask_hi))
|
||||
<< endl;
|
||||
os << " bucket_select_mask_lo "
|
||||
<< dumpStrMask(ri->bucket_select_mask_lo,
|
||||
sizeof(ri->bucket_select_mask_lo))
|
||||
<< endl;
|
||||
os << " data_select_mask "
|
||||
<< dumpStrMask(ri->data_select_mask,
|
||||
sizeof(ri->data_select_mask))
|
||||
<< endl;
|
||||
os << " hi_bits_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
|
||||
os << " lo_bits_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " base_offset " << ri->base_offset << endl;
|
||||
os << " last_start " << ri->last_start << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask,
|
||||
ri->lo_mask + 16, ri->hi_mask + 16,
|
||||
ri->bucket_select_mask_lo,
|
||||
ri->bucket_select_mask_hi,
|
||||
ri->data_select_mask,
|
||||
ri->neg_mask, ri->base_offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) {
|
||||
os << " hi_mask "
|
||||
<< dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
|
||||
<< endl;
|
||||
os << " lo_mask "
|
||||
<< dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask))
|
||||
<< endl;
|
||||
os << " bucket_select_mask "
|
||||
<< dumpStrMask(ri->bucket_select_mask,
|
||||
sizeof(ri->bucket_select_mask))
|
||||
<< endl;
|
||||
os << " data_select_mask "
|
||||
<< dumpStrMask(ri->data_select_mask,
|
||||
sizeof(ri->data_select_mask))
|
||||
<< endl;
|
||||
os << " hi_bits_mask 0x" << std::hex << std::setw(16)
|
||||
<< std::setfill('0') << ri->hi_bits_mask << std::dec << endl;
|
||||
os << " lo_bits_mask 0x" << std::hex << std::setw(16)
|
||||
<< std::setfill('0') << ri->lo_bits_mask << std::dec << endl;
|
||||
os << " neg_mask 0x" << std::hex << std::setw(16)
|
||||
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||
os << " base_offset " << ri->base_offset << endl;
|
||||
os << " last_start " << ri->last_start << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
dumpMultipathShufti(os, 64, ri->lo_mask, ri->hi_mask,
|
||||
ri->bucket_select_mask,
|
||||
ri->data_select_mask,
|
||||
ri->neg_mask, ri->base_offset);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
default:
|
||||
os << " UNKNOWN (code " << int{code} << ")" << endl;
|
||||
os << " <stopping>" << endl;
|
||||
|
@ -45,6 +45,7 @@
|
||||
|
||||
#include <cstdlib>
|
||||
#include <queue>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -62,6 +63,20 @@ static const u32 MAX_LOOKAROUND_ENTRIES = 16;
|
||||
/** \brief We would rather have lookarounds with smaller reach than this. */
|
||||
static const u32 LOOKAROUND_WIDE_REACH = 200;
|
||||
|
||||
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||
static UNUSED
|
||||
string dump(const map<s32, CharReach> &look) {
|
||||
ostringstream oss;
|
||||
for (auto it = look.begin(), ite = look.end(); it != ite; ++it) {
|
||||
if (it != look.begin()) {
|
||||
oss << ", ";
|
||||
}
|
||||
oss << "{" << it->first << ": " << describeClass(it->second) << "}";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
|
||||
ue2::flat_set<NFAVertex> curr, next;
|
||||
@ -298,21 +313,6 @@ void findBackwardReach(const RoseGraph &g, const RoseVertex v,
|
||||
// TODO: implement DFA variants if necessary.
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||
#include <sstream>
|
||||
static UNUSED
|
||||
string dump(const map<s32, CharReach> &look) {
|
||||
ostringstream oss;
|
||||
for (auto it = look.begin(), ite = look.end(); it != ite; ++it) {
|
||||
if (it != look.begin()) {
|
||||
oss << ", ";
|
||||
}
|
||||
oss << "{" << it->first << ": " << describeClass(it->second) << "}";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void normalise(map<s32, CharReach> &look) {
|
||||
// We can erase entries where the reach is "all characters".
|
||||
@ -554,6 +554,76 @@ void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
|
||||
DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
void normaliseLeftfix(map<s32, CharReach> &look) {
|
||||
// We can erase entries where the reach is "all characters", except for the
|
||||
// very first one -- this might be required to establish a minimum bound on
|
||||
// the literal's match offset.
|
||||
|
||||
// TODO: It would be cleaner to use a literal program instruction to check
|
||||
// the minimum bound explicitly.
|
||||
|
||||
if (look.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto earliest = begin(look)->first;
|
||||
|
||||
vector<s32> dead;
|
||||
for (const auto &m : look) {
|
||||
if (m.second.all() && m.first != earliest) {
|
||||
dead.push_back(m.first);
|
||||
}
|
||||
}
|
||||
erase_all(&look, dead);
|
||||
}
|
||||
|
||||
static
|
||||
bool trimMultipathLeftfix(const RoseBuildImpl &build, const RoseVertex v,
|
||||
vector<map<s32, CharReach>> &looks) {
|
||||
size_t path_count = 0;
|
||||
for (auto &look : looks) {
|
||||
++path_count;
|
||||
DEBUG_PRINTF("Path #%ld\n", path_count);
|
||||
|
||||
assert(!look.empty());
|
||||
trimLiterals(build, v, look);
|
||||
|
||||
if (look.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Could be optimized here, just keep the empty byte of the longest path
|
||||
normaliseLeftfix(look);
|
||||
|
||||
if (look.size() > MAX_LOOKAROUND_ENTRIES) {
|
||||
DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void transToLookaround(const vector<map<s32, CharReach>> &looks,
|
||||
vector<vector<LookEntry>> &lookarounds) {
|
||||
for (const auto &look : looks) {
|
||||
vector<LookEntry> lookaround;
|
||||
DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
|
||||
lookaround.reserve(look.size());
|
||||
for (const auto &m : look) {
|
||||
if (m.first < -128 || m.first > 127) {
|
||||
DEBUG_PRINTF("range too big\n");
|
||||
lookarounds.clear();
|
||||
return;
|
||||
}
|
||||
s8 offset = verify_s8(m.first);
|
||||
lookaround.emplace_back(offset, m.second);
|
||||
}
|
||||
lookarounds.push_back(lookaround);
|
||||
}
|
||||
}
|
||||
|
||||
void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
|
||||
vector<LookEntry> &lookaround) {
|
||||
lookaround.clear();
|
||||
@ -592,115 +662,155 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
|
||||
}
|
||||
|
||||
static
|
||||
bool hasSingleFloatingStart(const NGHolder &g) {
|
||||
NFAVertex initial = NGHolder::null_vertex();
|
||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
if (initial != NGHolder::null_vertex()) {
|
||||
DEBUG_PRINTF("more than one start\n");
|
||||
return false;
|
||||
}
|
||||
initial = v;
|
||||
}
|
||||
bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
|
||||
u32 bucket_size) {
|
||||
set<u32> bucket;
|
||||
for (const auto &look : looks) {
|
||||
for (const auto &l : look) {
|
||||
CharReach cr = l.second;
|
||||
if (cr.count() > 128) {
|
||||
cr.flip();
|
||||
}
|
||||
map <u16, u16> lo2hi;
|
||||
|
||||
if (initial == NGHolder::null_vertex()) {
|
||||
DEBUG_PRINTF("no floating starts\n");
|
||||
return false;
|
||||
}
|
||||
for (size_t i = cr.find_first(); i != CharReach::npos;) {
|
||||
u8 it_hi = i >> 4;
|
||||
u16 low_encode = 0;
|
||||
while (i != CharReach::npos && (i >> 4) == it_hi) {
|
||||
low_encode |= 1 << (i &0xf);
|
||||
i = cr.find_next(i);
|
||||
}
|
||||
lo2hi[low_encode] |= 1 << it_hi;
|
||||
}
|
||||
|
||||
// Anchored start must have no successors other than startDs and initial.
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (v != initial && v != g.startDs) {
|
||||
DEBUG_PRINTF("anchored start\n");
|
||||
return false;
|
||||
for (const auto &it : lo2hi) {
|
||||
u32 hi_lo = (it.second << 16) | it.first;
|
||||
bucket.insert(hi_lo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size());
|
||||
return bucket.size() <= bucket_size;
|
||||
}
|
||||
|
||||
static
|
||||
bool getTransientPrefixReach(const NGHolder &g, u32 lag,
|
||||
map<s32, CharReach> &look) {
|
||||
if (in_degree(g.accept, g) != 1) {
|
||||
DEBUG_PRINTF("more than one accept\n");
|
||||
bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
|
||||
vector<map<s32, CharReach>> &looks) {
|
||||
if (!isAcyclic(g)) {
|
||||
DEBUG_PRINTF("contains back-edge\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must be a floating chain wired to startDs.
|
||||
if (!hasSingleFloatingStart(g)) {
|
||||
DEBUG_PRINTF("not a single floating start\n");
|
||||
// Must be floating chains wired to startDs.
|
||||
if (!isFloating(g)) {
|
||||
DEBUG_PRINTF("not a floating start\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first);
|
||||
u32 i = lag + 1;
|
||||
while (v != g.startDs) {
|
||||
DEBUG_PRINTF("i=%u, v=%zu\n", i, g[v].index);
|
||||
if (is_special(v, g)) {
|
||||
DEBUG_PRINTF("special\n");
|
||||
vector<NFAVertex> curr;
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
if (v == g.start || v == g.startDs) {
|
||||
DEBUG_PRINTF("empty graph\n");
|
||||
return true;
|
||||
}
|
||||
if (contains(g[v].reports, report)) {
|
||||
curr.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
assert(!curr.empty());
|
||||
|
||||
u32 total_len = curr.size();
|
||||
|
||||
for (const auto &v : curr) {
|
||||
looks.emplace_back(map<s32, CharReach>());
|
||||
looks.back()[0 - (lag + 1)] = g[v].char_reach;
|
||||
}
|
||||
|
||||
bool curr_active = false;
|
||||
|
||||
/* For each offset -i, we backwardly trace the path by vertices in curr.
|
||||
* Once there are more than 8 paths and more than 64 bits total_len,
|
||||
* which means that neither MULTIPATH_LOOKAROUND nor MULTIPATH_SHUFTI
|
||||
* could be successfully built, we will give up the path finding.
|
||||
* Otherwise, the loop will halt when all vertices in curr are startDs.
|
||||
*/
|
||||
for (u32 i = lag + 2; i < (lag + 2) + MAX_BACK_LEN; i++) {
|
||||
curr_active = false;
|
||||
size_t curr_size = curr.size();
|
||||
if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) {
|
||||
DEBUG_PRINTF("range is larger than 16 in multi-path\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
look[0 - i] = g[v].char_reach;
|
||||
|
||||
NFAVertex next = NGHolder::null_vertex();
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == g.start) {
|
||||
continue; // Benign, checked by hasSingleFloatingStart
|
||||
}
|
||||
if (next == NGHolder::null_vertex()) {
|
||||
next = u;
|
||||
for (size_t idx = 0; idx < curr_size; idx++) {
|
||||
NFAVertex v = curr[idx];
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("branch\n");
|
||||
return false;
|
||||
}
|
||||
assert(!is_special(v, g));
|
||||
|
||||
if (next == NGHolder::null_vertex() || next == v) {
|
||||
DEBUG_PRINTF("no predecessor or only self-loop\n");
|
||||
// This graph is malformed -- all vertices in a graph that makes it
|
||||
// to this analysis should have predecessors.
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == g.start || u == g.startDs) {
|
||||
curr[idx] = g.startDs;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
v = next;
|
||||
i++;
|
||||
if (is_special(curr[idx], g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
curr_active = true;
|
||||
if (curr[idx] == v) {
|
||||
curr[idx] = u;
|
||||
looks[idx][0 - i] = g[u].char_reach;
|
||||
total_len++;
|
||||
} else {
|
||||
curr.push_back(u);
|
||||
looks.push_back(looks[idx]);
|
||||
(looks.back())[0 - i] = g[u].char_reach;
|
||||
total_len += looks.back().size();
|
||||
}
|
||||
|
||||
if (curr.size() > MAX_LOOKAROUND_PATHS && total_len > 64) {
|
||||
DEBUG_PRINTF("too many branches\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!curr_active) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (curr_active) {
|
||||
DEBUG_PRINTF("single path too long\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// More than 8 paths, check multi-path shufti.
|
||||
if (curr.size() > MAX_LOOKAROUND_PATHS) {
|
||||
u32 bucket_size = total_len > 32 ? 8 : 16;
|
||||
if (!checkShuftiBuckets(looks, bucket_size)) {
|
||||
DEBUG_PRINTF("shufti has too many buckets\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
assert(!looks.empty());
|
||||
if (looks.size() == 1) {
|
||||
DEBUG_PRINTF("single lookaround\n");
|
||||
} else {
|
||||
DEBUG_PRINTF("multi-path lookaround\n");
|
||||
}
|
||||
DEBUG_PRINTF("done\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void normaliseLeftfix(map<s32, CharReach> &look) {
|
||||
// We can erase entries where the reach is "all characters", except for the
|
||||
// very first one -- this might be required to establish a minimum bound on
|
||||
// the literal's match offset.
|
||||
|
||||
// TODO: It would be cleaner to use a literal program instruction to check
|
||||
// the minimum bound explicitly.
|
||||
|
||||
if (look.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto earliest = begin(look)->first;
|
||||
|
||||
vector<s32> dead;
|
||||
for (const auto &m : look) {
|
||||
if (m.second.all() && m.first != earliest) {
|
||||
dead.push_back(m.first);
|
||||
}
|
||||
}
|
||||
erase_all(&look, dead);
|
||||
}
|
||||
|
||||
bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
|
||||
vector<LookEntry> &lookaround) {
|
||||
vector<vector<LookEntry>> &lookaround) {
|
||||
lookaround.clear();
|
||||
|
||||
const RoseGraph &g = build.g;
|
||||
@ -716,36 +826,19 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
|
||||
return false;
|
||||
}
|
||||
|
||||
map<s32, CharReach> look;
|
||||
if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.lag, look)) {
|
||||
DEBUG_PRINTF("not a chain\n");
|
||||
vector<map<s32, CharReach>> looks;
|
||||
if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report,
|
||||
g[v].left.lag, looks)) {
|
||||
DEBUG_PRINTF("graph has loop or too large\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
trimLiterals(build, v, look);
|
||||
normaliseLeftfix(look);
|
||||
|
||||
if (look.size() > MAX_LOOKAROUND_ENTRIES) {
|
||||
DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size());
|
||||
if (!trimMultipathLeftfix(build, v, looks)) {
|
||||
return false;
|
||||
}
|
||||
transToLookaround(looks, lookaround);
|
||||
|
||||
if (look.empty()) {
|
||||
DEBUG_PRINTF("lookaround empty; this is weird\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
lookaround.reserve(look.size());
|
||||
for (const auto &m : look) {
|
||||
if (m.first < -128 || m.first > 127) {
|
||||
DEBUG_PRINTF("range too big\n");
|
||||
return false;
|
||||
}
|
||||
s8 offset = verify_s8(m.first);
|
||||
lookaround.emplace_back(offset, m.second);
|
||||
}
|
||||
|
||||
return true;
|
||||
return !lookaround.empty();
|
||||
}
|
||||
|
||||
void mergeLookaround(vector<LookEntry> &lookaround,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,6 +36,9 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
/** \brief Max path number for multi-path lookaround. */
|
||||
#define MAX_LOOKAROUND_PATHS 8
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
@ -44,6 +47,7 @@ class RoseBuildImpl;
|
||||
/** \brief Lookaround entry prototype, describing the reachability at a given
|
||||
* distance from the end of a role match. */
|
||||
struct LookEntry {
|
||||
LookEntry() : offset(0) {}
|
||||
LookEntry(s8 offset_in, const CharReach &reach_in)
|
||||
: offset(offset_in), reach(reach_in) {}
|
||||
s8 offset; //!< offset from role match location.
|
||||
@ -63,7 +67,7 @@ size_t hash_value(const LookEntry &l) {
|
||||
}
|
||||
|
||||
void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
|
||||
std::vector<LookEntry> &lookaround);
|
||||
std::vector<LookEntry> &look_more);
|
||||
|
||||
/**
|
||||
* \brief If possible, render the prefix of the given vertex as a lookaround.
|
||||
@ -72,7 +76,7 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
|
||||
* it can be satisfied with a lookaround alone.
|
||||
*/
|
||||
bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
|
||||
std::vector<LookEntry> &lookaround);
|
||||
std::vector<std::vector<LookEntry>> &lookaround);
|
||||
|
||||
void mergeLookaround(std::vector<LookEntry> &lookaround,
|
||||
const std::vector<LookEntry> &more_lookaround);
|
||||
|
@ -127,7 +127,8 @@ void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->index = index;
|
||||
inst->look_index = look_index;
|
||||
inst->reach_index = reach_index;
|
||||
inst->count = count;
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
@ -537,6 +538,93 @@ void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob,
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->look_index = look_index;
|
||||
inst->reach_index = reach_index;
|
||||
inst->count = count;
|
||||
inst->last_start = last_start;
|
||||
copy(begin(start_mask), end(start_mask), inst->start_mask);
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map)
|
||||
const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
copy(begin(nib_mask), end(nib_mask), inst->nib_mask);
|
||||
copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16,
|
||||
inst->bucket_select_mask);
|
||||
copy(begin(data_select_mask), begin(data_select_mask) + 16,
|
||||
inst->data_select_mask);
|
||||
inst->hi_bits_mask = hi_bits_mask;
|
||||
inst->lo_bits_mask = lo_bits_mask;
|
||||
inst->neg_mask = neg_mask;
|
||||
inst->base_offset = base_offset;
|
||||
inst->last_start = last_start;
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map)
|
||||
const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask);
|
||||
copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask);
|
||||
copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32,
|
||||
inst->bucket_select_mask);
|
||||
copy(begin(data_select_mask), begin(data_select_mask) + 32,
|
||||
inst->data_select_mask);
|
||||
inst->hi_bits_mask = hi_bits_mask;
|
||||
inst->lo_bits_mask = lo_bits_mask;
|
||||
inst->neg_mask = neg_mask;
|
||||
inst->base_offset = base_offset;
|
||||
inst->last_start = last_start;
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
copy(begin(hi_mask), end(hi_mask), inst->hi_mask);
|
||||
copy(begin(lo_mask), end(lo_mask), inst->lo_mask);
|
||||
copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32,
|
||||
inst->bucket_select_mask_hi);
|
||||
copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32,
|
||||
inst->bucket_select_mask_lo);
|
||||
copy(begin(data_select_mask), begin(data_select_mask) + 32,
|
||||
inst->data_select_mask);
|
||||
inst->hi_bits_mask = hi_bits_mask;
|
||||
inst->lo_bits_mask = lo_bits_mask;
|
||||
inst->neg_mask = neg_mask;
|
||||
inst->base_offset = base_offset;
|
||||
inst->last_start = last_start;
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask);
|
||||
copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask);
|
||||
copy(begin(bucket_select_mask), end(bucket_select_mask),
|
||||
inst->bucket_select_mask);
|
||||
copy(begin(data_select_mask), end(data_select_mask),
|
||||
inst->data_select_mask);
|
||||
inst->hi_bits_mask = hi_bits_mask;
|
||||
inst->lo_bits_mask = lo_bits_mask;
|
||||
inst->neg_mask = neg_mask;
|
||||
inst->base_offset = base_offset;
|
||||
inst->last_start = last_start;
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
static
|
||||
OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) {
|
||||
OffsetMap offset_map;
|
||||
|
@ -420,20 +420,24 @@ class RoseInstrCheckLookaround
|
||||
ROSE_STRUCT_CHECK_LOOKAROUND,
|
||||
RoseInstrCheckLookaround> {
|
||||
public:
|
||||
u32 index;
|
||||
u32 look_index;
|
||||
u32 reach_index;
|
||||
u32 count;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckLookaround(u32 index_in, u32 count_in,
|
||||
const RoseInstruction *target_in)
|
||||
: index(index_in), count(count_in), target(target_in) {}
|
||||
RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in,
|
||||
u32 count_in, const RoseInstruction *target_in)
|
||||
: look_index(look_index_in), reach_index(reach_index_in),
|
||||
count(count_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckLookaround &ri) const {
|
||||
return index == ri.index && count == ri.count && target == ri.target;
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), index, count);
|
||||
return hash_all(static_cast<int>(opcode), look_index, reach_index,
|
||||
count);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
@ -441,7 +445,8 @@ public:
|
||||
|
||||
bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return index == ri.index && count == ri.count &&
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
@ -498,7 +503,7 @@ public:
|
||||
RoseInstrCheckMask32(std::array<u8, 32> and_mask_in,
|
||||
std::array<u8, 32> cmp_mask_in, u32 neg_mask_in,
|
||||
s32 offset_in, const RoseInstruction *target_in)
|
||||
: and_mask(move(and_mask_in)), cmp_mask(move(cmp_mask_in)),
|
||||
: and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)),
|
||||
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckMask32 &ri) const {
|
||||
@ -576,8 +581,8 @@ public:
|
||||
std::array<u8, 16> bucket_select_mask_in,
|
||||
u32 neg_mask_in, s32 offset_in,
|
||||
const RoseInstruction *target_in)
|
||||
: nib_mask(move(nib_mask_in)),
|
||||
bucket_select_mask(move(bucket_select_mask_in)),
|
||||
: nib_mask(std::move(nib_mask_in)),
|
||||
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckShufti16x8 &ri) const {
|
||||
@ -621,8 +626,8 @@ public:
|
||||
std::array<u8, 32> bucket_select_mask_in,
|
||||
u32 neg_mask_in, s32 offset_in,
|
||||
const RoseInstruction *target_in)
|
||||
: hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)),
|
||||
bucket_select_mask(move(bucket_select_mask_in)),
|
||||
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckShufti32x8 &ri) const {
|
||||
@ -666,8 +671,8 @@ public:
|
||||
std::array<u8, 32> bucket_select_mask_in,
|
||||
u32 neg_mask_in, s32 offset_in,
|
||||
const RoseInstruction *target_in)
|
||||
: hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)),
|
||||
bucket_select_mask(move(bucket_select_mask_in)),
|
||||
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckShufti16x16 &ri) const {
|
||||
@ -713,9 +718,9 @@ public:
|
||||
std::array<u8, 32> bucket_select_mask_lo_in,
|
||||
u32 neg_mask_in, s32 offset_in,
|
||||
const RoseInstruction *target_in)
|
||||
: hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)),
|
||||
bucket_select_mask_hi(move(bucket_select_mask_hi_in)),
|
||||
bucket_select_mask_lo(move(bucket_select_mask_lo_in)),
|
||||
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||
bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)),
|
||||
bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)),
|
||||
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckShufti32x16 &ri) const {
|
||||
@ -1859,6 +1864,306 @@ public:
|
||||
~RoseInstrClearWorkDone() override;
|
||||
};
|
||||
|
||||
class RoseInstrMultipathLookaround
|
||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_MULTIPATH_LOOKAROUND,
|
||||
ROSE_STRUCT_MULTIPATH_LOOKAROUND,
|
||||
RoseInstrMultipathLookaround> {
|
||||
public:
|
||||
u32 look_index;
|
||||
u32 reach_index;
|
||||
u32 count;
|
||||
s32 last_start;
|
||||
std::array<u8, 16> start_mask;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in,
|
||||
u32 count_in, s32 last_start_in,
|
||||
std::array<u8, 16> start_mask_in,
|
||||
const RoseInstruction *target_in)
|
||||
: look_index(look_index_in), reach_index(reach_index_in),
|
||||
count(count_in), last_start(last_start_in),
|
||||
start_mask(std::move(start_mask_in)), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrMultipathLookaround &ri) const {
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count && last_start == ri.last_start &&
|
||||
start_mask == ri.start_mask && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), look_index, reach_index,
|
||||
count, last_start, start_mask);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrMultipathLookaround &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count && last_start == ri.last_start &&
|
||||
start_mask == ri.start_mask &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrCheckMultipathShufti16x8
|
||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8,
|
||||
ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8,
|
||||
RoseInstrCheckMultipathShufti16x8> {
|
||||
public:
|
||||
std::array<u8, 32> nib_mask;
|
||||
std::array<u8, 64> bucket_select_mask;
|
||||
std::array<u8, 64> data_select_mask;
|
||||
u16 hi_bits_mask;
|
||||
u16 lo_bits_mask;
|
||||
u16 neg_mask;
|
||||
s32 base_offset;
|
||||
s32 last_start;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckMultipathShufti16x8(std::array<u8, 32> nib_mask_in,
|
||||
std::array<u8, 64> bucket_select_mask_in,
|
||||
std::array<u8, 64> data_select_mask_in,
|
||||
u16 hi_bits_mask_in, u16 lo_bits_mask_in,
|
||||
u16 neg_mask_in, s32 base_offset_in,
|
||||
s32 last_start_in,
|
||||
const RoseInstruction *target_in)
|
||||
: nib_mask(std::move(nib_mask_in)),
|
||||
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||
data_select_mask(std::move(data_select_mask_in)),
|
||||
hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in),
|
||||
neg_mask(neg_mask_in), base_offset(base_offset_in),
|
||||
last_start(last_start_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const {
|
||||
return nib_mask == ri.nib_mask &&
|
||||
bucket_select_mask == ri.bucket_select_mask &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask &&
|
||||
neg_mask == ri.neg_mask && base_offset == ri.base_offset &&
|
||||
last_start == ri.last_start && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), nib_mask,
|
||||
bucket_select_mask, data_select_mask, hi_bits_mask,
|
||||
lo_bits_mask, neg_mask, base_offset, last_start);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return nib_mask == ri.nib_mask &&
|
||||
bucket_select_mask == ri.bucket_select_mask &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask &&
|
||||
base_offset == ri.base_offset && last_start == ri.last_start &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrCheckMultipathShufti32x8
|
||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8,
|
||||
ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8,
|
||||
RoseInstrCheckMultipathShufti32x8> {
|
||||
public:
|
||||
std::array<u8, 32> hi_mask;
|
||||
std::array<u8, 32> lo_mask;
|
||||
std::array<u8, 64> bucket_select_mask;
|
||||
std::array<u8, 64> data_select_mask;
|
||||
u32 hi_bits_mask;
|
||||
u32 lo_bits_mask;
|
||||
u32 neg_mask;
|
||||
s32 base_offset;
|
||||
s32 last_start;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckMultipathShufti32x8(std::array<u8, 32> hi_mask_in,
|
||||
std::array<u8, 32> lo_mask_in,
|
||||
std::array<u8, 64> bucket_select_mask_in,
|
||||
std::array<u8, 64> data_select_mask_in,
|
||||
u32 hi_bits_mask_in, u32 lo_bits_mask_in,
|
||||
u32 neg_mask_in, s32 base_offset_in,
|
||||
s32 last_start_in,
|
||||
const RoseInstruction *target_in)
|
||||
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||
data_select_mask(std::move(data_select_mask_in)),
|
||||
hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in),
|
||||
neg_mask(neg_mask_in), base_offset(base_offset_in),
|
||||
last_start(last_start_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const {
|
||||
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||
bucket_select_mask == ri.bucket_select_mask &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask &&
|
||||
neg_mask == ri.neg_mask && base_offset == ri.base_offset &&
|
||||
last_start == ri.last_start && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), hi_mask, lo_mask,
|
||||
bucket_select_mask, data_select_mask, hi_bits_mask,
|
||||
lo_bits_mask, neg_mask, base_offset, last_start);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||
bucket_select_mask == ri.bucket_select_mask &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask &&
|
||||
base_offset == ri.base_offset && last_start == ri.last_start &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrCheckMultipathShufti32x16
|
||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16,
|
||||
ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16,
|
||||
RoseInstrCheckMultipathShufti32x16> {
|
||||
public:
|
||||
std::array<u8, 32> hi_mask;
|
||||
std::array<u8, 32> lo_mask;
|
||||
std::array<u8, 64> bucket_select_mask_hi;
|
||||
std::array<u8, 64> bucket_select_mask_lo;
|
||||
std::array<u8, 64> data_select_mask;
|
||||
u32 hi_bits_mask;
|
||||
u32 lo_bits_mask;
|
||||
u32 neg_mask;
|
||||
s32 base_offset;
|
||||
s32 last_start;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckMultipathShufti32x16(std::array<u8, 32> hi_mask_in,
|
||||
std::array<u8, 32> lo_mask_in,
|
||||
std::array<u8, 64> bucket_select_mask_hi_in,
|
||||
std::array<u8, 64> bucket_select_mask_lo_in,
|
||||
std::array<u8, 64> data_select_mask_in,
|
||||
u32 hi_bits_mask_in, u32 lo_bits_mask_in,
|
||||
u32 neg_mask_in, s32 base_offset_in,
|
||||
s32 last_start_in,
|
||||
const RoseInstruction *target_in)
|
||||
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||
bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)),
|
||||
bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)),
|
||||
data_select_mask(std::move(data_select_mask_in)),
|
||||
hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in),
|
||||
neg_mask(neg_mask_in), base_offset(base_offset_in),
|
||||
last_start(last_start_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const {
|
||||
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||
bucket_select_mask_hi == ri.bucket_select_mask_hi &&
|
||||
bucket_select_mask_lo == ri.bucket_select_mask_lo &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask &&
|
||||
neg_mask == ri.neg_mask && base_offset == ri.base_offset &&
|
||||
last_start == ri.last_start && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), hi_mask, lo_mask,
|
||||
bucket_select_mask_hi, bucket_select_mask_lo,
|
||||
data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask,
|
||||
base_offset, last_start);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||
bucket_select_mask_hi == ri.bucket_select_mask_hi &&
|
||||
bucket_select_mask_lo == ri.bucket_select_mask_lo &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask &&
|
||||
base_offset == ri.base_offset && last_start == ri.last_start &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrCheckMultipathShufti64
|
||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64,
|
||||
ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64,
|
||||
RoseInstrCheckMultipathShufti64> {
|
||||
public:
|
||||
std::array<u8, 32> hi_mask;
|
||||
std::array<u8, 32> lo_mask;
|
||||
std::array<u8, 64> bucket_select_mask;
|
||||
std::array<u8, 64> data_select_mask;
|
||||
u64a hi_bits_mask;
|
||||
u64a lo_bits_mask;
|
||||
u64a neg_mask;
|
||||
s32 base_offset;
|
||||
s32 last_start;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckMultipathShufti64(std::array<u8, 32> hi_mask_in,
|
||||
std::array<u8, 32> lo_mask_in,
|
||||
std::array<u8, 64> bucket_select_mask_in,
|
||||
std::array<u8, 64> data_select_mask_in,
|
||||
u64a hi_bits_mask_in, u64a lo_bits_mask_in,
|
||||
u64a neg_mask_in, s32 base_offset_in,
|
||||
s32 last_start_in,
|
||||
const RoseInstruction *target_in)
|
||||
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||
data_select_mask(std::move(data_select_mask_in)),
|
||||
hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in),
|
||||
neg_mask(neg_mask_in), base_offset(base_offset_in),
|
||||
last_start(last_start_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const {
|
||||
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||
bucket_select_mask == ri.bucket_select_mask &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask &&
|
||||
neg_mask == ri.neg_mask && base_offset == ri.base_offset &&
|
||||
last_start == ri.last_start && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), hi_mask, lo_mask,
|
||||
bucket_select_mask, data_select_mask, hi_bits_mask,
|
||||
lo_bits_mask, neg_mask, base_offset, last_start);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||
bucket_select_mask == ri.bucket_select_mask &&
|
||||
data_select_mask == ri.data_select_mask &&
|
||||
hi_bits_mask == ri.hi_bits_mask &&
|
||||
lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask &&
|
||||
base_offset == ri.base_offset && last_start == ri.last_start &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrEnd
|
||||
: public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
|
||||
RoseInstrEnd> {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,6 +41,15 @@
|
||||
/** \brief Length in bytes of a reach bitvector, used by the lookaround code. */
|
||||
#define REACH_BITVECTOR_LEN 32
|
||||
|
||||
/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */
|
||||
#define MULTI_REACH_BITVECTOR_LEN 256
|
||||
|
||||
/**
|
||||
* \brief The max offset from the leftmost byte to the rightmost byte in
|
||||
* multi-path lookaround.
|
||||
*/
|
||||
#define MULTIPATH_MAX_LEN 16
|
||||
|
||||
/** \brief Value used to represent an invalid Rose program offset. */
|
||||
#define ROSE_INVALID_PROG_OFFSET 0
|
||||
|
||||
|
@ -386,7 +386,8 @@ struct RoseEngine {
|
||||
u32 roseCount;
|
||||
u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values)
|
||||
u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
|
||||
* bytes each) */
|
||||
* bytes for single-path lookaround and 256 bytes
|
||||
* for multi-path lookaround) */
|
||||
|
||||
u32 eodProgramOffset; //!< EOD program, otherwise 0.
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "som/som_operation.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
/** \brief Minimum alignment for each instruction in memory. */
|
||||
#define ROSE_INSTR_MIN_ALIGN 8U
|
||||
@ -146,7 +147,38 @@ enum RoseInstructionCode {
|
||||
*/
|
||||
ROSE_INSTR_CLEAR_WORK_DONE,
|
||||
|
||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CLEAR_WORK_DONE //!< Sentinel.
|
||||
/** \brief Check lookaround if it has multiple paths. */
|
||||
ROSE_INSTR_MULTIPATH_LOOKAROUND,
|
||||
|
||||
/**
|
||||
* \brief Use shufti to check lookaround with multiple paths. The total
|
||||
* length of the paths is 16 bytes at most and shufti has 8 buckets.
|
||||
* All paths can be at most 16 bytes long.
|
||||
*/
|
||||
ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8,
|
||||
|
||||
/**
|
||||
* \brief Use shufti to check lookaround with multiple paths. The total
|
||||
* length of the paths is 32 bytes at most and shufti has 8 buckets.
|
||||
* All paths can be at most 16 bytes long.
|
||||
*/
|
||||
ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8,
|
||||
|
||||
/**
|
||||
* \brief Use shufti to check lookaround with multiple paths. The total
|
||||
* length of the paths is 32 bytes at most and shufti has 16 buckets.
|
||||
* All paths can be at most 16 bytes long.
|
||||
*/
|
||||
ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16,
|
||||
|
||||
/**
|
||||
* \brief Use shufti to check multiple paths lookaround. The total
|
||||
* length of the paths is 64 bytes at most and shufti has 8 buckets.
|
||||
* All paths can be at most 16 bytes long.
|
||||
*/
|
||||
ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64,
|
||||
|
||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64 //!< Sentinel.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_END {
|
||||
@ -192,14 +224,15 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED {
|
||||
struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
s8 offset; //!< The offset of the byte to examine.
|
||||
u32 reach_index; //!< The index of the reach table entry to use.
|
||||
u32 reach_index; //!< Index for lookaround reach bitvectors.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 index;
|
||||
u32 count;
|
||||
u32 look_index; //!< Index for lookaround offset list.
|
||||
u32 reach_index; //!< Index for lookaround reach bitvectors.
|
||||
u32 count; //!< The count of lookaround entries in one instruction.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
@ -526,4 +559,70 @@ struct ROSE_STRUCT_CLEAR_WORK_DONE {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_MULTIPATH_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 look_index; //!< Index for lookaround offset list.
|
||||
u32 reach_index; //!< Index for lookaround reach bitvectors.
|
||||
u32 count; //!< The lookaround byte numbers for each path.
|
||||
s32 last_start; //!< The latest start offset among 8 paths.
|
||||
u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most
|
||||
* data is missed. */
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 nib_mask[2 * sizeof(m128)]; //!< High and low nibble mask in shufti.
|
||||
u8 bucket_select_mask[sizeof(m128)]; //!< Mask for bucket assigning.
|
||||
u8 data_select_mask[sizeof(m128)]; //!< Shuffle mask for data ordering.
|
||||
u32 hi_bits_mask; //!< High-bits used in multi-path validation.
|
||||
u32 lo_bits_mask; //!< Low-bits used in multi-path validation.
|
||||
u32 neg_mask; //!< 64 bits negation mask.
|
||||
s32 base_offset; //!< Relative offset of the first byte.
|
||||
s32 last_start; //!< The latest start offset among 8 paths.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti.
|
||||
u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti.
|
||||
u8 bucket_select_mask[sizeof(m256)]; //!< Mask for bucket assigning.
|
||||
u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering.
|
||||
u32 hi_bits_mask; //!< High-bits used in multi-path validation.
|
||||
u32 lo_bits_mask; //!< Low-bits used in multi-path validation.
|
||||
u32 neg_mask; //!< 64 bits negation mask.
|
||||
s32 base_offset; //!< Relative offset of the first byte.
|
||||
s32 last_start; //!< The latest start offset among 8 paths.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 hi_mask[sizeof(m256)]; //!< High nibble mask in shufti.
|
||||
u8 lo_mask[sizeof(m256)]; //!< Low nibble mask in shufti.
|
||||
u8 bucket_select_mask_hi[sizeof(m256)]; //!< Mask for bucket assigning.
|
||||
u8 bucket_select_mask_lo[sizeof(m256)]; //!< Mask for bucket assigning.
|
||||
u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering.
|
||||
u32 hi_bits_mask; //!< High-bits used in multi-path validation.
|
||||
u32 lo_bits_mask; //!< Low-bits used in multi-path validation.
|
||||
u32 neg_mask; //!< 64 bits negation mask.
|
||||
s32 base_offset; //!< Relative offset of the first byte.
|
||||
s32 last_start; //!< The latest start offset among 8 paths.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti.
|
||||
u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti.
|
||||
u8 bucket_select_mask[2 * sizeof(m256)]; //!< Mask for bucket assigning.
|
||||
u8 data_select_mask[2 * sizeof(m256)]; //!< Shuffle mask for data ordering.
|
||||
u64a hi_bits_mask; //!< High-bits used in multi-path validation.
|
||||
u64a lo_bits_mask; //!< Low-bits used in multi-path validation.
|
||||
u64a neg_mask; //!< 64 bits negation mask.
|
||||
s32 base_offset; //!< Relative offset of the first byte.
|
||||
s32 last_start; //!< The latest start offset among 8 paths.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
#endif // ROSE_ROSE_PROGRAM_H
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -46,7 +46,7 @@ void dumpMask(const void *mask, int len) {
|
||||
static really_inline
|
||||
int validateShuftiMask16x16(const m256 data, const m256 hi_mask,
|
||||
const m256 lo_mask, const m256 and_mask,
|
||||
const u32 neg_mask, const u16 valid_data_mask) {
|
||||
const u32 neg_mask, const u32 valid_data_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 c_lo = vpshufb(lo_mask, and256(data, low4bits));
|
||||
m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4));
|
||||
@ -75,7 +75,7 @@ int validateShuftiMask16x16(const m256 data, const m256 hi_mask,
|
||||
static really_inline
|
||||
int validateShuftiMask16x8(const m128 data, const m256 nib_mask,
|
||||
const m128 and_mask, const u32 neg_mask,
|
||||
const u16 valid_data_mask) {
|
||||
const u32 valid_data_mask) {
|
||||
m256 data_m256 = combine2x128(rshift64_m128(data, 4), data);
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 c_nib = vpshufb(nib_mask, and256(data_m256, low4bits));
|
||||
@ -172,4 +172,121 @@ int validateShuftiMask32x16(const m256 data,
|
||||
u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask;
|
||||
return !cmp_result;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) {
|
||||
u32 t = ~(data | hi_bits);
|
||||
t += lo_bits;
|
||||
t &= (~data) & hi_bits;
|
||||
DEBUG_PRINTF("t %x\n", t);
|
||||
return !!t;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int checkMultipath64(u64a data, u64a hi_bits, u64a lo_bits) {
|
||||
u64a t = ~(data | hi_bits);
|
||||
t += lo_bits;
|
||||
t &= (~data) & hi_bits;
|
||||
DEBUG_PRINTF("t %llx\n", t);
|
||||
return !!t;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int validateMultipathShuftiMask16x8(const m128 data,
|
||||
const m256 nib_mask,
|
||||
const m128 bucket_select_mask,
|
||||
const u32 hi_bits, const u32 lo_bits,
|
||||
const u32 neg_mask,
|
||||
const u32 valid_path_mask) {
|
||||
m256 data_256 = combine2x128(rshift64_m128(data, 4), data);
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 c_nib = vpshufb(nib_mask, and256(data_256, low4bits));
|
||||
m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib));
|
||||
m128 result = and128(t, bucket_select_mask);
|
||||
u32 nresult = movemask128(eq128(result, zeroes128()));
|
||||
u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask;
|
||||
|
||||
DEBUG_PRINTF("cmp_result %x\n", cmp_result);
|
||||
|
||||
return checkMultipath32(cmp_result, hi_bits, lo_bits);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int validateMultipathShuftiMask32x8(const m256 data,
|
||||
const m256 hi_mask, const m256 lo_mask,
|
||||
const m256 bucket_select_mask,
|
||||
const u32 hi_bits, const u32 lo_bits,
|
||||
const u32 neg_mask,
|
||||
const u32 valid_path_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 data_lo = and256(data, low4bits);
|
||||
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
|
||||
m256 c_lo = vpshufb(lo_mask, data_lo);
|
||||
m256 c_hi = vpshufb(hi_mask, data_hi);
|
||||
m256 c = and256(c_lo, c_hi);
|
||||
m256 result = and256(c, bucket_select_mask);
|
||||
u32 nresult = movemask256(eq256(result, zeroes256()));
|
||||
u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask;
|
||||
|
||||
DEBUG_PRINTF("cmp_result %x\n", cmp_result);
|
||||
|
||||
return checkMultipath32(cmp_result, hi_bits, lo_bits);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int validateMultipathShuftiMask32x16(const m256 data,
|
||||
const m256 hi_mask_1, const m256 hi_mask_2,
|
||||
const m256 lo_mask_1, const m256 lo_mask_2,
|
||||
const m256 bucket_select_mask_hi,
|
||||
const m256 bucket_select_mask_lo,
|
||||
const u32 hi_bits, const u32 lo_bits,
|
||||
const u32 neg_mask,
|
||||
const u32 valid_path_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 data_lo = and256(data, low4bits);
|
||||
m256 data_hi = and256(rshift64_m256(data, 4), low4bits);
|
||||
m256 c_lo_1 = vpshufb(lo_mask_1, data_lo);
|
||||
m256 c_lo_2 = vpshufb(lo_mask_2, data_lo);
|
||||
m256 c_hi_1 = vpshufb(hi_mask_1, data_hi);
|
||||
m256 c_hi_2 = vpshufb(hi_mask_2, data_hi);
|
||||
m256 t1 = and256(c_lo_1, c_hi_1);
|
||||
m256 t2 = and256(c_lo_2, c_hi_2);
|
||||
m256 result = or256(and256(t1, bucket_select_mask_lo),
|
||||
and256(t2, bucket_select_mask_hi));
|
||||
u32 nresult = movemask256(eq256(result, zeroes256()));
|
||||
u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask;
|
||||
|
||||
DEBUG_PRINTF("cmp_result %x\n", cmp_result);
|
||||
|
||||
return checkMultipath32(cmp_result, hi_bits, lo_bits);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2,
|
||||
const m256 hi_mask, const m256 lo_mask,
|
||||
const m256 bucket_select_mask_1,
|
||||
const m256 bucket_select_mask_2,
|
||||
const u64a hi_bits, const u64a lo_bits,
|
||||
const u64a neg_mask,
|
||||
const u64a valid_path_mask) {
|
||||
m256 low4bits = set32x8(0xf);
|
||||
m256 c_lo_1 = vpshufb(lo_mask, and256(data_1, low4bits));
|
||||
m256 c_lo_2 = vpshufb(lo_mask, and256(data_2, low4bits));
|
||||
m256 c_hi_1 = vpshufb(hi_mask,
|
||||
rshift64_m256(andnot256(low4bits, data_1), 4));
|
||||
m256 c_hi_2 = vpshufb(hi_mask,
|
||||
rshift64_m256(andnot256(low4bits, data_2), 4));
|
||||
m256 t1 = and256(c_lo_1, c_hi_1);
|
||||
m256 t2 = and256(c_lo_2, c_hi_2);
|
||||
m256 nresult_1 = eq256(and256(t1, bucket_select_mask_1), zeroes256());
|
||||
m256 nresult_2 = eq256(and256(t2, bucket_select_mask_2), zeroes256());
|
||||
u64a nresult = (u64a)movemask256(nresult_1) |
|
||||
(u64a)movemask256(nresult_2) << 32;
|
||||
u64a cmp_result = (nresult ^ neg_mask) | valid_path_mask;
|
||||
|
||||
DEBUG_PRINTF("cmp_result %llx\n", cmp_result);
|
||||
|
||||
return checkMultipath64(cmp_result, hi_bits, lo_bits);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -317,6 +317,11 @@ m128 sub_u8_m128(m128 a, m128 b) {
|
||||
return _mm_sub_epi8(a, b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 set64x2(u64a hi, u64a lo) {
|
||||
return _mm_set_epi64x(hi, lo);
|
||||
}
|
||||
|
||||
/****
|
||||
**** 256-bit Primitives
|
||||
****/
|
||||
@ -592,6 +597,18 @@ m256 mask1bit256(unsigned int n) {
|
||||
return loadu256(&simd_onebit_masks[mask_idx]);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m256 set64x4(u64a hi_1, u64a hi_0, u64a lo_1, u64a lo_0) {
|
||||
#if defined(HAVE_AVX2)
|
||||
return _mm256_set_epi64x(hi_1, hi_0, lo_1, lo_0);
|
||||
#else
|
||||
m256 rv;
|
||||
rv.hi = set64x2(hi_1, hi_0);
|
||||
rv.lo = set64x2(lo_1, lo_0);
|
||||
return rv;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(HAVE_AVX2)
|
||||
// switches on bit N in the given vector.
|
||||
static really_inline
|
||||
|
Loading…
x
Reference in New Issue
Block a user