From ae3cb7de6fd48c20decda1e71e9aadcb28d20b84 Mon Sep 17 00:00:00 2001 From: "Xu, Chi" Date: Fri, 31 Mar 2017 04:37:33 +0800 Subject: [PATCH] rose: add multi-path shufti 16x8, 32x8, 32x16, 64x8 and multi-path lookaround instructions. --- src/rose/program_runtime.h | 435 ++++++++++++++++++++- src/rose/rose_build_bytecode.cpp | 603 +++++++++++++++++++++++------ src/rose/rose_build_dump.cpp | 339 +++++++++++++++- src/rose/rose_build_lookaround.cpp | 335 ++++++++++------ src/rose/rose_build_lookaround.h | 10 +- src/rose/rose_build_program.cpp | 90 ++++- src/rose/rose_build_program.h | 339 +++++++++++++++- src/rose/rose_common.h | 11 +- src/rose/rose_internal.h | 3 +- src/rose/rose_program.h | 107 ++++- src/rose/validate_shufti.h | 123 +++++- src/util/simd_utils.h | 17 + 12 files changed, 2133 insertions(+), 279 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 30ff8527..88c312d2 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -857,13 +857,13 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, } static rose_inline -m128 getData128(const struct core_info *ci, s64a offset, u16 *valid_data_mask) { +m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { if (offset > 0 && offset + sizeof(m128) <= ci->len) { *valid_data_mask = 0xffff; return loadu128(ci->buf + offset); } ALIGN_DIRECTIVE u8 data[sizeof(m128)]; - *valid_data_mask = (u16)getBufferDataComplex(ci, offset, data, 16); + *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); return *(m128 *)data; } @@ -892,7 +892,7 @@ int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, return 0; } - u16 valid_data_mask = 0; + u32 valid_data_mask = 0; m128 data = getData128(ci, offset, &valid_data_mask); if (unlikely(!valid_data_mask)) { return 1; @@ -924,7 +924,7 @@ int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, return 0; } - u16 valid_data_mask = 0; + u32 valid_data_mask = 0; m128 data = getData128(ci, offset, &valid_data_mask); if (unlikely(!valid_data_mask)) { return 1; @@ -1020,8 +1020,9 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, static rose_inline int roseCheckSingleLookaround(const struct RoseEngine *t, const struct hs_scratch *scratch, - s8 checkOffset, u32 lookaroundIndex, u64a end) { - assert(lookaroundIndex != MO_INVALID_IDX); + s8 checkOffset, u32 lookaroundReachIndex, + u64a end) { + assert(lookaroundReachIndex != MO_INVALID_IDX); const struct core_info *ci = &scratch->core_info; DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); @@ -1037,7 +1038,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, } const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + lookaroundReachIndex; u8 c; if (offset >= 0 && offset < (s64a)ci->len) { @@ -1063,9 +1064,11 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, */ static rose_inline int roseCheckLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, u32 lookaroundIndex, + const struct hs_scratch *scratch, + u32 lookaroundLookIndex, u32 lookaroundReachIndex, u32 lookaroundCount, u64a end) { - assert(lookaroundIndex != MO_INVALID_IDX); + assert(lookaroundLookIndex != MO_INVALID_IDX); + assert(lookaroundReachIndex != MO_INVALID_IDX); assert(lookaroundCount > 0); const struct core_info *ci = &scratch->core_info; @@ -1074,12 +1077,12 @@ int roseCheckLookaround(const struct RoseEngine *t, const u8 *base = (const u8 *)t; const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + lookaroundIndex; + const s8 *look = look_base + lookaroundLookIndex; const s8 *look_end = look + lookaroundCount; assert(look < look_end); const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + lookaroundReachIndex; // The following code assumes that the lookaround structures are ordered by // increasing offset. @@ -1151,6 +1154,359 @@ int roseCheckLookaround(const struct RoseEngine *t, return 1; } +/** + * \brief Trying to find a matching path by the corresponding path mask of + * every lookaround location. + */ +static rose_inline +int roseMultipathLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + u32 multipathLookaroundLookIndex, + u32 multipathLookaroundReachIndex, + u32 multipathLookaroundCount, + s32 last_start, const u8 *start_mask, + u64a end) { + assert(multipathLookaroundCount > 0); + + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s8 *look_base = getByOffset(t, t->lookaroundTableOffset); + const s8 *look = look_base + multipathLookaroundLookIndex; + const s8 *look_end = look + multipathLookaroundCount; + assert(look < look_end); + + const u8 *reach_base = getByOffset(t, t->lookaroundReachOffset); + const u8 *reach = reach_base + multipathLookaroundReachIndex; + + const s64a base_offset = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + + u8 path = 0xff; + + assert(last_start < 0); + + if (unlikely((u64a)(0 - last_start) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 start_offset = 0; + do { + s64a offset = base_offset + *look; + DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, + start_mask[start_offset]); + path = start_mask[start_offset]; + if (offset >= -(s64a)ci->hlen) { + break; + } + DEBUG_PRINTF("look=%d before history\n", *look); + start_offset++; + look++; + reach += MULTI_REACH_BITVECTOR_LEN; + } while (look < look_end); + + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + break; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static never_inline +int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_init = getData128(ci, offset, &valid_data_mask); + m128 data_select_mask = loadu128(ri->data_select_mask); + + u32 valid_path_mask = 0; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + m128 expand_valid; + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x2(valid_hi, valid_lo); + valid_path_mask = ~movemask128(pshufb(expand_valid, + data_select_mask)); + } + + m128 data = pshufb(data_init, data_select_mask); + m256 nib_mask = loadu256(ri->nib_mask); + m128 bucket_select_mask = loadu128(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask16x8(data, nib_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(vpshufb(expand_valid, + data_select_mask)); + } + + m256 data = vpshufb(data_double, data_select_mask); + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + m256 bucket_select_mask = loadu256(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(vpshufb(expand_valid, + data_select_mask)); + } + + m256 data = vpshufb(data_double, data_select_mask); + + m256 hi_mask_1 = loadu2x128(ri->hi_mask); + m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); + m256 lo_mask_1 = loadu2x128(ri->lo_mask); + m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16); + + m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi); + m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, + bucket_select_mask_hi, + bucket_select_mask_lo, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti64(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_m256 = set2x128(data_m128); + m256 data_select_mask_1 = loadu256(ri->data_select_mask); + m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32); + + u64a valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + u32 valid_path_1 = movemask256(vpshufb(expand_valid, + data_select_mask_1)); + u32 valid_path_2 = movemask256(vpshufb(expand_valid, + data_select_mask_2)); + valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); + } + + m256 data_1 = vpshufb(data_m256, data_select_mask_1); + m256 data_2 = vpshufb(data_m256, data_select_mask_2); + + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + + m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask); + m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32); + + u64a hi_bits_mask = ri->hi_bits_mask; + u64a lo_bits_mask = ri->lo_bits_mask; + u64a neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask, + bucket_select_mask_1, + bucket_select_mask_2, hi_bits_mask, + lo_bits_mask, neg_mask, + valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-64 successfully\n"); + return 1; + } else { + return 0; + } +} + int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context); static rose_inline @@ -1614,8 +1970,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, scratch, ri->index, ri->count, - end)) { + if (!roseCheckLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, end)) { DEBUG_PRINTF("failed lookaround check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; @@ -2172,6 +2528,59 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, work_done = 0; } PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + if (!roseMultipathLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, + ri->last_start, ri->start_mask, + end)) { + DEBUG_PRINTF("failed multi-path lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + if (!roseCheckMultipathShufti16x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 16x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + if (!roseCheckMultipathShufti32x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + if (!roseCheckMultipathShufti32x16(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x16 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + if (!roseCheckMultipathShufti64(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 64 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 6ee08952..a0edc711 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -82,6 +82,7 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" +#include "util/dump_charclass.h" #include "util/fatbit_build.h" #include "util/graph_range.h" #include "util/make_unique.h" @@ -99,6 +100,7 @@ #include #include #include +#include #include #include #include @@ -141,8 +143,8 @@ struct left_build_info { countingMiracleReach(cm_cr) {} // Constructor for a lookaround implementation. - explicit left_build_info(const vector &look) - : has_lookaround(true), lookaround(look) {} + explicit left_build_info(const vector> &looks) + : has_lookaround(true), lookaround(looks) {} u32 queue = 0; /* uniquely idents the left_build_info */ u32 lag = 0; @@ -154,7 +156,7 @@ struct left_build_info { CharReach countingMiracleReach; u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ bool has_lookaround = false; - vector lookaround; // alternative implementation to the NFA + vector> lookaround; // alternative implementation to the NFA }; /** @@ -197,12 +199,22 @@ struct build_context : noncopyable { ue2::unordered_map program_cache; - /** \brief LookEntry list cache, so that we don't have to go scanning - * through the full list to find cases we've used already. */ - ue2::unordered_map, size_t> lookaround_cache; + /** \brief LookEntry list cache, so that we can reuse the look index and + * reach index for the same lookaround. */ + ue2::unordered_map>, + pair> lookaround_cache; /** \brief Lookaround table for Rose roles. */ - vector lookaround; + vector>> lookaround; + + /** \brief Lookaround look table size. */ + size_t lookTableSize = 0; + + /** \brief Lookaround reach table size. + * since single path lookaround and multi-path lookaround have different + * bitvectors range (32 and 256), we need to maintain both look table size + * and reach table size. */ + size_t reachTableSize = 0; /** \brief State indices, for those roles that have them. */ ue2::unordered_map roleStateIndices; @@ -1582,7 +1594,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, // TODO: Handle SOM-tracking cases as well. if (cc.grey.roseLookaroundMasks && is_transient && !g[v].left.tracksSom()) { - vector lookaround; + vector> lookaround; if (makeLeftfixLookaround(tbi, v, lookaround)) { DEBUG_PRINTF("implementing as lookaround!\n"); bc.leftfix_info.emplace(v, left_build_info(lookaround)); @@ -2651,15 +2663,7 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, } static -void writeLookaroundTables(build_context &bc, RoseEngine &proto) { - const auto &look_vec = bc.lookaround; - DEBUG_PRINTF("%zu lookaround table entries\n", look_vec.size()); - - vector look_table(look_vec.size(), 0); - vector reach_table(REACH_BITVECTOR_LEN * look_vec.size(), 0); - - s8 *look = look_table.data(); - u8 *reach = reach_table.data(); +void writeLookaround(const vector &look_vec, s8 *&look, u8 *&reach) { for (const auto &le : look_vec) { *look = verify_s8(le.offset); const CharReach &cr = le.reach; @@ -2670,6 +2674,52 @@ void writeLookaroundTables(build_context &bc, RoseEngine &proto) { ++look; reach += REACH_BITVECTOR_LEN; } +} + +static +void writeMultipathLookaround(const vector> &multi_look, + s8 *&look, u8 *&reach) { + for (const auto &m : multi_look) { + u8 u = 0; + assert(m.size() == MAX_LOOKAROUND_PATHS); + for (size_t i = 0; i < m.size(); i++) { + if (m[i].reach.none()) { + u |= (u8)1U << i; + } + } + std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u); + + for (size_t i = 0; i < m.size(); i++) { + const CharReach &cr = m[i].reach; + if (cr.none()) { + continue; + } + *look = m[i].offset; + + for (size_t c = cr.find_first(); c != cr.npos; + c = cr.find_next(c)) { + reach[c] |= (u8)1U << i; + } + } + + ++look; + reach += MULTI_REACH_BITVECTOR_LEN; + } +} + +static +void writeLookaroundTables(build_context &bc, RoseEngine &proto) { + vector look_table(bc.lookTableSize, 0); + vector reach_table(bc.reachTableSize, 0); + s8 *look = look_table.data(); + u8 *reach = reach_table.data(); + for (const auto &l : bc.lookaround) { + if (l.size() == 1) { + writeLookaround(l.front(), look, reach); + } else { + writeMultipathLookaround(l, look, reach); + } + } proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table); proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table); @@ -2804,30 +2854,37 @@ bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { } static -u32 addLookaround(build_context &bc, const vector &look) { +void addLookaround(build_context &bc, + const vector> &look, + u32 &look_index, u32 &reach_index) { // Check the cache. auto it = bc.lookaround_cache.find(look); if (it != bc.lookaround_cache.end()) { - DEBUG_PRINTF("reusing look at idx %zu\n", it->second); - return verify_u32(it->second); + look_index = verify_u32(it->second.first); + reach_index = verify_u32(it->second.second); + DEBUG_PRINTF("reusing look at idx %u\n", look_index); + DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); + return; } - // Linear scan for sequence. - auto seq_it = search(begin(bc.lookaround), end(bc.lookaround), begin(look), - end(look)); - if (seq_it != end(bc.lookaround)) { - size_t idx = distance(begin(bc.lookaround), seq_it); - DEBUG_PRINTF("linear scan found look at idx %zu\n", idx); - bc.lookaround_cache.emplace(look, idx); - return verify_u32(idx); + size_t look_idx = bc.lookTableSize; + size_t reach_idx = bc.reachTableSize; + + if (look.size() == 1) { + bc.lookTableSize += look.front().size(); + bc.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; + } else { + bc.lookTableSize += look.size(); + bc.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; } - // New sequence. - size_t idx = bc.lookaround.size(); - bc.lookaround_cache.emplace(look, idx); - insert(&bc.lookaround, bc.lookaround.end(), look); - DEBUG_PRINTF("adding look at idx %zu\n", idx); - return verify_u32(idx); + bc.lookaround_cache.emplace(look, make_pair(look_idx, reach_idx)); + bc.lookaround.emplace_back(look); + + DEBUG_PRINTF("adding look at idx %zu\n", look_idx); + DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); + look_index = verify_u32(look_idx); + reach_index = verify_u32(reach_idx); } static @@ -2977,7 +3034,7 @@ struct cmpNibble { // Insert all pairs of bucket and offset into buckets. static really_inline void getAllBuckets(const vector &look, - map, cmpNibble> &buckets, u32 &neg_mask) { + map, cmpNibble> &buckets, u64a &neg_mask) { s32 base_offset = verify_s32(look.front().offset); for (const auto &entry : look) { CharReach cr = entry.reach; @@ -2985,7 +3042,7 @@ void getAllBuckets(const vector &look, if (cr.count() > 128 ) { cr.flip(); } else { - neg_mask ^= 1 << (entry.offset - base_offset); + neg_mask ^= 1ULL << (entry.offset - base_offset); } map lo2hi; // We treat Ascii Table as a 16x16 grid. @@ -3037,23 +3094,16 @@ void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { } static -bool makeRoleShufti(const vector &look, - RoseProgram &program) { - - s32 base_offset = verify_s32(look.front().offset); - if (look.back().offset >= base_offset + 32) { - return false; - } - array hi_mask, lo_mask; - hi_mask.fill(0); - lo_mask.fill(0); - array bucket_select_hi, bucket_select_lo; - bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. - bucket_select_lo.fill(0); - u8 bit_index = 0; // number of buckets +bool getShuftiMasks(const vector &look, array &hi_mask, + array &lo_mask, u8 *bucket_select_hi, + u8 *bucket_select_lo, u64a &neg_mask, + u8 &bit_idx, size_t len) { map nib; // map every bucket to its bucket number. map, cmpNibble> bucket2offsets; - u32 neg_mask = ~0u; + s32 base_offset = look.front().offset; + + bit_idx = 0; + neg_mask = ~0ULL; getAllBuckets(look, bucket2offsets, neg_mask); @@ -3061,15 +3111,15 @@ bool makeRoleShufti(const vector &look, u32 hi_lo = it.first; // New bucket. if (!nib[hi_lo]) { - if (bit_index >= 16) { + if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { return false; } - nib[hi_lo] = 1 << bit_index; + nib[hi_lo] = 1 << bit_idx; nibUpdate(nib, hi_lo); - nibMaskUpdate(hi_mask, hi_lo >> 16, bit_index); - nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_index); - bit_index++; + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); + bit_idx++; } DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); @@ -3082,6 +3132,113 @@ bool makeRoleShufti(const vector &look, bucket_select_lo[offset - base_offset] |= nib_lo; } } + return true; +} + +static +unique_ptr +makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 8) { + return nullptr; + } + array nib_mask; + array bucket_select_mask_16; + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, + bucket_select_mask_16.begin()); + return make_unique + (nib_mask, bucket_select_mask_16, + neg_mask & 0xffff, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 8) { + return nullptr; + } + + array hi_mask_16; + array lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + return make_unique + (hi_mask_16, lo_mask_16, bucket_select_mask, + neg_mask, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 16) { + return nullptr; + } + + array bucket_select_mask_32; + copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + return make_unique + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask & 0xffff, base_offset, end_inst); +} +static +unique_ptr +makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 16) { + return nullptr; + } + + return make_unique + (hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, base_offset, end_inst); +} + +static +bool makeRoleShufti(const vector &look, + RoseProgram &program) { + + s32 base_offset = verify_s32(look.front().offset); + if (look.back().offset >= base_offset + 32) { + return false; + } + + u8 bucket_idx = 0; // number of buckets + u64a neg_mask_64; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + + if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { + return false; + } + u32 neg_mask = (u32)neg_mask_64; DEBUG_PRINTF("hi_mask %s\n", convertMaskstoString(hi_mask.data(), 32).c_str()); @@ -3093,48 +3250,29 @@ bool makeRoleShufti(const vector &look, convertMaskstoString(bucket_select_lo.data(), 32).c_str()); const auto *end_inst = program.end_instruction(); - if (bit_index < 8) { - if (look.back().offset < base_offset + 16) { - neg_mask &= 0xffff; - array nib_mask; - array bucket_select_mask_16; - copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); - copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, - bucket_select_mask_16.begin()); - auto ri = make_unique - (nib_mask, bucket_select_mask_16, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } else { - array hi_mask_16; - array lo_mask_16; - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); - auto ri = make_unique - (hi_mask_16, lo_mask_16, bucket_select_lo, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } - } else { - if (look.back().offset < base_offset + 16) { - neg_mask &= 0xffff; - array bucket_select_mask_32; - copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, - bucket_select_mask_32.begin()); - copy(bucket_select_hi.begin(), bucket_select_hi.begin() + 16, - bucket_select_mask_32.begin() + 16); - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_mask_32, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } else { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } + s32 offset_range = look.back().offset - base_offset + 1; + + auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + if (!ri) { + ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); } + if (!ri) { + ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + if (!ri) { + ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + assert(ri); + program.add_before_end(move(ri)); + return true; } @@ -3153,9 +3291,13 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, if (look.size() == 1) { s8 offset = look.begin()->offset; - u32 look_idx = addLookaround(bc, look); - auto ri = make_unique(offset, look_idx, - program.end_instruction()); + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(bc, lookaround, look_idx, reach_idx); + // We don't need look_idx here. + auto ri = make_unique(offset, reach_idx, + program.end_instruction()); program.add_before_end(move(ri)); return; } @@ -3172,10 +3314,242 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, return; } - u32 look_idx = addLookaround(bc, look); + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(bc, lookaround, look_idx, reach_idx); u32 look_count = verify_u32(look.size()); - auto ri = make_unique(look_idx, look_count, + auto ri = make_unique(look_idx, reach_idx, + look_count, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dumpMultiLook(const vector &looks) { + ostringstream oss; + for (auto it = looks.begin(); it != looks.end(); ++it) { + if (it != looks.begin()) { + oss << ", "; + } + oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; + } + return oss.str(); +} +#endif + +static +bool makeRoleMultipathShufti(const vector> &multi_look, + RoseProgram &program) { + if (multi_look.empty()) { + return false; + } + + // find the base offset + assert(!multi_look[0].empty()); + s32 base_offset = multi_look[0].front().offset; + s32 last_start = base_offset; + s32 end_offset = multi_look[0].back().offset; + size_t multi_len = 0; + + for (const auto &look : multi_look) { + assert(look.size() > 0); + multi_len += look.size(); + + LIMIT_TO_AT_MOST(&base_offset, look.front().offset); + ENSURE_AT_LEAST(&last_start, look.front().offset); + ENSURE_AT_LEAST(&end_offset, look.back().offset); + } + + assert(last_start < 0); + + if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { + return false; + } + + if (multi_len <= 16) { + multi_len = 16; + } else if (multi_len <= 32) { + multi_len = 32; + } else if (multi_len <= 64) { + multi_len = 64; + } else { + DEBUG_PRINTF("too long for multi-path\n"); + return false; + } + + vector linear_look; + array data_select_mask; + data_select_mask.fill(0); + u64a hi_bits_mask = 0; + u64a lo_bits_mask = 0; + + for (const auto &look : multi_look) { + assert(linear_look.size() < 64); + lo_bits_mask |= 1LLU << linear_look.size(); + for (const auto &entry : look) { + assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); + data_select_mask[linear_look.size()] = + verify_u8(entry.offset - base_offset); + linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); + } + hi_bits_mask |= 1LLU << (linear_look.size() - 1); + } + + u8 bit_index = 0; // number of buckets + u64a neg_mask; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); + bucket_select_lo.fill(0); + + if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask, bit_index, + multi_len)) { + return false; + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 16).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 16).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 64).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 64).c_str()); + DEBUG_PRINTF("data_select_mask %s\n", + convertMaskstoString(data_select_mask.data(), 64).c_str()); + DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); + DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); + DEBUG_PRINTF("neg_mask %llx\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + DEBUG_PRINTF("last_start %d\n", last_start); + + // Since we don't have 16x16 now, just call 32x16 instead. + if (bit_index > 8) { + assert(multi_len <= 32); + multi_len = 32; + } + + const auto *end_inst = program.end_instruction(); + assert(multi_len == 16 || multi_len == 32 || multi_len == 64); + if (multi_len == 16) { + neg_mask &= 0xffff; + assert(!(hi_bits_mask & ~0xffffULL)); + assert(!(lo_bits_mask & ~0xffffULL)); + assert(bit_index <=8); + array nib_mask; + copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); + copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); + + auto ri = make_unique + (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } else if (multi_len == 32) { + neg_mask &= 0xffffffff; + assert(!(hi_bits_mask & ~0xffffffffULL)); + assert(!(lo_bits_mask & ~0xffffffffULL)); + if (bit_index <= 8) { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } + return true; +} + +static +void makeRoleMultipathLookaround(build_context &bc, + const vector> &multi_look, + RoseProgram &program) { + assert(!multi_look.empty()); + assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); + vector> ordered_look; + set look_offset; + + assert(!multi_look[0].empty()); + s32 last_start = multi_look[0][0].offset; + + // build offset table. + for (const auto &look : multi_look) { + assert(look.size() > 0); + last_start = max(last_start, (s32)look.begin()->offset); + + for (const auto &t : look) { + look_offset.insert(t.offset); + } + } + + array start_mask; + if (multi_look.size() < MAX_LOOKAROUND_PATHS) { + start_mask.fill((1 << multi_look.size()) - 1); + } else { + start_mask.fill(0xff); + } + + u32 path_idx = 0; + for (const auto &look : multi_look) { + for (const auto &t : look) { + assert(t.offset >= (int)*look_offset.begin()); + size_t update_offset = t.offset - *look_offset.begin() + 1; + if (update_offset < start_mask.size()) { + start_mask[update_offset] &= ~(1 << path_idx); + } + } + path_idx++; + } + + for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { + start_mask[i] &= start_mask[i - 1]; + DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); + } + + assert(look_offset.size() <= MULTIPATH_MAX_LEN); + + assert(last_start < 0); + + for (const auto &offset : look_offset) { + vector multi_entry; + multi_entry.resize(MAX_LOOKAROUND_PATHS); + + for (size_t i = 0; i < multi_look.size(); i++) { + for (const auto &t : multi_look[i]) { + if (t.offset == offset) { + multi_entry[i] = t; + } + } + } + ordered_look.emplace_back(multi_entry); + } + + u32 look_idx, reach_idx; + addLookaround(bc, ordered_look, look_idx, reach_idx); + u32 look_count = verify_u32(ordered_look.size()); + + auto ri = make_unique(look_idx, reach_idx, + look_count, last_start, + start_mask, program.end_instruction()); program.add_before_end(move(ri)); } @@ -3187,25 +3561,34 @@ void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc, return; } - vector look; + vector> looks; // Lookaround from leftfix (mandatory). if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) { DEBUG_PRINTF("using leftfix lookaround\n"); - look = bc.leftfix_info.at(v).lookaround; + looks = bc.leftfix_info.at(v).lookaround; } // We may be able to find more lookaround info (advisory) and merge it // in. - vector look_more; - findLookaroundMasks(build, v, look_more); - mergeLookaround(look, look_more); - - if (look.empty()) { + if (looks.size() <= 1) { + vector look; + vector look_more; + if (!looks.empty()) { + look = move(looks.front()); + } + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + if (!look.empty()) { + makeLookaroundInstruction(bc, look, program); + } return; } - makeLookaroundInstruction(bc, look, program); + if (!makeRoleMultipathShufti(looks, program)) { + assert(looks.size() <= 8); + makeRoleMultipathLookaround(bc, looks, program); + } } static diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 0e53d59d..30dccb1a 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -569,10 +569,20 @@ static CharReach bitvectorToReach(const u8 *reach) { CharReach cr; - for (size_t i = 0; i < 256; i++) { + for (size_t i = 0; i < N_CHARS; i++) { if (reach[i / 8] & (1U << (i % 8))) { cr.set(i); + } + } + return cr; +} +static +CharReach multiBitvectorToReach(const u8 *reach, u8 path_mask) { + CharReach cr; + for (size_t i = 0; i < N_CHARS; i++) { + if (reach[i] & path_mask) { + cr.set(i); } } return cr; @@ -587,9 +597,9 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); const u8 *reach_base = base + t->lookaroundReachOffset; - const s8 *look = look_base + ri->index; + const s8 *look = look_base + ri->look_index; const s8 *look_end = look + ri->count; - const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + ri->reach_index; os << " contents:" << endl; @@ -601,6 +611,41 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } } +static +void dumpMultipathLookaround(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_MULTIPATH_LOOKAROUND *ri) { + assert(ri); + + const u8 *base = (const u8 *)t; + const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); + const u8 *reach_base = base + t->lookaroundReachOffset; + + const s8 *look_begin = look_base + ri->look_index; + const s8 *look_end = look_begin + ri->count; + const u8 *reach_begin = reach_base + ri->reach_index; + + os << " contents:" << endl; + + u32 path_mask = ri->start_mask[0]; + while (path_mask) { + u32 path = findAndClearLSB_32(&path_mask); + os << " Path #" << path << ":" << endl; + os << " "; + + const s8 *look = look_begin; + const u8 *reach = reach_begin; + for (; look < look_end; look++, reach += MULTI_REACH_BITVECTOR_LEN) { + CharReach cr = multiBitvectorToReach(reach, 1U << path); + if (cr.any() && !cr.all()) { + os << "<" << int(*look) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; + } +} + static vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { vector keys; @@ -666,7 +711,126 @@ string dumpStrMask(const u8 *mask, size_t len) { return oss.str(); } -#define PROGRAM_CASE(name) \ +static +CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) { + CharReach cr; + for (u32 i = 0; i < N_CHARS; i++) { + if(lo[i & 0xf] & hi[i >> 4] & bucket_mask) { + cr.set(i); + } + } + return cr; +} + +static +void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *bucket_mask, u32 neg_mask, s32 offset) { + assert(len == 16 || len == 32); + os << " contents:" << endl; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + + if (neg_mask & (1U << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + os << " " << std::setw(4) << std::setfill(' ') + << int(offset + idx) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << endl; + } + } +} + +static +void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, + const u8 *bucket_mask_2, u32 neg_mask, s32 offset) { + assert(len == 16 || len == 32); + os << " contents:" << endl; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); + + if (neg_mask & (1U << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + os << " " << std::setw(4) << std::setfill(' ') + << int(offset + idx) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << endl; + } + } +} + +static +void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *bucket_mask, const u8 *data_offset, + u64a neg_mask, s32 base_offset) { + assert(len == 16 || len == 32 || len == 64); + os << " contents:" << endl; + u32 path = 0; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + + if (neg_mask & (1ULL << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { + path++; + if (idx) { + os << endl; + } + os << " Path #" << path << ":" << endl; + os << " "; + } + + os << "<" << int(base_offset + data_offset[idx]) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; +} + +static +void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, + const u8 *bucket_mask_2, const u8 *data_offset, + u32 neg_mask, s32 base_offset) { + assert(len == 16 || len == 32 || len == 64); + os << " contents:"; + u32 path = 0; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); + + if (neg_mask & (1ULL << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { + path++; + os << endl; + os << " Path #" << path << ":" << endl; + os << " "; + } + + os << "<" << int(base_offset + data_offset[idx]) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; +} + + #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ @@ -741,7 +905,8 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { - os << " index " << ri->index << endl; + os << " look_index " << ri->look_index << endl; + os << " reach_index " << ri->reach_index << endl; os << " count " << ri->count << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaround(os, t, ri); @@ -795,8 +960,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, + ri->bucket_select_mask, ri->neg_mask, + ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -811,8 +981,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, ri->neg_mask, + ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -827,8 +1002,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 16, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask, + ri->bucket_select_mask + 16, + ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -847,8 +1029,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask_lo, sizeof(ri->bucket_select_mask_lo)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask_lo, + ri->bucket_select_mask_hi, + ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -1103,6 +1292,146 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(CLEAR_WORK_DONE) {} PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + os << " look_index " << ri->look_index << endl; + os << " reach_index " << ri->reach_index << endl; + os << " count " << ri->count << endl; + os << " last_start " << ri->last_start << endl; + os << " start_mask " + << dumpStrMask(ri->start_mask, sizeof(ri->start_mask)) + << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathLookaround(os, t, ri); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + os << " nib_mask " + << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask_hi " + << dumpStrMask(ri->bucket_select_mask_hi, + sizeof(ri->bucket_select_mask_hi)) + << endl; + os << " bucket_select_mask_lo " + << dumpStrMask(ri->bucket_select_mask_lo, + sizeof(ri->bucket_select_mask_lo)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask_lo, + ri->bucket_select_mask_hi, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 64, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index ae990f7f..07ab7c59 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -45,6 +45,7 @@ #include #include +#include using namespace std; @@ -62,6 +63,20 @@ static const u32 MAX_LOOKAROUND_ENTRIES = 16; /** \brief We would rather have lookarounds with smaller reach than this. */ static const u32 LOOKAROUND_WIDE_REACH = 200; +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dump(const map &look) { + ostringstream oss; + for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { + if (it != look.begin()) { + oss << ", "; + } + oss << "{" << it->first << ": " << describeClass(it->second) << "}"; + } + return oss.str(); +} +#endif + static void getForwardReach(const NGHolder &g, u32 top, map &look) { ue2::flat_set curr, next; @@ -298,21 +313,6 @@ void findBackwardReach(const RoseGraph &g, const RoseVertex v, // TODO: implement DFA variants if necessary. } -#if defined(DEBUG) || defined(DUMP_SUPPORT) -#include -static UNUSED -string dump(const map &look) { - ostringstream oss; - for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { - if (it != look.begin()) { - oss << ", "; - } - oss << "{" << it->first << ": " << describeClass(it->second) << "}"; - } - return oss.str(); -} -#endif - static void normalise(map &look) { // We can erase entries where the reach is "all characters". @@ -554,6 +554,76 @@ void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str()); } +static +void normaliseLeftfix(map &look) { + // We can erase entries where the reach is "all characters", except for the + // very first one -- this might be required to establish a minimum bound on + // the literal's match offset. + + // TODO: It would be cleaner to use a literal program instruction to check + // the minimum bound explicitly. + + if (look.empty()) { + return; + } + + const auto earliest = begin(look)->first; + + vector dead; + for (const auto &m : look) { + if (m.second.all() && m.first != earliest) { + dead.push_back(m.first); + } + } + erase_all(&look, dead); +} + +static +bool trimMultipathLeftfix(const RoseBuildImpl &build, const RoseVertex v, + vector> &looks) { + size_t path_count = 0; + for (auto &look : looks) { + ++path_count; + DEBUG_PRINTF("Path #%ld\n", path_count); + + assert(!look.empty()); + trimLiterals(build, v, look); + + if (look.empty()) { + return false; + } + + // Could be optimized here, just keep the empty byte of the longest path + normaliseLeftfix(look); + + if (look.size() > MAX_LOOKAROUND_ENTRIES) { + DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); + return false; + } + } + return true; +} + +static +void transToLookaround(const vector> &looks, + vector> &lookarounds) { + for (const auto &look : looks) { + vector lookaround; + DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str()); + lookaround.reserve(look.size()); + for (const auto &m : look) { + if (m.first < -128 || m.first > 127) { + DEBUG_PRINTF("range too big\n"); + lookarounds.clear(); + return; + } + s8 offset = verify_s8(m.first); + lookaround.emplace_back(offset, m.second); + } + lookarounds.push_back(lookaround); + } +} + void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, vector &lookaround) { lookaround.clear(); @@ -592,115 +662,155 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, } static -bool hasSingleFloatingStart(const NGHolder &g) { - NFAVertex initial = NGHolder::null_vertex(); - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (v == g.startDs) { - continue; - } - if (initial != NGHolder::null_vertex()) { - DEBUG_PRINTF("more than one start\n"); - return false; - } - initial = v; - } +bool checkShuftiBuckets(const vector> &looks, + u32 bucket_size) { + set bucket; + for (const auto &look : looks) { + for (const auto &l : look) { + CharReach cr = l.second; + if (cr.count() > 128) { + cr.flip(); + } + map lo2hi; - if (initial == NGHolder::null_vertex()) { - DEBUG_PRINTF("no floating starts\n"); - return false; - } + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i &0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } - // Anchored start must have no successors other than startDs and initial. - for (auto v : adjacent_vertices_range(g.start, g)) { - if (v != initial && v != g.startDs) { - DEBUG_PRINTF("anchored start\n"); - return false; + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + bucket.insert(hi_lo); + } } } - - return true; + DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size()); + return bucket.size() <= bucket_size; } static -bool getTransientPrefixReach(const NGHolder &g, u32 lag, - map &look) { - if (in_degree(g.accept, g) != 1) { - DEBUG_PRINTF("more than one accept\n"); +bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, + vector> &looks) { + if (!isAcyclic(g)) { + DEBUG_PRINTF("contains back-edge\n"); return false; } - // Must be a floating chain wired to startDs. - if (!hasSingleFloatingStart(g)) { - DEBUG_PRINTF("not a single floating start\n"); + // Must be floating chains wired to startDs. + if (!isFloating(g)) { + DEBUG_PRINTF("not a floating start\n"); return false; } - NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first); - u32 i = lag + 1; - while (v != g.startDs) { - DEBUG_PRINTF("i=%u, v=%zu\n", i, g[v].index); - if (is_special(v, g)) { - DEBUG_PRINTF("special\n"); + vector curr; + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (v == g.start || v == g.startDs) { + DEBUG_PRINTF("empty graph\n"); + return true; + } + if (contains(g[v].reports, report)) { + curr.push_back(v); + } + } + + assert(!curr.empty()); + + u32 total_len = curr.size(); + + for (const auto &v : curr) { + looks.emplace_back(map()); + looks.back()[0 - (lag + 1)] = g[v].char_reach; + } + + bool curr_active = false; + + /* For each offset -i, we backwardly trace the path by vertices in curr. + * Once there are more than 8 paths and more than 64 bits total_len, + * which means that neither MULTIPATH_LOOKAROUND nor MULTIPATH_SHUFTI + * could be successfully built, we will give up the path finding. + * Otherwise, the loop will halt when all vertices in curr are startDs. + */ + for (u32 i = lag + 2; i < (lag + 2) + MAX_BACK_LEN; i++) { + curr_active = false; + size_t curr_size = curr.size(); + if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) { + DEBUG_PRINTF("range is larger than 16 in multi-path\n"); return false; } - look[0 - i] = g[v].char_reach; - - NFAVertex next = NGHolder::null_vertex(); - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - continue; // Benign, checked by hasSingleFloatingStart - } - if (next == NGHolder::null_vertex()) { - next = u; + for (size_t idx = 0; idx < curr_size; idx++) { + NFAVertex v = curr[idx]; + if (v == g.startDs) { continue; } - DEBUG_PRINTF("branch\n"); - return false; - } + assert(!is_special(v, g)); - if (next == NGHolder::null_vertex() || next == v) { - DEBUG_PRINTF("no predecessor or only self-loop\n"); - // This graph is malformed -- all vertices in a graph that makes it - // to this analysis should have predecessors. - assert(0); - return false; - } + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start || u == g.startDs) { + curr[idx] = g.startDs; + break; + } + } - v = next; - i++; + if (is_special(curr[idx], g)) { + continue; + } + + for (auto u : inv_adjacent_vertices_range(v, g)) { + curr_active = true; + if (curr[idx] == v) { + curr[idx] = u; + looks[idx][0 - i] = g[u].char_reach; + total_len++; + } else { + curr.push_back(u); + looks.push_back(looks[idx]); + (looks.back())[0 - i] = g[u].char_reach; + total_len += looks.back().size(); + } + + if (curr.size() > MAX_LOOKAROUND_PATHS && total_len > 64) { + DEBUG_PRINTF("too many branches\n"); + return false; + } + } + } + if (!curr_active) { + break; + } } + if (curr_active) { + DEBUG_PRINTF("single path too long\n"); + return false; + } + + // More than 8 paths, check multi-path shufti. + if (curr.size() > MAX_LOOKAROUND_PATHS) { + u32 bucket_size = total_len > 32 ? 8 : 16; + if (!checkShuftiBuckets(looks, bucket_size)) { + DEBUG_PRINTF("shufti has too many buckets\n"); + return false; + } + } + + assert(!looks.empty()); + if (looks.size() == 1) { + DEBUG_PRINTF("single lookaround\n"); + } else { + DEBUG_PRINTF("multi-path lookaround\n"); + } DEBUG_PRINTF("done\n"); return true; } -static -void normaliseLeftfix(map &look) { - // We can erase entries where the reach is "all characters", except for the - // very first one -- this might be required to establish a minimum bound on - // the literal's match offset. - - // TODO: It would be cleaner to use a literal program instruction to check - // the minimum bound explicitly. - - if (look.empty()) { - return; - } - - const auto earliest = begin(look)->first; - - vector dead; - for (const auto &m : look) { - if (m.second.all() && m.first != earliest) { - dead.push_back(m.first); - } - } - erase_all(&look, dead); -} - bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - vector &lookaround) { + vector> &lookaround) { lookaround.clear(); const RoseGraph &g = build.g; @@ -716,36 +826,19 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, return false; } - map look; - if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.lag, look)) { - DEBUG_PRINTF("not a chain\n"); + vector> looks; + if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report, + g[v].left.lag, looks)) { + DEBUG_PRINTF("graph has loop or too large\n"); return false; } - trimLiterals(build, v, look); - normaliseLeftfix(look); - - if (look.size() > MAX_LOOKAROUND_ENTRIES) { - DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); + if (!trimMultipathLeftfix(build, v, looks)) { return false; } + transToLookaround(looks, lookaround); - if (look.empty()) { - DEBUG_PRINTF("lookaround empty; this is weird\n"); - return false; - } - - lookaround.reserve(look.size()); - for (const auto &m : look) { - if (m.first < -128 || m.first > 127) { - DEBUG_PRINTF("range too big\n"); - return false; - } - s8 offset = verify_s8(m.first); - lookaround.emplace_back(offset, m.second); - } - - return true; + return !lookaround.empty(); } void mergeLookaround(vector &lookaround, diff --git a/src/rose/rose_build_lookaround.h b/src/rose/rose_build_lookaround.h index 993bd229..aea87ccf 100644 --- a/src/rose/rose_build_lookaround.h +++ b/src/rose/rose_build_lookaround.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,9 @@ #include +/** \brief Max path number for multi-path lookaround. */ +#define MAX_LOOKAROUND_PATHS 8 + namespace ue2 { class CharReach; @@ -44,6 +47,7 @@ class RoseBuildImpl; /** \brief Lookaround entry prototype, describing the reachability at a given * distance from the end of a role match. */ struct LookEntry { + LookEntry() : offset(0) {} LookEntry(s8 offset_in, const CharReach &reach_in) : offset(offset_in), reach(reach_in) {} s8 offset; //!< offset from role match location. @@ -63,7 +67,7 @@ size_t hash_value(const LookEntry &l) { } void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, - std::vector &lookaround); + std::vector &look_more); /** * \brief If possible, render the prefix of the given vertex as a lookaround. @@ -72,7 +76,7 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, * it can be satisfied with a lookaround alone. */ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - std::vector &lookaround); + std::vector> &lookaround); void mergeLookaround(std::vector &lookaround, const std::vector &more_lookaround); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 1c0fd2ab..9e030e8e 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -127,7 +127,8 @@ void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); - inst->index = index; + inst->look_index = look_index; + inst->reach_index = reach_index; inst->count = count; inst->fail_jump = calc_jump(offset_map, this, target); } @@ -537,6 +538,93 @@ void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); } +void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->look_index = look_index; + inst->reach_index = reach_index; + inst->count = count; + inst->last_start = last_start; + copy(begin(start_mask), end(start_mask), inst->start_mask); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) + const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 16, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) + const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32, + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32, + inst->bucket_select_mask_lo); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + copy(begin(data_select_mask), end(data_select_mask), + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + static OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { OffsetMap offset_map; diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index a63f03c8..3931f095 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -420,20 +420,24 @@ class RoseInstrCheckLookaround ROSE_STRUCT_CHECK_LOOKAROUND, RoseInstrCheckLookaround> { public: - u32 index; + u32 look_index; + u32 reach_index; u32 count; const RoseInstruction *target; - RoseInstrCheckLookaround(u32 index_in, u32 count_in, - const RoseInstruction *target_in) - : index(index_in), count(count_in), target(target_in) {} + RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in, + u32 count_in, const RoseInstruction *target_in) + : look_index(look_index_in), reach_index(reach_index_in), + count(count_in), target(target_in) {} bool operator==(const RoseInstrCheckLookaround &ri) const { - return index == ri.index && count == ri.count && target == ri.target; + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), index, count); + return hash_all(static_cast(opcode), look_index, reach_index, + count); } void write(void *dest, RoseEngineBlob &blob, @@ -441,7 +445,8 @@ public: bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return index == ri.index && count == ri.count && + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && offsets.at(target) == other_offsets.at(ri.target); } }; @@ -498,7 +503,7 @@ public: RoseInstrCheckMask32(std::array and_mask_in, std::array cmp_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : and_mask(move(and_mask_in)), cmp_mask(move(cmp_mask_in)), + : and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckMask32 &ri) const { @@ -576,8 +581,8 @@ public: std::array bucket_select_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : nib_mask(move(nib_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti16x8 &ri) const { @@ -621,8 +626,8 @@ public: std::array bucket_select_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti32x8 &ri) const { @@ -666,8 +671,8 @@ public: std::array bucket_select_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti16x16 &ri) const { @@ -713,9 +718,9 @@ public: std::array bucket_select_mask_lo_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask_hi(move(bucket_select_mask_hi_in)), - bucket_select_mask_lo(move(bucket_select_mask_lo_in)), + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti32x16 &ri) const { @@ -1859,6 +1864,306 @@ public: ~RoseInstrClearWorkDone() override; }; +class RoseInstrMultipathLookaround + : public RoseInstrBaseOneTarget { +public: + u32 look_index; + u32 reach_index; + u32 count; + s32 last_start; + std::array start_mask; + const RoseInstruction *target; + + RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in, + u32 count_in, s32 last_start_in, + std::array start_mask_in, + const RoseInstruction *target_in) + : look_index(look_index_in), reach_index(reach_index_in), + count(count_in), last_start(last_start_in), + start_mask(std::move(start_mask_in)), target(target_in) {} + + bool operator==(const RoseInstrMultipathLookaround &ri) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && last_start == ri.last_start && + start_mask == ri.start_mask && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), look_index, reach_index, + count, last_start, start_mask); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrMultipathLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && last_start == ri.last_start && + start_mask == ri.start_mask && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u16 hi_bits_mask; + u16 lo_bits_mask; + u16 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u16 hi_bits_mask_in, u16 lo_bits_mask_in, + u16 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti64 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u64a hi_bits_mask; + u64a lo_bits_mask; + u64a neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti64(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u64a hi_bits_mask_in, u64a lo_bits_mask_in, + u64a neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + class RoseInstrEnd : public RoseInstrBaseTrivial { diff --git a/src/rose/rose_common.h b/src/rose/rose_common.h index c0250aa5..34678b8f 100644 --- a/src/rose/rose_common.h +++ b/src/rose/rose_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,15 @@ /** \brief Length in bytes of a reach bitvector, used by the lookaround code. */ #define REACH_BITVECTOR_LEN 32 +/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */ +#define MULTI_REACH_BITVECTOR_LEN 256 + +/** + * \brief The max offset from the leftmost byte to the rightmost byte in + * multi-path lookaround. + */ +#define MULTIPATH_MAX_LEN 16 + /** \brief Value used to represent an invalid Rose program offset. */ #define ROSE_INVALID_PROG_OFFSET 0 diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 48f15ff0..06a9b069 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -386,7 +386,8 @@ struct RoseEngine { u32 roseCount; u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values) u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 - * bytes each) */ + * bytes for single-path lookaround and 256 bytes + * for multi-path lookaround) */ u32 eodProgramOffset; //!< EOD program, otherwise 0. diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index cf1a9eb6..ebda679a 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -36,6 +36,7 @@ #include "som/som_operation.h" #include "rose_internal.h" #include "ue2common.h" +#include "util/simd_types.h" /** \brief Minimum alignment for each instruction in memory. */ #define ROSE_INSTR_MIN_ALIGN 8U @@ -146,7 +147,38 @@ enum RoseInstructionCode { */ ROSE_INSTR_CLEAR_WORK_DONE, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_CLEAR_WORK_DONE //!< Sentinel. + /** \brief Check lookaround if it has multiple paths. */ + ROSE_INSTR_MULTIPATH_LOOKAROUND, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 16 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 16 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, + + /** + * \brief Use shufti to check multiple paths lookaround. The total + * length of the paths is 64 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64 //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -192,14 +224,15 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED { struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. s8 offset; //!< The offset of the byte to examine. - u32 reach_index; //!< The index of the reach table entry to use. + u32 reach_index; //!< Index for lookaround reach bitvectors. u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. - u32 index; - u32 count; + u32 look_index; //!< Index for lookaround offset list. + u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 count; //!< The count of lookaround entries in one instruction. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -526,4 +559,70 @@ struct ROSE_STRUCT_CLEAR_WORK_DONE { u8 code; //!< From enum RoseInstructionCode. }; +struct ROSE_STRUCT_MULTIPATH_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + u32 look_index; //!< Index for lookaround offset list. + u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 count; //!< The lookaround byte numbers for each path. + s32 last_start; //!< The latest start offset among 8 paths. + u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most + * data is missed. */ + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 nib_mask[2 * sizeof(m128)]; //!< High and low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m128)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m128)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m256)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m256)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask_hi[sizeof(m256)]; //!< Mask for bucket assigning. + u8 bucket_select_mask_lo[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[2 * sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[2 * sizeof(m256)]; //!< Shuffle mask for data ordering. + u64a hi_bits_mask; //!< High-bits used in multi-path validation. + u64a lo_bits_mask; //!< Low-bits used in multi-path validation. + u64a neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/rose/validate_shufti.h b/src/rose/validate_shufti.h index 49d2c2fe..e26d6c2b 100644 --- a/src/rose/validate_shufti.h +++ b/src/rose/validate_shufti.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,7 +46,7 @@ void dumpMask(const void *mask, int len) { static really_inline int validateShuftiMask16x16(const m256 data, const m256 hi_mask, const m256 lo_mask, const m256 and_mask, - const u32 neg_mask, const u16 valid_data_mask) { + const u32 neg_mask, const u32 valid_data_mask) { m256 low4bits = set32x8(0xf); m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); @@ -75,7 +75,7 @@ int validateShuftiMask16x16(const m256 data, const m256 hi_mask, static really_inline int validateShuftiMask16x8(const m128 data, const m256 nib_mask, const m128 and_mask, const u32 neg_mask, - const u16 valid_data_mask) { + const u32 valid_data_mask) { m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); m256 low4bits = set32x8(0xf); m256 c_nib = vpshufb(nib_mask, and256(data_m256, low4bits)); @@ -172,4 +172,121 @@ int validateShuftiMask32x16(const m256 data, u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask; return !cmp_result; } + +static really_inline +int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) { + u32 t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %x\n", t); + return !!t; +} + +static really_inline +int checkMultipath64(u64a data, u64a hi_bits, u64a lo_bits) { + u64a t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %llx\n", t); + return !!t; +} + +static really_inline +int validateMultipathShuftiMask16x8(const m128 data, + const m256 nib_mask, + const m128 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 data_256 = combine2x128(rshift64_m128(data, 4), data); + m256 low4bits = set32x8(0xf); + m256 c_nib = vpshufb(nib_mask, and256(data_256, low4bits)); + m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); + m128 result = and128(t, bucket_select_mask); + u32 nresult = movemask128(eq128(result, zeroes128())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x8(const m256 data, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo = vpshufb(lo_mask, data_lo); + m256 c_hi = vpshufb(hi_mask, data_hi); + m256 c = and256(c_lo, c_hi); + m256 result = and256(c, bucket_select_mask); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x16(const m256 data, + const m256 hi_mask_1, const m256 hi_mask_2, + const m256 lo_mask_1, const m256 lo_mask_2, + const m256 bucket_select_mask_hi, + const m256 bucket_select_mask_lo, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo_1 = vpshufb(lo_mask_1, data_lo); + m256 c_lo_2 = vpshufb(lo_mask_2, data_lo); + m256 c_hi_1 = vpshufb(hi_mask_1, data_hi); + m256 c_hi_2 = vpshufb(hi_mask_2, data_hi); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 result = or256(and256(t1, bucket_select_mask_lo), + and256(t2, bucket_select_mask_hi)); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask_1, + const m256 bucket_select_mask_2, + const u64a hi_bits, const u64a lo_bits, + const u64a neg_mask, + const u64a valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo_1 = vpshufb(lo_mask, and256(data_1, low4bits)); + m256 c_lo_2 = vpshufb(lo_mask, and256(data_2, low4bits)); + m256 c_hi_1 = vpshufb(hi_mask, + rshift64_m256(andnot256(low4bits, data_1), 4)); + m256 c_hi_2 = vpshufb(hi_mask, + rshift64_m256(andnot256(low4bits, data_2), 4)); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 nresult_1 = eq256(and256(t1, bucket_select_mask_1), zeroes256()); + m256 nresult_2 = eq256(and256(t2, bucket_select_mask_2), zeroes256()); + u64a nresult = (u64a)movemask256(nresult_1) | + (u64a)movemask256(nresult_2) << 32; + u64a cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %llx\n", cmp_result); + + return checkMultipath64(cmp_result, hi_bits, lo_bits); +} + #endif diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index bc49a046..1f884843 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -317,6 +317,11 @@ m128 sub_u8_m128(m128 a, m128 b) { return _mm_sub_epi8(a, b); } +static really_inline +m128 set64x2(u64a hi, u64a lo) { + return _mm_set_epi64x(hi, lo); +} + /**** **** 256-bit Primitives ****/ @@ -592,6 +597,18 @@ m256 mask1bit256(unsigned int n) { return loadu256(&simd_onebit_masks[mask_idx]); } +static really_inline +m256 set64x4(u64a hi_1, u64a hi_0, u64a lo_1, u64a lo_0) { +#if defined(HAVE_AVX2) + return _mm256_set_epi64x(hi_1, hi_0, lo_1, lo_0); +#else + m256 rv; + rv.hi = set64x2(hi_1, hi_0); + rv.lo = set64x2(lo_1, lo_0); + return rv; +#endif +} + #if !defined(HAVE_AVX2) // switches on bit N in the given vector. static really_inline