rose: add multi-path shufti 16x8, 32x8, 32x16, 64x8 and multi-path lookaround instructions.

This commit is contained in:
Xu, Chi
2017-03-31 04:37:33 +08:00
committed by Matthew Barr
parent 7533e3341e
commit ae3cb7de6f
12 changed files with 2133 additions and 279 deletions

View File

@@ -82,6 +82,7 @@
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/fatbit_build.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
@@ -99,6 +100,7 @@
#include <map>
#include <queue>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include <utility>
@@ -141,8 +143,8 @@ struct left_build_info {
countingMiracleReach(cm_cr) {}
// Constructor for a lookaround implementation.
explicit left_build_info(const vector<LookEntry> &look)
: has_lookaround(true), lookaround(look) {}
explicit left_build_info(const vector<vector<LookEntry>> &looks)
: has_lookaround(true), lookaround(looks) {}
u32 queue = 0; /* uniquely idents the left_build_info */
u32 lag = 0;
@@ -154,7 +156,7 @@ struct left_build_info {
CharReach countingMiracleReach;
u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */
bool has_lookaround = false;
vector<LookEntry> lookaround; // alternative implementation to the NFA
vector<vector<LookEntry>> lookaround; // alternative implementation to the NFA
};
/**
@@ -197,12 +199,22 @@ struct build_context : noncopyable {
ue2::unordered_map<RoseProgram, u32, RoseProgramHash,
RoseProgramEquivalence> program_cache;
/** \brief LookEntry list cache, so that we don't have to go scanning
* through the full list to find cases we've used already. */
ue2::unordered_map<vector<LookEntry>, size_t> lookaround_cache;
/** \brief LookEntry list cache, so that we can reuse the look index and
* reach index for the same lookaround. */
ue2::unordered_map<vector<vector<LookEntry>>,
pair<size_t, size_t>> lookaround_cache;
/** \brief Lookaround table for Rose roles. */
vector<LookEntry> lookaround;
vector<vector<vector<LookEntry>>> lookaround;
/** \brief Lookaround look table size. */
size_t lookTableSize = 0;
/** \brief Lookaround reach table size.
* since single path lookaround and multi-path lookaround have different
* bitvectors range (32 and 256), we need to maintain both look table size
* and reach table size. */
size_t reachTableSize = 0;
/** \brief State indices, for those roles that have them. */
ue2::unordered_map<RoseVertex, u32> roleStateIndices;
@@ -1582,7 +1594,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
// TODO: Handle SOM-tracking cases as well.
if (cc.grey.roseLookaroundMasks && is_transient &&
!g[v].left.tracksSom()) {
vector<LookEntry> lookaround;
vector<vector<LookEntry>> lookaround;
if (makeLeftfixLookaround(tbi, v, lookaround)) {
DEBUG_PRINTF("implementing as lookaround!\n");
bc.leftfix_info.emplace(v, left_build_info(lookaround));
@@ -2651,15 +2663,7 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
}
static
void writeLookaroundTables(build_context &bc, RoseEngine &proto) {
const auto &look_vec = bc.lookaround;
DEBUG_PRINTF("%zu lookaround table entries\n", look_vec.size());
vector<s8> look_table(look_vec.size(), 0);
vector<u8> reach_table(REACH_BITVECTOR_LEN * look_vec.size(), 0);
s8 *look = look_table.data();
u8 *reach = reach_table.data();
void writeLookaround(const vector<LookEntry> &look_vec, s8 *&look, u8 *&reach) {
for (const auto &le : look_vec) {
*look = verify_s8(le.offset);
const CharReach &cr = le.reach;
@@ -2670,6 +2674,52 @@ void writeLookaroundTables(build_context &bc, RoseEngine &proto) {
++look;
reach += REACH_BITVECTOR_LEN;
}
}
static
void writeMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
s8 *&look, u8 *&reach) {
for (const auto &m : multi_look) {
u8 u = 0;
assert(m.size() == MAX_LOOKAROUND_PATHS);
for (size_t i = 0; i < m.size(); i++) {
if (m[i].reach.none()) {
u |= (u8)1U << i;
}
}
std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u);
for (size_t i = 0; i < m.size(); i++) {
const CharReach &cr = m[i].reach;
if (cr.none()) {
continue;
}
*look = m[i].offset;
for (size_t c = cr.find_first(); c != cr.npos;
c = cr.find_next(c)) {
reach[c] |= (u8)1U << i;
}
}
++look;
reach += MULTI_REACH_BITVECTOR_LEN;
}
}
static
void writeLookaroundTables(build_context &bc, RoseEngine &proto) {
vector<s8> look_table(bc.lookTableSize, 0);
vector<u8> reach_table(bc.reachTableSize, 0);
s8 *look = look_table.data();
u8 *reach = reach_table.data();
for (const auto &l : bc.lookaround) {
if (l.size() == 1) {
writeLookaround(l.front(), look, reach);
} else {
writeMultipathLookaround(l, look, reach);
}
}
proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table);
proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table);
@@ -2804,30 +2854,37 @@ bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) {
}
static
u32 addLookaround(build_context &bc, const vector<LookEntry> &look) {
void addLookaround(build_context &bc,
const vector<vector<LookEntry>> &look,
u32 &look_index, u32 &reach_index) {
// Check the cache.
auto it = bc.lookaround_cache.find(look);
if (it != bc.lookaround_cache.end()) {
DEBUG_PRINTF("reusing look at idx %zu\n", it->second);
return verify_u32(it->second);
look_index = verify_u32(it->second.first);
reach_index = verify_u32(it->second.second);
DEBUG_PRINTF("reusing look at idx %u\n", look_index);
DEBUG_PRINTF("reusing reach at idx %u\n", reach_index);
return;
}
// Linear scan for sequence.
auto seq_it = search(begin(bc.lookaround), end(bc.lookaround), begin(look),
end(look));
if (seq_it != end(bc.lookaround)) {
size_t idx = distance(begin(bc.lookaround), seq_it);
DEBUG_PRINTF("linear scan found look at idx %zu\n", idx);
bc.lookaround_cache.emplace(look, idx);
return verify_u32(idx);
size_t look_idx = bc.lookTableSize;
size_t reach_idx = bc.reachTableSize;
if (look.size() == 1) {
bc.lookTableSize += look.front().size();
bc.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN;
} else {
bc.lookTableSize += look.size();
bc.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN;
}
// New sequence.
size_t idx = bc.lookaround.size();
bc.lookaround_cache.emplace(look, idx);
insert(&bc.lookaround, bc.lookaround.end(), look);
DEBUG_PRINTF("adding look at idx %zu\n", idx);
return verify_u32(idx);
bc.lookaround_cache.emplace(look, make_pair(look_idx, reach_idx));
bc.lookaround.emplace_back(look);
DEBUG_PRINTF("adding look at idx %zu\n", look_idx);
DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx);
look_index = verify_u32(look_idx);
reach_index = verify_u32(reach_idx);
}
static
@@ -2977,7 +3034,7 @@ struct cmpNibble {
// Insert all pairs of bucket and offset into buckets.
static really_inline
void getAllBuckets(const vector<LookEntry> &look,
map<u32, vector<s8>, cmpNibble> &buckets, u32 &neg_mask) {
map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) {
s32 base_offset = verify_s32(look.front().offset);
for (const auto &entry : look) {
CharReach cr = entry.reach;
@@ -2985,7 +3042,7 @@ void getAllBuckets(const vector<LookEntry> &look,
if (cr.count() > 128 ) {
cr.flip();
} else {
neg_mask ^= 1 << (entry.offset - base_offset);
neg_mask ^= 1ULL << (entry.offset - base_offset);
}
map <u16, u16> lo2hi;
// We treat Ascii Table as a 16x16 grid.
@@ -3037,23 +3094,16 @@ void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) {
}
static
bool makeRoleShufti(const vector<LookEntry> &look,
RoseProgram &program) {
s32 base_offset = verify_s32(look.front().offset);
if (look.back().offset >= base_offset + 32) {
return false;
}
array<u8, 32> hi_mask, lo_mask;
hi_mask.fill(0);
lo_mask.fill(0);
array<u8, 32> bucket_select_hi, bucket_select_lo;
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
bucket_select_lo.fill(0);
u8 bit_index = 0; // number of buckets
bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask,
array<u8, 32> &lo_mask, u8 *bucket_select_hi,
u8 *bucket_select_lo, u64a &neg_mask,
u8 &bit_idx, size_t len) {
map<u32, u16> nib; // map every bucket to its bucket number.
map<u32, vector<s8>, cmpNibble> bucket2offsets;
u32 neg_mask = ~0u;
s32 base_offset = look.front().offset;
bit_idx = 0;
neg_mask = ~0ULL;
getAllBuckets(look, bucket2offsets, neg_mask);
@@ -3061,15 +3111,15 @@ bool makeRoleShufti(const vector<LookEntry> &look,
u32 hi_lo = it.first;
// New bucket.
if (!nib[hi_lo]) {
if (bit_index >= 16) {
if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) {
return false;
}
nib[hi_lo] = 1 << bit_index;
nib[hi_lo] = 1 << bit_idx;
nibUpdate(nib, hi_lo);
nibMaskUpdate(hi_mask, hi_lo >> 16, bit_index);
nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_index);
bit_index++;
nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx);
nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx);
bit_idx++;
}
DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]);
@@ -3082,6 +3132,113 @@ bool makeRoleShufti(const vector<LookEntry> &look,
bucket_select_lo[offset - base_offset] |= nib_lo;
}
}
return true;
}
static
unique_ptr<RoseInstruction>
makeCheckShufti16x8(u32 offset_range, u8 bucket_idx,
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
const array<u8, 32> &bucket_select_mask,
u32 neg_mask, s32 base_offset,
const RoseInstruction *end_inst) {
if (offset_range > 16 || bucket_idx > 8) {
return nullptr;
}
array<u8, 32> nib_mask;
array<u8, 16> bucket_select_mask_16;
copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16,
bucket_select_mask_16.begin());
return make_unique<RoseInstrCheckShufti16x8>
(nib_mask, bucket_select_mask_16,
neg_mask & 0xffff, base_offset, end_inst);
}
static
unique_ptr<RoseInstruction>
makeCheckShufti32x8(u32 offset_range, u8 bucket_idx,
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
const array<u8, 32> &bucket_select_mask,
u32 neg_mask, s32 base_offset,
const RoseInstruction *end_inst) {
if (offset_range > 32 || bucket_idx > 8) {
return nullptr;
}
array<u8, 16> hi_mask_16;
array<u8, 16> lo_mask_16;
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin());
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin());
return make_unique<RoseInstrCheckShufti32x8>
(hi_mask_16, lo_mask_16, bucket_select_mask,
neg_mask, base_offset, end_inst);
}
static
unique_ptr<RoseInstruction>
makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
const array<u8, 32> &bucket_select_mask_lo,
const array<u8, 32> &bucket_select_mask_hi,
u32 neg_mask, s32 base_offset,
const RoseInstruction *end_inst) {
if (offset_range > 16 || bucket_idx > 16) {
return nullptr;
}
array<u8, 32> bucket_select_mask_32;
copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16,
bucket_select_mask_32.begin());
copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16,
bucket_select_mask_32.begin() + 16);
return make_unique<RoseInstrCheckShufti16x16>
(hi_mask, lo_mask, bucket_select_mask_32,
neg_mask & 0xffff, base_offset, end_inst);
}
static
unique_ptr<RoseInstruction>
makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
const array<u8, 32> &bucket_select_mask_lo,
const array<u8, 32> &bucket_select_mask_hi,
u32 neg_mask, s32 base_offset,
const RoseInstruction *end_inst) {
if (offset_range > 32 || bucket_idx > 16) {
return nullptr;
}
return make_unique<RoseInstrCheckShufti32x16>
(hi_mask, lo_mask, bucket_select_mask_hi,
bucket_select_mask_lo, neg_mask, base_offset, end_inst);
}
static
bool makeRoleShufti(const vector<LookEntry> &look,
RoseProgram &program) {
s32 base_offset = verify_s32(look.front().offset);
if (look.back().offset >= base_offset + 32) {
return false;
}
u8 bucket_idx = 0; // number of buckets
u64a neg_mask_64;
array<u8, 32> hi_mask;
array<u8, 32> lo_mask;
array<u8, 32> bucket_select_hi;
array<u8, 32> bucket_select_lo;
hi_mask.fill(0);
lo_mask.fill(0);
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
bucket_select_lo.fill(0);
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) {
return false;
}
u32 neg_mask = (u32)neg_mask_64;
DEBUG_PRINTF("hi_mask %s\n",
convertMaskstoString(hi_mask.data(), 32).c_str());
@@ -3093,48 +3250,29 @@ bool makeRoleShufti(const vector<LookEntry> &look,
convertMaskstoString(bucket_select_lo.data(), 32).c_str());
const auto *end_inst = program.end_instruction();
if (bit_index < 8) {
if (look.back().offset < base_offset + 16) {
neg_mask &= 0xffff;
array<u8, 32> nib_mask;
array<u8, 16> bucket_select_mask_16;
copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16,
bucket_select_mask_16.begin());
auto ri = make_unique<RoseInstrCheckShufti16x8>
(nib_mask, bucket_select_mask_16,
neg_mask, base_offset, end_inst);
program.add_before_end(move(ri));
} else {
array<u8, 16> hi_mask_16;
array<u8, 16> lo_mask_16;
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin());
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin());
auto ri = make_unique<RoseInstrCheckShufti32x8>
(hi_mask_16, lo_mask_16, bucket_select_lo,
neg_mask, base_offset, end_inst);
program.add_before_end(move(ri));
}
} else {
if (look.back().offset < base_offset + 16) {
neg_mask &= 0xffff;
array<u8, 32> bucket_select_mask_32;
copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16,
bucket_select_mask_32.begin());
copy(bucket_select_hi.begin(), bucket_select_hi.begin() + 16,
bucket_select_mask_32.begin() + 16);
auto ri = make_unique<RoseInstrCheckShufti16x16>
(hi_mask, lo_mask, bucket_select_mask_32,
neg_mask, base_offset, end_inst);
program.add_before_end(move(ri));
} else {
auto ri = make_unique<RoseInstrCheckShufti32x16>
(hi_mask, lo_mask, bucket_select_hi, bucket_select_lo,
neg_mask, base_offset, end_inst);
program.add_before_end(move(ri));
}
s32 offset_range = look.back().offset - base_offset + 1;
auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask,
bucket_select_lo, neg_mask, base_offset,
end_inst);
if (!ri) {
ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask,
bucket_select_lo, neg_mask, base_offset,
end_inst);
}
if (!ri) {
ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
bucket_select_lo, bucket_select_hi,
neg_mask, base_offset, end_inst);
}
if (!ri) {
ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask,
bucket_select_lo, bucket_select_hi,
neg_mask, base_offset, end_inst);
}
assert(ri);
program.add_before_end(move(ri));
return true;
}
@@ -3153,9 +3291,13 @@ void makeLookaroundInstruction(build_context &bc, const vector<LookEntry> &look,
if (look.size() == 1) {
s8 offset = look.begin()->offset;
u32 look_idx = addLookaround(bc, look);
auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, look_idx,
program.end_instruction());
u32 look_idx, reach_idx;
vector<vector<LookEntry>> lookaround;
lookaround.emplace_back(look);
addLookaround(bc, lookaround, look_idx, reach_idx);
// We don't need look_idx here.
auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach_idx,
program.end_instruction());
program.add_before_end(move(ri));
return;
}
@@ -3172,10 +3314,242 @@ void makeLookaroundInstruction(build_context &bc, const vector<LookEntry> &look,
return;
}
u32 look_idx = addLookaround(bc, look);
u32 look_idx, reach_idx;
vector<vector<LookEntry>> lookaround;
lookaround.emplace_back(look);
addLookaround(bc, lookaround, look_idx, reach_idx);
u32 look_count = verify_u32(look.size());
auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, look_count,
auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, reach_idx,
look_count,
program.end_instruction());
program.add_before_end(move(ri));
}
#if defined(DEBUG) || defined(DUMP_SUPPORT)
static UNUSED
string dumpMultiLook(const vector<LookEntry> &looks) {
ostringstream oss;
for (auto it = looks.begin(); it != looks.end(); ++it) {
if (it != looks.begin()) {
oss << ", ";
}
oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}";
}
return oss.str();
}
#endif
static
bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
RoseProgram &program) {
if (multi_look.empty()) {
return false;
}
// find the base offset
assert(!multi_look[0].empty());
s32 base_offset = multi_look[0].front().offset;
s32 last_start = base_offset;
s32 end_offset = multi_look[0].back().offset;
size_t multi_len = 0;
for (const auto &look : multi_look) {
assert(look.size() > 0);
multi_len += look.size();
LIMIT_TO_AT_MOST(&base_offset, look.front().offset);
ENSURE_AT_LEAST(&last_start, look.front().offset);
ENSURE_AT_LEAST(&end_offset, look.back().offset);
}
assert(last_start < 0);
if (end_offset - base_offset >= MULTIPATH_MAX_LEN) {
return false;
}
if (multi_len <= 16) {
multi_len = 16;
} else if (multi_len <= 32) {
multi_len = 32;
} else if (multi_len <= 64) {
multi_len = 64;
} else {
DEBUG_PRINTF("too long for multi-path\n");
return false;
}
vector<LookEntry> linear_look;
array<u8, 64> data_select_mask;
data_select_mask.fill(0);
u64a hi_bits_mask = 0;
u64a lo_bits_mask = 0;
for (const auto &look : multi_look) {
assert(linear_look.size() < 64);
lo_bits_mask |= 1LLU << linear_look.size();
for (const auto &entry : look) {
assert(entry.offset - base_offset < MULTIPATH_MAX_LEN);
data_select_mask[linear_look.size()] =
verify_u8(entry.offset - base_offset);
linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach);
}
hi_bits_mask |= 1LLU << (linear_look.size() - 1);
}
u8 bit_index = 0; // number of buckets
u64a neg_mask;
array<u8, 32> hi_mask;
array<u8, 32> lo_mask;
array<u8, 64> bucket_select_hi;
array<u8, 64> bucket_select_lo;
hi_mask.fill(0);
lo_mask.fill(0);
bucket_select_hi.fill(0);
bucket_select_lo.fill(0);
if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(),
bucket_select_lo.data(), neg_mask, bit_index,
multi_len)) {
return false;
}
DEBUG_PRINTF("hi_mask %s\n",
convertMaskstoString(hi_mask.data(), 16).c_str());
DEBUG_PRINTF("lo_mask %s\n",
convertMaskstoString(lo_mask.data(), 16).c_str());
DEBUG_PRINTF("bucket_select_hi %s\n",
convertMaskstoString(bucket_select_hi.data(), 64).c_str());
DEBUG_PRINTF("bucket_select_lo %s\n",
convertMaskstoString(bucket_select_lo.data(), 64).c_str());
DEBUG_PRINTF("data_select_mask %s\n",
convertMaskstoString(data_select_mask.data(), 64).c_str());
DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask);
DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask);
DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
DEBUG_PRINTF("base_offset %d\n", base_offset);
DEBUG_PRINTF("last_start %d\n", last_start);
// Since we don't have 16x16 now, just call 32x16 instead.
if (bit_index > 8) {
assert(multi_len <= 32);
multi_len = 32;
}
const auto *end_inst = program.end_instruction();
assert(multi_len == 16 || multi_len == 32 || multi_len == 64);
if (multi_len == 16) {
neg_mask &= 0xffff;
assert(!(hi_bits_mask & ~0xffffULL));
assert(!(lo_bits_mask & ~0xffffULL));
assert(bit_index <=8);
array<u8, 32> nib_mask;
copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin());
copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16);
auto ri = make_unique<RoseInstrCheckMultipathShufti16x8>
(nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask,
lo_bits_mask, neg_mask, base_offset, last_start, end_inst);
program.add_before_end(move(ri));
} else if (multi_len == 32) {
neg_mask &= 0xffffffff;
assert(!(hi_bits_mask & ~0xffffffffULL));
assert(!(lo_bits_mask & ~0xffffffffULL));
if (bit_index <= 8) {
auto ri = make_unique<RoseInstrCheckMultipathShufti32x8>
(hi_mask, lo_mask, bucket_select_lo, data_select_mask,
hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
last_start, end_inst);
program.add_before_end(move(ri));
} else {
auto ri = make_unique<RoseInstrCheckMultipathShufti32x16>
(hi_mask, lo_mask, bucket_select_hi, bucket_select_lo,
data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask,
base_offset, last_start, end_inst);
program.add_before_end(move(ri));
}
} else {
auto ri = make_unique<RoseInstrCheckMultipathShufti64>
(hi_mask, lo_mask, bucket_select_lo, data_select_mask,
hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
last_start, end_inst);
program.add_before_end(move(ri));
}
return true;
}
static
void makeRoleMultipathLookaround(build_context &bc,
const vector<vector<LookEntry>> &multi_look,
RoseProgram &program) {
assert(!multi_look.empty());
assert(multi_look.size() <= MAX_LOOKAROUND_PATHS);
vector<vector<LookEntry>> ordered_look;
set<s32> look_offset;
assert(!multi_look[0].empty());
s32 last_start = multi_look[0][0].offset;
// build offset table.
for (const auto &look : multi_look) {
assert(look.size() > 0);
last_start = max(last_start, (s32)look.begin()->offset);
for (const auto &t : look) {
look_offset.insert(t.offset);
}
}
array<u8, MULTIPATH_MAX_LEN> start_mask;
if (multi_look.size() < MAX_LOOKAROUND_PATHS) {
start_mask.fill((1 << multi_look.size()) - 1);
} else {
start_mask.fill(0xff);
}
u32 path_idx = 0;
for (const auto &look : multi_look) {
for (const auto &t : look) {
assert(t.offset >= (int)*look_offset.begin());
size_t update_offset = t.offset - *look_offset.begin() + 1;
if (update_offset < start_mask.size()) {
start_mask[update_offset] &= ~(1 << path_idx);
}
}
path_idx++;
}
for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) {
start_mask[i] &= start_mask[i - 1];
DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]);
}
assert(look_offset.size() <= MULTIPATH_MAX_LEN);
assert(last_start < 0);
for (const auto &offset : look_offset) {
vector<LookEntry> multi_entry;
multi_entry.resize(MAX_LOOKAROUND_PATHS);
for (size_t i = 0; i < multi_look.size(); i++) {
for (const auto &t : multi_look[i]) {
if (t.offset == offset) {
multi_entry[i] = t;
}
}
}
ordered_look.emplace_back(multi_entry);
}
u32 look_idx, reach_idx;
addLookaround(bc, ordered_look, look_idx, reach_idx);
u32 look_count = verify_u32(ordered_look.size());
auto ri = make_unique<RoseInstrMultipathLookaround>(look_idx, reach_idx,
look_count, last_start,
start_mask,
program.end_instruction());
program.add_before_end(move(ri));
}
@@ -3187,25 +3561,34 @@ void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc,
return;
}
vector<LookEntry> look;
vector<vector<LookEntry>> looks;
// Lookaround from leftfix (mandatory).
if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) {
DEBUG_PRINTF("using leftfix lookaround\n");
look = bc.leftfix_info.at(v).lookaround;
looks = bc.leftfix_info.at(v).lookaround;
}
// We may be able to find more lookaround info (advisory) and merge it
// in.
vector<LookEntry> look_more;
findLookaroundMasks(build, v, look_more);
mergeLookaround(look, look_more);
if (look.empty()) {
if (looks.size() <= 1) {
vector<LookEntry> look;
vector<LookEntry> look_more;
if (!looks.empty()) {
look = move(looks.front());
}
findLookaroundMasks(build, v, look_more);
mergeLookaround(look, look_more);
if (!look.empty()) {
makeLookaroundInstruction(bc, look, program);
}
return;
}
makeLookaroundInstruction(bc, look, program);
if (!makeRoleMultipathShufti(looks, program)) {
assert(looks.size() <= 8);
makeRoleMultipathLookaround(bc, looks, program);
}
}
static