refactoring of double byte offset accel to use paths and add to mcclellan

This commit is contained in:
Alex Coyte
2016-03-10 09:58:28 +11:00
committed by Matthew Barr
parent 6898dc9864
commit 89d7728f77
8 changed files with 273 additions and 263 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -94,7 +94,6 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
}
static
bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
// test for vector containing <A,Z> <A,z> <a,Z> <a,z>
if (stop.size() != 4) {

View File

@@ -56,6 +56,8 @@ struct MultibyteAccelInfo {
multiaccel_type type = MAT_NONE;
};
bool isCaselessDouble(const flat_set<std::pair<u8, u8>> &stop);
struct AccelInfo {
AccelInfo() : single_offset(0U), double_offset(0U),
single_stops(CharReach::dot()),

View File

@@ -566,12 +566,29 @@ bool containsBadSubset(const limex_accel_info &accel,
}
static
void doAccelCommon(NGHolder &g,
ue2::unordered_map<NFAVertex, AccelScheme> &accel_map,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
const u32 num_states, limex_accel_info *accel,
const CompileContext &cc) {
bool is_too_wide(const AccelScheme &as) {
return as.cr.count() > MAX_MERGED_ACCEL_STOPS;
}
static
void fillAccelInfo(build_info &bi) {
if (!bi.do_accel) {
return;
}
NGHolder &g = bi.h;
limex_accel_info &accel = bi.accel;
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
const CompileContext &cc = bi.cc;
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
const u32 num_states = bi.num_states;
nfaFindAccelSchemes(g, br_cyclic, &accel_map);
filterAccelStates(g, bi.tops, &accel_map);
assert(accel_map.size() <= NFA_MAX_ACCEL_STATES);
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
vector<NFAVertex> astates;
@@ -602,7 +619,7 @@ void doAccelCommon(NGHolder &g,
}
}
if (containsBadSubset(*accel, state_set, effective_sds)) {
if (containsBadSubset(accel, state_set, effective_sds)) {
DEBUG_PRINTF("accel %u has bad subset\n", i);
continue; /* if a subset failed to build we would too */
}
@@ -610,19 +627,20 @@ void doAccelCommon(NGHolder &g,
const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy);
AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic,
allow_wide);
if (as.cr.count() > MAX_MERGED_ACCEL_STOPS) {
allow_wide, true);
if (is_too_wide(as)) {
DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i,
as.cr.count(), MAX_MERGED_ACCEL_STOPS);
continue;
}
DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset);
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
as.double_offset);
// try multibyte acceleration first
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
precalcAccel &pa = accel->precalc[state_set];
precalcAccel &pa = accel.precalc[state_set];
useful |= state_set;
// if we successfully built a multibyte accel scheme, use that
@@ -635,17 +653,11 @@ void doAccelCommon(NGHolder &g,
pa.single_offset = as.offset;
pa.single_cr = as.cr;
if (states.size() == 1) {
DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front());
if (pa.single_cr.count() > b.stop1.count()) {
/* insert this information into the precalc accel info as it is
* better than the single scheme */
pa.double_offset = b.offset;
pa.double_lits = b.stop2;
pa.double_cr = b.stop1;
}
}
if (as.double_byte.size() != 0) {
pa.double_offset = as.double_offset;
pa.double_lits = as.double_byte;
pa.double_cr = as.double_cr;
};
}
for (const auto &m : accel_map) {
@@ -663,33 +675,21 @@ void doAccelCommon(NGHolder &g,
state_set.set(state_id);
bool is_multi = false;
auto p_it = accel->precalc.find(state_set);
if (p_it != accel->precalc.end()) {
auto p_it = accel.precalc.find(state_set);
if (p_it != accel.precalc.end()) {
const precalcAccel &pa = p_it->second;
offset = max(pa.double_offset, pa.single_offset);
is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
assert(offset <= MAX_ACCEL_DEPTH);
}
accel->accelerable.insert(v);
if (!is_multi)
findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]);
accel.accelerable.insert(v);
if (!is_multi) {
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
}
}
}
static
void fillAccelInfo(build_info &bi) {
if (!bi.do_accel) {
return;
}
nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map);
filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map);
assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES);
doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic,
bi.num_states, &bi.accel, bi.cc);
}
/** The AccelAux structure has large alignment specified, and this makes some
* compilers do odd things unless we specify a custom allocator. */
typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)> >

View File

@@ -201,7 +201,8 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
void *accel_out) {
AccelAux *accel = (AccelAux *)accel_out;
DEBUG_PRINTF("accelerations scheme has offset %u\n", info.offset);
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
info.outs2_offset);
accel->generic.offset = verify_u8(info.offset);
if (double_byte_ok(info) && info.outs2_single.none()
@@ -209,6 +210,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
accel->accel_type = ACCEL_DVERM;
accel->dverm.c1 = info.outs2.begin()->first;
accel->dverm.c2 = info.outs2.begin()->second;
accel->dverm.offset = verify_u8(info.outs2_offset);
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
return;
}
@@ -233,6 +235,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
accel->accel_type = ACCEL_DVERM_NOCASE;
accel->dverm.c1 = firstC;
accel->dverm.c2 = secondC;
accel->dverm.offset = verify_u8(info.outs2_offset);
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
return;
}
@@ -240,6 +243,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
if (double_byte_ok(info)) {
accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.outs2_offset);
shuftiBuildDoubleMasks(info.outs2_single, info.outs2,
&accel->dshufti.lo1,
&accel->dshufti.hi1,

View File

@@ -60,6 +60,7 @@ struct escape_info {
flat_set<std::pair<u8, u8>> outs2;
bool outs2_broken = false;
u32 offset = 0;
u32 outs2_offset = 0;
};
class dfa_build_strat {

View File

@@ -183,11 +183,18 @@ escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
DEBUG_PRINTF("looking for accel for %hu\n", base);
vector<vector<CharReach> > paths = generate_paths(rdfa, base,
max_allowed_accel_offset + 1);
AccelScheme as = findBestAccelScheme(paths, CharReach());
AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
escape_info rv;
rv.outs2_broken = true;
rv.offset = as.offset;
rv.outs = as.cr;
if (!as.double_byte.empty()) {
rv.outs2_single = as.double_cr;
rv.outs2 = as.double_byte;
rv.outs2_offset = as.double_offset;
rv.outs2_broken = false;
} else {
rv.outs2_broken = true;
}
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
return rv;
}
@@ -308,10 +315,15 @@ map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
/* provide accleration states to states in the region of sds */
if (contains(rv, sds_proxy)) {
auto sds_region = find_region(rdfa, sds_proxy, rv[sds_proxy]);
escape_info sds_ei = rv[sds_proxy];
sds_ei.outs2_broken = true; /* region based on single byte scheme
* may differ from double byte */
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
sds_ei.outs.count());
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
for (auto s : sds_region) {
if (!contains(rv, s) || better(rv[sds_proxy], rv[s])) {
rv[s] = rv[sds_proxy];
if (!contains(rv, s) || better(sds_ei, rv[s])) {
rv[s] = sds_ei;
}
}
}
@@ -395,7 +407,7 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa,
max_allowed_accel_offset);
DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(),
rv.outs.count());
if (offset.outs.count() < rv.outs.count()) {
if (double_byte_ok(offset) || offset.outs.count() < rv.outs.count()) {
DEBUG_PRINTF("using offset accel\n");
rv = offset;
}