mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 19:47:43 +03:00
refactoring of double byte offset accel to use paths and add to mcclellan
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -94,7 +94,6 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
|
||||
}
|
||||
|
||||
static
|
||||
bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
|
||||
// test for vector containing <A,Z> <A,z> <a,Z> <a,z>
|
||||
if (stop.size() != 4) {
|
||||
|
@@ -56,6 +56,8 @@ struct MultibyteAccelInfo {
|
||||
multiaccel_type type = MAT_NONE;
|
||||
};
|
||||
|
||||
bool isCaselessDouble(const flat_set<std::pair<u8, u8>> &stop);
|
||||
|
||||
struct AccelInfo {
|
||||
AccelInfo() : single_offset(0U), double_offset(0U),
|
||||
single_stops(CharReach::dot()),
|
||||
|
@@ -566,12 +566,29 @@ bool containsBadSubset(const limex_accel_info &accel,
|
||||
}
|
||||
|
||||
static
|
||||
void doAccelCommon(NGHolder &g,
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> &accel_map,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
const u32 num_states, limex_accel_info *accel,
|
||||
const CompileContext &cc) {
|
||||
bool is_too_wide(const AccelScheme &as) {
|
||||
return as.cr.count() > MAX_MERGED_ACCEL_STOPS;
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelInfo(build_info &bi) {
|
||||
if (!bi.do_accel) {
|
||||
return;
|
||||
}
|
||||
|
||||
NGHolder &g = bi.h;
|
||||
limex_accel_info &accel = bi.accel;
|
||||
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
|
||||
const CompileContext &cc = bi.cc;
|
||||
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
|
||||
const u32 num_states = bi.num_states;
|
||||
|
||||
nfaFindAccelSchemes(g, br_cyclic, &accel_map);
|
||||
filterAccelStates(g, bi.tops, &accel_map);
|
||||
|
||||
assert(accel_map.size() <= NFA_MAX_ACCEL_STATES);
|
||||
|
||||
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
|
||||
|
||||
vector<NFAVertex> astates;
|
||||
@@ -602,7 +619,7 @@ void doAccelCommon(NGHolder &g,
|
||||
}
|
||||
}
|
||||
|
||||
if (containsBadSubset(*accel, state_set, effective_sds)) {
|
||||
if (containsBadSubset(accel, state_set, effective_sds)) {
|
||||
DEBUG_PRINTF("accel %u has bad subset\n", i);
|
||||
continue; /* if a subset failed to build we would too */
|
||||
}
|
||||
@@ -610,19 +627,20 @@ void doAccelCommon(NGHolder &g,
|
||||
const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy);
|
||||
|
||||
AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic,
|
||||
allow_wide);
|
||||
if (as.cr.count() > MAX_MERGED_ACCEL_STOPS) {
|
||||
allow_wide, true);
|
||||
if (is_too_wide(as)) {
|
||||
DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i,
|
||||
as.cr.count(), MAX_MERGED_ACCEL_STOPS);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset);
|
||||
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
|
||||
as.double_offset);
|
||||
|
||||
// try multibyte acceleration first
|
||||
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
|
||||
|
||||
precalcAccel &pa = accel->precalc[state_set];
|
||||
precalcAccel &pa = accel.precalc[state_set];
|
||||
useful |= state_set;
|
||||
|
||||
// if we successfully built a multibyte accel scheme, use that
|
||||
@@ -635,17 +653,11 @@ void doAccelCommon(NGHolder &g,
|
||||
|
||||
pa.single_offset = as.offset;
|
||||
pa.single_cr = as.cr;
|
||||
|
||||
if (states.size() == 1) {
|
||||
DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front());
|
||||
if (pa.single_cr.count() > b.stop1.count()) {
|
||||
/* insert this information into the precalc accel info as it is
|
||||
* better than the single scheme */
|
||||
pa.double_offset = b.offset;
|
||||
pa.double_lits = b.stop2;
|
||||
pa.double_cr = b.stop1;
|
||||
}
|
||||
}
|
||||
if (as.double_byte.size() != 0) {
|
||||
pa.double_offset = as.double_offset;
|
||||
pa.double_lits = as.double_byte;
|
||||
pa.double_cr = as.double_cr;
|
||||
};
|
||||
}
|
||||
|
||||
for (const auto &m : accel_map) {
|
||||
@@ -663,33 +675,21 @@ void doAccelCommon(NGHolder &g,
|
||||
state_set.set(state_id);
|
||||
|
||||
bool is_multi = false;
|
||||
auto p_it = accel->precalc.find(state_set);
|
||||
if (p_it != accel->precalc.end()) {
|
||||
auto p_it = accel.precalc.find(state_set);
|
||||
if (p_it != accel.precalc.end()) {
|
||||
const precalcAccel &pa = p_it->second;
|
||||
offset = max(pa.double_offset, pa.single_offset);
|
||||
is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
|
||||
assert(offset <= MAX_ACCEL_DEPTH);
|
||||
}
|
||||
|
||||
accel->accelerable.insert(v);
|
||||
if (!is_multi)
|
||||
findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]);
|
||||
accel.accelerable.insert(v);
|
||||
if (!is_multi) {
|
||||
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelInfo(build_info &bi) {
|
||||
if (!bi.do_accel) {
|
||||
return;
|
||||
}
|
||||
|
||||
nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map);
|
||||
filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map);
|
||||
assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES);
|
||||
doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic,
|
||||
bi.num_states, &bi.accel, bi.cc);
|
||||
}
|
||||
|
||||
/** The AccelAux structure has large alignment specified, and this makes some
|
||||
* compilers do odd things unless we specify a custom allocator. */
|
||||
typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)> >
|
||||
|
@@ -201,7 +201,8 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
void *accel_out) {
|
||||
AccelAux *accel = (AccelAux *)accel_out;
|
||||
|
||||
DEBUG_PRINTF("accelerations scheme has offset %u\n", info.offset);
|
||||
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
||||
info.outs2_offset);
|
||||
accel->generic.offset = verify_u8(info.offset);
|
||||
|
||||
if (double_byte_ok(info) && info.outs2_single.none()
|
||||
@@ -209,6 +210,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
accel->accel_type = ACCEL_DVERM;
|
||||
accel->dverm.c1 = info.outs2.begin()->first;
|
||||
accel->dverm.c2 = info.outs2.begin()->second;
|
||||
accel->dverm.offset = verify_u8(info.outs2_offset);
|
||||
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
@@ -233,6 +235,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
accel->accel_type = ACCEL_DVERM_NOCASE;
|
||||
accel->dverm.c1 = firstC;
|
||||
accel->dverm.c2 = secondC;
|
||||
accel->dverm.offset = verify_u8(info.outs2_offset);
|
||||
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
@@ -240,6 +243,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
|
||||
if (double_byte_ok(info)) {
|
||||
accel->accel_type = ACCEL_DSHUFTI;
|
||||
accel->dshufti.offset = verify_u8(info.outs2_offset);
|
||||
shuftiBuildDoubleMasks(info.outs2_single, info.outs2,
|
||||
&accel->dshufti.lo1,
|
||||
&accel->dshufti.hi1,
|
||||
|
@@ -60,6 +60,7 @@ struct escape_info {
|
||||
flat_set<std::pair<u8, u8>> outs2;
|
||||
bool outs2_broken = false;
|
||||
u32 offset = 0;
|
||||
u32 outs2_offset = 0;
|
||||
};
|
||||
|
||||
class dfa_build_strat {
|
||||
|
@@ -183,11 +183,18 @@ escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
||||
DEBUG_PRINTF("looking for accel for %hu\n", base);
|
||||
vector<vector<CharReach> > paths = generate_paths(rdfa, base,
|
||||
max_allowed_accel_offset + 1);
|
||||
AccelScheme as = findBestAccelScheme(paths, CharReach());
|
||||
AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
|
||||
escape_info rv;
|
||||
rv.outs2_broken = true;
|
||||
rv.offset = as.offset;
|
||||
rv.outs = as.cr;
|
||||
if (!as.double_byte.empty()) {
|
||||
rv.outs2_single = as.double_cr;
|
||||
rv.outs2 = as.double_byte;
|
||||
rv.outs2_offset = as.double_offset;
|
||||
rv.outs2_broken = false;
|
||||
} else {
|
||||
rv.outs2_broken = true;
|
||||
}
|
||||
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
|
||||
return rv;
|
||||
}
|
||||
@@ -308,10 +315,15 @@ map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
|
||||
|
||||
/* provide accleration states to states in the region of sds */
|
||||
if (contains(rv, sds_proxy)) {
|
||||
auto sds_region = find_region(rdfa, sds_proxy, rv[sds_proxy]);
|
||||
escape_info sds_ei = rv[sds_proxy];
|
||||
sds_ei.outs2_broken = true; /* region based on single byte scheme
|
||||
* may differ from double byte */
|
||||
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
|
||||
sds_ei.outs.count());
|
||||
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
|
||||
for (auto s : sds_region) {
|
||||
if (!contains(rv, s) || better(rv[sds_proxy], rv[s])) {
|
||||
rv[s] = rv[sds_proxy];
|
||||
if (!contains(rv, s) || better(sds_ei, rv[s])) {
|
||||
rv[s] = sds_ei;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -395,7 +407,7 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa,
|
||||
max_allowed_accel_offset);
|
||||
DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(),
|
||||
rv.outs.count());
|
||||
if (offset.outs.count() < rv.outs.count()) {
|
||||
if (double_byte_ok(offset) || offset.outs.count() < rv.outs.count()) {
|
||||
DEBUG_PRINTF("using offset accel\n");
|
||||
rv = offset;
|
||||
}
|
||||
|
Reference in New Issue
Block a user