unify some accel code/structures between limex and mcclellan

This commit is contained in:
Alex Coyte 2016-04-06 16:43:32 +10:00 committed by Matthew Barr
parent 850636dbd6
commit ff721ed8e4
9 changed files with 156 additions and 327 deletions

View File

@ -827,6 +827,7 @@ SET (hs_SRCS
src/rose/rose_in_graph.h src/rose/rose_in_graph.h
src/rose/rose_in_util.cpp src/rose/rose_in_util.cpp
src/rose/rose_in_util.h src/rose/rose_in_util.h
src/util/accel_scheme.h
src/util/alloc.cpp src/util/alloc.cpp
src/util/alloc.h src/util/alloc.h
src/util/bitfield.h src/util/bitfield.h

View File

@ -85,9 +85,9 @@ public:
vector<u32> &reports_eod /* out */, vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */, u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const override; ReportID *arbReport /* out */) const override;
escape_info find_escape_strings(dstate_id_t this_idx) const override; AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
size_t accelSize(void) const override { return sizeof(gough_accel); } size_t accelSize(void) const override { return sizeof(gough_accel); }
void buildAccel(dstate_id_t this_idx, const escape_info &info, void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) override; void *accel_out) override;
u32 max_allowed_offset_accel() const override { return 0; } u32 max_allowed_offset_accel() const override { return 0; }
@ -1146,31 +1146,32 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
return gough_dfa; return gough_dfa;
} }
escape_info gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const {
escape_info rv; AccelScheme rv;
if (!contains(accel_gough_info, this_idx)) { if (!contains(accel_gough_info, this_idx)) {
rv.outs = CharReach::dot(); rv.cr = CharReach::dot();
rv.outs2_broken = true; rv.double_byte.clear();
return rv; return rv;
} }
rv = mcclellan_build_strat::find_escape_strings(this_idx); rv = mcclellan_build_strat::find_escape_strings(this_idx);
assert(!rv.offset); /* should have been limited by strat */ assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */
if (rv.offset) { if (rv.offset) {
rv.outs = CharReach::dot(); rv.cr = CharReach::dot();
rv.outs2_broken = true; rv.double_byte.clear();
return rv; return rv;
} }
if (!accel_gough_info.at(this_idx).two_byte) { if (rv.double_offset
rv.outs2_broken = true; || !accel_gough_info.at(this_idx).two_byte) {
rv.double_byte.clear();
} }
return rv; return rv;
} }
void gough_build_strat::buildAccel(dstate_id_t this_idx, const escape_info &info, void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) { void *accel_out) {
assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
gough_accel *accel = (gough_accel *)accel_out; gough_accel *accel = (gough_accel *)accel_out;

View File

@ -49,209 +49,6 @@
#include "util/simd_utils_ssse3.h" #include "util/simd_utils_ssse3.h"
#include "util/shuffle_ssse3.h" #include "util/shuffle_ssse3.h"
static
const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) {
assert(ISALIGNED(aux)); // must be SIMD aligned for shufti
assert(end > ptr);
assert(end - ptr >= 16); // must be at least 16 bytes to scan
const u8 *start = ptr;
u8 offset;
switch (aux->accel_type) {
case ACCEL_VERM:
DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
offset = aux->verm.offset;
ptr = vermicelliExec(aux->verm.c, 0, ptr, end);
break;
case ACCEL_VERM_NOCASE:
DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
offset = aux->verm.offset;
ptr = vermicelliExec(aux->verm.c, 1, ptr, end);
break;
case ACCEL_DVERM:
DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
offset = aux->dverm.offset;
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
break;
case ACCEL_DVERM_NOCASE:
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
offset = aux->dverm.offset;
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2,
1, ptr, end);
break;
case ACCEL_DVERM_MASKED:
DEBUG_PRINTF("double vermicelli masked for "
"0x%02hhx%02hhx/0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2,
aux->dverm.m1, aux->dverm.m2);
offset = aux->dverm.offset;
ptr = vermicelliDoubleMaskedExec(aux->dverm.c1, aux->dverm.c2,
aux->dverm.m1, aux->dverm.m2, ptr, end);
break;
case ACCEL_MLVERM:
DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = long_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MLVERM_NOCASE:
DEBUG_PRINTF("long vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = long_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MLGVERM:
DEBUG_PRINTF("long grab vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = longgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MLGVERM_NOCASE:
DEBUG_PRINTF("long grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = longgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MSVERM:
DEBUG_PRINTF("shift vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shift_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MSVERM_NOCASE:
DEBUG_PRINTF("shift vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shift_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MSGVERM:
DEBUG_PRINTF("shift grab vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MSGVERM_NOCASE:
DEBUG_PRINTF("shift grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MDSVERM:
DEBUG_PRINTF("double shift vermicelli for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mdverm.offset;
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSVERM_NOCASE:
DEBUG_PRINTF("double shift vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSGVERM:
DEBUG_PRINTF("double shift grab vermicelli for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSGVERM_NOCASE:
DEBUG_PRINTF("double shift grab vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n");
offset = aux->shufti.offset;
ptr = shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
break;
case ACCEL_DSHUFTI:
DEBUG_PRINTF("double shufti\n");
offset = aux->dshufti.offset;
ptr = shuftiDoubleExec(aux->dshufti.lo1, aux->dshufti.hi1,
aux->dshufti.lo2, aux->dshufti.hi2, ptr, end);
break;
case ACCEL_MLSHUFTI:
offset = aux->mshufti.offset;
ptr = long_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MLGSHUFTI:
offset = aux->mshufti.offset;
ptr = longgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MSSHUFTI:
offset = aux->mshufti.offset;
ptr = shift_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MSGSHUFTI:
offset = aux->mshufti.offset;
ptr = shiftgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MDSSHUFTI:
offset = aux->mdshufti.offset;
ptr = doubleshift_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end,
aux->mdshufti.len1, aux->mdshufti.len2);
break;
case ACCEL_MDSGSHUFTI:
offset = aux->mdshufti.offset;
ptr = doubleshiftgrab_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end,
aux->mdshufti.len1, aux->mdshufti.len2);
break;
case ACCEL_TRUFFLE:
DEBUG_PRINTF("truffle shuffle\n");
offset = aux->truffle.offset;
ptr = truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
break;
case ACCEL_MLTRUFFLE:
DEBUG_PRINTF("long match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = long_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MLGTRUFFLE:
DEBUG_PRINTF("long grab match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = longgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MSTRUFFLE:
DEBUG_PRINTF("shift match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = shift_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MSGTRUFFLE:
DEBUG_PRINTF("shift grab match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = shiftgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MDSTRUFFLE:
DEBUG_PRINTF("double shift match truffle shuffle\n");
offset = aux->mdtruffle.offset;
ptr = doubleshift_truffleExec(aux->mdtruffle.mask1,
aux->mdtruffle.mask2, ptr, end,
aux->mdtruffle.len1,
aux->mdtruffle.len2);
break;
case ACCEL_MDSGTRUFFLE:
DEBUG_PRINTF("double shift grab match truffle shuffle\n");
offset = aux->mdtruffle.offset;
ptr = doubleshiftgrab_truffleExec(aux->mdtruffle.mask1,
aux->mdtruffle.mask2, ptr, end,
aux->mdtruffle.len1,
aux->mdtruffle.len2);
break;
case ACCEL_RED_TAPE:
ptr = end; /* there is no escape */
offset = aux->generic.offset;
break;
default:
/* no acceleration, fall through and return current ptr */
offset = 0;
break;
}
if (offset) {
ptr -= offset;
if (ptr < start) {
return start;
}
}
return ptr;
}
static really_inline static really_inline
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
const u8 *input, u32 idx, size_t i, size_t end) { const u8 *input, u32 idx, size_t i, size_t end) {
@ -272,7 +69,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
} }
aux = aux + aux_idx; aux = aux + aux_idx;
const u8 *ptr = accelScan(aux, &input[i], &input[end]); const u8 *ptr = run_accel(aux, &input[i], &input[end]);
assert(ptr >= &input[i]); assert(ptr >= &input[i]);
size_t j = (size_t)(ptr - input); size_t j = (size_t)(ptr - input);
DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i)); DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i));

View File

@ -128,10 +128,10 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
} }
static static
bool double_byte_ok(const escape_info &info) { bool double_byte_ok(const AccelScheme &info) {
return !info.outs2_broken return !info.double_byte.empty()
&& info.outs2_single.count() < info.outs2.size() && info.double_cr.count() < info.double_byte.size()
&& info.outs2_single.count() <= 2 && !info.outs2.empty(); && info.double_cr.count() <= 2 && !info.double_byte.empty();
} }
static static
@ -189,7 +189,7 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const {
return ACCEL_DFA_MAX_OFFSET_DEPTH; return ACCEL_DFA_MAX_OFFSET_DEPTH;
} }
escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx)
const { const {
return find_mcclellan_escape_info(rdfa, this_idx, return find_mcclellan_escape_info(rdfa, this_idx,
max_allowed_offset_accel()); max_allowed_offset_accel());
@ -197,33 +197,33 @@ escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx)
/** builds acceleration schemes for states */ /** builds acceleration schemes for states */
void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
const escape_info &info, const AccelScheme &info,
void *accel_out) { void *accel_out) {
AccelAux *accel = (AccelAux *)accel_out; AccelAux *accel = (AccelAux *)accel_out;
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
info.outs2_offset); info.double_offset);
accel->generic.offset = verify_u8(info.offset); accel->generic.offset = verify_u8(info.offset);
if (double_byte_ok(info) && info.outs2_single.none() if (double_byte_ok(info) && info.double_cr.none()
&& info.outs2.size() == 1) { && info.double_byte.size() == 1) {
accel->accel_type = ACCEL_DVERM; accel->accel_type = ACCEL_DVERM;
accel->dverm.c1 = info.outs2.begin()->first; accel->dverm.c1 = info.double_byte.begin()->first;
accel->dverm.c2 = info.outs2.begin()->second; accel->dverm.c2 = info.double_byte.begin()->second;
accel->dverm.offset = verify_u8(info.outs2_offset); accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
return; return;
} }
if (double_byte_ok(info) && info.outs2_single.none() if (double_byte_ok(info) && info.double_cr.none()
&& (info.outs2.size() == 2 || info.outs2.size() == 4)) { && (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
bool ok = true; bool ok = true;
assert(!info.outs2.empty()); assert(!info.double_byte.empty());
u8 firstC = info.outs2.begin()->first & CASE_CLEAR; u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
u8 secondC = info.outs2.begin()->second & CASE_CLEAR; u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
for (const pair<u8, u8> &p : info.outs2) { for (const pair<u8, u8> &p : info.double_byte) {
if ((p.first & CASE_CLEAR) != firstC if ((p.first & CASE_CLEAR) != firstC
|| (p.second & CASE_CLEAR) != secondC) { || (p.second & CASE_CLEAR) != secondC) {
ok = false; ok = false;
@ -235,18 +235,18 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
accel->accel_type = ACCEL_DVERM_NOCASE; accel->accel_type = ACCEL_DVERM_NOCASE;
accel->dverm.c1 = firstC; accel->dverm.c1 = firstC;
accel->dverm.c2 = secondC; accel->dverm.c2 = secondC;
accel->dverm.offset = verify_u8(info.outs2_offset); accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
return; return;
} }
u8 m1; u8 m1;
u8 m2; u8 m2;
if (buildDvermMask(info.outs2, &m1, &m2)) { if (buildDvermMask(info.double_byte, &m1, &m2)) {
accel->accel_type = ACCEL_DVERM_MASKED; accel->accel_type = ACCEL_DVERM_MASKED;
accel->dverm.offset = verify_u8(info.outs2_offset); accel->dverm.offset = verify_u8(info.double_offset);
accel->dverm.c1 = info.outs2.begin()->first & m1; accel->dverm.c1 = info.double_byte.begin()->first & m1;
accel->dverm.c2 = info.outs2.begin()->second & m2; accel->dverm.c2 = info.double_byte.begin()->second & m2;
accel->dverm.m1 = m1; accel->dverm.m1 = m1;
accel->dverm.m2 = m2; accel->dverm.m2 = m2;
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
@ -256,52 +256,52 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
} }
if (double_byte_ok(info) if (double_byte_ok(info)
&& shuftiBuildDoubleMasks(info.outs2_single, info.outs2, && shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
&accel->dshufti.lo1, &accel->dshufti.hi1, &accel->dshufti.lo1, &accel->dshufti.hi1,
&accel->dshufti.lo2, &accel->dshufti.hi2)) { &accel->dshufti.lo2, &accel->dshufti.hi2)) {
accel->accel_type = ACCEL_DSHUFTI; accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.outs2_offset); accel->dshufti.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double shufti\n", this_idx); DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
return; return;
} }
if (info.outs.none()) { if (info.cr.none()) {
accel->accel_type = ACCEL_RED_TAPE; accel->accel_type = ACCEL_RED_TAPE;
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
" from which there is no escape\n", this_idx); " from which there is no escape\n", this_idx);
return; return;
} }
if (info.outs.count() == 1) { if (info.cr.count() == 1) {
accel->accel_type = ACCEL_VERM; accel->accel_type = ACCEL_VERM;
accel->verm.c = info.outs.find_first(); accel->verm.c = info.cr.find_first();
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
return; return;
} }
if (info.outs.count() == 2 && info.outs.isCaselessChar()) { if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
accel->accel_type = ACCEL_VERM_NOCASE; accel->accel_type = ACCEL_VERM_NOCASE;
accel->verm.c = info.outs.find_first() & CASE_CLEAR; accel->verm.c = info.cr.find_first() & CASE_CLEAR;
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
return; return;
} }
if (info.outs.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) {
accel->accel_type = ACCEL_NONE; accel->accel_type = ACCEL_NONE;
DEBUG_PRINTF("state %hu is too broad\n", this_idx); DEBUG_PRINTF("state %hu is too broad\n", this_idx);
return; return;
} }
accel->accel_type = ACCEL_SHUFTI; accel->accel_type = ACCEL_SHUFTI;
if (-1 != shuftiBuildMasks(info.outs, &accel->shufti.lo, if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo,
&accel->shufti.hi)) { &accel->shufti.hi)) {
DEBUG_PRINTF("state %hu is shufti\n", this_idx); DEBUG_PRINTF("state %hu is shufti\n", this_idx);
return; return;
} }
assert(!info.outs.none()); assert(!info.cr.none());
accel->accel_type = ACCEL_TRUFFLE; accel->accel_type = ACCEL_TRUFFLE;
truffleBuildMasks(info.outs, &accel->truffle.mask1, &accel->truffle.mask2); truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
DEBUG_PRINTF("state %hu is truffle\n", this_idx); DEBUG_PRINTF("state %hu is truffle\n", this_idx);
} }
@ -486,7 +486,7 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
} }
static static
void fillAccelOut(const map<dstate_id_t, escape_info> &accel_escape_info, void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
set<dstate_id_t> *accel_states) { set<dstate_id_t> *accel_states) {
for (dstate_id_t i : accel_escape_info | map_keys) { for (dstate_id_t i : accel_escape_info | map_keys) {
accel_states->insert(i); accel_states->insert(i);
@ -581,7 +581,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
unique_ptr<raw_report_info> ri unique_ptr<raw_report_info> ri
= info.strat.gatherReports(reports, reports_eod, &single, &arb); = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, escape_info> accel_escape_info map<dstate_id_t, AccelScheme> accel_escape_info
= populateAccelerationInfo(info.raw, info.strat, cc.grey); = populateAccelerationInfo(info.raw, info.strat, cc.grey);
size_t tran_size = (1 << info.getAlphaShift()) size_t tran_size = (1 << info.getAlphaShift())
@ -748,7 +748,7 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table,
static static
void allocateFSN8(dfa_info &info, void allocateFSN8(dfa_info &info,
const map<dstate_id_t, escape_info> &accel_escape_info, const map<dstate_id_t, AccelScheme> &accel_escape_info,
u16 *accel_limit, u16 *accept_limit) { u16 *accel_limit, u16 *accept_limit) {
info.states[0].impl_id = 0; /* dead is always 0 */ info.states[0].impl_id = 0; /* dead is always 0 */
@ -801,7 +801,7 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
unique_ptr<raw_report_info> ri unique_ptr<raw_report_info> ri
= info.strat.gatherReports(reports, reports_eod, &single, &arb); = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, escape_info> accel_escape_info map<dstate_id_t, AccelScheme> accel_escape_info
= populateAccelerationInfo(info.raw, info.strat, cc.grey); = populateAccelerationInfo(info.raw, info.strat, cc.grey);
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();

View File

@ -31,6 +31,7 @@
#include "rdfa.h" #include "rdfa.h"
#include "ue2common.h" #include "ue2common.h"
#include "util/accel_scheme.h"
#include "util/alloc.h" #include "util/alloc.h"
#include "util/charreach.h" #include "util/charreach.h"
#include "util/ue2_containers.h" #include "util/ue2_containers.h"
@ -54,15 +55,6 @@ struct raw_report_info {
std::vector<u32> &ro /* out */) const = 0; std::vector<u32> &ro /* out */) const = 0;
}; };
struct escape_info {
CharReach outs;
CharReach outs2_single;
flat_set<std::pair<u8, u8>> outs2;
bool outs2_broken = false;
u32 offset = 0;
u32 outs2_offset = 0;
};
class dfa_build_strat { class dfa_build_strat {
public: public:
virtual ~dfa_build_strat(); virtual ~dfa_build_strat();
@ -72,9 +64,9 @@ public:
std::vector<u32> &reports_eod /* out */, std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */, u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const = 0; ReportID *arbReport /* out */) const = 0;
virtual escape_info find_escape_strings(dstate_id_t this_idx) const = 0; virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0;
virtual size_t accelSize(void) const = 0; virtual size_t accelSize(void) const = 0;
virtual void buildAccel(dstate_id_t this_idx, const escape_info &info, virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) = 0; void *accel_out) = 0;
}; };
@ -87,9 +79,9 @@ public:
std::vector<u32> &reports_eod /* out */, std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */, u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const override; ReportID *arbReport /* out */) const override;
escape_info find_escape_strings(dstate_id_t this_idx) const override; AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
size_t accelSize(void) const override; size_t accelSize(void) const override;
void buildAccel(dstate_id_t this_idx,const escape_info &info, void buildAccel(dstate_id_t this_idx,const AccelScheme &info,
void *accel_out) override; void *accel_out) override;
virtual u32 max_allowed_offset_accel() const; virtual u32 max_allowed_offset_accel() const;

View File

@ -178,25 +178,14 @@ vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
} }
static static
escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
u32 max_allowed_accel_offset) { u32 max_allowed_accel_offset) {
DEBUG_PRINTF("looking for accel for %hu\n", base); DEBUG_PRINTF("looking for accel for %hu\n", base);
vector<vector<CharReach> > paths = generate_paths(rdfa, base, vector<vector<CharReach> > paths = generate_paths(rdfa, base,
max_allowed_accel_offset + 1); max_allowed_accel_offset + 1);
AccelScheme as = findBestAccelScheme(paths, CharReach(), true); AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
escape_info rv;
rv.offset = as.offset;
rv.outs = as.cr;
if (!as.double_byte.empty()) {
rv.outs2_single = as.double_cr;
rv.outs2 = as.double_byte;
rv.outs2_offset = as.double_offset;
rv.outs2_broken = false;
} else {
rv.outs2_broken = true;
}
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
return rv; return as;
} }
static static
@ -214,18 +203,18 @@ vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
static static
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base, set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
const escape_info &ei) { const AccelScheme &ei) {
DEBUG_PRINTF("looking for region around %hu\n", base); DEBUG_PRINTF("looking for region around %hu\n", base);
set<dstate_id_t> region = {base}; set<dstate_id_t> region = {base};
if (!ei.outs2_broken) { if (!ei.double_byte.empty()) {
return region; return region;
} }
DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.outs).c_str(), ei.offset); DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset);
const CharReach &escape = ei.outs; const CharReach &escape = ei.cr;
auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); auto nonexit_symbols = find_nonexit_symbols(rdfa, escape);
vector<dstate_id_t> pending = {base}; vector<dstate_id_t> pending = {base};
@ -248,16 +237,16 @@ set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
} }
static static
bool better(const escape_info &a, const escape_info &b) { bool better(const AccelScheme &a, const AccelScheme &b) {
if (!a.outs2_broken && b.outs2_broken) { if (!a.double_byte.empty() && b.double_byte.empty()) {
return true; return true;
} }
if (!b.outs2_broken) { if (!b.double_byte.empty()) {
return false; return false;
} }
return a.outs.count() < b.outs.count(); return a.cr.count() < b.cr.count();
} }
static static
@ -271,10 +260,10 @@ vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
return rv; return rv;
} }
map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa, map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
const dfa_build_strat &strat, const dfa_build_strat &strat,
const Grey &grey) { const Grey &grey) {
map<dstate_id_t, escape_info> rv; map<dstate_id_t, AccelScheme> rv;
if (!grey.accelerateDFA) { if (!grey.accelerateDFA) {
return rv; return rv;
} }
@ -283,7 +272,7 @@ map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
DEBUG_PRINTF("sds %hu\n", sds_proxy); DEBUG_PRINTF("sds %hu\n", sds_proxy);
for (size_t i = 0; i < rdfa.states.size(); i++) { for (size_t i = 0; i < rdfa.states.size(); i++) {
escape_info ei = strat.find_escape_strings(i); AccelScheme ei = strat.find_escape_strings(i);
if (i == DEAD_STATE) { if (i == DEAD_STATE) {
continue; continue;
@ -301,25 +290,25 @@ map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
: ACCEL_DFA_MAX_STOP_CHAR; : ACCEL_DFA_MAX_STOP_CHAR;
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
if (ei.outs.count() > single_limit) { if (ei.cr.count() > single_limit) {
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
ei.outs.count()); ei.cr.count());
continue; continue;
} }
DEBUG_PRINTF("state %zu should be accelerable %zu\n", DEBUG_PRINTF("state %zu should be accelerable %zu\n",
i, ei.outs.count()); i, ei.cr.count());
rv[i] = ei; rv[i] = ei;
} }
/* provide accleration states to states in the region of sds */ /* provide accleration states to states in the region of sds */
if (contains(rv, sds_proxy)) { if (contains(rv, sds_proxy)) {
escape_info sds_ei = rv[sds_proxy]; AccelScheme sds_ei = rv[sds_proxy];
sds_ei.outs2_broken = true; /* region based on single byte scheme sds_ei.double_byte.clear(); /* region based on single byte scheme
* may differ from double byte */ * may differ from double byte */
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
sds_ei.outs.count()); sds_ei.cr.count());
auto sds_region = find_region(rdfa, sds_proxy, sds_ei); auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
for (auto s : sds_region) { for (auto s : sds_region) {
if (!contains(rv, s) || better(sds_ei, rv[s])) { if (!contains(rv, s) || better(sds_ei, rv[s])) {
@ -332,18 +321,20 @@ map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
} }
static static
bool double_byte_ok(const escape_info &info) { bool double_byte_ok(const AccelScheme &info) {
return !info.outs2_broken return !info.double_byte.empty()
&& info.outs2_single.count() < info.outs2.size() && info.double_cr.count() < info.double_byte.size()
&& info.outs2_single.count() <= 2 && !info.outs2.empty(); && info.double_cr.count() <= 2 && !info.double_byte.empty();
} }
escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx,
dstate_id_t this_idx,
u32 max_allowed_accel_offset) { u32 max_allowed_accel_offset) {
escape_info rv; AccelScheme rv;
rv.cr.clear();
rv.offset = 0;
const dstate &raw = rdfa.states[this_idx]; const dstate &raw = rdfa.states[this_idx];
const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa); const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
bool outs2_broken = false;
for (u32 i = 0; i < rev_map.size(); i++) { for (u32 i = 0; i < rev_map.size(); i++) {
if (raw.next[i] == this_idx) { if (raw.next[i] == this_idx) {
@ -352,17 +343,17 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa,
const CharReach &cr_i = rev_map.at(i); const CharReach &cr_i = rev_map.at(i);
rv.outs |= cr_i; rv.cr |= cr_i;
DEBUG_PRINTF("next is %hu\n", raw.next[i]); DEBUG_PRINTF("next is %hu\n", raw.next[i]);
const dstate &raw_next = rdfa.states[raw.next[i]]; const dstate &raw_next = rdfa.states[raw.next[i]];
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
DEBUG_PRINTF("leads to report\n"); DEBUG_PRINTF("leads to report\n");
rv.outs2_broken = true; /* cannot accelerate over reports */ outs2_broken = true; /* cannot accelerate over reports */
} }
if (rv.outs2_broken) { if (outs2_broken) {
continue; continue;
} }
@ -378,35 +369,39 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa,
} }
if (cr_i.count() * cr_all_j.count() > 8) { if (cr_i.count() * cr_all_j.count() > 8) {
DEBUG_PRINTF("adding sym %u to outs2_single\n", i); DEBUG_PRINTF("adding sym %u to double_cr\n", i);
rv.outs2_single |= cr_i; rv.double_cr |= cr_i;
} else { } else {
for (auto ii = cr_i.find_first(); ii != CharReach::npos; for (auto ii = cr_i.find_first(); ii != CharReach::npos;
ii = cr_i.find_next(ii)) { ii = cr_i.find_next(ii)) {
for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
jj = cr_all_j.find_next(jj)) { jj = cr_all_j.find_next(jj)) {
rv.outs2.emplace((u8)ii, (u8)jj); rv.double_byte.emplace((u8)ii, (u8)jj);
} }
} }
} }
if (rv.outs2.size() > 8) { if (rv.double_byte.size() > 8) {
DEBUG_PRINTF("outs2 too big\n"); DEBUG_PRINTF("outs2 too big\n");
rv.outs2_broken = true; outs2_broken = true;
} }
} }
if (outs2_broken) {
rv.double_byte.clear();
}
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
DEBUG_PRINTF("broken %d\n", rv.outs2_broken); DEBUG_PRINTF("broken %d\n", outs2_broken);
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) if (!double_byte_ok(rv) && !is_triggered(rdfa.kind)
&& this_idx == rdfa.start_floating && this_idx == rdfa.start_floating
&& this_idx != DEAD_STATE) { && this_idx != DEAD_STATE) {
DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
auto offset = look_for_offset_accel(rdfa, this_idx, auto offset = look_for_offset_accel(rdfa, this_idx,
max_allowed_accel_offset); max_allowed_accel_offset);
DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(), DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(),
rv.outs.count()); rv.cr.count());
if (double_byte_ok(offset) || offset.outs.count() < rv.outs.count()) { if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
DEBUG_PRINTF("using offset accel\n"); DEBUG_PRINTF("using offset accel\n");
rv = offset; rv = offset;
} }

View File

@ -48,11 +48,11 @@ struct Grey;
* than normal states as accelerating sds is important. Matches NFA value */ * than normal states as accelerating sds is important. Matches NFA value */
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 #define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
std::map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa, std::map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
const dfa_build_strat &strat, const dfa_build_strat &strat,
const Grey &grey); const Grey &grey);
escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa,
dstate_id_t this_idx, dstate_id_t this_idx,
u32 max_allowed_accel_offset); u32 max_allowed_accel_offset);

View File

@ -37,6 +37,7 @@
#include "ng_misc_opt.h" #include "ng_misc_opt.h"
#include "ue2common.h" #include "ue2common.h"
#include "nfa/accelcompile.h" #include "nfa/accelcompile.h"
#include "util/accel_scheme.h"
#include "util/charreach.h" #include "util/charreach.h"
#include "util/order_check.h" #include "util/order_check.h"
#include "util/ue2_containers.h" #include "util/ue2_containers.h"
@ -47,7 +48,6 @@
namespace ue2 { namespace ue2 {
/* compile time accel defs */ /* compile time accel defs */
#define MAX_ACCEL_DEPTH 4
#define MAX_MERGED_ACCEL_STOPS 200 #define MAX_MERGED_ACCEL_STOPS 200
#define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_STOP_CHAR 24
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
@ -65,14 +65,6 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
#define DOUBLE_SHUFTI_LIMIT 20 #define DOUBLE_SHUFTI_LIMIT 20
struct AccelScheme {
ue2::flat_set<std::pair<u8, u8> > double_byte;
CharReach cr = CharReach::dot();
CharReach double_cr;
u32 offset = MAX_ACCEL_DEPTH + 1;
u32 double_offset = 0;
};
NFAVertex get_sds_or_proxy(const NGHolder &g); NFAVertex get_sds_or_proxy(const NGHolder &g);
AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts, AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,

51
src/util/accel_scheme.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ACCEL_SCHEME_H
#define ACCEL_SCHEME_H
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include <utility>
namespace ue2 {
#define MAX_ACCEL_DEPTH 4
struct AccelScheme {
flat_set<std::pair<u8, u8> > double_byte;
CharReach cr = CharReach::dot();
CharReach double_cr;
u32 offset = MAX_ACCEL_DEPTH + 1;
u32 double_offset = 0;
};
}
#endif