mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
look for normal accel schemes using compressed alpha
This commit is contained in:
parent
f53c093baa
commit
6898dc9864
@ -177,6 +177,7 @@ vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
||||||
u32 max_allowed_accel_offset) {
|
u32 max_allowed_accel_offset) {
|
||||||
DEBUG_PRINTF("looking for accel for %hu\n", base);
|
DEBUG_PRINTF("looking for accel for %hu\n", base);
|
||||||
@ -191,7 +192,6 @@ escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static
|
static
|
||||||
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
||||||
const CharReach &escape) {
|
const CharReach &escape) {
|
||||||
@ -253,6 +253,17 @@ bool better(const escape_info &a, const escape_info &b) {
|
|||||||
return a.outs.count() < b.outs.count();
|
return a.outs.count() < b.outs.count();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
||||||
|
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
||||||
|
|
||||||
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
|
rv.at(rdfa.alpha_remap[i]).set(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
|
map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
|
||||||
const dfa_build_strat &strat,
|
const dfa_build_strat &strat,
|
||||||
const Grey &grey) {
|
const Grey &grey) {
|
||||||
@ -321,45 +332,58 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa,
|
|||||||
u32 max_allowed_accel_offset) {
|
u32 max_allowed_accel_offset) {
|
||||||
escape_info rv;
|
escape_info rv;
|
||||||
const dstate &raw = rdfa.states[this_idx];
|
const dstate &raw = rdfa.states[this_idx];
|
||||||
const auto &alpha_remap = rdfa.alpha_remap;
|
const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
|
||||||
|
|
||||||
flat_set<pair<u8, u8>> outs2_local;
|
for (u32 i = 0; i < rev_map.size(); i++) {
|
||||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
if (raw.next[i] == this_idx) {
|
||||||
outs2_local.clear();
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (raw.next[alpha_remap[i]] != this_idx) {
|
const CharReach &cr_i = rev_map.at(i);
|
||||||
rv.outs.set(i);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]);
|
rv.outs |= cr_i;
|
||||||
const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]];
|
|
||||||
|
DEBUG_PRINTF("next is %hu\n", raw.next[i]);
|
||||||
|
const dstate &raw_next = rdfa.states[raw.next[i]];
|
||||||
|
|
||||||
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
|
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
|
||||||
DEBUG_PRINTF("leads to report\n");
|
DEBUG_PRINTF("leads to report\n");
|
||||||
rv.outs2_broken = true; /* cannot accelerate over reports */
|
rv.outs2_broken = true; /* cannot accelerate over reports */
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned j = 0; !rv.outs2_broken && j < N_CHARS; j++) {
|
if (rv.outs2_broken) {
|
||||||
if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j,
|
CharReach cr_all_j;
|
||||||
raw_next.next[alpha_remap[j]]);
|
for (u32 j = 0; j < rev_map.size(); j++) {
|
||||||
outs2_local.emplace((u8)i, (u8)j);
|
if (raw_next.next[j] == raw.next[j]) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (outs2_local.size() > 8) {
|
DEBUG_PRINTF("adding sym %u sym %u -> %hu to 2 \n", i, j,
|
||||||
DEBUG_PRINTF("adding %02x to outs2_single\n", i);
|
raw_next.next[j]);
|
||||||
rv.outs2_single.set(i);
|
cr_all_j |= rev_map.at(j);
|
||||||
} else {
|
|
||||||
insert(&rv.outs2, outs2_local);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cr_i.count() * cr_all_j.count() > 8) {
|
||||||
|
DEBUG_PRINTF("adding sym %u to outs2_single\n", i);
|
||||||
|
rv.outs2_single |= cr_i;
|
||||||
|
} else {
|
||||||
|
for (auto ii = cr_i.find_first(); ii != CharReach::npos;
|
||||||
|
ii = cr_i.find_next(ii)) {
|
||||||
|
for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
|
||||||
|
jj = cr_all_j.find_next(jj)) {
|
||||||
|
rv.outs2.emplace((u8)ii, (u8)jj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (rv.outs2.size() > 8) {
|
if (rv.outs2.size() > 8) {
|
||||||
DEBUG_PRINTF("outs2 too big\n");
|
DEBUG_PRINTF("outs2 too big\n");
|
||||||
rv.outs2_broken = true;
|
rv.outs2_broken = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
|
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
|
||||||
DEBUG_PRINTF("broken %d\n", rv.outs2_broken);
|
DEBUG_PRINTF("broken %d\n", rv.outs2_broken);
|
||||||
|
@ -48,9 +48,6 @@ struct Grey;
|
|||||||
* than normal states as accelerating sds is important. Matches NFA value */
|
* than normal states as accelerating sds is important. Matches NFA value */
|
||||||
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
||||||
|
|
||||||
escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
|
||||||
u32 max_allowed_accel_offset);
|
|
||||||
|
|
||||||
std::map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
|
std::map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
|
||||||
const dfa_build_strat &strat,
|
const dfa_build_strat &strat,
|
||||||
const Grey &grey);
|
const Grey &grey);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user