mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
take mask overhang into account for hwlm accel, float min dist
This commit is contained in:
parent
34289eb3b4
commit
d574557200
@ -64,6 +64,28 @@ namespace ue2 {
|
|||||||
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
||||||
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
||||||
|
|
||||||
|
static
|
||||||
|
size_t mask_overhang(const hwlmLiteral &lit) {
|
||||||
|
size_t msk_true_size = lit.msk.size();
|
||||||
|
assert(msk_true_size <= HWLM_MASKLEN);
|
||||||
|
assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
|
||||||
|
for (u8 c : lit.msk) {
|
||||||
|
if (!c) {
|
||||||
|
msk_true_size--;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lit.s.length() >= msk_true_size) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* only short literals should be able to have a mask which overhangs */
|
||||||
|
assert(lit.s.length() < MAX_ACCEL_OFFSET);
|
||||||
|
return msk_true_size - lit.s.length();
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||||
const hwlmLiteral &first = *lits.front();
|
const hwlmLiteral &first = *lits.front();
|
||||||
@ -169,7 +191,8 @@ bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (found) {
|
if (found) {
|
||||||
curr.max_offset = MAX(curr.max_offset, j);
|
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
||||||
|
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -290,8 +313,8 @@ bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (found) {
|
if (found) {
|
||||||
curr.max_offset = MAX(curr.max_offset, j);
|
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
||||||
break;
|
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -392,13 +415,25 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
|
u32 overhang = mask_overhang(lit);
|
||||||
|
for (u32 i = 0; i < overhang; i++) {
|
||||||
|
/* this offset overhangs the start of the real literal; look at the
|
||||||
|
* msk/cmp */
|
||||||
|
for (u32 j = 0; j < N_CHARS; j++) {
|
||||||
|
if ((j & lit.msk[i]) == lit.cmp[i]) {
|
||||||
|
reach[i].set(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
|
||||||
CharReach &reach_i = reach[i];
|
CharReach &reach_i = reach[i];
|
||||||
|
u32 i_effective = i - overhang;
|
||||||
|
|
||||||
if (litGuardedByCharReach(reach_i, lit, i)) {
|
if (litGuardedByCharReach(reach_i, lit, i_effective)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
unsigned char c = i < lit.s.length() ? lit.s[i] : lit.s.back();
|
unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
|
||||||
|
: lit.s.back();
|
||||||
if (lit.nocase) {
|
if (lit.nocase) {
|
||||||
reach_i.set(mytoupper(c));
|
reach_i.set(mytoupper(c));
|
||||||
reach_i.set(mytolower(c));
|
reach_i.set(mytolower(c));
|
||||||
|
@ -5031,6 +5031,9 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
|
|||||||
u32 max_d = g[v].max_offset;
|
u32 max_d = g[v].max_offset;
|
||||||
u32 min_d = g[v].min_offset;
|
u32 min_d = g[v].min_offset;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("checking %u: elen %zu min/max %u/%u\n", lit_id,
|
||||||
|
key.elength_including_mask(), min_d, max_d);
|
||||||
|
|
||||||
if (build.literal_info[lit_id].undelayed_id != lit_id) {
|
if (build.literal_info[lit_id].undelayed_id != lit_id) {
|
||||||
/* this is a delayed match; need to update delay properties */
|
/* this is a delayed match; need to update delay properties */
|
||||||
/* TODO: can delayed literals ever be in another table ? */
|
/* TODO: can delayed literals ever be in another table ? */
|
||||||
@ -5050,9 +5053,9 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
|
|||||||
switch (key.table) {
|
switch (key.table) {
|
||||||
case ROSE_FLOATING:
|
case ROSE_FLOATING:
|
||||||
ENSURE_AT_LEAST(&engine->floatingDistance, max_d);
|
ENSURE_AT_LEAST(&engine->floatingDistance, max_d);
|
||||||
if (min_d >= key.elength()) {
|
if (min_d >= key.elength_including_mask()) {
|
||||||
LIMIT_TO_AT_MOST(&engine->floatingMinDistance,
|
LIMIT_TO_AT_MOST(&engine->floatingMinDistance,
|
||||||
min_d - (u32)key.elength());
|
min_d - (u32)key.elength_including_mask());
|
||||||
} else {
|
} else {
|
||||||
/* overlapped literals from rose + anchored table can
|
/* overlapped literals from rose + anchored table can
|
||||||
* cause us to underflow due to sloppiness in
|
* cause us to underflow due to sloppiness in
|
||||||
|
@ -277,6 +277,17 @@ struct rose_literal_id {
|
|||||||
u32 distinctiveness;
|
u32 distinctiveness;
|
||||||
|
|
||||||
size_t elength(void) const { return s.length() + delay; }
|
size_t elength(void) const { return s.length() + delay; }
|
||||||
|
size_t elength_including_mask(void) const {
|
||||||
|
size_t mask_len = msk.size();
|
||||||
|
for (u8 c : msk) {
|
||||||
|
if (!c) {
|
||||||
|
mask_len--;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return MAX(mask_len, s.length()) + delay;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
Loading…
x
Reference in New Issue
Block a user