mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
mcclellan: wide state fixes for sanitisers and accept state construction
This commit is contained in:
parent
6f3a0a323e
commit
805a550a0a
@ -1197,7 +1197,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
|
||||
// new byte
|
||||
if (m->has_wide) {
|
||||
*((u16 *)state + 1) = 0;
|
||||
unaligned_store_u16((u16 *)state + 1, 0);
|
||||
}
|
||||
|
||||
if (s) {
|
||||
@ -1236,7 +1236,7 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
||||
|
||||
// new byte
|
||||
if (m->has_wide) {
|
||||
*((u16 *)state + 1) = 0;
|
||||
unaligned_store_u16((u16 *)state + 1, 0);
|
||||
}
|
||||
} else {
|
||||
s = unaligned_load_u16(state);
|
||||
@ -1285,7 +1285,7 @@ char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa,
|
||||
|
||||
// new byte
|
||||
if (m->has_wide) {
|
||||
*((u16 *)q->state + 1) = 0;
|
||||
unaligned_store_u16((u16 *)q->state + 1, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1325,7 +1325,7 @@ char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa,
|
||||
|
||||
// new byte
|
||||
if (m->has_wide) {
|
||||
*((u16 *)dest + 1) = *((const u16 *)src + 1);
|
||||
unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1344,7 +1344,7 @@ char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
|
||||
// new byte
|
||||
if (m->has_wide) {
|
||||
*((u16 *)dest + 1) = *((const u16 *)src + 1);
|
||||
*((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end,
|
||||
const u8 *remap, const u16 *s, char *qstate, u16 *offset) {
|
||||
// Internal relative offset after the last visit of the wide state.
|
||||
if (qstate != NULL) { // stream mode
|
||||
*offset = *(const u16 *)(qstate + 2);
|
||||
*offset = unaligned_load_u16((const u16 *)(qstate + 2));
|
||||
}
|
||||
|
||||
u8 successful = 0;
|
||||
|
@ -52,13 +52,13 @@ extern "C"
|
||||
|
||||
#define WIDE_STATE 2
|
||||
#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos))
|
||||
#define WIDE_ENTRY_OFFSET16(weo_pos) (3 + (weo_pos))
|
||||
#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos))
|
||||
|
||||
#define WIDE_WIDTH_OFFSET 0
|
||||
#define WIDE_SYMBOL_OFFSET8 1
|
||||
#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width))
|
||||
#define WIDE_SYMBOL_OFFSET16 2
|
||||
#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + (wto_width))
|
||||
#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2))
|
||||
|
||||
struct report_list {
|
||||
u32 count;
|
||||
|
@ -261,22 +261,24 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
|
||||
// check successful transition
|
||||
u16 next = unaligned_load_u16((u8 *)trans);
|
||||
if (next >= wide_limit) {
|
||||
continue;
|
||||
if (next < wide_limit) {
|
||||
mstate_aux *aux = getAux(n, next);
|
||||
if (aux->accept) {
|
||||
next |= ACCEPT_FLAG;
|
||||
}
|
||||
if (aux->accel_offset) {
|
||||
next |= ACCEL_FLAG;
|
||||
}
|
||||
unaligned_store_u16((u8 *)trans, next);
|
||||
}
|
||||
mstate_aux *aux = getAux(n, next);
|
||||
if (aux->accept) {
|
||||
next |= ACCEPT_FLAG;
|
||||
}
|
||||
if (aux->accel_offset) {
|
||||
next |= ACCEL_FLAG;
|
||||
}
|
||||
unaligned_store_u16((u8 *)trans, next);
|
||||
trans ++;
|
||||
trans++;
|
||||
|
||||
// check failure transition
|
||||
for (symbol_t k = 0; k < alphaSize; k++) {
|
||||
u16 next_k = unaligned_load_u16((u8 *)&trans[k]);
|
||||
if (next_k >= wide_limit) {
|
||||
continue;
|
||||
}
|
||||
mstate_aux *aux_k = getAux(n, next_k);
|
||||
if (aux_k->accept) {
|
||||
next_k |= ACCEPT_FLAG;
|
||||
@ -525,11 +527,12 @@ size_t calcWideRegionSize(const dfa_info &info) {
|
||||
}
|
||||
|
||||
// wide info header
|
||||
size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 3;
|
||||
size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4;
|
||||
|
||||
// wide info body
|
||||
for (const auto &chain : info.wide_symbol_chain) {
|
||||
rv += chain.size() + (info.impl_alpha_size + 1) * sizeof(u16) + 2;
|
||||
rv += ROUNDUP_N(chain.size(), 2) +
|
||||
(info.impl_alpha_size + 1) * sizeof(u16) + 2;
|
||||
}
|
||||
|
||||
return ROUNDUP_16(rv);
|
||||
@ -776,6 +779,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
|
||||
|
||||
char *wide_top = wide_base;
|
||||
*(u8 *)(wide_top++) = WIDE_STATE;
|
||||
wide_top = ROUNDUP_PTR(wide_top, 2);
|
||||
*(u16 *)(wide_top) = wide_number;
|
||||
wide_top += 2;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user