mcclellan: wide state fixes for sanitisers and accept state construction

This commit is contained in:
Hong, Yang A 2018-12-24 04:49:47 +08:00 committed by Chang, Harry
parent 6f3a0a323e
commit 805a550a0a
4 changed files with 25 additions and 21 deletions

View File

@ -1197,7 +1197,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
// new byte // new byte
if (m->has_wide) { if (m->has_wide) {
*((u16 *)state + 1) = 0; unaligned_store_u16((u16 *)state + 1, 0);
} }
if (s) { if (s) {
@ -1236,7 +1236,7 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
// new byte // new byte
if (m->has_wide) { if (m->has_wide) {
*((u16 *)state + 1) = 0; unaligned_store_u16((u16 *)state + 1, 0);
} }
} else { } else {
s = unaligned_load_u16(state); s = unaligned_load_u16(state);
@ -1285,7 +1285,7 @@ char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa,
// new byte // new byte
if (m->has_wide) { if (m->has_wide) {
*((u16 *)q->state + 1) = 0; unaligned_store_u16((u16 *)q->state + 1, 0);
} }
return 0; return 0;
} }
@ -1325,7 +1325,7 @@ char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa,
// new byte // new byte
if (m->has_wide) { if (m->has_wide) {
*((u16 *)dest + 1) = *((const u16 *)src + 1); unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1));
} }
return 0; return 0;
} }
@ -1344,7 +1344,7 @@ char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest,
// new byte // new byte
if (m->has_wide) { if (m->has_wide) {
*((u16 *)dest + 1) = *((const u16 *)src + 1); *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1);
} }
return 0; return 0;
} }

View File

@ -88,7 +88,7 @@ u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end,
const u8 *remap, const u16 *s, char *qstate, u16 *offset) { const u8 *remap, const u16 *s, char *qstate, u16 *offset) {
// Internal relative offset after the last visit of the wide state. // Internal relative offset after the last visit of the wide state.
if (qstate != NULL) { // stream mode if (qstate != NULL) { // stream mode
*offset = *(const u16 *)(qstate + 2); *offset = unaligned_load_u16((const u16 *)(qstate + 2));
} }
u8 successful = 0; u8 successful = 0;

View File

@ -52,13 +52,13 @@ extern "C"
#define WIDE_STATE 2 #define WIDE_STATE 2
#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) #define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos))
#define WIDE_ENTRY_OFFSET16(weo_pos) (3 + (weo_pos)) #define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos))
#define WIDE_WIDTH_OFFSET 0 #define WIDE_WIDTH_OFFSET 0
#define WIDE_SYMBOL_OFFSET8 1 #define WIDE_SYMBOL_OFFSET8 1
#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) #define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width))
#define WIDE_SYMBOL_OFFSET16 2 #define WIDE_SYMBOL_OFFSET16 2
#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + (wto_width)) #define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2))
struct report_list { struct report_list {
u32 count; u32 count;

View File

@ -261,9 +261,7 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
// check successful transition // check successful transition
u16 next = unaligned_load_u16((u8 *)trans); u16 next = unaligned_load_u16((u8 *)trans);
if (next >= wide_limit) { if (next < wide_limit) {
continue;
}
mstate_aux *aux = getAux(n, next); mstate_aux *aux = getAux(n, next);
if (aux->accept) { if (aux->accept) {
next |= ACCEPT_FLAG; next |= ACCEPT_FLAG;
@ -272,11 +270,15 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
next |= ACCEL_FLAG; next |= ACCEL_FLAG;
} }
unaligned_store_u16((u8 *)trans, next); unaligned_store_u16((u8 *)trans, next);
trans ++; }
trans++;
// check failure transition // check failure transition
for (symbol_t k = 0; k < alphaSize; k++) { for (symbol_t k = 0; k < alphaSize; k++) {
u16 next_k = unaligned_load_u16((u8 *)&trans[k]); u16 next_k = unaligned_load_u16((u8 *)&trans[k]);
if (next_k >= wide_limit) {
continue;
}
mstate_aux *aux_k = getAux(n, next_k); mstate_aux *aux_k = getAux(n, next_k);
if (aux_k->accept) { if (aux_k->accept) {
next_k |= ACCEPT_FLAG; next_k |= ACCEPT_FLAG;
@ -525,11 +527,12 @@ size_t calcWideRegionSize(const dfa_info &info) {
} }
// wide info header // wide info header
size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 3; size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4;
// wide info body // wide info body
for (const auto &chain : info.wide_symbol_chain) { for (const auto &chain : info.wide_symbol_chain) {
rv += chain.size() + (info.impl_alpha_size + 1) * sizeof(u16) + 2; rv += ROUNDUP_N(chain.size(), 2) +
(info.impl_alpha_size + 1) * sizeof(u16) + 2;
} }
return ROUNDUP_16(rv); return ROUNDUP_16(rv);
@ -776,6 +779,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
char *wide_top = wide_base; char *wide_top = wide_base;
*(u8 *)(wide_top++) = WIDE_STATE; *(u8 *)(wide_top++) = WIDE_STATE;
wide_top = ROUNDUP_PTR(wide_top, 2);
*(u16 *)(wide_top) = wide_number; *(u16 *)(wide_top) = wide_number;
wide_top += 2; wide_top += 2;