From 805a550a0aa5b137eb989558d7969cc2964954d6 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 24 Dec 2018 04:49:47 +0800 Subject: [PATCH] mcclellan: wide state fixes for sanitisers and accept state construction --- src/nfa/mcclellan.c | 10 +++++----- src/nfa/mcclellan_common_impl.h | 2 +- src/nfa/mcclellan_internal.h | 4 ++-- src/nfa/mcclellancompile.cpp | 30 +++++++++++++++++------------- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 1521de5b..71f71e32 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1197,7 +1197,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, // new byte if (m->has_wide) { - *((u16 *)state + 1) = 0; + unaligned_store_u16((u16 *)state + 1, 0); } if (s) { @@ -1236,7 +1236,7 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, // new byte if (m->has_wide) { - *((u16 *)state + 1) = 0; + unaligned_store_u16((u16 *)state + 1, 0); } } else { s = unaligned_load_u16(state); @@ -1285,7 +1285,7 @@ char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, // new byte if (m->has_wide) { - *((u16 *)q->state + 1) = 0; + unaligned_store_u16((u16 *)q->state + 1, 0); } return 0; } @@ -1325,7 +1325,7 @@ char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, // new byte if (m->has_wide) { - *((u16 *)dest + 1) = *((const u16 *)src + 1); + unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1)); } return 0; } @@ -1344,7 +1344,7 @@ char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, // new byte if (m->has_wide) { - *((u16 *)dest + 1) = *((const u16 *)src + 1); + *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1); } return 0; } diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index b6af672d..7b0e7f48 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -88,7 +88,7 @@ u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, const u8 *remap, const u16 *s, char *qstate, u16 *offset) { // Internal relative offset after the last visit of the wide state. if (qstate != NULL) { // stream mode - *offset = *(const u16 *)(qstate + 2); + *offset = unaligned_load_u16((const u16 *)(qstate + 2)); } u8 successful = 0; diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 0981f99e..482fdb1b 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -52,13 +52,13 @@ extern "C" #define WIDE_STATE 2 #define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) -#define WIDE_ENTRY_OFFSET16(weo_pos) (3 + (weo_pos)) +#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos)) #define WIDE_WIDTH_OFFSET 0 #define WIDE_SYMBOL_OFFSET8 1 #define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) #define WIDE_SYMBOL_OFFSET16 2 -#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + (wto_width)) +#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2)) struct report_list { u32 count; diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index db142f86..c1a4f87f 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -261,22 +261,24 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { // check successful transition u16 next = unaligned_load_u16((u8 *)trans); - if (next >= wide_limit) { - continue; + if (next < wide_limit) { + mstate_aux *aux = getAux(n, next); + if (aux->accept) { + next |= ACCEPT_FLAG; + } + if (aux->accel_offset) { + next |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)trans, next); } - mstate_aux *aux = getAux(n, next); - if (aux->accept) { - next |= ACCEPT_FLAG; - } - if (aux->accel_offset) { - next |= ACCEL_FLAG; - } - unaligned_store_u16((u8 *)trans, next); - trans ++; + trans++; // check failure transition for (symbol_t k = 0; k < alphaSize; k++) { u16 next_k = unaligned_load_u16((u8 *)&trans[k]); + if (next_k >= wide_limit) { + continue; + } mstate_aux *aux_k = getAux(n, next_k); if (aux_k->accept) { next_k |= ACCEPT_FLAG; @@ -525,11 +527,12 @@ size_t calcWideRegionSize(const dfa_info &info) { } // wide info header - size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 3; + size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4; // wide info body for (const auto &chain : info.wide_symbol_chain) { - rv += chain.size() + (info.impl_alpha_size + 1) * sizeof(u16) + 2; + rv += ROUNDUP_N(chain.size(), 2) + + (info.impl_alpha_size + 1) * sizeof(u16) + 2; } return ROUNDUP_16(rv); @@ -776,6 +779,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, char *wide_top = wide_base; *(u8 *)(wide_top++) = WIDE_STATE; + wide_top = ROUNDUP_PTR(wide_top, 2); *(u16 *)(wide_top) = wide_number; wide_top += 2;