diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index e0c459aa..77754e0b 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1172,12 +1172,13 @@ u32 getReportListIndex(const flat_set &reports, } static -void buildExceptionMap(const build_info &args, - const ue2::unordered_set &exceptional, - map > &exceptionMap, - vector &exceptionReports) { +u32 buildExceptionMap(const build_info &args, + const ue2::unordered_set &exceptional, + map > &exceptionMap, + vector &exceptionReports) { const NGHolder &h = args.h; const u32 num_states = args.num_states; + u32 exceptionCount = 0; ue2::unordered_map pos_trigger; ue2::unordered_map tug_trigger; @@ -1307,10 +1308,13 @@ void buildExceptionMap(const build_info &args, assert(e.succ_states.size() == num_states); assert(e.squash_states.size() == num_states); exceptionMap[e].push_back(i); + exceptionCount++; } } - DEBUG_PRINTF("%zu unique exceptions found.\n", exceptionMap.size()); + DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount, + exceptionMap.size()); + return exceptionCount; } static @@ -1642,19 +1646,25 @@ struct Factory { implNFA_t *limex, const u32 exceptionsOffset) { DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); - // to make testing easier, we pre-set the exceptionMap to all invalid - // values - memset(limex->exceptionMap, 0xff, sizeof(limex->exceptionMap)); - exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); assert(ISALIGNED(etable)); - u32 ecount = 0; + map exception_by_state; for (const auto &m : exceptionMap) { const ExceptionProto &proto = m.first; const vector &states = m.second; - DEBUG_PRINTF("exception %u, triggered by %zu states.\n", ecount, - states.size()); + for (u32 i : states) { + assert(!contains(exception_by_state, i)); + exception_by_state.emplace(i, proto); + } + } + + u32 ecount = 0; + for (const auto &m : exception_by_state) { + const ExceptionProto &proto = m.second; + u32 state_id = m.first; + DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount, + state_id); // Write the exception entry. exception_t &e = etable[ecount]; @@ -1668,13 +1678,10 @@ struct Factory { : repeatOffsets[proto.repeat_index]; e.repeatOffset = repeat_offset; - // for each state that can switch it on - for (auto state_id : states) { - // set this bit in the exception mask - maskSetBit(limex->exceptionMask, state_id); - // set this index in the exception map - limex->exceptionMap[state_id] = ecount; - } + // for the state that can switch it on + // set this bit in the exception mask + maskSetBit(limex->exceptionMask, state_id); + ecount++; } @@ -1882,12 +1889,10 @@ struct Factory { map > exceptionMap; vector exceptionReports; - buildExceptionMap(args, exceptional, exceptionMap, exceptionReports); + u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap, + exceptionReports); - if (exceptionMap.size() > ~0U) { - DEBUG_PRINTF("too many exceptions!\n"); - return nullptr; - } + assert(exceptionCount <= args.num_states); // Build reach table and character mapping. vector reach; @@ -1942,7 +1947,7 @@ struct Factory { offset = ROUNDUP_CL(offset); const u32 exceptionsOffset = offset; - offset += sizeof(exception_t) * exceptionMap.size(); + offset += sizeof(exception_t) * exceptionCount; const u32 exceptionReportsOffset = offset; offset += sizeof(ReportID) * exceptionReports.size(); diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 207769a0..2c215feb 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -80,6 +80,21 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) { fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str()); } +template +static +u32 rank_in_mask(mask_t mask, u32 bit) { + u32 chunks[sizeof(mask)/sizeof(u32)]; + memcpy(chunks, &mask, sizeof(mask)); + u32 base_rank = 0; + for (u32 i = 0; i < bit / 32; i++) { + base_rank += popcount32(chunks[i]); + } + u32 chunk = chunks[bit / 32]; + u32 local_bit = bit % 32; + assert(chunk & (1U << local_bit)); + return base_rank + popcount32(chunk & ((1U << local_bit) - 1)); +} + template static void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) { @@ -338,7 +353,7 @@ struct limex_labeller : public nfa_labeller { return; } - u32 ex_index = limex->exceptionMap[state]; + u32 ex_index = rank_in_mask(limex->exceptionMask, state); const typename limex_traits::exception_type *e = &exceptions[ex_index]; @@ -409,7 +424,7 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) { const typename limex_traits::exception_type *exceptions = getExceptionTable(limex); - u32 ex_index = limex->exceptionMap[state]; + u32 ex_index = rank_in_mask(limex->exceptionMask, state); const typename limex_traits::exception_type *e = &exceptions[ex_index]; diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index 26c5e5a5..175ca393 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,9 +79,13 @@ #ifdef ARCH_64_BIT #define CHUNK_T u64a #define FIND_AND_CLEAR_FN findAndClearLSB_64 +#define POPCOUNT_FN popcount64 +#define RANK_IN_MASK_FN rank_in_mask64 #else #define CHUNK_T u32 #define FIND_AND_CLEAR_FN findAndClearLSB_32 +#define POPCOUNT_FN popcount32 +#define RANK_IN_MASK_FN rank_in_mask32 #endif /** \brief Process a single exception. Returns 1 if exception handling should @@ -206,13 +210,13 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, #ifndef RUN_EXCEPTION_FN_ONLY -/** \brief Process all of the exceptions associated with the states in the \a estate. */ +/** \brief Process all of the exceptions associated with the states in the \a + * estate. */ static really_inline int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, - const struct IMPL_NFA_T *limex, - const u32 *exceptionMap, const EXCEPTION_T *exceptions, - const ReportID *exReports, - u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { + const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, + const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx, + char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { @@ -237,15 +241,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, // A copy of the estate as an array of GPR-sized chunks. CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; + CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; #ifdef ESTATE_ON_STACK memcpy(chunks, &estate, sizeof(STATE_T)); #else memcpy(chunks, estatep, sizeof(STATE_T)); #endif + memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); struct proto_cache new_cache = {0, NULL}; enum CacheResult cacheable = CACHE_RESULT; + u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; + base_index[0] = 0; + for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) { + base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); + } + do { u32 t = findAndClearLSB_32(&diffmask); #ifdef ARCH_64_BIT @@ -254,10 +266,10 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, assert(t < ARRAY_LENGTH(chunks)); CHUNK_T word = chunks[t]; assert(word != 0); - u32 base = t * sizeof(CHUNK_T) * 8; do { - u32 bit = FIND_AND_CLEAR_FN(&word) + base; - u32 idx = exceptionMap[bit]; + u32 bit = FIND_AND_CLEAR_FN(&word); + u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit); + u32 idx = local_index + base_index[t]; const EXCEPTION_T *e = &exceptions[idx]; if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index 6bc9a597..c37f5f40 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -135,7 +135,6 @@ struct LimExNFA##size { \ u32 exReportOffset; /* rel. to start of LimExNFA */ \ u32 repeatCount; \ u32 repeatOffset; \ - u32 exceptionMap[size]; \ u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \ u32 squashCount; \ u32 topCount; \ diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index e156cb81..8a0a8acd 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -74,7 +74,6 @@ static really_inline int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, const struct LimExNFA32 *limex, - const u32 *exceptionMap, const struct NFAException32 *exceptions, const ReportID *exReports, u64a offset, struct NFAContext32 *ctx, char in_rev, char flags) { @@ -104,7 +103,7 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, do { u32 bit = findAndClearLSB_32(&estate); - u32 idx = exceptionMap[bit]; + u32 idx = rank_in_mask32(limex->exceptionMask, bit); const struct NFAException32 *e = &exceptions[idx]; if (!runException32(e, s, succ, &local_succ, limex, exReports, offset, ctx, &new_cache, &cacheable, in_rev, flags)) { diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index d6c28c6f..881e41fd 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -105,8 +105,8 @@ // continue, 1 if an accept was fired and the user instructed us to halt. static really_inline char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - const ReportID *exReports, const u32 *exceptionMap, - STATE_T s, const STATE_T emask, size_t i, u64a offset, + const ReportID *exReports, STATE_T s, + const STATE_T emask, size_t i, u64a offset, STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, const char flags, const char in_rev, const char first_match) { @@ -133,8 +133,8 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; int rv = JOIN(processExceptional, SIZE)( - pass_state, pass_estate, diffmask, succ, limex, exceptionMap, - exceptions, exReports, callback_offset, ctx, in_rev, localflags); + pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports, + callback_offset, ctx, in_rev, localflags); if (rv == PE_RV_HALT) { return 1; // Halt matching. } @@ -176,7 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - const u32 *exceptionMap = limex->exceptionMap; STATE_T s = LOAD_STATE(&ctx->s); /* assert(ISALIGNED_16(exceptions)); */ @@ -204,9 +203,9 @@ without_accel: STATE_T succ; NFA_EXEC_GET_LIM_SUCC(STATE_T); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, - EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, - flags, 0, first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, + i, offset, &succ, final_loc, ctx, flags, 0, + first_match)) { return MO_HALT_MATCHING; } @@ -255,9 +254,9 @@ with_accel: STATE_T succ; NFA_EXEC_GET_LIM_SUCC(STATE_T); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, - EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, - flags, 0, first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, + i, offset, &succ, final_loc, ctx, flags, 0, + first_match)) { return MO_HALT_MATCHING; } @@ -301,7 +300,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - const u32 *exceptionMap = limex->exceptionMap; STATE_T s = LOAD_STATE(&ctx->s); /* assert(ISALIGNED_16(exceptions)); */ @@ -321,7 +319,7 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, STATE_T succ; NFA_EXEC_GET_LIM_SUCC(STATE_T); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, + if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 1, 0)) { return MO_HALT_MATCHING; diff --git a/src/util/bitutils.h b/src/util/bitutils.h index c863fba9..6f1bcd09 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -454,4 +454,20 @@ void bf64_unset(u64a *bitfield, u32 i) { *bitfield &= ~(1ULL << i); } +static really_inline +u32 rank_in_mask32(u32 mask, u32 bit) { + assert(bit < sizeof(u32) * 8); + assert(mask & (u32)(1U << bit)); + mask &= (u32)(1U << bit) - 1; + return popcount32(mask); +} + +static really_inline +u32 rank_in_mask64(u64a mask, u32 bit) { + assert(bit < sizeof(u64a) * 8); + assert(mask & (u64a)(1ULL << bit)); + mask &= (u64a)(1ULL << bit) - 1; + return popcount64(mask); +} + #endif // BITUTILS_H diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index e13270dc..4d476932 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -412,3 +412,27 @@ TEST(BitUtils, bf_it_1) { ASSERT_EQ(~0U, bf64_iterate(1ULL << 63, 63)); } +TEST(BitUtils, rank_in_mask32) { + for (u32 i = 0; i < 32; i++) { + ASSERT_EQ(i, rank_in_mask32(0xffffffff, i)); + ASSERT_EQ(0, rank_in_mask32(1U << i, i)); + } + ASSERT_EQ(0, rank_in_mask32(0xf0f0f0f0, 4)); + ASSERT_EQ(1, rank_in_mask32(0xf0f0f0f0, 5)); + ASSERT_EQ(3, rank_in_mask32(0xf0f0f0f0, 7)); + ASSERT_EQ(7, rank_in_mask32(0xf0f0f0f0, 15)); + ASSERT_EQ(15, rank_in_mask32(0xf0f0f0f0, 31)); +} + +TEST(BitUtils, rank_in_mask64) { + for (u32 i = 0; i < 64; i++) { + ASSERT_EQ(i, rank_in_mask64(0xffffffffffffffffULL, i)); + ASSERT_EQ(0, rank_in_mask64(1ULL << i, i)); + } + ASSERT_EQ(0, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 4)); + ASSERT_EQ(1, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 5)); + ASSERT_EQ(3, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 7)); + ASSERT_EQ(7, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 15)); + ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31)); + ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63)); +}