From f2c0a66b6f91655ad6b287822879403cfc94b39b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 4 Apr 2016 16:04:28 +1000 Subject: [PATCH] Rose: use a multibit for the exhaustion vector Previously, the exhaustion vector was a standard bitvector, which required an expensive memset() call at init for databases with a large number of exhaustion keys. --- src/report.h | 19 ++++++++------ src/rose/program_runtime.h | 2 +- src/rose/rose_build_bytecode.cpp | 4 +-- src/rose/runtime.h | 2 +- src/util/exhaust.h | 44 +++++++++----------------------- 5 files changed, 27 insertions(+), 44 deletions(-) diff --git a/src/report.h b/src/report.h index 96cea32e..6f5cec1b 100644 --- a/src/report.h +++ b/src/report.h @@ -243,7 +243,8 @@ int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, } } - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ir->ekey))) { + if (!is_simple && + unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); return MO_CONTINUE_MATCHING; } @@ -296,7 +297,7 @@ exit: } if (!is_simple && ir->ekey != END_EXHAUST) { - markAsMatched(ci->exhaustionVector, ir->ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; @@ -338,7 +339,8 @@ int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, assert(!ir->quashSom); #endif - assert(ekey == INVALID_EKEY || !isExhausted(ci->exhaustionVector, ekey)); + assert(ekey == INVALID_EKEY || + !isExhausted(ci->rose, ci->exhaustionVector, ekey)); u64a from_offset = 0; u64a to_offset = offset + offset_adjust; @@ -355,7 +357,7 @@ int roseDeliverReport(u64a offset, UNUSED ReportID id, ReportID onmatch, } if (ekey != INVALID_EKEY) { - markAsMatched(ci->exhaustionVector, ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; @@ -398,7 +400,8 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, int halt = 0; - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ir->ekey))) { + if (!is_simple && + unlikely(isExhausted(ci->rose, ci->exhaustionVector, ir->ekey))) { DEBUG_PRINTF("ate exhausted match\n"); goto exit; } @@ -444,7 +447,7 @@ int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, flags, ci->userContext); if (!is_simple) { - markAsMatched(ci->exhaustionVector, ir->ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); } exit: @@ -485,7 +488,7 @@ int roseDeliverSomReport(u64a from_offset, u64a to_offset, assert(!ir->hasBounds || (to_offset >= ir->minOffset && to_offset <= ir->maxOffset)); assert(ir->ekey == INVALID_EKEY || - !isExhausted(ci->exhaustionVector, ir->ekey)); + !isExhausted(ci->rose, ci->exhaustionVector, ir->ekey)); to_offset += ir->offsetAdjust; assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); @@ -509,7 +512,7 @@ int roseDeliverSomReport(u64a from_offset, u64a to_offset, if (is_exhaustible) { assert(ir->ekey != INVALID_EKEY); - markAsMatched(ci->exhaustionVector, ir->ekey); + markAsMatched(ci->rose, ci->exhaustionVector, ir->ekey); return MO_CONTINUE_MATCHING; } else { return ROSE_CONTINUE_MATCHING_NO_EXHAUST; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index d816d62e..2dd3ba8b 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1146,7 +1146,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(ri->ekey != INVALID_EKEY); assert(ri->ekey < t->ekeyCount); const char *evec = scratch->core_info.exhaustionVector; - if (isExhausted(evec, ri->ekey)) { + if (isExhausted(t, evec, ri->ekey)) { DEBUG_PRINTF("ekey %u already set, match is exhausted\n", ri->ekey); assert(ri->fail_jump); // must progress diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bcf42eed..7fe29538 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -666,9 +666,9 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, so->history = curr_offset; curr_offset += historyRequired; - // Exhausted bit vector. + // Exhaustion multibit. so->exhausted = curr_offset; - curr_offset += ROUNDUP_N(tbi.rm.numEkeys(), 8) / 8; + curr_offset += mmbit_size(tbi.rm.numEkeys()); // SOM locations and valid/writeable multibit structures. if (tbi.ssm.numSomSlots()) { diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 275adfb4..46ccc2a1 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -123,7 +123,7 @@ char roseSuffixInfoIsExhausted(const struct RoseEngine *t, const u32 *ekeys = (const u32 *)((const char *)t + info->ekeyListOffset); while (*ekeys != END_EXHAUST) { DEBUG_PRINTF("check %u\n", *ekeys); - if (!isExhausted(exhausted, *ekeys)) { + if (!isExhausted(t, exhausted, *ekeys)) { DEBUG_PRINTF("not exhausted -> alive\n"); return 0; } diff --git a/src/util/exhaust.h b/src/util/exhaust.h index e75d1809..3b5bff4e 100644 --- a/src/util/exhaust.h +++ b/src/util/exhaust.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,8 +34,8 @@ #define EXHAUST_H #include "rose/rose_internal.h" +#include "util/multibit.h" #include "ue2common.h" -#include /** \brief Sentinel value meaning no further exhaustion keys. */ #define END_EXHAUST (~(u32)0) @@ -43,56 +43,36 @@ /** \brief Test whether the given key (\a eoff) is set in the exhaustion vector * \a evec. */ static really_inline -int isExhausted(const char *evec, u32 eoff) { +int isExhausted(const struct RoseEngine *t, const char *evec, u32 eoff) { DEBUG_PRINTF("checking exhaustion %p %u\n", evec, eoff); - return eoff != END_EXHAUST && (evec[eoff >> 3] & (1 << (eoff % 8))); + return eoff != END_EXHAUST && + mmbit_isset((const u8 *)evec, t->ekeyCount, eoff); } /** \brief Returns 1 if all exhaustion keys in the bitvector are on. */ static really_inline -int isAllExhausted(const struct RoseEngine *t, const char *evec_in) { +int isAllExhausted(const struct RoseEngine *t, const char *evec) { if (!t->canExhaust) { return 0; /* pattern set is inexhaustible */ } - const u8 *evec = (const u8 *)evec_in; - - u32 whole_bytes = t->ekeyCount / 8; - for (u32 i = 0; i < whole_bytes; i++) { - if (evec[i] != 0xff) { - DEBUG_PRINTF("unexhausted pattern in byte %u\n", i); - return 0; - } - } - - u32 rem = t->ekeyCount % 8; - if (t->ekeyCount % 8) { - u8 mask = (1 << rem) - 1; - if (evec[whole_bytes] != (char)mask) { - DEBUG_PRINTF("unexhausted pattern (%hhu) in final byte\n", mask); - return 0; - } - } - - DEBUG_PRINTF("pattern set is exhausted\n"); - return 1; + return mmbit_all((const u8 *)evec, t->ekeyCount); } /** \brief Mark key \a eoff on in the exhaustion vector. */ static really_inline -void markAsMatched(char *evec, u32 eoff) { +void markAsMatched(const struct RoseEngine *t, char *evec, u32 eoff) { if (eoff != END_EXHAUST) { DEBUG_PRINTF("marking as exhausted key %u\n", eoff); - evec[eoff >> 3] |= 1 << (eoff % 8); + mmbit_set((u8 *)evec, t->ekeyCount, eoff); } } /** \brief Clear all keys in the exhaustion vector. */ static really_inline -void clearEvec(char *ev, const struct RoseEngine *t) { - size_t size = (t->ekeyCount + 7) / 8; - DEBUG_PRINTF("clearing evec %p %zu\n", ev, size); - memset(ev, 0, size); +void clearEvec(char *evec, const struct RoseEngine *t) { + DEBUG_PRINTF("clearing evec %p %u\n", evec, t->ekeyCount); + mmbit_clear((u8 *)evec, t->ekeyCount); } #endif