From a6383a54a42847fe5c23682898ebf026c40c421f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 30 Oct 2015 09:33:04 +1100 Subject: [PATCH 01/54] assignStringsToBuckets: assert that there are lits --- src/fdr/fdr_compile.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 8d658ccd..8f1f3e03 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -245,6 +245,8 @@ void FDRCompiler::assignStringsToBuckets() { typedef pair SCORE_INDEX_PAIR; u32 ls = verify_u32(lits.size()); + assert(ls); // Shouldn't be called with no literals. + // make a vector that contains our literals as pointers or u32 LiteralIndex values vector vli; vli.resize(ls); @@ -292,6 +294,8 @@ void FDRCompiler::assignStringsToBuckets() { currentChunk++; } } + + assert(currentChunk > 0); count[currentChunk - 1] = ls - chunkStartID; // close off chunks with an empty row firstIds[currentChunk] = ls; From cea914e18e32a4c35421b94dc8cc133e5b65d57f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 30 Oct 2015 09:43:28 +1100 Subject: [PATCH 02/54] Add q_last_type() queue function Analogous to q_cur_type(), asserts that queue indices are within a valid range. --- src/nfa/nfa_api_queue.h | 8 ++++++++ src/rose/catchup.c | 2 +- src/rose/match.c | 4 ++-- src/rose/stream.c | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h index d8079292..75c15cce 100644 --- a/src/nfa/nfa_api_queue.h +++ b/src/nfa/nfa_api_queue.h @@ -196,6 +196,14 @@ static really_inline s64a q_cur_loc(const struct mq *q) { return q->items[q->cur].location; } +/** \brief Returns the type of the last event in the queue. */ +static really_inline u32 q_last_type(const struct mq *q) { + assert(q->cur < q->end); + assert(q->end > 0); + assert(q->end <= MAX_MQE_LEN); + return q->items[q->end - 1].type; +} + /** \brief Returns the location (relative to the beginning of the current data * buffer) of the last event in the queue. */ static really_inline s64a q_last_loc(const struct mq *q) { diff --git a/src/rose/catchup.c b/src/rose/catchup.c index b9fcd784..77b12b49 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -379,7 +379,7 @@ void ensureEnd(struct mq *q, UNUSED u32 qi, s64a final_loc) { DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi); if (final_loc >= q_last_loc(q)) { /* TODO: ensure situation does not arise */ - assert(q->items[q->end - 1].type != MQE_END); + assert(q_last_type(q) != MQE_END); pushQueueNoMerge(q, MQE_END, final_loc); } } diff --git a/src/rose/match.c b/src/rose/match.c index 32224c24..be9bc35e 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -758,7 +758,7 @@ found_miracle: q_skip_forward_to(q, miracle_loc); - if (q->items[q->end - 1].type == MQE_START) { + if (q_last_type(q) == MQE_START) { DEBUG_PRINTF("miracle caused infix to die\n"); return 0; } @@ -853,7 +853,7 @@ char roseTestLeftfix(const struct RoseEngine *t, const struct RoseRole *tr, } } - if (q_cur_loc(q) < loc || q->items[q->end - 1].type != MQE_START) { + if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { if (left->infix) { if (infixTooOld(q, loc)) { DEBUG_PRINTF("infix %u died of old age\n", ri); diff --git a/src/rose/stream.c b/src/rose/stream.c index aab79a29..b100eeae 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -167,7 +167,7 @@ found_miracle: DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc); q_skip_forward_to(q, miracle_loc); - if (q->items[q->end - 1].type == MQE_START) { + if (q_last_type(q) == MQE_START) { DEBUG_PRINTF("miracle caused infix to die\n"); return MIRACLE_DEAD; } From da2386585da4f7e72e359a1334ce4b78c8892caa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 30 Oct 2015 09:52:49 +1100 Subject: [PATCH 03/54] Init filter members to nullptr Note that BGL filters must be default-constructible. --- src/nfagraph/ng_depth.cpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 719c7654..d7945be9 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -55,14 +55,14 @@ namespace ue2 { namespace { /** Distance value used to indicate that the vertex can't be reached. */ -static const int DIST_UNREACHABLE = INT_MAX; +static constexpr int DIST_UNREACHABLE = INT_MAX; /** * Distance value used to indicate that the distance to a vertex is infinite * (for example, it's the max distance and there's a cycle in the path) or so * large that we should consider it effectively infinite. */ -static const int DIST_INFINITY = INT_MAX - 1; +static constexpr int DIST_INFINITY = INT_MAX - 1; // // Filters @@ -71,10 +71,12 @@ static const int DIST_INFINITY = INT_MAX - 1; template struct NodeFilter { typedef typename GraphT::edge_descriptor EdgeT; - NodeFilter() { } + NodeFilter() {} // BGL filters must be default-constructible. NodeFilter(const vector *bad_in, const GraphT *g_in) : bad(bad_in), g(g_in) { } bool operator()(const EdgeT &e) const { + assert(g && bad); + u32 src_idx = (*g)[source(e, *g)].index; u32 tar_idx = (*g)[target(e, *g)].index; @@ -84,16 +86,20 @@ struct NodeFilter { return !(*bad)[src_idx] && !(*bad)[tar_idx]; } - const vector *bad; - const GraphT *g; + +private: + const vector *bad = nullptr; + const GraphT *g = nullptr; }; template struct StartFilter { typedef typename GraphT::edge_descriptor EdgeT; - StartFilter() { } + StartFilter() {} // BGL filters must be default-constructible. explicit StartFilter(const GraphT *g_in) : g(g_in) { } bool operator()(const EdgeT &e) const { + assert(g); + u32 src_idx = (*g)[source(e, *g)].index; u32 tar_idx = (*g)[target(e, *g)].index; @@ -107,7 +113,9 @@ struct StartFilter { } return true; } - const GraphT *g; + +private: + const GraphT *g = nullptr; }; } // namespace From 447753f1481ad15c06ee593a04f23d9c2a8b8ba9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 30 Oct 2015 15:01:20 +1100 Subject: [PATCH 04/54] FDR compiler: assert that all models are < 32 bits --- src/fdr/fdr_compile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 8f1f3e03..8be44370 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -387,12 +387,14 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng, const vector &lits, SuffixPositionInString pos, std::map > &m2) { + assert(eng.bits < 32); + u32 distance = 0; if (eng.bits <= 8) { distance = 1; } else if (eng.bits <= 16) { distance = 2; - } else if (eng.bits <= 32) { + } else { distance = 4; } From c7bebf8836e539c5f21f001a0d3f417c2975a290 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 30 Oct 2015 15:10:03 +1100 Subject: [PATCH 05/54] RoseBuildImpl: init base_id members These are set late in the Rose build process, when final IDs are allocated. --- src/rose/rose_build_misc.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 9f309985..f627017c 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -77,6 +77,8 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, hasSom(false), group_weak_end(0), group_end(0), + anchored_base_id(MO_INVALID_IDX), + nonbenefits_base_id(MO_INVALID_IDX), ematcher_region_size(0), floating_direct_report(false), eod_event_literal_id(MO_INVALID_IDX), From 7b6ad2a01a6966f94332854c877d35511d421a79 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 30 Oct 2015 16:20:18 +1100 Subject: [PATCH 06/54] doComponent: make it obvious that a is never null --- src/nfagraph/ng_puff.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index cd190171..501d8f7b 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -316,7 +316,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, bool unbounded = false; bool exhaustible = can_exhaust(g, rm); - while (a) { + while (true) { if (is_special(a, g)) { DEBUG_PRINTF("stopped puffing due to special vertex\n"); break; @@ -350,9 +350,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, a = getSoleSourceVertex(g, a); - if (!a) { - break; - } + assert(a); /* already checked that old a had a proper in degree of 1 */ // Snark: we can't handle this case, because we can only handle a // single report ID on a vertex From a255e6b67802a05a33acfde43f71a9b1824f8b81 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 2 Nov 2015 10:28:31 +1100 Subject: [PATCH 07/54] add asserts to make bounds on alphaShift clear --- src/nfa/mcclellancompile.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 39d29359..f75d08b5 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -700,7 +700,10 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, ReportID arb; u8 single; u32 accelCount; + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + u16 count_real_states; if (allocateFSN16(info, &count_real_states)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", @@ -843,6 +846,7 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, const vector &reports_eod, u32 i) { dstate_id_t j = info.implId(i); u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); for (size_t s = 0; s < info.impl_alpha_size; s++) { dstate_id_t raw_succ = info.states[i].next[s]; From 510e999738c2ae755b01e7300c19cac09f689bcc Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 2 Nov 2015 11:22:51 +1100 Subject: [PATCH 08/54] make Automaton_Base ctor protected Makes explicit that Automaton_Base is intended to be used as a only base class --- src/nfagraph/ng_haig.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 7681fa14..78c615c9 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -114,7 +114,7 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { } class Automaton_Base { -public: +protected: Automaton_Base(const NGHolder &graph_in, const ue2::unordered_map &state_ids_in) : graph(graph_in), state_ids(state_ids_in) { @@ -122,6 +122,7 @@ public: assert(alphasize <= ALPHABET_SIZE); } +public: static bool canPrune(const flat_set &) { return false; } const NGHolder &graph; From 01498fa8a577625614b2fa57dbb792849706e792 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 2 Nov 2015 10:20:37 +1100 Subject: [PATCH 09/54] LimEx NFA: no need to zero init cached_esucc All of the "exception cache" members are guarded by cached_esucc. --- src/nfa/limex_runtime_impl.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 611fe747..703a7d19 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -542,7 +542,6 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { ctx->callback = q->cb; ctx->context = q->context; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); @@ -638,7 +637,6 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ctx->callback = q->cb; ctx->context = q->context; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx->s, LOAD_STATE(q->state)); @@ -730,7 +728,6 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ctx->callback = NULL; ctx->context = NULL; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx->s, LOAD_STATE(q->state)); @@ -833,7 +830,6 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, ctx->callback = cb; ctx->context = context; STORE_STATE(&ctx->cached_estate, ZERO_STATE); - STORE_STATE(&ctx->cached_esucc, ZERO_STATE); const IMPL_NFA_T *limex = getImplNfa(n); STORE_STATE(&ctx->s, INITIAL_FN(limex, 0)); // always anchored From b5e290e98508c58d699bbc957e44ef2b0bfe315d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 2 Nov 2015 11:57:55 +1100 Subject: [PATCH 10/54] LimEx NFA: no need to zero estate cache in STREAM We believe that we have solved the issues that required zeroing of the exception state in STREAM_FN and REV_STREAM_FN nowadays. --- src/nfa/limex_exceptional.h | 3 ++- src/nfa/limex_native.c | 3 ++- src/nfa/limex_runtime_impl.h | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index dcc302d4..26c5e5a5 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -218,7 +218,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { DEBUG_PRINTF("using cached succ from previous state\n"); STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc))); - if (ctx->cached_reports) { + if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { + DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, ctx->context, offset) == MO_HALT_MATCHING)) { diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index 675c18bd..49c10199 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -83,7 +83,8 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, if (estate == ctx->cached_estate) { DEBUG_PRINTF("using cached succ from previous state\n"); *succ |= ctx->cached_esucc; - if (ctx->cached_reports) { + if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { + DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, ctx->context, offset) == MO_HALT_MATCHING)) { diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 703a7d19..6ef3bae9 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -179,7 +179,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, assert(ISALIGNED_CL(ctx)); assert(ISALIGNED_CL(&ctx->s)); STATE_T s = LOAD_STATE(&ctx->s); - STORE_STATE(&ctx->cached_estate, ZERO_STATE); /* TODO: understand why this is required */ /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -305,7 +304,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, const ReportID *exReports = getExReports(limex); const u32 *exceptionMap = limex->exceptionMap; STATE_T s = LOAD_STATE(&ctx->s); - STORE_STATE(&ctx->cached_estate, ZERO_STATE); /* TODO: understand why this is required */ /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ From 4311775b436430a9f5a724eee2a78d2fc8101e57 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 2 Nov 2015 12:00:09 +1100 Subject: [PATCH 11/54] LimEx NFA: unify flush br/estate behaviour Make the GPR NFA models only clear cached_estate conditionally based on cached_br, as per the SIMD models. --- src/nfa/limex_native.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index 49c10199..471e4bf0 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -120,7 +120,9 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, ctx->cached_reports = new_cache.reports; ctx->cached_br = new_cache.br; } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { - ctx->cached_estate = 0U; + if (ctx->cached_br) { + ctx->cached_estate = 0U; + } } return 0; From 89660e30b6fad4dd67cf07478e3498eae6f2100d Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 2 Nov 2015 13:35:04 +1100 Subject: [PATCH 12/54] raw_som_dfa: initialize members in constructor --- src/nfa/goughcompile.cpp | 2 +- src/nfa/goughcompile.h | 7 +++++-- src/nfagraph/ng_haig.cpp | 11 +++++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 92d09aa5..d2de7b95 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1109,7 +1109,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, u32 total_prog_size = byte_length(temp_blocks); curr_offset += total_prog_size; - gi.stream_som_loc_count = slot_count; + gi.stream_som_loc_count = slot_count; gi.stream_som_loc_width = somPrecision; u32 gough_size = ROUNDUP_N(curr_offset, 16); diff --git a/src/nfa/goughcompile.h b/src/nfa/goughcompile.h index b575c75b..9da983d4 100644 --- a/src/nfa/goughcompile.h +++ b/src/nfa/goughcompile.h @@ -70,8 +70,11 @@ struct dstate_som { }; struct raw_som_dfa : public raw_dfa { - raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in) - : raw_dfa(k), unordered_som_triggers(unordered_som_triggers_in) { + raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in, u32 trigger, + u32 stream_som_loc_width_in) + : raw_dfa(k), stream_som_loc_width(stream_som_loc_width_in), + unordered_som_triggers(unordered_som_triggers_in), + trigger_nfa_state(trigger) { assert(!unordered_som_triggers || is_triggered(kind)); } diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 78c615c9..d06083bd 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -609,7 +609,6 @@ bool doHaig(const NGHolder &g, } haig_note_starts(g, &rdfa->new_som_nfa_states); - rdfa->trigger_nfa_state = NODE_START; return true; } @@ -639,7 +638,8 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, return nullptr; } - auto rdfa = ue2::make_unique(g.kind, unordered_som); + auto rdfa = ue2::make_unique(g.kind, unordered_som, NODE_START, + somPrecision); DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); bool rv; @@ -659,7 +659,6 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n", rdfa->start_anchored, rdfa->start_floating); - rdfa->stream_som_loc_width = somPrecision; assert(rdfa->kind == g.kind); return rdfa; @@ -783,7 +782,9 @@ unique_ptr attemptToMergeHaig(const vector &df typedef Automaton_Haig_Merge::StateSet StateSet; vector nfa_state_map; - auto rdfa = ue2::make_unique(dfas[0]->kind, unordered_som); + auto rdfa = ue2::make_unique(dfas[0]->kind, unordered_som, + NODE_START, + dfas[0]->stream_som_loc_width); int rv = determinise(n, rdfa->states, limit, &nfa_state_map); if (rv) { @@ -831,11 +832,9 @@ unique_ptr attemptToMergeHaig(const vector &df } haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states); - rdfa->trigger_nfa_state = NODE_START; DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n", rdfa->start_anchored, rdfa->start_floating); - rdfa->stream_som_loc_width = dfas[0]->stream_som_loc_width; return rdfa; } From 1507b3fd36872f666af077dadd6fd79c235ebdfd Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 2 Nov 2015 14:36:43 +1100 Subject: [PATCH 13/54] move oversize graph check out of Automaton_holder ctor --- src/rose/rose_build_anchored.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 0c556dfe..675f8c68 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -271,16 +271,13 @@ public: typedef Holder_StateSet StateSet; typedef ue2::unordered_map StateMap; - explicit Automaton_Holder(const NGHolder &g_in) : g(g_in), bad(false) { + explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) { for (auto v : vertices_range(g)) { vertexToIndex[v] = indexToVertex.size(); indexToVertex.push_back(v); } - if (indexToVertex.size() > ANCHORED_NFA_STATE_LIMIT) { - bad = true; - return; - } + assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT); DEBUG_PRINTF("%zu states\n", indexToVertex.size()); init.wdelay = 0; @@ -400,7 +397,6 @@ public: array alpha; array unalpha; u16 alphasize; - bool bad; }; } // namespace @@ -670,13 +666,13 @@ int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, static int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { - Automaton_Holder autom(h); - - if (autom.bad) { + if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) { DEBUG_PRINTF("autom bad!\n"); return ANCHORED_FAIL; } + Automaton_Holder autom(h); + unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX); if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { return finalise_out(tbi, h, autom, move(out_dfa), remap); @@ -738,7 +734,6 @@ void buildSimpleDfas(const RoseBuildImpl &tbi, NGHolder h; populate_holder(simple.first, exit_ids, &h); Automaton_Holder autom(h); - assert(!autom.bad); unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(!rv); From e8bfe5478b9073a135e42d2c748db83cd88b27cf Mon Sep 17 00:00:00 2001 From: Xiang Wang Date: Thu, 29 Oct 2015 06:43:47 -0400 Subject: [PATCH 14/54] Optimize max clique analysis Use vectors of state ids to avoid the overhead of subgraph copies --- src/nfa/castlecompile.cpp | 164 ++++++++++++++++---------------------- 1 file changed, 70 insertions(+), 94 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 5ea0e873..e7e40540 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -145,7 +145,6 @@ struct CliqueVertexProps { u32 stateId = ~0U; u32 parentId = ~0U; bool leftChild = false; /* tells us if it is the left child of its parent */ - bool rightChildVisited = false; /* tells us if its right child is visited */ vector clique1; /* clique for the left branch */ vector indepSet1; /* independent set for the left branch */ @@ -188,56 +187,6 @@ unique_ptr makeCG(const vector> &exclusiveSet) { return cg; } -static -CliqueGraph createSubgraph(const CliqueGraph &cg, - const vector &vertices) { - CliqueGraph g; - map vertexMap; - for (auto u : vertices) { - u32 id = cg[u].stateId; - CliqueVertex v = add_vertex(CliqueVertexProps(id), g); - vertexMap[id] = v; - } - - set found; - for (auto u : vertices) { - u32 srcId = cg[u].stateId; - CliqueVertex src = vertexMap[srcId]; - found.insert(srcId); - for (auto n : adjacent_vertices_range(u, cg)) { - u32 dstId = cg[n].stateId; - if (found.find(dstId) == found.end() && - vertexMap.find(dstId) != vertexMap.end()) { - CliqueVertex dst = vertexMap[dstId]; - add_edge(src, dst, g); - } - } - } - return g; -} - -static -void getNeighborInfo(const CliqueGraph &g, vector &neighbor, - vector &nonneighbor, - const CliqueVertex &cv) { - u32 id = g[cv].stateId; - ue2::unordered_set neighborId; - - // find neighbors for cv - for (auto v : adjacent_vertices_range(cv, g)) { - neighbor.push_back(v); - neighborId.insert(g[v].stateId); - } - - // find non-neighbors for cv - for (auto v : vertices_range(g)) { - if (g[v].stateId != id && - neighborId.find(g[v].stateId) == neighborId.end()) { - nonneighbor.push_back(v); - } - } -} - static void updateCliqueInfo(CliqueGraph &cg, const CliqueVertex &n, vector &clique, vector &indepSet) { @@ -257,76 +206,103 @@ void updateCliqueInfo(CliqueGraph &cg, const CliqueVertex &n, } } +static +void getNeighborInfo(const CliqueGraph &g, vector &neighbor, + vector &nonneighbor, const CliqueVertex &cv, + const set &group) { + u32 id = g[cv].stateId; + ue2::unordered_set neighborId; + + // find neighbors for cv + for (const auto &v : adjacent_vertices_range(cv, g)) { + if (g[v].stateId != id && contains(group, g[v].stateId)) { + neighbor.push_back(g[v].stateId); + neighborId.insert(g[v].stateId); + } + } + + neighborId.insert(id); + // find non-neighbors for cv + for (const auto &v : vertices_range(g)) { + if (!contains(neighborId, g[v].stateId) && + contains(group, g[v].stateId)) { + nonneighbor.push_back(g[v].stateId); + } + } +} + static void findCliqueGroup(CliqueGraph &cg, vector &clique, vector &indepSet) { - stack gStack; - gStack.push(cg); + stack> gStack; // create mapping between vertex and id map vertexMap; - for (auto v : vertices_range(cg)) { + vector init; + for (auto &v : vertices_range(cg)) { vertexMap[cg[v].stateId] = v; + init.push_back(cg[v].stateId); } + gStack.push(init); // get the vertex to start from - ue2::unordered_set foundVertexId; + set foundVertexId; + ue2::unordered_set visitedId; CliqueGraph::vertex_iterator vi, ve; tie(vi, ve) = vertices(cg); CliqueVertex start = *vi; u32 startId = cg[start].stateId; - + DEBUG_PRINTF("startId:%u\n", startId); bool leftChild = false; u32 prevId = startId; while (!gStack.empty()) { - CliqueGraph g = gStack.top(); - gStack.pop(); + const auto &g = gStack.top(); // choose a vertex from the graph - tie(vi, ve) = vertices(g); - CliqueVertex cv = *vi; - u32 id = g[cv].stateId; + assert(!g.empty()); + u32 id = g[0]; + CliqueVertex &n = vertexMap.at(id); - // corresponding vertex in the original graph - CliqueVertex n = vertexMap.at(id); - - vector neighbor; - vector nonneighbor; - getNeighborInfo(g, neighbor, nonneighbor, cv); - - if (foundVertexId.find(id) != foundVertexId.end()) { + vector neighbor; + vector nonneighbor; + set subgraphId(g.begin(), g.end()); + getNeighborInfo(cg, neighbor, nonneighbor, n, subgraphId); + if (contains(foundVertexId, id)) { prevId = id; - // get graph consisting of non-neighbors for right branch - if (!cg[n].rightChildVisited) { - gStack.push(g); + // get non-neighbors for right branch + if (visitedId.insert(id).second) { + DEBUG_PRINTF("right branch\n"); if (!nonneighbor.empty()) { - const CliqueGraph &nSub = createSubgraph(g, nonneighbor); - gStack.push(nSub); + gStack.push(nonneighbor); leftChild = false; } - cg[n].rightChildVisited = true; - } else if (id != startId) { - // both the left and right branches are visited, - // update its parent's clique and independent sets - u32 parentId = cg[n].parentId; - CliqueVertex parent = vertexMap.at(parentId); - if (cg[n].leftChild) { - updateCliqueInfo(cg, n, cg[parent].clique1, - cg[parent].indepSet1); - } else { - updateCliqueInfo(cg, n, cg[parent].clique2, - cg[parent].indepSet2); + } else { + if (id != startId) { + // both the left and right branches are visited, + // update its parent's clique and independent sets + u32 parentId = cg[n].parentId; + CliqueVertex &parent = vertexMap.at(parentId); + if (cg[n].leftChild) { + updateCliqueInfo(cg, n, cg[parent].clique1, + cg[parent].indepSet1); + } else { + updateCliqueInfo(cg, n, cg[parent].clique2, + cg[parent].indepSet2); + } } + gStack.pop(); } } else { foundVertexId.insert(id); - g[n].leftChild = leftChild; - g[n].parentId = prevId; - gStack.push(g); - // get graph consisting of neighbors for left branch + cg[n].leftChild = leftChild; + cg[n].parentId = prevId; + cg[n].clique1.clear(); + cg[n].clique2.clear(); + cg[n].indepSet1.clear(); + cg[n].indepSet2.clear(); + // get neighbors for left branch if (!neighbor.empty()) { - const CliqueGraph &sub = createSubgraph(g, neighbor); - gStack.push(sub); + gStack.push(neighbor); leftChild = true; } prevId = id; @@ -351,12 +327,12 @@ vector removeClique(CliqueGraph &cg) { while (!graph_empty(cg)) { const vector &c = cliquesVec.back(); vector dead; - for (auto v : vertices_range(cg)) { + for (const auto &v : vertices_range(cg)) { if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) { dead.push_back(v); } } - for (auto v : dead) { + for (const auto &v : dead) { clear_vertex(v, cg); remove_vertex(v, cg); } From ae7dbc2472cc1d826dcbcf5cdb5a56a7187e619f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 2 Nov 2015 14:41:17 +1100 Subject: [PATCH 15/54] repeatRecurTable: no need for u64a return type --- src/nfa/repeatcompile.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 25e57581..2f187503 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -75,8 +75,8 @@ u32 calcPackedBytes(u64a val) { } static -u64a repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod) { +u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, + const u32 minPeriod) { u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax; u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod; for (u32 i = 0; i <= repeat_index; i++) { @@ -93,7 +93,7 @@ u64a repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, static u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod, u64a rv) { + const u32 minPeriod, u32 rv) { u32 cnt = 0; u32 patch_bits = 0; u32 total_size = 0; @@ -171,7 +171,7 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, assert(minPeriod); assert(repeatMax.is_finite()); { - u64a rv = repeatRecurTable(this, repeatMax, minPeriod); + u32 rv = repeatRecurTable(this, repeatMax, minPeriod); u32 repeatTmp = 0; if ((u32)repeatMax < minPeriod) { repeatTmp = repeatMax; From a083bcfa8dd24792ba436a2818210c71ab3264ee Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 13:18:34 +1100 Subject: [PATCH 16/54] repeat: use u32 arithmetic explicitly In some ring-based models, we know that if the ring is not stale, then all our bounds should fit within 32-bits. This change makes these explicitly u32 rather than implicitly narrowing later on. --- src/nfa/repeat.c | 47 ++++++++++++++++++++++++---------------- unit/internal/repeat.cpp | 6 +++-- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index f84eed71..0344423b 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -39,6 +39,8 @@ #include "util/pack_bits.h" #include "util/partial_store.h" #include "util/unaligned.h" + +#include #include /** \brief Returns the total capacity of the ring. @@ -709,12 +711,7 @@ enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, dumpRing(info, xs, ring); #endif - // We work in terms of the distance between the current offset and the base - // offset in our history. - u64a delta = offset - xs->offset; - DEBUG_PRINTF("delta=%llu\n", delta); - - if (delta < info->repeatMin) { + if (offset - xs->offset < info->repeatMin) { DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); return REPEAT_NOMATCH; } @@ -724,17 +721,22 @@ enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, return REPEAT_STALE; } + // If we're not stale, delta fits in the range [repeatMin, lastTop + + // repeatMax], which fits in a u32. + assert(offset - xs->offset < UINT32_MAX); + u32 delta = (u32)(offset - xs->offset); + DEBUG_PRINTF("delta=%u\n", delta); + // Find the bounds on possible matches in the ring buffer. - u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - u64a upper = delta - info->repeatMin + 1; - upper = MIN(upper, ringOccupancy(xs, ringSize)); + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize)); if (lower >= upper) { DEBUG_PRINTF("no matches to check\n"); return REPEAT_NOMATCH; } - DEBUG_PRINTF("possible match indices=[%llu,%llu]\n", lower, upper); + DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper); if (ringHasMatch(xs, ring, ringSize, lower, upper)) { return REPEAT_MATCH; } @@ -1252,17 +1254,19 @@ u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, } else if (nextOffset > repeatLastTopSparseOptimalP(info, ctrl, state) + info->repeatMax) { + DEBUG_PRINTF("ring is stale\n"); return 0; } else { - u64a delta = nextOffset - xs->offset; - u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale + u32 delta = (u32)(nextOffset - xs->offset); + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; patch = lower / patch_size; tval = lower - patch * patch_size; } DEBUG_PRINTF("patch %u\n", patch); u32 patch_count = info->patchCount; - if (patch >= patch_count){ + if (patch >= patch_count) { return 0; } @@ -1480,7 +1484,7 @@ char sparseHasMatch(const struct RepeatInfo *info, const u8 *state, enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, const union RepeatControl *ctrl, const void *state, u64a offset) { - DEBUG_PRINTF("check for match at %llu corresponding to trigger" + DEBUG_PRINTF("check for match at %llu corresponding to trigger " "at [%llu, %llu]\n", offset, offset - info->repeatMax, offset - info->repeatMin); @@ -1498,15 +1502,20 @@ enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, return REPEAT_STALE; } - u64a delta = offset - xs->offset; - u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - u64a upper = delta - info->repeatMin; + // Our delta between the base offset of the ring and the current offset + // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This + // range fits comfortably within a u32. + assert(offset - xs->offset <= UINT32_MAX); + + u32 delta = (u32)(offset - xs->offset); u32 patch_size = info->patchSize; u32 patch_count = info->patchCount; u32 occ = ringOccupancy(xs, patch_count); - upper = MIN(upper, occ * patch_size - 1); - DEBUG_PRINTF("lower=%llu, upper=%llu\n", lower, upper); + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1); + + DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper); u32 patch_lower = lower / patch_size; u32 patch_upper = upper / patch_size; diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index 95408f28..db797d24 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -505,6 +505,7 @@ const RepeatTestInfo sparseRepeats[] = { { REPEAT_SPARSE_OPTIMAL_P, 4000, 4000 }, { REPEAT_SPARSE_OPTIMAL_P, 4500, 4500 }, { REPEAT_SPARSE_OPTIMAL_P, 5000, 5000 }, + { REPEAT_SPARSE_OPTIMAL_P, 65534, 65534 }, // {N, M} repeats { REPEAT_SPARSE_OPTIMAL_P, 10, 20 }, { REPEAT_SPARSE_OPTIMAL_P, 20, 40 }, @@ -528,7 +529,8 @@ const RepeatTestInfo sparseRepeats[] = { { REPEAT_SPARSE_OPTIMAL_P, 3500, 4000 }, { REPEAT_SPARSE_OPTIMAL_P, 4000, 8000 }, { REPEAT_SPARSE_OPTIMAL_P, 4500, 8000 }, - { REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 } + { REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 }, + { REPEAT_SPARSE_OPTIMAL_P, 60000, 65534 } }; static @@ -802,7 +804,7 @@ TEST_P(SparseOptimalTest, Simple1) { 1000 + info->repeatMax * 2)); ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 1000 + info->repeatMax * 2 + 1)); - ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 10000)); + ASSERT_EQ(0U, repeatNextMatch(info, ctrl, state, 100000)); } TEST_P(SparseOptimalTest, TwoTopsNeg) { From 2603be3924f155b851851e2076e1cd52a37f99e5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 14:58:01 +1100 Subject: [PATCH 17/54] storeInitialRingTopPatch: fix large delta bug Check for staleness up front, so that it is safe to use u32 values to handle adding more tops. Adds LargeGap unit tests. --- src/nfa/repeat.c | 38 +++++++++++++++++++++++--------------- unit/internal/repeat.cpp | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index 0344423b..59205ae4 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -1165,7 +1165,7 @@ static void storeInitialRingTopPatch(const struct RepeatInfo *info, struct RepeatRingControl *xs, u8 *state, u64a offset) { - DEBUG_PRINTF("set the first patch\n"); + DEBUG_PRINTF("set the first patch, offset=%llu\n", offset); xs->offset = offset; u8 *active = state; @@ -1340,21 +1340,33 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, union RepeatControl *ctrl, void *state, u64a offset, char is_alive) { struct RepeatRingControl *xs = &ctrl->ring; - - u64a delta = offset - xs->offset; - u32 patch_size = info->patchSize; - u32 patch_count = info->patchCount; - u32 encoding_size = info->encodingSize; - u32 patch = delta / patch_size; - DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset, encoding_size); - u8 *active = (u8 *)state; - if (!is_alive) { + + DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset, + info->encodingSize); + + // If (a) this is the first top, or (b) the ring is stale, initialize the + // ring and write this offset in as the first top. + if (!is_alive || + offset > + repeatLastTopSparseOptimalP(info, ctrl, state) + info->repeatMax) { storeInitialRingTopPatch(info, xs, active, offset); return; } - assert(offset >= xs->offset); + // Tops should arrive in order, with no duplicates. + assert(offset > repeatLastTopSparseOptimalP(info, ctrl, state)); + + // As the ring is not stale, our delta should fit within a u32. + assert(offset - xs->offset <= UINT32_MAX); + u32 delta = (u32)(offset - xs->offset); + u32 patch_size = info->patchSize; + u32 patch_count = info->patchCount; + u32 encoding_size = info->encodingSize; + u32 patch = delta / patch_size; + + DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size, + patch); u8 *ring = active + info->patchesOffset; u32 occ = ringOccupancy(xs, patch_count); @@ -1365,10 +1377,6 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, patch, patch_count, occ); if (patch >= patch_count) { u32 patch_shift_count = patch - patch_count + 1; - if (patch_shift_count >= patch_count) { - storeInitialRingTopPatch(info, xs, active, offset); - return; - } assert(patch >= patch_shift_count); DEBUG_PRINTF("shifting by %u\n", patch_shift_count); xs->offset += patch_size * patch_shift_count; diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index db797d24..94f1bdc1 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -448,6 +448,25 @@ TEST_P(RepeatTest, Pack) { } } +TEST_P(RepeatTest, LargeGap) { + SCOPED_TRACE(testing::Message() << "Repeat: " << info); + + if (info.repeatMax == REPEAT_INF) { + return; // Test not valid for FIRST-type repeats. + } + + for (int i = 0; i < 64; i++) { + u64a top1 = 1000; + repeatStore(&info, ctrl, state, top1, 0); // first top + ASSERT_EQ(top1, repeatLastTop(&info, ctrl, state)); + + // Add a second top after a gap of 2^i bytes. + u64a top2 = top1 + (1ULL << i); + repeatStore(&info, ctrl, state, top2, 1); // second top + ASSERT_EQ(top2, repeatLastTop(&info, ctrl, state)); + } +} + static const u32 sparsePeriods[] = { 2, @@ -895,6 +914,24 @@ TEST_P(SparseOptimalTest, Simple3e) { test_sparse3entryExpire(info, ctrl, state, 2 * info->minPeriod - 1); } +TEST_P(SparseOptimalTest, LargeGap) { + SCOPED_TRACE(testing::Message() << "Repeat: " << *info); + + for (int i = 0; i < 64; i++) { + u64a top1 = 1000; + repeatStore(info, ctrl, state, top1, 0); // first top + ASSERT_EQ(top1, repeatLastTop(info, ctrl, state)); + + // Add a second top after a gap of 2^i bytes. + u64a top2 = top1 + (1ULL << i); + if (top2 - top1 < info->minPeriod) { + continue; // not a valid top + } + repeatStore(info, ctrl, state, top2, 1); // second top + ASSERT_EQ(top2, repeatLastTop(info, ctrl, state)); + } +} + TEST_P(SparseOptimalTest, ThreeTops) { SCOPED_TRACE(testing::Message() << "Repeat: " << *info); From 46ad39f253a4c604d867337206501a4c74dd6dd6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 15:17:17 +1100 Subject: [PATCH 18/54] Add inlined sparseLastTop This allows the code to be inlined into other sparse optimal repeat functions. --- src/nfa/repeat.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index 59205ae4..92b61874 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -1199,12 +1199,10 @@ u32 getSparseOptimalTargetValue(const struct RepeatInfo *info, return loc; } -u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state) { +static +u64a sparseLastTop(const struct RepeatInfo *info, + const struct RepeatRingControl *xs, const u8 *state) { DEBUG_PRINTF("looking for last top\n"); - const struct RepeatRingControl *xs = &ctrl->ring; - u32 patch_size = info->patchSize; u32 patch_count = info->patchCount; u32 encoding_size = info->encodingSize; @@ -1216,7 +1214,7 @@ u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, } DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ); - const u8 *ring = (const u8 *)state + info->patchesOffset; + const u8 *ring = state + info->patchesOffset; u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size); DEBUG_PRINTF("val:%llu\n", val); @@ -1233,6 +1231,12 @@ u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, return 0; } +u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state) { + return sparseLastTop(info, &ctrl->ring, state); +} + u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, const union RepeatControl *ctrl, const void *state, u64a offset) { @@ -1251,9 +1255,7 @@ u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, if (nextOffset <= xs->offset + info->repeatMin) { patch = xs->first; tval = 0; - } else if (nextOffset > - repeatLastTopSparseOptimalP(info, ctrl, state) + - info->repeatMax) { + } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) { DEBUG_PRINTF("ring is stale\n"); return 0; } else { @@ -1348,14 +1350,13 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, // If (a) this is the first top, or (b) the ring is stale, initialize the // ring and write this offset in as the first top. if (!is_alive || - offset > - repeatLastTopSparseOptimalP(info, ctrl, state) + info->repeatMax) { + offset > sparseLastTop(info, xs, state) + info->repeatMax) { storeInitialRingTopPatch(info, xs, active, offset); return; } // Tops should arrive in order, with no duplicates. - assert(offset > repeatLastTopSparseOptimalP(info, ctrl, state)); + assert(offset > sparseLastTop(info, xs, state)); // As the ring is not stale, our delta should fit within a u32. assert(offset - xs->offset <= UINT32_MAX); @@ -1504,8 +1505,7 @@ enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, if (offset < xs->offset + info->repeatMin) { DEBUG_PRINTF("too soon\n"); return REPEAT_NOMATCH; - } else if (offset > repeatLastTopSparseOptimalP(info, ctrl, state) + - info->repeatMax) { + } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) { DEBUG_PRINTF("stale\n"); return REPEAT_STALE; } From 4c53bd46413cd54f0c85863addf675edfca88f9b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:11:56 +1100 Subject: [PATCH 19/54] parser: use 'override' keyword in subclasses --- src/parser/check_refs.cpp | 2 +- src/parser/prefilter.cpp | 2 +- src/parser/shortcut_literal.cpp | 2 +- src/parser/unsupported.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/check_refs.cpp b/src/parser/check_refs.cpp index 6307a06a..ad81ae76 100644 --- a/src/parser/check_refs.cpp +++ b/src/parser/check_refs.cpp @@ -57,7 +57,7 @@ public: ReferenceVisitor(size_t num_groups, const flat_set &targets) : num_ids(num_groups), names(targets) {} - ~ReferenceVisitor(); + ~ReferenceVisitor() override; void invalid_index(const char *component, unsigned id) { assert(component); diff --git a/src/parser/prefilter.cpp b/src/parser/prefilter.cpp index de99d9f1..f5a0c66c 100644 --- a/src/parser/prefilter.cpp +++ b/src/parser/prefilter.cpp @@ -201,7 +201,7 @@ const ComponentSequence *findCapturingGroup(const Component *root, class PrefilterVisitor : public DefaultComponentVisitor { public: PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {} - ~PrefilterVisitor(); + ~PrefilterVisitor() override; /** \brief Calls the visitor (recursively) on a new replacement component * we've just created. Takes care of freeing it if the sequence is itself diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index d2b77dc5..f6f5d383 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -64,7 +64,7 @@ namespace ue2 { */ class ConstructLiteralVisitor : public ConstComponentVisitor { public: - ~ConstructLiteralVisitor(); + ~ConstructLiteralVisitor() override; /** \brief Thrown if this component does not represent a literal. */ struct NotLiteral {}; diff --git a/src/parser/unsupported.cpp b/src/parser/unsupported.cpp index b0b15d8d..c97a5750 100644 --- a/src/parser/unsupported.cpp +++ b/src/parser/unsupported.cpp @@ -44,7 +44,7 @@ namespace ue2 { * an unsupported component. */ class UnsupportedVisitor : public DefaultConstComponentVisitor { public: - ~UnsupportedVisitor(); + ~UnsupportedVisitor() override; void pre(const ComponentAssertion &) override { throw ParseError("Zero-width assertions are not supported."); } From 5805ac193c5c20c32930d5d4fd5c62bbbcb8ec7b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:12:36 +1100 Subject: [PATCH 20/54] NGWrapper: mark dtor with override --- src/nfagraph/ng.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index fe78254c..52353da9 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -64,7 +64,7 @@ public: bool prefilter, const som_type som, ReportID rid, u64a min_offset, u64a max_offset, u64a min_length); - ~NGWrapper(); + ~NGWrapper() override; /** index of the expression represented by this graph, used * - down the track in error handling From fb834114e573602daead103d490f888e2899f566 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:13:12 +1100 Subject: [PATCH 21/54] limex_dump: use 'override' keyword in subclass --- src/nfa/limex_dump.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index b836b63b..084f35dd 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -317,7 +317,7 @@ template struct limex_labeller : public nfa_labeller { explicit limex_labeller(const limex_type *limex_in) : limex(limex_in) {} - void label_state(FILE *f, u32 state) const { + void label_state(FILE *f, u32 state) const override { const typename limex_traits::exception_type *exceptions = getExceptionTable(limex); if (!testbit((const u8 *)&limex->exceptionMask, From ed4a3cdcf102820367117897af09c1d510d20765 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:19:47 +1100 Subject: [PATCH 22/54] compare: always use braces for for/if blocks --- src/util/compare.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/util/compare.h b/src/util/compare.h index b7237ec2..11c01f08 100644 --- a/src/util/compare.h +++ b/src/util/compare.h @@ -98,18 +98,22 @@ u64a theirtoupper64(const u64a x) { static really_inline int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) { const u8 *pEnd = (const u8 *)p1 + len; - for (; p1 < pEnd; p1++, p2++) - if (mytolower(*p1) != mytolower(*p2)) + for (; p1 < pEnd; p1++, p2++) { + if (mytolower(*p1) != mytolower(*p2)) { return 1; + } + } return 0; } static really_inline int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) { const u8 *pEnd = (const u8 *)p1 + len; - for (; p1 < pEnd; p1++, p2++) - if (*p1 != *p2) + for (; p1 < pEnd; p1++, p2++) { + if (*p1 != *p2) { return 1; + } + } return 0; } From 51c80200399966fa9c1f0ad6b629071a2d1ec636 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:21:56 +1100 Subject: [PATCH 23/54] simplegrep: use correct sign in printf format --- examples/simplegrep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/simplegrep.c b/examples/simplegrep.c index 2fe6e6f3..9e392a8f 100644 --- a/examples/simplegrep.c +++ b/examples/simplegrep.c @@ -109,7 +109,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) { * limit the size of our buffer appropriately. */ if ((unsigned long)dataLen > UINT_MAX) { dataLen = UINT_MAX; - printf("WARNING: clipping data to %lu bytes\n", dataLen); + printf("WARNING: clipping data to %ld bytes\n", dataLen); } else if (dataLen == 0) { fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN); fclose(f); @@ -118,7 +118,7 @@ static char *readInputData(const char *inputFN, unsigned int *length) { char *inputData = malloc(dataLen); if (!inputData) { - fprintf(stderr, "ERROR: unable to malloc %lu bytes\n", dataLen); + fprintf(stderr, "ERROR: unable to malloc %ld bytes\n", dataLen); fclose(f); return NULL; } From 863ea1b2b26603527bb581a6f12d6ce2a7d20939 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:22:39 +1100 Subject: [PATCH 24/54] mpv_dump: correct hex escapes in printf format --- src/nfa/mpv_dump.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index b6e702ac..e731df87 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -70,9 +70,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) { break; case MPV_VERM: if (!ourisprint(k->u.verm.c)) { - fprintf(f, "verm 0x%hhu\n", k->u.verm.c); + fprintf(f, "verm 0x%02x\n", k->u.verm.c); } else { - fprintf(f, "verm 0x%hhu '%c'\n", k->u.verm.c, k->u.verm.c); + fprintf(f, "verm 0x%02x '%c'\n", k->u.verm.c, k->u.verm.c); } break; case MPV_SHUFTI: @@ -87,9 +87,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) { break; case MPV_NVERM: if (!ourisprint(k->u.verm.c)) { - fprintf(f, "nverm 0x%hhu\n", k->u.verm.c); + fprintf(f, "nverm 0x%02x\n", k->u.verm.c); } else { - fprintf(f, "nverm 0x%hhu '%c'\n", k->u.verm.c, k->u.verm.c); + fprintf(f, "nverm 0x%02x '%c'\n", k->u.verm.c, k->u.verm.c); } break; default: From 62776b615b32e7c8101fd41a0aa13928bdd7839c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:23:27 +1100 Subject: [PATCH 25/54] nfa_api_queue: debug printf format fix --- src/nfa/nfa_api_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h index 75c15cce..59c18fca 100644 --- a/src/nfa/nfa_api_queue.h +++ b/src/nfa/nfa_api_queue.h @@ -277,7 +277,7 @@ void debugQueue(const struct mq *q) { type = "MQE_TOP_N"; break; } - DEBUG_PRINTF("\tq[%u] %lld %d:%s\n", cur, q->items[cur].location, + DEBUG_PRINTF("\tq[%u] %lld %u:%s\n", cur, q->items[cur].location, q->items[cur].type, type); } } From d9efe071254acd762f12598f16c4a76a71082288 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 3 Nov 2015 16:24:06 +1100 Subject: [PATCH 26/54] depth: correct sign in printf format --- src/util/depth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/depth.h b/src/util/depth.h index 29ff2819..977fd0c3 100644 --- a/src/util/depth.h +++ b/src/util/depth.h @@ -183,7 +183,7 @@ public: s64a rv = val + d; if (rv < 0 || (u64a)rv >= val_infinity) { - DEBUG_PRINTF("depth %llu too large to represent!\n", rv); + DEBUG_PRINTF("depth %lld too large to represent!\n", rv); throw DepthOverflowError(); } From 9a7b912a5dae8b15de3b932405713733eb22d23b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 Nov 2015 10:37:20 +1100 Subject: [PATCH 27/54] Rework parser rejection for POSIX collating elems Implement rejection of POSIX collating elements ("[.ch.]" and "[=ch=]" entirely in the Ragel parser, using the same approach both inside and ouside character classes. Fix buggy rejection of [^.ch.], which we should accept as a character class. --- src/parser/Parser.rl | 34 +++++++++++++-------------------- unit/hyperscan/bad_patterns.txt | 4 ++++ 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 1481b7d8..37beb765 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -790,10 +790,12 @@ unichar readUtf8CodePoint4c(const u8 *ts) { any => { throw LocatedParseError("Unknown property"); }; *|; charClassGuts := |* - # We don't like POSIX collating elements (neither does PCRE or Perl). - '\[\.' [^\]]* '\.\]' | - '\[=' [^\]]* '=\]' => { - throw LocatedParseError("Unsupported POSIX collating element"); + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); }; # Named sets # Adding these may cause the charclass to close, hence the @@ -1090,23 +1092,6 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throwInvalidUtf8(); }; - # dot or equals at the end of a character class could be the end - # of a collating element, like [.blah.] or [=blah=]. - [.=] ']' => { - if (currentCls->getFirstChar() == *ts) { - assert(currentClsBegin); - ostringstream oss; - oss << "Unsupported POSIX collating element at index " - << currentClsBegin - ptr << "."; - throw ParseError(oss.str()); - } - currentCls->add(*ts); - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - inCharClass = false; - fgoto main; - }; - # Literal character (any - ']') => { if (currentCls->class_empty()) { @@ -1232,6 +1217,13 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throw LocatedParseError("POSIX named classes are only " "supported inside a class"); }; + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); + }; # Begin eating characters for class '\[' => eatClass; # Begin quoted literal diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 1ad445b3..837ba871 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -128,3 +128,7 @@ 128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ρρρρρρρρρρρρρρρρρρρρρρρρρρρ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8. 129:/bignum \1111111111111111111/ #Number is too big at index 7. 130:/foo|&{5555555,}/ #Bounded repeat is too large. +131:/[a[..]]/ #Unsupported POSIX collating element at index 2. +132:/[a[==]]/ #Unsupported POSIX collating element at index 2. +133:/[a[.\].]]/ #Unsupported POSIX collating element at index 2. +134:/[a[=\]=]]/ #Unsupported POSIX collating element at index 2. From b1f6a539c7e23771933be57ac97a28f5b458f3aa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 Nov 2015 10:49:19 +1100 Subject: [PATCH 28/54] Remove dead ComponentClass::{get,set}FirstChar --- src/parser/ComponentClass.cpp | 2 +- src/parser/ComponentClass.h | 11 +---------- src/parser/Parser.rl | 3 --- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/parser/ComponentClass.cpp b/src/parser/ComponentClass.cpp index d90d7db2..4570734d 100644 --- a/src/parser/ComponentClass.cpp +++ b/src/parser/ComponentClass.cpp @@ -420,7 +420,7 @@ unique_ptr getLiteralComponentClass(unsigned char c, ComponentClass::ComponentClass(const ParseMode &mode_in) : m_negate(false), mode(mode_in), in_cand_range(false), - range_start(INVALID_UNICODE), finalized(false), firstChar('\0') {} + range_start(INVALID_UNICODE), finalized(false) {} ComponentClass::~ComponentClass() { } diff --git a/src/parser/ComponentClass.h b/src/parser/ComponentClass.h index 21b51202..ce3b49f3 100644 --- a/src/parser/ComponentClass.h +++ b/src/parser/ComponentClass.h @@ -245,9 +245,6 @@ public: bool isNegated() const { return m_negate; } - void setFirstChar(char c) { firstChar = c; } - char getFirstChar() const { return firstChar; } - std::vector first() const override = 0; std::vector last() const override = 0; bool empty() const override { return false; } /* always 1 codepoint wide */ @@ -263,19 +260,13 @@ protected: unichar range_start; bool finalized; - /** Literal character at the start of this character class, e.g. '.' for - * the class [.abc]. Used to identify (unsupported) POSIX collating - * elements. */ - char firstChar; - virtual void createRange(unichar) = 0; // Protected copy ctor. Use clone instead. ComponentClass(const ComponentClass &other) : Component(other), m_negate(other.m_negate), mode(other.mode), in_cand_range(other.in_cand_range), range_start(other.range_start), - finalized(other.finalized), - firstChar(other.firstChar) {} + finalized(other.finalized) {} }; } // namespace ue2 diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 37beb765..a0378dce 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -1094,9 +1094,6 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # Literal character (any - ']') => { - if (currentCls->class_empty()) { - currentCls->setFirstChar(*ts); - } currentCls->add(*ts); }; From c68bfe05d860265887101c4196f1c02d15d506b6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 Nov 2015 12:50:52 +1100 Subject: [PATCH 29/54] Don't use class_empty in early class parsing Instead, explicitly track whether we're still in the early class parsing machine. --- src/parser/Parser.rl | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index a0378dce..96656875 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -424,6 +424,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { assert(!inCharClass); // not reentrant currentCls = getComponentClass(mode); inCharClass = true; + inCharClassEarly = true; currentClsBegin = ts; fgoto readClass; } @@ -474,6 +475,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { } action is_utf8 { mode.utf8 } action is_ignore_space { mode.ignore_space } + action is_early_charclass { inCharClassEarly } action addNumberedBackRef { if (accumulator == 0) { @@ -1109,25 +1111,24 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # Parser to read stuff from a character class ############################################################# readClass := |* - # the negate and right bracket out the front are special - '\^' => { + # A caret at the beginning of the class means that the rest of the + # class is negated. + '\^' when is_early_charclass => { if (currentCls->isNegated()) { + // Already seen a caret; the second one is not a meta-character. + inCharClassEarly = false; fhold; fgoto charClassGuts; } else { currentCls->negate(); + // Note: we cannot switch off inCharClassEarly here, as /[^]]/ + // needs to use the right square bracket path below. } }; - ']' => { - // if this is the first thing in the class, add it and move along, - // otherwise jump into the char class machine to handle what might - // end up as fail - if (currentCls->class_empty()) { - currentCls->add(']'); - } else { - // leave it for the next machine - fhold; - } - fgoto charClassGuts; + # A right square bracket before anything "real" is interpreted as a + # literal right square bracket. + ']' when is_early_charclass => { + currentCls->add(']'); + inCharClassEarly = false; }; # if we hit a quote before anything "real", handle it #'\\Q' => { fcall readQuotedClass; }; @@ -1137,7 +1138,11 @@ unichar readUtf8CodePoint4c(const u8 *ts) { '\\E' => { /*noop*/}; # time for the real work to happen - any => { fhold; fgoto charClassGuts; }; + any => { + inCharClassEarly = false; + fhold; + fgoto charClassGuts; + }; *|; ############################################################# @@ -1885,6 +1890,11 @@ unique_ptr parse(const char *const c_ptr, ParseMode &globalMode) { // brackets [..]. bool inCharClass = false; + // True if the machine is inside a character class but it has not processed + // any "real" elements yet, i.e. it's still processing meta-characters like + // '^'. + bool inCharClassEarly = false; + // Location at which the current character class began. const u8 *currentClsBegin = p; From 9cffa7666ffda10b2c506c926adf94c11dc8fb44 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 Nov 2015 12:59:36 +1100 Subject: [PATCH 30/54] Refine ComponentClass::class_empty ComponentClass::class_empty should only be used on finalized classes to determine whether a given class contains any elements; it should not take the cr_ucp or cps_ucp into account, as they have been folden in by the finalize call. Fixes our failure to identify that the pattern /[^\D\d]/8W can never match. --- src/parser/AsciiComponentClass.cpp | 3 ++- src/parser/ComponentClass.cpp | 1 - src/parser/ComponentClass.h | 8 ++++++-- src/parser/Utf8ComponentClass.cpp | 3 ++- unit/hyperscan/bad_patterns.txt | 1 + 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/parser/AsciiComponentClass.cpp b/src/parser/AsciiComponentClass.cpp index de7a40d4..44ecb5bb 100644 --- a/src/parser/AsciiComponentClass.cpp +++ b/src/parser/AsciiComponentClass.cpp @@ -52,7 +52,8 @@ AsciiComponentClass *AsciiComponentClass::clone() const { } bool AsciiComponentClass::class_empty(void) const { - return cr.none() && cr_ucp.none(); + assert(finalized); + return cr.none(); } void AsciiComponentClass::createRange(unichar to) { diff --git a/src/parser/ComponentClass.cpp b/src/parser/ComponentClass.cpp index 4570734d..43c05898 100644 --- a/src/parser/ComponentClass.cpp +++ b/src/parser/ComponentClass.cpp @@ -441,7 +441,6 @@ void ComponentClass::addDash(void) { } void ComponentClass::negate() { - assert(class_empty()); m_negate = true; } diff --git a/src/parser/ComponentClass.h b/src/parser/ComponentClass.h index ce3b49f3..1cb1a7d0 100644 --- a/src/parser/ComponentClass.h +++ b/src/parser/ComponentClass.h @@ -232,8 +232,12 @@ public: Component *accept(ComponentVisitor &v) override = 0; void accept(ConstComponentVisitor &v) const override = 0; - /** True iff we have already started adding members to the class. This is - * a different concept to Component::empty */ + /** \brief True if the class contains no members (i.e. it will not match + * against anything). This function can only be called on a finalized + * class. + * + * Note: This is a different concept to Component::empty. + */ virtual bool class_empty(void) const = 0; virtual void add(PredefinedClass c, bool negated) = 0; diff --git a/src/parser/Utf8ComponentClass.cpp b/src/parser/Utf8ComponentClass.cpp index 72fc2146..3a6a85a4 100644 --- a/src/parser/Utf8ComponentClass.cpp +++ b/src/parser/Utf8ComponentClass.cpp @@ -484,7 +484,8 @@ UTF8ComponentClass *UTF8ComponentClass::clone() const { } bool UTF8ComponentClass::class_empty(void) const { - return cps.none() && cps_ucp.none(); + assert(finalized); + return cps.none(); } void UTF8ComponentClass::createRange(unichar to) { diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 837ba871..3b13e064 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -132,3 +132,4 @@ 132:/[a[==]]/ #Unsupported POSIX collating element at index 2. 133:/[a[.\].]]/ #Unsupported POSIX collating element at index 2. 134:/[a[=\]=]]/ #Unsupported POSIX collating element at index 2. +135:/[^\D\d]/8W #Pattern can never match. From b9d3b73ab8264aaa6cf2dd66a857afc84731177c Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 5 Nov 2015 14:46:07 +1100 Subject: [PATCH 31/54] Fix includes to meet our usual guidelines --- unit/internal/nfagraph_find_matches.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 41d28c52..553d6dc5 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -36,9 +36,9 @@ #include "nfagraph/ng_builder.h" #include "nfagraph/ng.h" #include "nfagraph/ng_asserts.h" -#include "util/target_info.h" #include "hs_compile.h" -#include "ng_find_matches.h" +#include "util/ng_find_matches.h" +#include "util/target_info.h" using namespace std; using namespace testing; From f65170da5b15a77ddd8fb8948c72ab7b00170f06 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 5 Nov 2015 14:49:04 +1100 Subject: [PATCH 32/54] cmake: improve build paths for nested builds If Hyperscan is built as a subproject of another cmake project, it helps to refer to PROJECT_xx_DIR instead of CMAKE_xx_DIR, etc. --- CMakeLists.txt | 24 +++++++++++++++++------- src/fdr/CMakeLists.txt | 10 +++++----- unit/CMakeLists.txt | 5 +++-- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67885c93..a1dfcaf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) string (TIMESTAMP BUILD_DATE "%Y-%m-%d") -set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) INCLUDE (CheckFunctionExists) @@ -56,8 +56,9 @@ if(CMAKE_GENERATOR STREQUAL Xcode) set(XCODE TRUE) endif() -include_directories(src .) -include_directories(${CMAKE_BINARY_DIR}) +set(CMAKE_INCLUDE_CURRENT_DIR 1) +include_directories(${PROJECT_SOURCE_DIR}/src) +include_directories(${PROJECT_BINARY_DIR}) include_directories(SYSTEM include) set(BOOST_USE_STATIC_LIBS OFF) @@ -71,7 +72,7 @@ find_package(Boost ${BOOST_MINVERSION}) if(NOT Boost_FOUND) # we might have boost in tree, so provide a hint and try again message(STATUS "trying include dir for boost") - set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include") + set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") find_package(Boost ${BOOST_MINVERSION}) if(NOT Boost_FOUND) message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") @@ -219,6 +220,15 @@ CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC) CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN) CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN) +if (RELEASE_BUILD) + if (HAS_C_HIDDEN) + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden") + endif() + if (HAS_CXX_HIDDEN) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fvisibility=hidden") + endif() +endif() + # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) @@ -327,8 +337,8 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/tools) endif() # do substitutions -configure_file(${CMAKE_MODULE_PATH}/config.h.in ${CMAKE_BINARY_DIR}/config.h) -configure_file(src/hs_version.h.in hs_version.h) +configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h) +configure_file(src/hs_version.h.in ${PROJECT_BINARY_DIR}/hs_version.h) if (PKG_CONFIG_FOUND) # we really only need to do this if we have pkg-config @@ -345,7 +355,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") # include the autogen targets add_subdirectory(src/fdr) -include_directories(${CMAKE_BINARY_DIR}/src/fdr) +include_directories(${PROJECT_BINARY_DIR}/src/fdr) if(NOT WIN32) set(RAGEL_C_FLAGS "-Wno-unused") diff --git a/src/fdr/CMakeLists.txt b/src/fdr/CMakeLists.txt index 25396689..1436c3fc 100644 --- a/src/fdr/CMakeLists.txt +++ b/src/fdr/CMakeLists.txt @@ -27,11 +27,11 @@ fdr_autogen(teddy_runtime teddy_autogen.c) fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp) set(fdr_GENERATED_SRC -${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c -${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp -${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c -${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp -PARENT_SCOPE) + ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen.c + ${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp + ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen.c + ${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp + PARENT_SCOPE) set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE) include_directories(${CMAKE_CURRENT_BINARY_DIR}) diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 9bc74e23..a8925a3c 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -7,7 +7,8 @@ if(NOT XCODE) else() set(CMAKE_CXX_FLAGS "-isystem ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CXX_FLAGS}") endif() -include_directories(${CMAKE_SOURCE_DIR}/util) + +include_directories(${PROJECT_SOURCE_DIR}) # remove some warnings # cmake's scope means these only apply here @@ -26,7 +27,7 @@ endif() add_library(gtest ${gtest_SOURCES}) -add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${CMAKE_SOURCE_DIR}) +add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) if (NOT RELEASE_BUILD) set(unit_internal_SOURCES From cf3ddd9e88e4ca27991f13a6da73f2b9e748057d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 10 Nov 2015 16:18:42 +1100 Subject: [PATCH 33/54] repeatStoreSparseOptimalP: make diff a u32 As delta is a u32, we know diff will always fit within a u32 as well. Silences a warning from Coverity. --- src/nfa/repeat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index 92b61874..c1ff5162 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -1414,7 +1414,8 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, } } - u64a diff = delta - patch * patch_size; + assert((u64a)patch * patch_size <= delta); + u32 diff = delta - patch * patch_size; const u64a *repeatTable = getImplTable(info); val += repeatTable[diff]; From 2a2576e90784b473832058ff06c591118e346445 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 12 Nov 2015 15:27:11 +1100 Subject: [PATCH 34/54] Introduce copy_bytes for writing into bytecode Protects memcpy from nullptr sources, which triggers failures in GCC's UB sanitizer. --- src/nfa/goughcompile.cpp | 11 +++------ src/nfa/limex_compile.cpp | 13 ++++------ src/nfagraph/ng_lbr.cpp | 5 ++-- src/rose/rose_build_bytecode.cpp | 41 ++++++++++---------------------- src/util/container.h | 29 ++++++++++++++++++++++ 5 files changed, 51 insertions(+), 48 deletions(-) diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index d2de7b95..d735c80a 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1136,16 +1136,11 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, gough_dfa->length = gough_size; /* copy in blocks */ - memcpy((u8 *)gough_dfa.get() + edge_prog_offset, &edge_blocks[0], - byte_length(edge_blocks)); + copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks); if (top_prog_offset) { - memcpy((u8 *)gough_dfa.get() + top_prog_offset, &top_blocks[0], - byte_length(top_blocks)); - } - if (!temp_blocks.empty()) { - memcpy((u8 *)gough_dfa.get() + prog_base_offset, &temp_blocks[0], - byte_length(temp_blocks)); + copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks); } + copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks); return gough_dfa; } diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 5cf46334..a6c34cb6 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1397,8 +1397,7 @@ struct Factory { repeat->horizon = rsi.horizon; repeat->packedCtrlSize = rsi.packedCtrlSize; repeat->stateSize = rsi.stateSize; - memcpy(repeat->packedFieldSizes, rsi.packedFieldSizes.data(), - byte_length(rsi.packedFieldSizes)); + copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes); repeat->patchCount = rsi.patchCount; repeat->patchSize = rsi.patchSize; repeat->encodingSize = rsi.encodingSize; @@ -1413,8 +1412,7 @@ struct Factory { // Copy in the sparse lookup table. if (br.type == REPEAT_SPARSE_OPTIMAL_P) { assert(!rsi.table.empty()); - memcpy(info_ptr + tableOffset, rsi.table.data(), - byte_length(rsi.table)); + copy_bytes(info_ptr + tableOffset, rsi.table); } // Fill the tug mask. @@ -1702,6 +1700,7 @@ struct Factory { for (u32 i = 0; i < num_repeats; i++) { repeatOffsets[i] = offset; + assert(repeats[i].first); memcpy((char *)limex + offset, repeats[i].first.get(), repeats[i].second); offset += repeats[i].second; @@ -1709,8 +1708,7 @@ struct Factory { // Write repeat offset lookup table. assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32))); - memcpy((char *)limex + repeatOffsetsOffset, repeatOffsets.data(), - byte_length(repeatOffsets)); + copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets); limex->repeatOffset = repeatOffsetsOffset; limex->repeatCount = num_repeats; @@ -1725,8 +1723,7 @@ struct Factory { limex->exReportOffset = exceptionReportsOffset; assert(ISALIGNED_N((char *)limex + exceptionReportsOffset, alignof(ReportID))); - memcpy((char *)limex + exceptionReportsOffset, reports.data(), - byte_length(reports)); + copy_bytes((char *)limex + exceptionReportsOffset, reports); } static diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index 11eded69..b9cacaa7 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -98,8 +98,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, info->packedCtrlSize = rsi.packedCtrlSize; info->horizon = rsi.horizon; info->minPeriod = minPeriod; - memcpy(&info->packedFieldSizes, rsi.packedFieldSizes.data(), - byte_length(rsi.packedFieldSizes)); + copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes); info->patchCount = rsi.patchCount; info->patchSize = rsi.patchSize; info->encodingSize = rsi.encodingSize; @@ -122,7 +121,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, nfa->length = verify_u32(len); info->length = verify_u32(sizeof(RepeatInfo) + sizeof(u64a) * (rsi.patchSize + 1)); - memcpy(table, rsi.table.data(), byte_length(rsi.table)); + copy_bytes(table, rsi.table); } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bbc8644e..e17953aa 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2687,12 +2687,6 @@ void fillInReportInfo(RoseEngine *engine, u32 reportOffset, sizeof(internal_report)); } -static -void populateInvDkeyTable(char *ptr, const ReportManager &rm) { - vector table = rm.getDkeyToReportTable(); - memcpy(ptr, table.data(), byte_length(table)); -} - static bool hasSimpleReports(const vector &reports) { auto it = find_if(reports.begin(), reports.end(), isComplexReport); @@ -4154,7 +4148,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ekeyCount = rm.numEkeys(); engine->dkeyCount = rm.numDkeys(); engine->invDkeyOffset = dkeyOffset; - populateInvDkeyTable(ptr + dkeyOffset, rm); + copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable()); engine->somHorizon = ssm.somPrecision(); engine->somLocationCount = ssm.numSomSlots(); @@ -4314,33 +4308,22 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { buildLitBenefits(*this, engine.get(), base_lits_benefits_offset); // Copy in other tables - memcpy(ptr + bc.engine_blob_base, bc.engine_blob.data(), - byte_length(bc.engine_blob)); - - memcpy(ptr + engine->literalOffset, literalTable.data(), - byte_length(literalTable)); - memcpy(ptr + engine->roleOffset, bc.roleTable.data(), - byte_length(bc.roleTable)); - copy(leftInfoTable.begin(), leftInfoTable.end(), - (LeftNfaInfo *)(ptr + engine->leftOffset)); + copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); + copy_bytes(ptr + engine->literalOffset, literalTable); + copy_bytes(ptr + engine->roleOffset, bc.roleTable); + copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, ptr + lookaroundReachOffset, bc.lookaround); fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); - memcpy(ptr + engine->predOffset, predTable.data(), byte_length(predTable)); - memcpy(ptr + engine->rootRoleOffset, rootRoleTable.data(), - byte_length(rootRoleTable)); - memcpy(ptr + engine->anchoredReportMapOffset, art.data(), byte_length(art)); - memcpy(ptr + engine->anchoredReportInverseMapOffset, arit.data(), - byte_length(arit)); - memcpy(ptr + engine->multidirectOffset, mdr_reports.data(), - byte_length(mdr_reports)); - - copy(activeLeftIter.begin(), activeLeftIter.end(), - (mmbit_sparse_iter *)(ptr + engine->activeLeftIterOffset)); - - memcpy(ptr + engine->sideOffset, sideTable.data(), byte_length(sideTable)); + copy_bytes(ptr + engine->predOffset, predTable); + copy_bytes(ptr + engine->rootRoleOffset, rootRoleTable); + copy_bytes(ptr + engine->anchoredReportMapOffset, art); + copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit); + copy_bytes(ptr + engine->multidirectOffset, mdr_reports); + copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); + copy_bytes(ptr + engine->sideOffset, sideTable); DEBUG_PRINTF("rose done %p\n", engine.get()); return engine; diff --git a/src/util/container.h b/src/util/container.h index b4a10c89..62e841c1 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -33,8 +33,13 @@ #ifndef UTIL_CONTAINER_H #define UTIL_CONTAINER_H +#include "ue2common.h" + #include +#include +#include #include +#include #include namespace ue2 { @@ -92,11 +97,35 @@ std::set assoc_keys(const C &container) { return keys; } +/** + * \brief Return the length in bytes of the given vector of (POD) objects. + */ template typename std::vector::size_type byte_length(const std::vector &vec) { + static_assert(std::is_pod::value, "should be pod"); return vec.size() * sizeof(T); } +/** + * \brief Copy the given vector of POD objects to the given location in memory. + * It is safe to give this function an empty vector. + */ +template +void *copy_bytes(void *dest, const std::vector &vec) { + static_assert(std::is_pod::value, "should be pod"); + assert(dest); + + // Since we're generally using this function to write into the bytecode, + // dest should be appropriately aligned for T. + assert(ISALIGNED_N(dest, alignof(T))); + + if (vec.empty()) { + return dest; // Protect memcpy against null pointers. + } + assert(vec.data() != nullptr); + return std::memcpy(dest, vec.data(), byte_length(vec)); +} + template bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) { static_assert(std::is_same Date: Thu, 12 Nov 2015 13:27:55 +1100 Subject: [PATCH 35/54] Restore \Q..\E support in character classes --- src/parser/Parser.rl | 12 +++--------- unit/hyperscan/bad_patterns.txt | 1 - 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 96656875..65cd7c1a 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -893,11 +893,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throw LocatedParseError("Invalid POSIX named class"); }; '\\Q' => { - // fcall readQuotedClass; - ostringstream str; - str << "\\Q..\\E sequences in character classes not supported at index " - << ts - ptr << "."; - throw ParseError(str.str()); + fcall readQuotedClass; }; '\\E' => { /*noop*/}; # Backspace (this is only valid for \b in char classes) @@ -1131,10 +1127,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { inCharClassEarly = false; }; # if we hit a quote before anything "real", handle it - #'\\Q' => { fcall readQuotedClass; }; - '\\Q' => { - throw LocatedParseError("\\Q..\\E sequences in character classes not supported"); - }; + '\\Q' => { fcall readQuotedClass; }; '\\E' => { /*noop*/}; # time for the real work to happen @@ -1170,6 +1163,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # Literal character any => { currentCls->add(*ts); + inCharClassEarly = false; }; *|; diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 3b13e064..fb2a2357 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -85,7 +85,6 @@ 84:/[=\]=]/ #Unsupported POSIX collating element at index 0. 85:/A(?!)+Z/ #Invalid repeat at index 5. 86:/\X/ #\X unsupported at index 0. -87:/[a\Qz\E]/ #\Q..\E sequences in character classes not supported at index 2. 88:/[A-\d]/ #Invalid range in character class at index 3. 89:/[A-[:digit:]]/ #Invalid range in character class at index 3. 90:/B[--[:digit:]--]+/ #Invalid range in character class at index 4. From abbd54889959425c77689d30d1764079f62a8ecc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 13 Nov 2015 14:36:28 +1100 Subject: [PATCH 36/54] ng_execute: update interface to use flat_set This changes all the execute_graph() interfaces so that instead of mutating a std::set of vertices, they accept an initial flat_set of states and return a resultant flat_set of states after execution. (Note that internally execute_graph() still uses bitsets) This is both faster and more flexible. --- src/nfagraph/ng_execute.cpp | 86 +++++++++++++++++---------------- src/nfagraph/ng_execute.h | 22 +++++---- src/nfagraph/ng_som.cpp | 15 +++--- src/nfagraph/ng_som_util.cpp | 8 +-- src/rose/rose_build_compile.cpp | 17 ++++--- 5 files changed, 79 insertions(+), 69 deletions(-) diff --git a/src/nfagraph/ng_execute.cpp b/src/nfagraph/ng_execute.cpp index aebfa712..92bef737 100644 --- a/src/nfagraph/ng_execute.cpp +++ b/src/nfagraph/ng_execute.cpp @@ -125,61 +125,62 @@ void execute_graph_i(const NGHolder &g, const vector &info, } static -void fillStateBitset(const NGHolder &g, const set &in, - dynamic_bitset<> &out) { - out.reset(); - for (auto v : in) { +dynamic_bitset<> makeStateBitset(const NGHolder &g, + const flat_set &in) { + dynamic_bitset<> work_states(num_vertices(g)); + for (const auto &v : in) { u32 idx = g[v].index; - out.set(idx); + work_states.set(idx); } + return work_states; } static -void fillVertexSet(const dynamic_bitset<> &in, - const vector &info, set &out) { - out.clear(); +flat_set getVertices(const dynamic_bitset<> &in, + const vector &info) { + flat_set out; for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { out.insert(info[i].vertex); } + return out; } static -void fillInfoTable(const NGHolder &g, vector &info) { - info.resize(num_vertices(g)); +vector makeInfoTable(const NGHolder &g) { + vector info(num_vertices(g)); for (auto v : vertices_range(g)) { u32 idx = g[v].index; const CharReach &cr = g[v].char_reach; assert(idx < info.size()); info[idx] = StateInfo(v, cr); } + return info; } -void execute_graph(const NGHolder &g, const ue2_literal &input, - set *states, bool kill_sds) { +flat_set execute_graph(const NGHolder &g, const ue2_literal &input, + const flat_set &initial_states, + bool kill_sds) { assert(hasCorrectlyNumberedVertices(g)); - vector info; - fillInfoTable(g, info); - dynamic_bitset<> work_states(num_vertices(g)); - fillStateBitset(g, *states, work_states); + auto info = makeInfoTable(g); + auto work_states = makeStateBitset(g, initial_states); execute_graph_i(g, info, input, &work_states, kill_sds); - fillVertexSet(work_states, info, *states); + return getVertices(work_states, info); } -void execute_graph(const NGHolder &g, const vector &input, - set *states) { +flat_set execute_graph(const NGHolder &g, + const vector &input, + const flat_set &initial_states) { assert(hasCorrectlyNumberedVertices(g)); - vector info; - fillInfoTable(g, info); - dynamic_bitset<> work_states(num_vertices(g)); - fillStateBitset(g, *states, work_states); + auto info = makeInfoTable(g); + auto work_states = makeStateBitset(g, initial_states); execute_graph_i(g, info, input, &work_states, false); - fillVertexSet(work_states, info, *states); + return getVertices(work_states, info); } typedef boost::reverse_graph RevNFAGraph; @@ -276,9 +277,10 @@ private: }; } // namespace -void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, - const set &input_start_states, - set *states) { +flat_set execute_graph(const NGHolder &running_g, + const NGHolder &input_dag, + const flat_set &input_start_states, + const flat_set &initial_states) { DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n", num_vertices(running_g), num_vertices(input_dag)); assert(hasCorrectlyNumberedVertices(running_g)); @@ -290,10 +292,8 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, RevNFAGraph revg(input_dag.g); map > dfs_states; - vector info; - fillInfoTable(running_g, info); - dynamic_bitset<> input_fs(num_vertices(running_g)); - fillStateBitset(running_g, *states, input_fs); + auto info = makeInfoTable(running_g); + auto input_fs = makeStateBitset(running_g, initial_states); for (auto v : input_start_states) { dfs_states[v] = input_fs; @@ -303,21 +303,25 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, eg_visitor(running_g, info, input_dag, dfs_states), make_assoc_property_map(colours)); - fillVertexSet(dfs_states[input_dag.accept], info, *states); + auto states = getVertices(dfs_states[input_dag.accept], info); #ifdef DEBUG - DEBUG_PRINTF(" output rstates:"); - for (auto v : *states) { - printf(" %u", running_g[v].index); - } - printf("\n"); + DEBUG_PRINTF(" output rstates:"); + for (const auto &v : states) { + printf(" %u", running_g[v].index); + } + printf("\n"); #endif + + return states; } -void execute_graph(const NGHolder &running_g, const NGHolder &input_dag, - set *states) { - set input_start_states = {input_dag.start, input_dag.startDs}; - execute_graph(running_g, input_dag, input_start_states, states); +flat_set execute_graph(const NGHolder &running_g, + const NGHolder &input_dag, + const flat_set &initial_states) { + auto input_start_states = {input_dag.start, input_dag.startDs}; + return execute_graph(running_g, input_dag, input_start_states, + initial_states); } } // namespace ue2 diff --git a/src/nfagraph/ng_execute.h b/src/nfagraph/ng_execute.h index 80fdcbd5..e2c7c72d 100644 --- a/src/nfagraph/ng_execute.h +++ b/src/nfagraph/ng_execute.h @@ -35,8 +35,8 @@ #define NG_EXECUTE_H #include "ng_holder.h" +#include "util/ue2_containers.h" -#include #include namespace ue2 { @@ -44,23 +44,25 @@ namespace ue2 { class CharReach; struct ue2_literal; -void execute_graph(const NGHolder &g, const ue2_literal &input, - std::set *states, bool kill_sds = false); +flat_set execute_graph(const NGHolder &g, const ue2_literal &input, + const flat_set &initial, + bool kill_sds = false); -void execute_graph(const NGHolder &g, const std::vector &input, - std::set *states); +flat_set execute_graph(const NGHolder &g, + const std::vector &input, + const flat_set &initial); /** on exit, states contains any state which may still be enabled after * receiving an input which corresponds to some path through the input_dag from * start or startDs to accept. input_dag MUST be acyclic aside from self-loops. */ -void execute_graph(const NGHolder &g, const NGHolder &input_dag, - std::set *states); +flat_set execute_graph(const NGHolder &g, const NGHolder &input_dag, + const flat_set &initial); /* as above, but able to specify the source states for the input graph */ -void execute_graph(const NGHolder &g, const NGHolder &input_dag, - const std::set &input_start_states, - std::set *states); +flat_set execute_graph(const NGHolder &g, const NGHolder &input_dag, + const flat_set &input_start_states, + const flat_set &initial); } // namespace ue2 diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 90ebb5c3..f26b62aa 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -266,7 +266,7 @@ bool validateEXSL(const NGHolder &g, const vector escapes_vec(1, escapes); const vector notescapes_vec(1, ~escapes); - set states; + ue2::flat_set states; /* turn on all states past the prefix */ DEBUG_PRINTF("region %u is cutover\n", region); for (auto v : vertices_range(g)) { @@ -276,20 +276,20 @@ bool validateEXSL(const NGHolder &g, } /* process the escapes */ - execute_graph(g, escapes_vec, &states); + states = execute_graph(g, escapes_vec, states); /* flood with any number of not escapes */ - set prev_states; + ue2::flat_set prev_states; while (prev_states != states) { prev_states = states; - execute_graph(g, notescapes_vec, &states); + states = execute_graph(g, notescapes_vec, states); insert(&states, prev_states); } /* find input starts to use for when we are running the prefix through as * when the escape character arrives we may be in matching the prefix * already */ - set prefix_start_states; + ue2::flat_set prefix_start_states; for (auto v : vertices_range(prefix)) { if (v != prefix.accept && v != prefix.acceptEod /* and as we have already made it past the prefix once */ @@ -298,11 +298,12 @@ bool validateEXSL(const NGHolder &g, } } - execute_graph(prefix, escapes_vec, &prefix_start_states); + prefix_start_states = + execute_graph(prefix, escapes_vec, prefix_start_states); assert(contains(prefix_start_states, prefix.startDs)); /* see what happens after we feed it the prefix */ - execute_graph(g, prefix, prefix_start_states, &states); + states = execute_graph(g, prefix, prefix_start_states, states); for (auto v : states) { assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index 7f487f89..a0829451 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -136,7 +136,7 @@ bool firstMatchIsFirst(const NGHolder &p) { return false; } - set states; + ue2::flat_set states; /* turn on all states (except starts - avoid suffix matches) */ /* If we were doing (1) we would also except states leading to accepts - avoid prefix matches */ @@ -149,7 +149,7 @@ bool firstMatchIsFirst(const NGHolder &p) { } /* run the prefix the main graph */ - execute_graph(p, p, &states); + states = execute_graph(p, p, states); for (auto v : states) { /* need to check if this vertex may represent an infix match - ie @@ -313,7 +313,7 @@ bool sentClearsTail(const NGHolder &g, */ u32 first_bad_region = ~0U; - set states; + ue2::flat_set states; /* turn on all states */ DEBUG_PRINTF("region %u is cutover\n", last_head_region); for (auto v : vertices_range(g)) { @@ -327,7 +327,7 @@ bool sentClearsTail(const NGHolder &g, } /* run the prefix the main graph */ - execute_graph(g, sent, &states); + states = execute_graph(g, sent, states); /* .. and check if we are left with anything in the tail region */ for (auto v : states) { diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 34e76269..a2bd971e 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1631,20 +1631,23 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, assert(left.graph()); const NGHolder &h = *left.graph(); + ue2::flat_set all_states; + insert(&all_states, vertices(h)); + assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */ + DEBUG_PRINTF("removing sds\n"); + all_states.erase(h.startDs); + + ue2::flat_set states; + /* check each pred literal to see if they all kill previous graph * state */ for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); - set states; - insert(&states, vertices(h)); - assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */ - DEBUG_PRINTF("removing sds\n"); - states.erase(h.startDs); DEBUG_PRINTF("running graph %zu\n", states.size()); - execute_graph(h, s, &states, true); - DEBUG_PRINTF("ran\n"); + states = execute_graph(h, s, all_states, true); + DEBUG_PRINTF("ran, %zu states on\n", states.size()); if (!states.empty()) { return false; From 313822c15741487de28c29f05e25dbd368f95424 Mon Sep 17 00:00:00 2001 From: Mohammad Abdul Awal Date: Tue, 17 Nov 2015 17:50:23 +0000 Subject: [PATCH 37/54] FDR runtime simplification Removed static specialisation of domains. --- src/fdr/autogen.py | 11 +-- src/fdr/fdr.c | 21 ----- src/fdr/fdr_autogen.py | 84 +++++++++---------- src/fdr/fdr_compile.cpp | 12 +++ src/fdr/fdr_dump.cpp | 1 + src/fdr/fdr_engine_description.cpp | 127 +++++++++++++++-------------- src/fdr/fdr_engine_description.h | 1 - src/fdr/fdr_internal.h | 6 +- 8 files changed, 124 insertions(+), 139 deletions(-) diff --git a/src/fdr/autogen.py b/src/fdr/autogen.py index 36e4c16c..e5b4f39e 100755 --- a/src/fdr/autogen.py +++ b/src/fdr/autogen.py @@ -54,16 +54,11 @@ def produce_fdr_compiles(l): def build_fdr_matchers(): all_matchers = [ ] - domains = [8, 10, 11, 12, 13] - big_domains = [ 14, 15 ] + strides = [ 1, 2, 4 ] common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 } - for d in domains: - all_matchers += [ M3(stride = 1, domain = d, **common) ] - all_matchers += [ M3(stride = 2, domain = d, **common) ] - all_matchers += [ M3(stride = 4, domain = d, **common) ] - for d in big_domains: - all_matchers += [ M3(stride = 1, domain = d, **common) ] + for s in strides: + all_matchers += [ M3(stride = s, **common) ] return all_matchers diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 082800f1..f83a4265 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -40,27 +40,6 @@ #include "fdr_confirm_runtime.h" #include "fdr_streaming_runtime.h" #include "fdr_loadval.h" - -static really_inline UNUSED -u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) { - u32 r = 0; - if (a->start_offset == 0) { - if (numBits <= 8) { - r = a->buf_history[a->len_history - 1]; - } else { - r = a->buf_history[a->len_history - 1]; - r |= (a->buf[0] << 8); - } - } else { - if (numBits <= 8) { - r = a->buf[a->start_offset - 1]; - } else { - r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len); - } - } - return r & ((1 << numBits) - 1); -} - #include "fdr_autogen.c" #define FAKE_HISTORY_SIZE 16 diff --git a/src/fdr/fdr_autogen.py b/src/fdr/fdr_autogen.py index 685cca3b..748d811f 100755 --- a/src/fdr/fdr_autogen.py +++ b/src/fdr/fdr_autogen.py @@ -74,12 +74,12 @@ class ValueExtractStep(Step): dsb = m.datasize_bytes modval = offset % dsb - if m.domain > 8 and modval == dsb - 1: + if modval == dsb - 1: # Case 1: reading more than one byte over the end of the bulk load self.latency = 4 if sub_load_cautious: - code_string = "cautious_forward" + code_string = "cautious_forward" else: code_string = "normal" load_string = m.single_load_type.load_expr_data(self.offset, code_string) @@ -101,7 +101,7 @@ class ValueExtractStep(Step): temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust) - init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask) + init_string = "(%s) & (domain_mask << %d)" % (temp_string, m.reach_shift_adjust) v_var = self.nv(m.value_extract_type, "v%d" % offset) self.val = v_var.gen_initializer_stmt(init_string) @@ -173,14 +173,10 @@ class ConfirmStep(Step): enable_confirmless = m.stride == 1, do_bailout = False) class M3(MatcherBase): - def get_hash_safety_parameters(self): - h_size = self.single_load_type.size_in_bytes() - return (0, h_size - 1) - def produce_compile_call(self): - print " { %d, %d, %d, %d, %d, %s, %d, %d }," % ( + print " { %d, %d, %d, %d, %s, %d, %d }," % ( self.id, self.state_width, self.num_buckets, - self.stride, self.domain, + self.stride, self.arch.target, self.conf_pull_back, self.conf_top_level_split) def produce_main_loop(self, switch_variant = False): @@ -192,8 +188,8 @@ class M3(MatcherBase): ctxt = CodeGenContext(self) if switch_variant: - print " ptr -= (iterBytes - dist);" - print " { " # need an extra scope around switch variant to stop its globals escaping + print " ptr -= (iterBytes - dist);" + print " { " # need an extra scope around switch variant to stop its globals escaping else: print " if (doMainLoop) {" print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {" @@ -349,25 +345,30 @@ class M3(MatcherBase): shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance)) s = Template(""" - $TYPENAME s; - if (a->len_history) { - u32 tmp = getPreStartVal(a, $DOMAIN); - s = *((const $TYPENAME *)ft + tmp); - $SHIFT_EXPR; - } else { - s = *(const $TYPENAME *)&fdr->start; - } + $TYPENAME s; + if (a->len_history) { + u32 tmp = 0; + if (a->start_offset == 0) { + tmp = a->buf_history[a->len_history - 1]; + tmp |= (a->buf[0] << 8); + } else { + tmp = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len); + } + tmp &= fdr->domainMask; + s = *((const $TYPENAME *)ft + tmp); + $SHIFT_EXPR; + } else { + s = *(const $TYPENAME *)&fdr->start; + } """).substitute(TYPENAME = s_type.get_name(), ZERO_EXPR = s_type.zero_expression(), - DOMAIN = self.domain, SHIFT_EXPR = shift_expr) return s def produce_code(self): - (behind, ahead) = self.get_hash_safety_parameters() - loop_read_behind = behind - loop_read_ahead = self.loop_bytes + ahead + loop_read_behind = 0 + loop_read_ahead = self.loop_bytes + 1 # we set up mask and shift stuff for extracting our masks from registers # @@ -380,7 +381,7 @@ class M3(MatcherBase): ssb = self.state_type.size / 8 # state size in bytes # Intel path - if ssb == 16 and self.domain == 16: + if ssb == 16: # obscure corner - we don't have the room in the register to # do this for all values so we don't. domain==16 is pretty # bad anyhow, of course @@ -390,7 +391,6 @@ class M3(MatcherBase): shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 } self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ] - self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust print self.produce_header(visible = False) @@ -398,21 +398,19 @@ class M3(MatcherBase): print " Arch: " + self.arch.name, print " State type: " + self.state_type.get_name(), print " Num buckets: %d" % self.num_buckets, - print " Domain: %d" % self.domain, print " Stride: %d" % self.stride print self.produce_common_declarations() - print - print "\tconst size_t tabSize = %d;" % self.table_size - print """ - const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)); - const u32 * confBase = (const u32 *)(ft + tabSize); -""" + print " assert(fdr->domain > 8 && fdr->domain < 16);" + print + print " u64a domain_mask = fdr->domainMask;" + print " const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));" + print " const u32 * confBase = (const u32 *)(ft + fdr->tabSize);" print self.produce_init_state() - print "\tconst size_t iterBytes = %d;" % self.loop_bytes - print "\tconst size_t START_MOD = %d;" % self.datasize_bytes - print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead + print " const size_t iterBytes = %d;" % self.loop_bytes + print " const size_t START_MOD = %d;" % self.datasize_bytes + print " const size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead print """ while (ptr < buf + len) { @@ -451,9 +449,9 @@ class M3(MatcherBase): print self.produce_footer() def get_name(self): - return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width) + return "fdr_exec_%s_s%d_w%d" % (self.arch.name, self.stride, self.state_width) - def __init__(self, state_width, domain, stride, + def __init__(self, state_width, stride, arch, table_state_width = None, num_buckets = 8, @@ -474,17 +472,9 @@ class M3(MatcherBase): self.table_state_width = state_width self.table_state_type = getRequiredType(self.table_state_width) - # domain is the number of bits that we draw from our input to - # index our 'reach' table - if not 8 <= domain <= 16: - fail_out("Unsupported domain: %d" % domain) - self.domain = domain - # this is the load type required for this domain if we want to + # this is the load type required for domain [9:15] if we want to # load it one at a time - self.single_load_type = getRequiredType(self.domain) - - # table size - self.table_size = 2**domain * table_state_width // 8 + self.single_load_type = IntegerType(16) # stride is the frequency with which we make data-driven # accesses to our reach table diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 8be44370..ccf17626 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -184,6 +184,13 @@ aligned_unique_ptr FDRCompiler::setupFDR(pair link) { ptr += floodControlTmp.second; aligned_free(floodControlTmp.first); + /* we are allowing domains 9 to 15 only */ + assert(eng.bits > 8 && eng.bits < 16); + fdr->domain = eng.bits; + fdr->schemeWidthByte = eng.schemeWidth / 8; + fdr->domainMask = (1 << eng.bits) - 1; + fdr->tabSize = (1 << eng.bits) * fdr->schemeWidthByte; + if (link.first) { fdr->link = verify_u32(ptr - fdr_base); memcpy(ptr, link.first, link.second); @@ -534,6 +541,11 @@ fdrBuildTableInternal(const vector &lits, bool make_small, return nullptr; } + // temporary hack for unit testing + if (hint != HINT_INVALID) { + des->bits = 9; + } + FDRCompiler fc(lits, *des, make_small); return fc.build(link); } diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index ae246270..158170c2 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -81,6 +81,7 @@ void fdrPrintStats(const FDR *fdr, FILE *f) { unique_ptr des = getFdrDescription(fdr->engineID); if (des) { + fprintf(f, " domain %u\n", des->bits); fprintf(f, " stride %u\n", des->stride); fprintf(f, " buckets %u\n", des->getNumBuckets()); fprintf(f, " width %u\n", des->schemeWidth); diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index 2a6fda79..5d470c7e 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -48,7 +48,7 @@ FDREngineDescription::FDREngineDescription(const FDREngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), def.numBuckets, def.confirmPullBackDistance, def.confirmTopLevelSplit), - schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {} + schemeWidth(def.schemeWidth), stride(def.stride), bits(0) {} u32 FDREngineDescription::getDefaultFloodSuffixLength() const { // rounding up, so that scheme width 32 and 6 buckets is 6 not 5! @@ -105,76 +105,83 @@ unique_ptr chooseEngine(const target_t &target, DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl, desiredStride); - const FDREngineDescription *best = nullptr; + FDREngineDescription *best = nullptr; u32 best_score = 0; - for (size_t engineID = 0; engineID < allDescs.size(); engineID++) { - const FDREngineDescription &eng = allDescs[engineID]; - if (!eng.isValidOnTarget(target)) { - continue; - } - if (msl < eng.stride) { - continue; - } - - u32 score = 100; - - score -= absdiff(desiredStride, eng.stride); - - if (eng.stride <= desiredStride) { - score += eng.stride; - } - - u32 effLits = vl.size(); /* * desiredStride;*/ - u32 ideal; - if (effLits < eng.getNumBuckets()) { - if (eng.stride == 1) { - ideal = 8; - } else { - ideal = 10; + for (u32 domain = 9; domain <= 15; domain++) { + for (size_t engineID = 0; engineID < allDescs.size(); engineID++) { + // to make sure that domains >=14 have stride 1 according to origin + if (domain > 13 && engineID > 0) { + continue; + } + FDREngineDescription &eng = allDescs[engineID]; + if (!eng.isValidOnTarget(target)) { + continue; + } + if (msl < eng.stride) { + continue; } - } else if (effLits < 20) { - ideal = 10; - } else if (effLits < 100) { - ideal = 11; - } else if (effLits < 1000) { - ideal = 12; - } else if (effLits < 10000) { - ideal = 13; - } else { - ideal = 15; - } - if (ideal != 8 && eng.schemeWidth == 32) { - ideal += 1; - } + u32 score = 100; - if (make_small) { - ideal -= 2; - } + score -= absdiff(desiredStride, eng.stride); - if (eng.stride > 1) { - ideal++; - } + if (eng.stride <= desiredStride) { + score += eng.stride; + } - DEBUG_PRINTF("effLits %u\n", effLits); + u32 effLits = vl.size(); /* * desiredStride;*/ + u32 ideal; + if (effLits < eng.getNumBuckets()) { + if (eng.stride == 1) { + ideal = 8; + } else { + ideal = 10; + } + } else if (effLits < 20) { + ideal = 10; + } else if (effLits < 100) { + ideal = 11; + } else if (effLits < 1000) { + ideal = 12; + } else if (effLits < 10000) { + ideal = 13; + } else { + ideal = 15; + } - if (target.is_atom_class() && !make_small && effLits < 4000) { - /* Unless it is a very heavy case, we want to build smaller tables - * on lightweight machines due to their small caches. */ - ideal -= 2; - } + if (ideal != 8 && eng.schemeWidth == 32) { + ideal += 1; + } - score -= absdiff(ideal, eng.bits); + if (make_small) { + ideal -= 2; + } - DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u " - "-> score=%u\n", - eng.getID(), eng.schemeWidth, eng.bits, - eng.getNumBuckets(), eng.stride, score); + if (eng.stride > 1) { + ideal++; + } - if (!best || score > best_score) { - best = ŋ - best_score = score; + DEBUG_PRINTF("effLits %u\n", effLits); + + if (target.is_atom_class() && !make_small && effLits < 4000) { + /* Unless it is a very heavy case, we want to build smaller tables + * on lightweight machines due to their small caches. */ + ideal -= 2; + } + + score -= absdiff(ideal, domain); + + DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u " + "-> score=%u\n", + eng.getID(), eng.schemeWidth, eng.bits, + eng.getNumBuckets(), eng.stride, score); + + if (!best || score > best_score) { + eng.bits = domain; + best = ŋ + best_score = score; + } } } diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h index d936095b..45f64ac0 100644 --- a/src/fdr/fdr_engine_description.h +++ b/src/fdr/fdr_engine_description.h @@ -43,7 +43,6 @@ struct FDREngineDef { u32 schemeWidth; u32 numBuckets; u32 stride; - u32 bits; u64a cpu_features; u32 confirmPullBackDistance; u32 confirmTopLevelSplit; diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 6c722777..607e039c 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -76,9 +76,11 @@ struct FDR { * structures (spillover strings and hash table) if we're a secondary * structure. */ u32 link; + u8 domain; /* dynamic domain info */ + u8 schemeWidthByte; /* scheme width in bytes */ + u16 domainMask; /* pre-computed domain mask */ + u32 tabSize; /* pre-computed hashtable size in bytes */ u32 pad1; - u32 pad2; - u32 pad3; union { u32 s_u32; From bdb7a100344d5e500081d061ea42f24e532e1ba4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 16 Nov 2015 16:43:43 +1100 Subject: [PATCH 38/54] Fix defn of POSIX graph, print, punct classes The POSIX classes [:graph:], [:print:] and [:punct:] are handled specially in UCP mode by PCRE. This change matches that behaviour. --- src/parser/ComponentClass.cpp | 7 ++++++- src/parser/ComponentClass.h | 4 +++- src/parser/Utf8ComponentClass.cpp | 24 +++++++++++++++++++++++- src/parser/Utf8ComponentClass.h | 3 +++ 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/parser/ComponentClass.cpp b/src/parser/ComponentClass.cpp index 43c05898..a91ae979 100644 --- a/src/parser/ComponentClass.cpp +++ b/src/parser/ComponentClass.cpp @@ -81,8 +81,9 @@ CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) { case CLASS_DIGIT: return number; case CLASS_GRAPH: - case CLASS_XGRAPH: return CharReach(0x21, 0x7e); + case CLASS_XGRAPH: + return to_cr(getPredefinedCodePointSet(c, mode)); case CLASS_HORZ: return CharReach("\x09\x20\xA0"); case CLASS_LOWER: @@ -93,11 +94,15 @@ CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) { } case CLASS_PRINT: return CharReach(0x20, 0x7e); + case CLASS_XPRINT: + return to_cr(getPredefinedCodePointSet(c, mode)); case CLASS_PUNCT: return CharReach(0x21, '0' - 1) | CharReach('9' + 1, 'A' - 1) | CharReach('Z' + 1, 'a' - 1) | CharReach('z' + 1, 126); + case CLASS_XPUNCT: + return to_cr(getPredefinedCodePointSet(c, mode)); case CLASS_SPACE: return CharReach("\x09\x0a\x0c\x0b\x0d\x20"); case CLASS_UPPER: diff --git a/src/parser/ComponentClass.h b/src/parser/ComponentClass.h index 1cb1a7d0..040e6d78 100644 --- a/src/parser/ComponentClass.h +++ b/src/parser/ComponentClass.h @@ -63,7 +63,9 @@ enum PredefinedClass { CLASS_VERT, CLASS_WORD, CLASS_XDIGIT, - CLASS_XGRAPH, + CLASS_XGRAPH, /* [:graph:] in UCP mode */ + CLASS_XPRINT, /* [:print:] in UCP mode */ + CLASS_XPUNCT, /* [:punct:] in UCP mode */ CLASS_UCP_C, CLASS_UCP_CC, CLASS_UCP_CF, diff --git a/src/parser/Utf8ComponentClass.cpp b/src/parser/Utf8ComponentClass.cpp index 3a6a85a4..54f9edb9 100644 --- a/src/parser/Utf8ComponentClass.cpp +++ b/src/parser/Utf8ComponentClass.cpp @@ -75,6 +75,10 @@ PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) { } else { return CLASS_UCP_LL; } + case CLASS_PRINT: + return CLASS_XPRINT; + case CLASS_PUNCT: + return CLASS_XPUNCT; case CLASS_SPACE: return CLASS_UCP_XPS; case CLASS_UPPER: @@ -90,7 +94,6 @@ PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) { } } -static CodePointSet getPredefinedCodePointSet(PredefinedClass c, const ParseMode &mode) { /* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */ @@ -117,6 +120,25 @@ CodePointSet getPredefinedCodePointSet(PredefinedClass c, rv |= cf; return rv; } + case CLASS_XPRINT: { + // Same as graph, plus everything with the Zs property. + CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode); + rv |= getUcpZs(); + return rv; + } + case CLASS_XPUNCT: { + // Everything with the P (punctuation) property, plus code points in S + // (symbols) that are < 128. + // NOTE: PCRE versions 8.37 and earlier erroneously use 256 as the + // cut-off here, so we are compatible with that for now. PCRE bug #1718 + // tracks this; once PCRE 8.38 is released we should correct this + // behaviour. + CodePointSet rv = getUcpP(); + CodePointSet symbols = getUcpS(); + symbols.unsetRange(256, MAX_UNICODE); + rv |= symbols; + return rv; + } case CLASS_HORZ: { CodePointSet rv; rv.set(0x0009); /* Horizontal tab */ diff --git a/src/parser/Utf8ComponentClass.h b/src/parser/Utf8ComponentClass.h index b2c402f9..3d21a278 100644 --- a/src/parser/Utf8ComponentClass.h +++ b/src/parser/Utf8ComponentClass.h @@ -110,6 +110,9 @@ private: PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode); bool isUcp(PredefinedClass c); +CodePointSet getPredefinedCodePointSet(PredefinedClass c, + const ParseMode &mode); + } // namespace #endif // UTF8_COMPONENT_CLASS_H From 25a01e1c3cb31b6bde6c83b82ad41a87fd0f0deb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 17 Nov 2015 17:23:52 +1100 Subject: [PATCH 39/54] Unify handling of caseless flag in class parser Apply caselessness to each element added to a class, rather than all at finalize time (which required separated ucp dnf and-ucp working data). Unifies the behaviour of AsciiComponentClass and Utf8ComponentClass in this respect. --- src/parser/AsciiComponentClass.cpp | 32 +++++++++---------- src/parser/AsciiComponentClass.h | 4 +-- src/parser/Utf8ComponentClass.cpp | 51 ++++++------------------------ src/parser/Utf8ComponentClass.h | 2 -- 4 files changed, 27 insertions(+), 62 deletions(-) diff --git a/src/parser/AsciiComponentClass.cpp b/src/parser/AsciiComponentClass.cpp index 44ecb5bb..7cfa6e11 100644 --- a/src/parser/AsciiComponentClass.cpp +++ b/src/parser/AsciiComponentClass.cpp @@ -61,11 +61,15 @@ void AsciiComponentClass::createRange(unichar to) { unsigned char from = (u8)range_start; if (from > to) { throw LocatedParseError("Range out of order in character class"); - } else { - in_cand_range = false; - cr.setRange(from, to); - range_start = INVALID_UNICODE; } + + in_cand_range = false; + CharReach ncr(from, to); + if (mode.caseless) { + make_caseless(&ncr); + } + cr |= ncr; + range_start = INVALID_UNICODE; } void AsciiComponentClass::notePositions(GlushkovBuildState &bs) { @@ -95,16 +99,13 @@ void AsciiComponentClass::add(PredefinedClass c, bool negative) { c = translateForUcpMode(c, mode); } + // Note: caselessness is handled by getPredefinedCharReach. CharReach pcr = getPredefinedCharReach(c, mode); if (negative) { pcr.flip(); } - if (isUcp(c)) { - cr_ucp |= pcr; - } else { - cr |= pcr; - } + cr |= pcr; range_start = INVALID_UNICODE; in_cand_range = false; } @@ -120,7 +121,12 @@ void AsciiComponentClass::add(unichar c) { return; } - cr.set(c); + CharReach ncr(c, c); + if (mode.caseless) { + make_caseless(&ncr); + } + + cr |= ncr; range_start = c; } @@ -136,12 +142,6 @@ void AsciiComponentClass::finalize() { in_cand_range = false; } - if (mode.caseless) { - make_caseless(&cr); - } - - cr |= cr_ucp; /* characters from ucp props don't participate in caseless */ - if (m_negate) { cr.flip(); } diff --git a/src/parser/AsciiComponentClass.h b/src/parser/AsciiComponentClass.h index 2d5ef843..925fa9bf 100644 --- a/src/parser/AsciiComponentClass.h +++ b/src/parser/AsciiComponentClass.h @@ -78,12 +78,10 @@ protected: private: Position position; CharReach cr; - CharReach cr_ucp; // Private copy ctor. Use clone instead. AsciiComponentClass(const AsciiComponentClass &other) - : ComponentClass(other), position(other.position), cr(other.cr), - cr_ucp(other.cr_ucp) {} + : ComponentClass(other), position(other.position), cr(other.cr) {} }; } // namespace ue2 diff --git a/src/parser/Utf8ComponentClass.cpp b/src/parser/Utf8ComponentClass.cpp index 54f9edb9..21707902 100644 --- a/src/parser/Utf8ComponentClass.cpp +++ b/src/parser/Utf8ComponentClass.cpp @@ -515,16 +515,16 @@ void UTF8ComponentClass::createRange(unichar to) { unichar from = range_start; if (from > to) { throw LocatedParseError("Range out of order in character class"); - } else { - in_cand_range = false; - CodePointSet ncps; - ncps.setRange(from, to); - if (mode.caseless) { - make_caseless(&ncps); - } - cps |= ncps; - range_start = INVALID_UNICODE; } + + in_cand_range = false; + CodePointSet ncps; + ncps.setRange(from, to); + if (mode.caseless) { + make_caseless(&ncps); + } + cps |= ncps; + range_start = INVALID_UNICODE; } void UTF8ComponentClass::add(PredefinedClass c, bool negative) { @@ -543,11 +543,7 @@ void UTF8ComponentClass::add(PredefinedClass c, bool negative) { pcps.flip(); } - if (isUcp(c)) { - cps_ucp |= pcps; - } else { - cps |= pcps; - } + cps |= pcps; range_start = INVALID_UNICODE; in_cand_range = false; @@ -585,8 +581,6 @@ void UTF8ComponentClass::finalize() { in_cand_range = false; } - cps |= cps_ucp; /* characters from ucp props always case sensitive */ - if (m_negate) { cps.flip(); } @@ -594,31 +588,6 @@ void UTF8ComponentClass::finalize() { finalized = true; } -bool isUcp(PredefinedClass c) { - switch (c) { - case CLASS_ALNUM: - case CLASS_ALPHA: - case CLASS_ANY: - case CLASS_ASCII: - case CLASS_BLANK: - case CLASS_CNTRL: - case CLASS_DIGIT: - case CLASS_GRAPH: - case CLASS_HORZ: - case CLASS_LOWER: - case CLASS_PRINT: - case CLASS_PUNCT: - case CLASS_SPACE: - case CLASS_UPPER: - case CLASS_VERT: - case CLASS_WORD: - case CLASS_XDIGIT: - return false; - default: - return true; - } -} - Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) { map::const_iterator it = heads.find(first_byte); if (it != heads.end()) { diff --git a/src/parser/Utf8ComponentClass.h b/src/parser/Utf8ComponentClass.h index 3d21a278..f4e7ea32 100644 --- a/src/parser/Utf8ComponentClass.h +++ b/src/parser/Utf8ComponentClass.h @@ -93,7 +93,6 @@ private: void buildFourByte(GlushkovBuildState &bs); CodePointSet cps; - CodePointSet cps_ucp; std::map heads; Position single_pos; @@ -108,7 +107,6 @@ private: }; PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode); -bool isUcp(PredefinedClass c); CodePointSet getPredefinedCodePointSet(PredefinedClass c, const ParseMode &mode); From f9b7e806b1ae0067a45f952fb68b77196dc974b0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 25 Nov 2015 14:53:27 +1100 Subject: [PATCH 40/54] Update defn of class [:punct:] for PCRE 8.38 --- src/parser/Utf8ComponentClass.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/parser/Utf8ComponentClass.cpp b/src/parser/Utf8ComponentClass.cpp index 21707902..b6b161e8 100644 --- a/src/parser/Utf8ComponentClass.cpp +++ b/src/parser/Utf8ComponentClass.cpp @@ -129,13 +129,9 @@ CodePointSet getPredefinedCodePointSet(PredefinedClass c, case CLASS_XPUNCT: { // Everything with the P (punctuation) property, plus code points in S // (symbols) that are < 128. - // NOTE: PCRE versions 8.37 and earlier erroneously use 256 as the - // cut-off here, so we are compatible with that for now. PCRE bug #1718 - // tracks this; once PCRE 8.38 is released we should correct this - // behaviour. CodePointSet rv = getUcpP(); CodePointSet symbols = getUcpS(); - symbols.unsetRange(256, MAX_UNICODE); + symbols.unsetRange(128, MAX_UNICODE); rv |= symbols; return rv; } From 205bc1af7f4111b7757bc2646528df34fb5e54bc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 25 Nov 2015 17:05:36 +1100 Subject: [PATCH 41/54] PCRE includes U+180E in /[:print:]/8W --- src/parser/Utf8ComponentClass.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/Utf8ComponentClass.cpp b/src/parser/Utf8ComponentClass.cpp index b6b161e8..cdfc974a 100644 --- a/src/parser/Utf8ComponentClass.cpp +++ b/src/parser/Utf8ComponentClass.cpp @@ -124,6 +124,7 @@ CodePointSet getPredefinedCodePointSet(PredefinedClass c, // Same as graph, plus everything with the Zs property. CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode); rv |= getUcpZs(); + rv.set(0x180e); // Also included in this class by PCRE 8.38. return rv; } case CLASS_XPUNCT: { From 15c2980948be285b9051b609358d37e6acc397fb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 27 Nov 2015 13:30:59 +1100 Subject: [PATCH 42/54] Make key 64 bits where large shifts may be used. This fixes a long-standing issue with large multibit structures. --- src/util/multibit.c | 18 +++++----- src/util/multibit.h | 35 ++++++++++--------- unit/internal/multi_bit.cpp | 69 ++++++++++++++++++++----------------- 3 files changed, 66 insertions(+), 56 deletions(-) diff --git a/src/util/multibit.c b/src/util/multibit.c index ca5e5656..c22b73ff 100644 --- a/src/util/multibit.c +++ b/src/util/multibit.c @@ -142,23 +142,25 @@ const u32 mmbit_root_offset_from_level[7] = { u32 mmbit_size(u32 total_bits) { MDEBUG_PRINTF("%u\n", total_bits); - // UE-2228: multibit has bugs in very, very large cases that we should be - // protected against at compile time by resource limits. - assert(total_bits <= 1U << 30); - // Flat model multibit structures are just stored as a bit vector. if (total_bits <= MMB_FLAT_MAX_BITS) { return ROUNDUP_N(total_bits, 8) / 8; } - u32 current_level = 1; - u32 total = 0; + u64a current_level = 1; // Number of blocks on current level. + u64a total = 0; // Total number of blocks. while (current_level * MMB_KEY_BITS < total_bits) { total += current_level; current_level <<= MMB_KEY_SHIFT; } - total += (total_bits + MMB_KEY_BITS - 1)/MMB_KEY_BITS; - return sizeof(MMB_TYPE) * total; + + // Last level is a one-for-one bit vector. It needs room for total_bits + // elements, rounded up to the nearest block. + u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; + total += last_level; + + assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); + return (u32)(total * sizeof(MMB_TYPE)); } #ifdef DUMP_SUPPORT diff --git a/src/util/multibit.h b/src/util/multibit.h index 251316b3..771c158d 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -235,18 +235,18 @@ const u8 *mmbit_get_level_root_const(const u8 *bits, u32 level) { /** \brief get the block for this key on the current level as a u8 ptr */ static really_inline u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) { - return mmbit_get_level_root(bits, level) + - (key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) * - sizeof(MMB_TYPE); + u8 *level_root = mmbit_get_level_root(bits, level); + u32 ks = mmbit_get_ks(max_level, level); + return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE); } /** \brief get the block for this key on the current level as a const u8 ptr */ static really_inline const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level, u32 key) { - return mmbit_get_level_root_const(bits, level) + - (key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) * - sizeof(MMB_TYPE); + const u8 *level_root = mmbit_get_level_root_const(bits, level); + u32 ks = mmbit_get_ks(max_level, level); + return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE); } /** \brief get the _byte_ for this key on the current level as a u8 ptr */ @@ -254,7 +254,7 @@ static really_inline u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) { u8 *level_root = mmbit_get_level_root(bits, level); u32 ks = mmbit_get_ks(max_level, level); - return level_root + (key >> (ks + MMB_KEY_SHIFT - 3)); + return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT - 3)); } /** \brief get our key value for the current level */ @@ -721,11 +721,11 @@ u32 mmbit_iterate_bounded_flat(const u8 *bits, u32 total_bits, u32 begin, } static really_inline -MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max, - u32 block_base) { +MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u64a block_min, u64a block_max, + u64a block_base) { const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT; - u32 lshift = (block_min - block_base) >> level_shift; - u32 ushift = (block_max - block_base) >> level_shift; + u64a lshift = (block_min - block_base) >> level_shift; + u64a ushift = (block_max - block_base) >> level_shift; MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0; MMB_TYPE umask = ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES; @@ -734,7 +734,7 @@ MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max, static really_inline u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) { - u32 key = 0; + u64a key = 0; u32 ks = mmbit_keyshift(total_bits); const u32 max_level = mmbit_maxlevel_from_keyshift(ks); u32 level = 0; @@ -743,9 +743,9 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 assert(level <= max_level); u32 block_width = MMB_KEY_BITS << ks; - u32 block_base = key*block_width; - u32 block_min = MAX(it_start, block_base); - u32 block_max = MIN(it_end, block_base + block_width - 1); + u64a block_base = key * block_width; + u64a block_min = MAX(it_start, block_base); + u64a block_max = MIN(it_end, block_base + block_width - 1); const u8 *block_ptr = mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE); MMB_TYPE block = mmb_load(block_ptr); @@ -761,13 +761,14 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 // No bit found, go up a level // we know that this block didn't have any answers, so we can push // our start iterator forward. - it_start = block_base + block_width; - if (it_start > it_end) { + u64a next_start = block_base + block_width; + if (next_start > it_end) { break; } if (level-- == 0) { break; } + it_start = next_start; key >>= MMB_KEY_SHIFT; ks += MMB_KEY_SHIFT; } diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 27725219..3f5c5908 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -363,7 +363,9 @@ TEST_P(MultiBitTest, BoundedIteratorSingle) { ASSERT_TRUE(ba != nullptr); // Set one bit on and run some checks. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { + SCOPED_TRACE(i); + mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -381,7 +383,12 @@ TEST_P(MultiBitTest, BoundedIteratorSingle) { // Scanning from one past our bit to the end should find nothing. if (i != test_size - 1) { - ASSERT_EQ(MMB_INVALID, mmbit_iterate_bounded(ba, test_size, i + 1, test_size)); + // Ordinary iterator. + ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, i)); + + // Bounded iterator. + ASSERT_EQ(MMB_INVALID, + mmbit_iterate_bounded(ba, test_size, i + 1, test_size)); } } } @@ -393,7 +400,7 @@ TEST_P(MultiBitTest, BoundedIteratorAll) { // Switch everything on. fill_mmbit(ba, test_size); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { if (i != 0) { ASSERT_EQ(0U, mmbit_iterate_bounded(ba, test_size, 0, i)); } @@ -408,13 +415,13 @@ TEST_P(MultiBitTest, BoundedIteratorEven) { // Set every even-numbered bit and see what we can see. mmbit_clear(ba, test_size); - for (u32 i = 0; i < test_size; i += 2) { + for (u64a i = 0; i < test_size; i += 2) { mmbit_set(ba, test_size, i); } u32 even_stride = stride % 2 ? stride + 1 : stride; - for (u32 i = 0; i < test_size; i += even_stride) { + for (u64a i = 0; i < test_size; i += even_stride) { // Scanning from each even bit to the end should find itself. ASSERT_EQ(i, mmbit_iterate_bounded(ba, test_size, i, test_size)); @@ -439,13 +446,13 @@ TEST_P(MultiBitTest, BoundedIteratorOdd) { // Set every odd-numbered bit and see what we can see. mmbit_clear(ba, test_size); - for (u32 i = 1; i < test_size; i += 2) { + for (u64a i = 1; i < test_size; i += 2) { mmbit_set(ba, test_size, i); } u32 even_stride = stride % 2 ? stride + 1 : stride; - for (u32 i = 0; i < test_size; i += even_stride) { + for (u64a i = 0; i < test_size; i += even_stride) { // Scanning from each even bit to the end should find i+1. if (i+1 < test_size) { ASSERT_EQ(i+1, mmbit_iterate_bounded(ba, test_size, i, test_size)); @@ -473,7 +480,7 @@ TEST_P(MultiBitTest, Set) { mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any(ba, test_size)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); // set a bit that wasn't set before @@ -500,7 +507,7 @@ TEST_P(MultiBitTest, Iter) { mmbit_clear(ba, test_size); ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -517,13 +524,13 @@ TEST_P(MultiBitTest, IterAll) { ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, MMB_INVALID)); // Set all bits. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { mmbit_set(ba, test_size, i); } // Find all bits. u32 it = MMB_INVALID; - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { ASSERT_EQ(i, mmbit_iterate(ba, test_size, it)); it = i; } @@ -536,7 +543,7 @@ TEST_P(MultiBitTest, AnyPrecise) { mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any_precise(ba, test_size)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -551,7 +558,7 @@ TEST_P(MultiBitTest, Any) { mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any(ba, test_size)); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); mmbit_clear(ba, test_size); mmbit_set(ba, test_size, i); @@ -567,7 +574,7 @@ TEST_P(MultiBitTest, UnsetRange1) { fill_mmbit(ba, test_size); // Use mmbit_unset_range to switch off any single bit. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); ASSERT_TRUE(mmbit_isset(ba, test_size, i)); mmbit_unset_range(ba, test_size, i, i + 1); @@ -590,7 +597,7 @@ TEST_P(MultiBitTest, UnsetRange2) { // Use mmbit_unset_range to switch off all bits. mmbit_unset_range(ba, test_size, 0, test_size); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); ASSERT_FALSE(mmbit_isset(ba, test_size, i)); } @@ -601,12 +608,12 @@ TEST_P(MultiBitTest, UnsetRange3) { ASSERT_TRUE(ba != nullptr); // Use mmbit_unset_range to switch off bits in chunks of 3. - for (u32 i = 0; i < test_size - 3; i += stride) { + for (u64a i = 0; i < test_size - 3; i += stride) { // Switch on the bit before, the bits in question, and the bit after. if (i > 0) { mmbit_set(ba, test_size, i - 1); } - for (u32 j = i; j < min(i + 4, test_size); j++) { + for (u64a j = i; j < min(i + 4, (u64a)test_size); j++) { mmbit_set(ba, test_size, j); } @@ -635,7 +642,7 @@ TEST_P(MultiBitTest, InitRangeAll) { mmbit_init_range(ba, test_size, 0, test_size); // Make sure they're all set. - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { SCOPED_TRACE(i); ASSERT_TRUE(mmbit_isset(ba, test_size, i)); } @@ -656,7 +663,7 @@ TEST_P(MultiBitTest, InitRangeOne) { SCOPED_TRACE(test_size); ASSERT_TRUE(ba != nullptr); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { mmbit_init_range(ba, test_size, i, i + 1); // Only bit 'i' should be on. @@ -685,7 +692,7 @@ TEST_P(MultiBitTest, InitRangeChunked) { ASSERT_EQ(chunk_begin, mmbit_iterate(ba, test_size, MMB_INVALID)); // All bits in the chunk should be on. - for (u32 i = chunk_begin; i < chunk_end; i += stride) { + for (u64a i = chunk_begin; i < chunk_end; i += stride) { SCOPED_TRACE(i); ASSERT_TRUE(mmbit_isset(ba, test_size, i)); } @@ -985,7 +992,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginAll) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1032,7 +1039,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) { // Switch every third bits on in state mmbit_clear(ba, test_size); ASSERT_FALSE(mmbit_any(ba, test_size)); - for (u32 i = 0; i < test_size; i += 3) { + for (u64a i = 0; i < test_size; i += 3) { mmbit_set(ba, test_size, i); } @@ -1044,7 +1051,7 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) { ASSERT_EQ(0U, val); ASSERT_EQ(0U, idx); - for (u32 i = 0; i < test_size - 3; i += 3) { + for (u64a i = 0; i < test_size - 3; i += 3) { mmbit_unset(ba, test_size, i); val = mmbit_sparse_iter_begin(ba, test_size, &idx, &it[0], &state[0]); ASSERT_EQ(i+3, val); @@ -1060,7 +1067,7 @@ TEST_P(MultiBitTest, SparseIteratorNextAll) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1103,7 +1110,7 @@ TEST_P(MultiBitTest, SparseIteratorNextExactStrided) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); mmbit_set(ba, test_size, i); } @@ -1135,7 +1142,7 @@ TEST_P(MultiBitTest, SparseIteratorNextNone) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1164,7 +1171,7 @@ TEST_P(MultiBitTest, SparseIteratorUnsetAll) { vector it; vector bits; bits.reserve(test_size / stride); - for (u32 i = 0; i < test_size; i += stride) { + for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } mmbBuildSparseIterator(it, bits, test_size); @@ -1194,10 +1201,10 @@ TEST_P(MultiBitTest, SparseIteratorUnsetHalves) { // Two sparse iterators: one for even bits, one for odd ones vector even, odd; - for (u32 i = 0; i < test_size; i += 2) { + for (u64a i = 0; i < test_size; i += 2) { even.push_back(i); } - for (u32 i = 1; i < test_size; i += 2) { + for (u64a i = 1; i < test_size; i += 2) { odd.push_back(i); } @@ -1277,9 +1284,9 @@ static const MultiBitTestParam multibitTests[] = { { 1U << 28, 15073 }, { 1U << 29, 24413 }, { 1U << 30, 50377 }, + { 1U << 31, 104729 }, - // XXX: cases this large segfault in mmbit_set, FIXME NOW - //{ 1U << 31, 3701 }, + // { UINT32_MAX, 104729 }, // Very slow }; INSTANTIATE_TEST_CASE_P(MultiBit, MultiBitTest, ValuesIn(multibitTests)); From b87590ce9d01d4d09fc1fd1198aea1ff04225137 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Dec 2015 09:38:20 +1100 Subject: [PATCH 43/54] castle: simplify find_next_top Tops are no longer sparse in CastleProto, so the linear scan for holes isn't necessary. --- src/nfa/castlecompile.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index e7e40540..b18eb9ad 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -721,10 +721,8 @@ const CharReach &CastleProto::reach() const { static u32 find_next_top(const map &repeats) { - u32 top = 0; - for (; contains(repeats, top); top++) { - // pass - } + u32 top = verify_u32(repeats.size()); + assert(!contains(repeats, top)); return top; } @@ -734,7 +732,7 @@ u32 CastleProto::add(const PureRepeat &pr) { assert(pr.reports.size() == 1); u32 top = find_next_top(repeats); DEBUG_PRINTF("selected unused top %u\n", top); - repeats.insert(make_pair(top, pr)); + repeats.emplace(top, pr); return top; } @@ -783,7 +781,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2, const PureRepeat &pr = m.second; DEBUG_PRINTF("top %u\n", top); u32 new_top = find_next_top(c1.repeats); - c1.repeats.insert(make_pair(new_top, pr)); + c1.repeats.emplace(new_top, pr); top_map[top] = new_top; DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top); } @@ -800,7 +798,7 @@ void remapCastleTops(CastleProto &proto, map &top_map) { const u32 top = m.first; const PureRepeat &pr = m.second; u32 new_top = find_next_top(out); - out.insert(make_pair(new_top, pr)); + out.emplace(new_top, pr); top_map[top] = new_top; } From 1267922ca77182bb89ef1704f9d451eadd302239 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Dec 2015 09:47:59 +1100 Subject: [PATCH 44/54] role aliasing: simplify hashRightRoleProperties Using the full report set for a suffix as an input to this hash was very slow at scale. --- src/rose/rose_build_role_aliasing.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 87b6936c..62db5b2e 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -439,12 +439,16 @@ size_t hashRightRoleProperties(RoseVertex v, const RoseGraph &g) { hash_combine(val, hash_range(begin(props.reports), end(props.reports))); if (props.suffix) { - hash_combine(val, all_reports(props.suffix)); - if (props.suffix.graph) { - hash_combine(val, num_vertices(*props.suffix.graph)); + const auto &suffix = props.suffix; + if (suffix.castle) { + hash_combine(val, suffix.castle->reach()); + hash_combine(val, suffix.castle->repeats.size()); } - if (props.suffix.haig) { - hash_combine(val, hash_dfa(*props.suffix.haig)); + if (suffix.graph) { + hash_combine(val, num_vertices(*suffix.graph)); + } + if (suffix.haig) { + hash_combine(val, hash_dfa(*suffix.haig)); } } From 03953f34b13c38f5597c800c206998a3f8a583bc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Dec 2015 09:54:55 +1100 Subject: [PATCH 45/54] RoseDedupeAuxImpl: collect unique suffixes first --- src/rose/rose_build_misc.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index f627017c..1d58c241 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -601,6 +601,8 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) : tbi(tbi_in) { const RoseGraph &g = tbi.g; + set suffixes; + for (auto v : vertices_range(g)) { // Literals in the small block table don't count as dupes: although // they have copies in the anchored table, the two are never run in the @@ -611,10 +613,16 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } } + // Several vertices may share a suffix, so we collect the set of + // suffixes first to avoid repeating work. if (g[v].suffix) { - for (const auto &report_id : all_reports(g[v].suffix)) { - suffix_map[report_id].insert(g[v].suffix); - } + suffixes.insert(g[v].suffix); + } + } + + for (const auto &suffix : suffixes) { + for (const auto &report_id : all_reports(suffix)) { + suffix_map[report_id].insert(suffix); } } From 8dac64d1dcad73a3bd3270215a9fc22e15ad49c8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Dec 2015 10:24:54 +1100 Subject: [PATCH 46/54] findMinWidth, findMaxWidth: width for a given top Currently only implemented for Castle suffixes. --- src/nfa/castlecompile.cpp | 16 ++++++++++++++++ src/nfa/castlecompile.h | 2 ++ src/rose/rose_build_impl.h | 4 ++++ src/rose/rose_build_misc.cpp | 24 ++++++++++++++++++++++++ src/rose/rose_build_width.cpp | 12 +++++++----- 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index b18eb9ad..7d37ef81 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -708,6 +708,22 @@ depth findMaxWidth(const CastleProto &proto) { return max_width; } +depth findMinWidth(const CastleProto &proto, u32 top) { + if (!contains(proto.repeats, top)) { + assert(0); // should not happen + return depth::infinity(); + } + return proto.repeats.at(top).bounds.min; +} + +depth findMaxWidth(const CastleProto &proto, u32 top) { + if (!contains(proto.repeats, top)) { + assert(0); // should not happen + return depth(0); + } + return proto.repeats.at(top).bounds.max; +} + CastleProto::CastleProto(const PureRepeat &pr) { assert(pr.reach.any()); assert(pr.reports.size() == 1); diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index fbafb606..a35f229d 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -85,6 +85,8 @@ struct CastleProto { std::set all_reports(const CastleProto &proto); depth findMinWidth(const CastleProto &proto); depth findMaxWidth(const CastleProto &proto); +depth findMinWidth(const CastleProto &proto, u32 top); +depth findMaxWidth(const CastleProto &proto, u32 top); /** * \brief Remap tops to be contiguous. diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 26a7c606..3112d639 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -130,6 +130,8 @@ private: friend depth findMinWidth(const suffix_id &s); friend depth findMaxWidth(const suffix_id &s); + friend depth findMinWidth(const suffix_id &s, u32 top); + friend depth findMaxWidth(const suffix_id &s, u32 top); }; std::set all_reports(const suffix_id &s); @@ -138,6 +140,8 @@ bool has_eod_accepts(const suffix_id &s); bool has_non_eod_accepts(const suffix_id &s); depth findMinWidth(const suffix_id &s); depth findMaxWidth(const suffix_id &s); +depth findMinWidth(const suffix_id &s, u32 top); +depth findMaxWidth(const suffix_id &s, u32 top); size_t hash_value(const suffix_id &s); /** \brief represents an engine to the left of a rose role */ diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 1d58c241..b8775912 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -907,6 +907,18 @@ depth findMinWidth(const suffix_id &s) { } } +depth findMinWidth(const suffix_id &s, u32 top) { + assert(s.graph() || s.castle() || s.haig() || s.dfa()); + // TODO: take top into account for non-castle suffixes. + if (s.graph()) { + return findMinWidth(*s.graph()); + } else if (s.castle()) { + return findMinWidth(*s.castle(), top); + } else { + return s.dfa_min_width; + } +} + depth findMaxWidth(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); if (s.graph()) { @@ -918,6 +930,18 @@ depth findMaxWidth(const suffix_id &s) { } } +depth findMaxWidth(const suffix_id &s, u32 top) { + assert(s.graph() || s.castle() || s.haig() || s.dfa()); + // TODO: take top into account for non-castle suffixes. + if (s.graph()) { + return findMaxWidth(*s.graph()); + } else if (s.castle()) { + return findMaxWidth(*s.castle(), top); + } else { + return s.dfa_max_width; + } +} + bool has_eod_accepts(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); if (s.graph()) { diff --git a/src/rose/rose_build_width.cpp b/src/rose/rose_build_width.cpp index 2a7f2bd6..6bfcee48 100644 --- a/src/rose/rose_build_width.cpp +++ b/src/rose/rose_build_width.cpp @@ -94,10 +94,11 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { } if (g[v].suffix) { - depth suffix_width = findMinWidth(g[v].suffix); + depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top); assert(suffix_width.is_reachable()); - DEBUG_PRINTF("%zu has suffix (width %s), can fire report at %u\n", - g[v].idx, suffix_width.str().c_str(), + DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire " + "report at %u\n", + g[v].idx, g[v].suffix.top, suffix_width.str().c_str(), w + suffix_width); minWidth = min(minWidth, w + suffix_width); } @@ -146,8 +147,9 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi) { if (has_non_eod_accepts(g[v].suffix)) { return ROSE_BOUND_INF; } - depth suffix_width = findMaxWidth(g[v].suffix); - DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str()); + depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top); + DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top, + suffix_width.str().c_str()); assert(suffix_width.is_reachable()); if (!suffix_width.is_finite()) { DEBUG_PRINTF("suffix too wide\n"); From da23e8306afda2d5d4706296da177ce9089987a8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Dec 2015 10:39:32 +1100 Subject: [PATCH 47/54] assignDkeys: use flat_set, not set --- src/nfa/castlecompile.cpp | 3 ++- src/nfa/castlecompile.h | 4 +++- src/rose/rose_build.h | 5 +++-- src/rose/rose_build_misc.cpp | 17 +++++++++-------- src/util/report_manager.cpp | 6 +++--- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 7d37ef81..32dc8f24 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -894,7 +894,8 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) { return c1.repeats == c2.repeats; } -bool requiresDedupe(const CastleProto &proto, const set &reports) { +bool requiresDedupe(const CastleProto &proto, + const ue2::flat_set &reports) { ue2::unordered_set seen; for (const PureRepeat &pr : proto.repeats | map_values) { for (const ReportID &report : pr.reports) { diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index a35f229d..bb5eff59 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -38,6 +38,7 @@ #include "nfagraph/ng_repeat.h" #include "util/alloc.h" #include "util/depth.h" +#include "util/ue2_containers.h" #include #include @@ -135,7 +136,8 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2); * \brief True if the given castle contains more than a single instance of any * of the reports in the given set. */ -bool requiresDedupe(const CastleProto &proto, const std::set &reports); +bool requiresDedupe(const CastleProto &proto, + const ue2::flat_set &reports); /** * \brief Build an NGHolder from a CastleProto. diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index 1a1fc223..bef2114f 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -42,6 +42,7 @@ #include "rose_in_graph.h" #include "util/alloc.h" #include "util/charreach.h" +#include "util/ue2_containers.h" #include "util/ue2string.h" #include @@ -72,8 +73,8 @@ public: /** \brief True if we can not establish that at most a single callback will * be generated at a given offset from this set of reports. */ - virtual bool requiresDedupeSupport(const std::set &reports) const - = 0; + virtual bool requiresDedupeSupport(const ue2::flat_set &reports) + const = 0; }; /** \brief Abstract interface intended for callers from elsewhere in the tree, diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index b8775912..e54c0370 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -538,7 +538,7 @@ u32 RoseBuildImpl::getNewLiteralId() { } static -bool requiresDedupe(const NGHolder &h, const set &reports, +bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, const Grey &grey) { /* TODO: tighten */ NFAVertex seen_vert = NFAGraph::null_vertex(); @@ -581,13 +581,14 @@ bool requiresDedupe(const NGHolder &h, const set &reports, class RoseDedupeAuxImpl : public RoseDedupeAux { public: explicit RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in); - bool requiresDedupeSupport(const set &reports) const override; + bool requiresDedupeSupport( + const ue2::flat_set &reports) const override; const RoseBuildImpl &tbi; - map > vert_map; - map > suffix_map; - map > outfix_map; - map > puff_map; + map> vert_map; + map> suffix_map; + map> outfix_map; + map> puff_map; }; unique_ptr RoseBuildImpl::generateDedupeAux() const { @@ -644,8 +645,8 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) } } -bool RoseDedupeAuxImpl::requiresDedupeSupport(const set &reports) - const { +bool RoseDedupeAuxImpl::requiresDedupeSupport( + const ue2::flat_set &reports) const { /* TODO: this could be expanded to check for offset or character constraints */ diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index eacef005..425f166d 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -128,11 +128,9 @@ vector ReportManager::getDkeyToReportTable() const { } void ReportManager::assignDkeys(const RoseBuild *rose) { - unique_ptr dedupe = rose->generateDedupeAux(); - DEBUG_PRINTF("assigning...\n"); - map> ext_to_int; + map> ext_to_int; for (u32 i = 0; i < reportIds.size(); i++) { const Report &ir = reportIds[i]; @@ -143,6 +141,8 @@ void ReportManager::assignDkeys(const RoseBuild *rose) { } } + auto dedupe = rose->generateDedupeAux(); + for (const auto &m : ext_to_int) { u32 ext = m.first; From 8427d837809ed0afcdc8db8d54d79d9b0b4c47b4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Dec 2015 11:31:09 +1100 Subject: [PATCH 48/54] CastleProto: track mapping of reports to tops This allows us to speed up report-based queries, like dedupe checking. --- src/nfa/castlecompile.cpp | 48 ++++++++++++++++++++++++++------------- src/nfa/castlecompile.h | 3 +++ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 32dc8f24..8d7f3913 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -58,6 +58,7 @@ #include using namespace std; +using boost::adaptors::map_keys; using boost::adaptors::map_values; namespace ue2 { @@ -686,8 +687,8 @@ buildCastle(const CastleProto &proto, set all_reports(const CastleProto &proto) { set reports; - for (const PureRepeat &pr : proto.repeats | map_values) { - reports.insert(pr.reports.begin(), pr.reports.end()); + for (const ReportID &report : proto.report_map | map_keys) { + reports.insert(report); } return reports; } @@ -727,7 +728,11 @@ depth findMaxWidth(const CastleProto &proto, u32 top) { CastleProto::CastleProto(const PureRepeat &pr) { assert(pr.reach.any()); assert(pr.reports.size() == 1); - repeats.insert(make_pair(0, pr)); + u32 top = 0; + repeats.emplace(top, pr); + for (const auto &report : pr.reports) { + report_map[report].insert(top); + } } const CharReach &CastleProto::reach() const { @@ -749,6 +754,9 @@ u32 CastleProto::add(const PureRepeat &pr) { u32 top = find_next_top(repeats); DEBUG_PRINTF("selected unused top %u\n", top); repeats.emplace(top, pr); + for (const auto &report : pr.reports) { + report_map[report].insert(top); + } return top; } @@ -796,8 +804,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2, const u32 top = m.first; const PureRepeat &pr = m.second; DEBUG_PRINTF("top %u\n", top); - u32 new_top = find_next_top(c1.repeats); - c1.repeats.emplace(new_top, pr); + u32 new_top = c1.add(pr); top_map[top] = new_top; DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top); } @@ -819,6 +826,17 @@ void remapCastleTops(CastleProto &proto, map &top_map) { } proto.repeats.swap(out); + + // Remap report map. + proto.report_map.clear(); + for (const auto &m : proto.repeats) { + const u32 top = m.first; + const PureRepeat &pr = m.second; + for (const auto &report : pr.reports) { + proto.report_map[report].insert(top); + } + } + assert(proto.repeats.size() <= proto.max_occupancy); } @@ -896,17 +914,15 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) { bool requiresDedupe(const CastleProto &proto, const ue2::flat_set &reports) { - ue2::unordered_set seen; - for (const PureRepeat &pr : proto.repeats | map_values) { - for (const ReportID &report : pr.reports) { - if (contains(reports, report)) { - if (contains(seen, report)) { - DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto, - report); - return true; - } - seen.insert(report); - } + for (const auto &report : reports) { + auto it = proto.report_map.find(report); + if (it == end(proto.report_map)) { + continue; + } + if (it->second.size() > 1) { + DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto, + report); + return true; } } return false; diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index bb5eff59..8ca3581c 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -81,6 +81,9 @@ struct CastleProto { /** \brief Mapping from unique top id to repeat. */ std::map repeats; + + /** \brief Mapping from report to associated tops. */ + ue2::unordered_map> report_map; }; std::set all_reports(const CastleProto &proto); From 748d46c12420d7cb3b7b2805283fd52abdbe158d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 3 Dec 2015 09:27:57 +1100 Subject: [PATCH 49/54] CastleProto: track next top explicitly Repeats may be removed (e.g. by pruning in role aliasing passes) leaving "holes" in the top map. Track the next top to use explicitly, rather than using repeats.size(). --- src/nfa/castlecompile.cpp | 21 ++++++++++++--------- src/nfa/castlecompile.h | 10 ++++++++++ src/rose/rose_build_role_aliasing.cpp | 17 ++++++++++------- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 8d7f3913..cc5c599b 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -740,19 +740,13 @@ const CharReach &CastleProto::reach() const { return repeats.begin()->second.reach; } -static -u32 find_next_top(const map &repeats) { - u32 top = verify_u32(repeats.size()); - assert(!contains(repeats, top)); - return top; -} - u32 CastleProto::add(const PureRepeat &pr) { assert(repeats.size() < max_occupancy); assert(pr.reach == reach()); assert(pr.reports.size() == 1); - u32 top = find_next_top(repeats); + u32 top = next_top++; DEBUG_PRINTF("selected unused top %u\n", top); + assert(!contains(repeats, top)); repeats.emplace(top, pr); for (const auto &report : pr.reports) { report_map[report].insert(top); @@ -760,6 +754,15 @@ u32 CastleProto::add(const PureRepeat &pr) { return top; } +void CastleProto::erase(u32 top) { + DEBUG_PRINTF("erase top %u\n", top); + assert(contains(repeats, top)); + repeats.erase(top); + for (auto &m : report_map) { + m.second.erase(top); + } +} + u32 CastleProto::merge(const PureRepeat &pr) { assert(repeats.size() <= max_occupancy); assert(pr.reach == reach()); @@ -820,7 +823,7 @@ void remapCastleTops(CastleProto &proto, map &top_map) { for (const auto &m : proto.repeats) { const u32 top = m.first; const PureRepeat &pr = m.second; - u32 new_top = find_next_top(out); + u32 new_top = out.size(); out.emplace(new_top, pr); top_map[top] = new_top; } diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index 8ca3581c..fc4bb991 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -68,8 +68,12 @@ struct CastleProto { explicit CastleProto(const PureRepeat &pr); const CharReach &reach() const; + /** \brief Add a new repeat. */ u32 add(const PureRepeat &pr); + /** \brief Remove a repeat. */ + void erase(u32 top); + /** * \brief Merge in the given repeat, returning the top used. * @@ -84,6 +88,12 @@ struct CastleProto { /** \brief Mapping from report to associated tops. */ ue2::unordered_map> report_map; + + /** + * \brief Next top id to use. Repeats may be removed without top remapping, + * so we track this explicitly instead of using repeats.size(). + */ + u32 next_top = 1; }; std::set all_reports(const CastleProto &proto); diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 62db5b2e..88deaa25 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -751,14 +751,17 @@ void pruneReportIfUnused(const RoseBuildImpl &tbi, shared_ptr h, * Castle. */ static void pruneCastle(CastleProto &castle, ReportID report) { - for (map::iterator it = castle.repeats.begin(); - it != castle.repeats.end(); /* incr inside */) { - if (contains(it->second.reports, report)) { - ++it; - } else { - castle.repeats.erase(it++); + unordered_set dead; // tops to remove. + for (const auto &m : castle.repeats) { + if (!contains(m.second.reports, report)) { + dead.insert(m.first); } } + + for (const auto &top : dead) { + castle.erase(top); + } + assert(!castle.repeats.empty()); } @@ -798,7 +801,7 @@ void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, for (u32 top : assoc_keys(castle.repeats)) { if (!contains(used_tops, top)) { DEBUG_PRINTF("removing unused top %u\n", top); - castle.repeats.erase(top); + castle.erase(top); } } } From 8c09d054c9035966d45a1b09504e090f4b2924aa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 2 Dec 2015 18:16:49 +1100 Subject: [PATCH 50/54] Add per-top findMinWidth etc for NFA graphs --- src/nfagraph/ng_width.cpp | 68 ++++++++++++++++++++++++++---------- src/nfagraph/ng_width.h | 28 ++++++++++++--- src/rose/rose_build_misc.cpp | 6 ++-- 3 files changed, 75 insertions(+), 27 deletions(-) diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index 3ecb391c..470f9343 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -51,10 +51,16 @@ namespace ue2 { namespace { -/** Filter out edges from start-to-start or accept-to-accept. */ +/** + * Filter out special edges, or in the top-specific variant, start edges that + * don't have the right top set. + */ struct SpecialEdgeFilter { SpecialEdgeFilter() {} - explicit SpecialEdgeFilter(const NGHolder *h_in) : h(h_in) {} + explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {} + explicit SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) + : h(&h_in), single_top(true), top(top_in) {} + bool operator()(const NFAEdge &e) const { const NFAGraph &g = h->g; NFAVertex u = source(e, g), v = target(e, g); @@ -62,23 +68,33 @@ struct SpecialEdgeFilter { (is_any_accept(u, g) && is_any_accept(v, g))) { return false; } + if (single_top) { + if (u == h->start && g[e].top != top) { + return false; + } + if (u == h->startDs) { + return false; + } + } return true; } private: const NGHolder *h = nullptr; + bool single_top = false; + u32 top = 0; }; } // namespace static -depth findMinWidth(const NGHolder &h, NFAVertex src) { +depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, + NFAVertex src) { if (isLeafNode(src, h)) { return depth::unreachable(); } - typedef boost::filtered_graph StartGraph; - StartGraph g(h.g, SpecialEdgeFilter(&h)); + boost::filtered_graph g(h.g, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -112,7 +128,8 @@ depth findMinWidth(const NGHolder &h, NFAVertex src) { } static -depth findMaxWidth(const NGHolder &h, NFAVertex src) { +depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, + NFAVertex src) { if (isLeafNode(src, h.g)) { return depth::unreachable(); } @@ -122,8 +139,7 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) { return depth::infinity(); } - typedef boost::filtered_graph NodeFilteredGraph; - NodeFilteredGraph g(h.g, SpecialEdgeFilter(&h)); + boost::filtered_graph g(h.g, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -164,7 +180,7 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) { if (d.is_unreachable()) { // If we're actually reachable, we'll have a min width, so we can // return infinity in this case. - if (findMinWidth(h, src).is_reachable()) { + if (findMinWidth(h, filter, src).is_reachable()) { return depth::infinity(); } return d; @@ -175,11 +191,10 @@ depth findMaxWidth(const NGHolder &h, NFAVertex src) { return d - depth(1); } -/** Returns the minimum width in bytes of an input that will match the given - * graph. */ -depth findMinWidth(const NGHolder &h) { - depth startDepth = findMinWidth(h, h.start); - depth dotstarDepth = findMinWidth(h, h.startDs); +static +depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { + depth startDepth = findMinWidth(h, filter, h.start); + depth dotstarDepth = findMinWidth(h, filter, h.startDs); DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), dotstarDepth.str().c_str()); if (startDepth.is_unreachable()) { @@ -194,11 +209,18 @@ depth findMinWidth(const NGHolder &h) { } } -/** Returns the maximum width in bytes of an input that will match the given - * graph. If there is no maximum width, returns infinity. */ -depth findMaxWidth(const NGHolder &h) { - depth startDepth = findMaxWidth(h, h.start); - depth dotstarDepth = findMaxWidth(h, h.startDs); +depth findMinWidth(const NGHolder &h) { + return findMinWidth(h, SpecialEdgeFilter(h)); +} + +depth findMinWidth(const NGHolder &h, u32 top) { + return findMinWidth(h, SpecialEdgeFilter(h, top)); +} + +static +depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) { + depth startDepth = findMaxWidth(h, filter, h.start); + depth dotstarDepth = findMaxWidth(h, filter, h.startDs); DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(), dotstarDepth.str().c_str()); if (startDepth.is_unreachable()) { @@ -210,4 +232,12 @@ depth findMaxWidth(const NGHolder &h) { } } +depth findMaxWidth(const NGHolder &h) { + return findMaxWidth(h, SpecialEdgeFilter(h)); +} + +depth findMaxWidth(const NGHolder &h, u32 top) { + return findMaxWidth(h, SpecialEdgeFilter(h, top)); +} + } // namespace ue2 diff --git a/src/nfagraph/ng_width.h b/src/nfagraph/ng_width.h index 5c78409b..871e8a93 100644 --- a/src/nfagraph/ng_width.h +++ b/src/nfagraph/ng_width.h @@ -41,14 +41,34 @@ namespace ue2 { class NGHolder; -/** Returns the minimum width in bytes of an input that will match the given - * graph. */ +/** + * \brief Compute the minimum width in bytes of an input that will match the + * given graph. + */ depth findMinWidth(const NGHolder &h); -/** Returns the maximum width in bytes of an input that will match the given - * graph. If there is no maximum width, returns infinity. */ +/** + * \brief Compute the minimum width in bytes of an input that will match the + * given graph, considering only paths activated by the given top. + */ +depth findMinWidth(const NGHolder &h, u32 top); + +/** + * \brief Compute the maximum width in bytes of an input that will match the + * given graph. + * + * If there is no bound on the maximum width, returns infinity. + */ depth findMaxWidth(const NGHolder &h); +/** + * \brief Compute the maximum width in bytes of an input that will match the + * given graph, considering only paths activated by the given top. + * + * If there is no bound on the maximum width, returns infinity. + */ +depth findMaxWidth(const NGHolder &h, u32 top); + } // namespace ue2 #endif // NG_WIDTH_H diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index e54c0370..cc5bbc70 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -910,9 +910,8 @@ depth findMinWidth(const suffix_id &s) { depth findMinWidth(const suffix_id &s, u32 top) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); - // TODO: take top into account for non-castle suffixes. if (s.graph()) { - return findMinWidth(*s.graph()); + return findMinWidth(*s.graph(), top); } else if (s.castle()) { return findMinWidth(*s.castle(), top); } else { @@ -933,9 +932,8 @@ depth findMaxWidth(const suffix_id &s) { depth findMaxWidth(const suffix_id &s, u32 top) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); - // TODO: take top into account for non-castle suffixes. if (s.graph()) { - return findMaxWidth(*s.graph()); + return findMaxWidth(*s.graph(), top); } else if (s.castle()) { return findMaxWidth(*s.castle(), top); } else { From 7bcd2b07c92a56ebc7dd537ccfc361d2e0fa824e Mon Sep 17 00:00:00 2001 From: Xiang Wang Date: Wed, 2 Dec 2015 07:24:57 -0500 Subject: [PATCH 51/54] simplify max clique analysis --- src/nfa/castlecompile.cpp | 228 +++++++++----------------------------- 1 file changed, 53 insertions(+), 175 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index cc5c599b..e5cc9267 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -64,6 +64,7 @@ using boost::adaptors::map_values; namespace ue2 { #define CASTLE_MAX_TOPS 32 +#define CLIQUE_GRAPH_MAX_SIZE 1000 static u32 depth_to_u32(const depth &d) { @@ -107,209 +108,90 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { } static -size_t literalOverlap(const vector &a, const vector &b) { +bool literalOverlap(const vector &a, const vector &b, + const size_t dist) { for (size_t i = 0; i < b.size(); i++) { + if (i > dist) { + return true; + } size_t overlap_len = b.size() - i; if (overlap_len <= a.size()) { if (matches(a.end() - overlap_len, a.end(), b.begin(), b.end() - i)) { - return i; + return false; } } else { assert(overlap_len > a.size()); if (matches(a.begin(), a.end(), b.end() - i - a.size(), b.end() - i)) { - return i; + return false; } } } - return b.size(); + return b.size() > dist; } -// UE-2666 case 1: The problem of find largest exclusive subcastles group -// can be reformulated as finding the largest clique (subgraph where every -// vertex is connected to every other vertex) in the graph. We use an -// approximate algorithm here to find the maximum clique. -// References -// ---------- -// [1] Boppana, R., & HalldΓ³rsson, M. M. (1992). -// Approximating maximum independent sets by excluding subgraphs. -// BIT Numerical Mathematics, 32(2), 180–196. Springer. -// doi:10.1007/BF01994876 -// ---------- - struct CliqueVertexProps { CliqueVertexProps() {} explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {} u32 stateId = ~0U; - u32 parentId = ~0U; - bool leftChild = false; /* tells us if it is the left child of its parent */ - - vector clique1; /* clique for the left branch */ - vector indepSet1; /* independent set for the left branch */ - vector clique2; /* clique for the right branch */ - vector indepSet2; /* independent set for the right branch */ }; typedef boost::adjacency_list CliqueGraph; typedef CliqueGraph::vertex_descriptor CliqueVertex; -static -unique_ptr makeCG(const vector> &exclusiveSet) { - u32 size = exclusiveSet.size(); - - vector vertices; - unique_ptr cg = make_unique(); - for (u32 i = 0; i < size; ++i) { - CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); - vertices.push_back(v); - } - - // construct the complement graph, then its maximum independent sets - // are equal to the maximum clique of the original graph - for (u32 i = 0; i < size; ++i) { - CliqueVertex s = vertices[i]; - vector complement(size, 0); - for (u32 j = 0; j < exclusiveSet[i].size(); ++j) { - u32 val = exclusiveSet[i][j]; - complement[val] = 1; - } - - for (u32 k = i + 1; k < size; ++k) { - if (!complement[k]) { - CliqueVertex d = vertices[k]; - add_edge(s, d, *cg); - } - } - } - return cg; -} - -static -void updateCliqueInfo(CliqueGraph &cg, const CliqueVertex &n, - vector &clique, vector &indepSet) { - u32 id = cg[n].stateId; - if (cg[n].clique1.size() + 1 > cg[n].clique2.size()) { - cg[n].clique1.push_back(id); - clique.swap(cg[n].clique1); - } else { - clique.swap(cg[n].clique2); - } - - if (cg[n].indepSet2.size() + 1 > cg[n].indepSet1.size()) { - cg[n].indepSet2.push_back(id); - indepSet.swap(cg[n].indepSet2); - } else { - indepSet.swap(cg[n].indepSet1); - } -} - static void getNeighborInfo(const CliqueGraph &g, vector &neighbor, - vector &nonneighbor, const CliqueVertex &cv, - const set &group) { + const CliqueVertex &cv, const set &group) { u32 id = g[cv].stateId; ue2::unordered_set neighborId; // find neighbors for cv for (const auto &v : adjacent_vertices_range(cv, g)) { - if (g[v].stateId != id && contains(group, g[v].stateId)) { + if (g[v].stateId != id && contains(group, g[v].stateId)){ neighbor.push_back(g[v].stateId); neighborId.insert(g[v].stateId); - } - } - - neighborId.insert(id); - // find non-neighbors for cv - for (const auto &v : vertices_range(g)) { - if (!contains(neighborId, g[v].stateId) && - contains(group, g[v].stateId)) { - nonneighbor.push_back(g[v].stateId); + DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId); } } } static -void findCliqueGroup(CliqueGraph &cg, vector &clique, - vector &indepSet) { +void findCliqueGroup(CliqueGraph &cg, vector &clique) { stack> gStack; - // create mapping between vertex and id + // Create mapping between vertex and id map vertexMap; vector init; - for (auto &v : vertices_range(cg)) { + for (const auto &v : vertices_range(cg)) { vertexMap[cg[v].stateId] = v; init.push_back(cg[v].stateId); } gStack.push(init); - // get the vertex to start from - set foundVertexId; - ue2::unordered_set visitedId; + // Get the vertex to start from CliqueGraph::vertex_iterator vi, ve; tie(vi, ve) = vertices(cg); - CliqueVertex start = *vi; - u32 startId = cg[start].stateId; - DEBUG_PRINTF("startId:%u\n", startId); - bool leftChild = false; - u32 prevId = startId; while (!gStack.empty()) { - const auto &g = gStack.top(); + vector g = gStack.top(); + gStack.pop(); - // choose a vertex from the graph - assert(!g.empty()); + // Choose a vertex from the graph u32 id = g[0]; - CliqueVertex &n = vertexMap.at(id); - + const CliqueVertex &n = vertexMap.at(id); + clique.push_back(id); + // Corresponding vertex in the original graph vector neighbor; - vector nonneighbor; set subgraphId(g.begin(), g.end()); - getNeighborInfo(cg, neighbor, nonneighbor, n, subgraphId); - if (contains(foundVertexId, id)) { - prevId = id; - // get non-neighbors for right branch - if (visitedId.insert(id).second) { - DEBUG_PRINTF("right branch\n"); - if (!nonneighbor.empty()) { - gStack.push(nonneighbor); - leftChild = false; - } - } else { - if (id != startId) { - // both the left and right branches are visited, - // update its parent's clique and independent sets - u32 parentId = cg[n].parentId; - CliqueVertex &parent = vertexMap.at(parentId); - if (cg[n].leftChild) { - updateCliqueInfo(cg, n, cg[parent].clique1, - cg[parent].indepSet1); - } else { - updateCliqueInfo(cg, n, cg[parent].clique2, - cg[parent].indepSet2); - } - } - gStack.pop(); - } - } else { - foundVertexId.insert(id); - cg[n].leftChild = leftChild; - cg[n].parentId = prevId; - cg[n].clique1.clear(); - cg[n].clique2.clear(); - cg[n].indepSet1.clear(); - cg[n].indepSet2.clear(); - // get neighbors for left branch - if (!neighbor.empty()) { - gStack.push(neighbor); - leftChild = true; - } - prevId = id; + getNeighborInfo(cg, neighbor, n, subgraphId); + // Get graph consisting of neighbors for left branch + if (!neighbor.empty()) { + gStack.push(neighbor); } } - updateCliqueInfo(cg, start, clique, indepSet); } template @@ -322,9 +204,8 @@ bool graph_empty(const Graph &g) { static vector removeClique(CliqueGraph &cg) { vector> cliquesVec(1); - vector> indepSetsVec(1); DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg)); - findCliqueGroup(cg, cliquesVec[0], indepSetsVec[0]); + findCliqueGroup(cg, cliquesVec[0]); while (!graph_empty(cg)) { const vector &c = cliquesVec.back(); vector dead; @@ -341,30 +222,22 @@ vector removeClique(CliqueGraph &cg) { break; } vector clique; - vector indepSet; - findCliqueGroup(cg, clique, indepSet); + findCliqueGroup(cg, clique); cliquesVec.push_back(clique); - indepSetsVec.push_back(indepSet); } // get the independent set with max size size_t max = 0; size_t id = 0; - for (size_t j = 0; j < indepSetsVec.size(); ++j) { - if (indepSetsVec[j].size() > max) { - max = indepSetsVec[j].size(); + for (size_t j = 0; j < cliquesVec.size(); ++j) { + if (cliquesVec[j].size() > max) { + max = cliquesVec[j].size(); id = j; } } - DEBUG_PRINTF("clique size:%lu\n", indepSetsVec[id].size()); - return indepSetsVec[id]; -} - -static -vector findMaxClique(const vector> &exclusiveSet) { - auto cg = makeCG(exclusiveSet); - return removeClique(*cg); + DEBUG_PRINTF("clique size:%lu\n", cliquesVec[id].size()); + return cliquesVec[id]; } // if the location of any reset character in one literal are after @@ -378,10 +251,10 @@ bool findExclusivePair(const u32 id1, const u32 id2, const auto &triggers2 = triggers[id2]; for (u32 i = 0; i < triggers1.size(); ++i) { for (u32 j = 0; j < triggers2.size(); ++j) { - size_t max_overlap1 = literalOverlap(triggers1[i], triggers2[j]); - size_t max_overlap2 = literalOverlap(triggers2[j], triggers1[i]); - if (max_overlap1 <= min_reset_dist[id2][j] || - max_overlap2 <= min_reset_dist[id1][i]) { + if (!literalOverlap(triggers1[i], triggers2[j], + min_reset_dist[id2][j]) || + !literalOverlap(triggers2[j], triggers1[i], + min_reset_dist[id1][i])) { return false; } } @@ -397,28 +270,33 @@ vector checkExclusion(const CharReach &cr, return group; } - vector > min_reset_dist; + vector> min_reset_dist; // get min reset distance for each repeat for (auto it = triggers.begin(); it != triggers.end(); it++) { const vector &tmp_dist = minResetDistToEnd(*it, cr); min_reset_dist.push_back(tmp_dist); } - vector> exclusiveSet; + vector vertices; + unique_ptr cg = make_unique(); + for (u32 i = 0; i < triggers.size(); ++i) { + CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); + vertices.push_back(v); + } + // find exclusive pair for each repeat for (u32 i = 0; i < triggers.size(); ++i) { - vector repeatIds; + CliqueVertex s = vertices[i]; for (u32 j = i + 1; j < triggers.size(); ++j) { if (findExclusivePair(i, j, min_reset_dist, triggers)) { - repeatIds.push_back(j); + CliqueVertex d = vertices[j]; + add_edge(s, d, *cg); } } - exclusiveSet.push_back(repeatIds); - DEBUG_PRINTF("Exclusive pair size:%lu\n", repeatIds.size()); } // find the largest exclusive group - return findMaxClique(exclusiveSet); + return removeClique(*cg); } static @@ -576,7 +454,7 @@ buildCastle(const CastleProto &proto, repeatInfoPair.push_back(make_pair(min_period, is_reset)); - if (is_reset) { + if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) { candidateTriggers.push_back(triggers.at(top)); candidateRepeats.push_back(i); } @@ -585,7 +463,7 @@ buildCastle(const CastleProto &proto, // Case 1: exclusive repeats bool exclusive = false; bool pureExclusive = false; - u8 activeIdxSize = 0; + u32 activeIdxSize = 0; set exclusiveGroup; if (cc.grey.castleExclusive) { vector tmpGroup = checkExclusion(cr, candidateTriggers); @@ -594,7 +472,7 @@ buildCastle(const CastleProto &proto, // Case 1: mutual exclusive repeats group found, initialize state // sizes exclusive = true; - activeIdxSize = calcPackedBytes(exclusiveSize); + activeIdxSize = calcPackedBytes(numRepeats + 1); if (exclusiveSize == numRepeats) { pureExclusive = true; streamStateSize = 0; @@ -642,7 +520,7 @@ buildCastle(const CastleProto &proto, c->numRepeats = verify_u32(subs.size()); c->exclusive = exclusive; c->pureExclusive = pureExclusive; - c->activeIdxSize = activeIdxSize; + c->activeIdxSize = verify_u8(activeIdxSize); writeCastleScanEngine(cr, c); From 2aa6830c8887e11040bd5c4c7446ac5b62e0eff9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 16 Dec 2015 14:10:46 +1100 Subject: [PATCH 52/54] Add ChangeLog --- CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..c137017a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,36 @@ +# Hyperscan Change Log + +This is a list of notable changes to Hyperscan, in reverse chronological order. + +## [4.1.0] 2015-12-18 +- Update version of PCRE used by testing tools as a syntax and semantic + reference to PCRE 8.38. +- Small updates to fix warnings identified by Coverity. +- Clean up and unify exception handling behaviour across GPR and SIMD NFA + models. +- Fix bug in handling of bounded repeat triggers with large gaps between them + for sparse repeat model. +- Correctly reject POSIX collating elements (`[.ch.]`, `[=ch=]`) in the parser. + These are not supported by Hyperscan. +- Add support for quoted sequences (`\Q...\E`) inside character classes. +- Simplify FDR literal matcher runtime by removing some static specialization. +- Fix handling of the POSIX `[:graph:]`, `[:print:]` and `[:punct:]` character + classes to match the behaviour of PCRE 8.38 in both standard operation and + with the UCP flag set. (Note: some bugs were fixed in this area in PCRE + 8.38.) Previously Hyperscan's behaviour was the same as versions of PCRE + before 8.34. +- Improve performance when compiling pattern sets that include a large number + of similar bounded repeat constructs. (github issue #9) + +## [4.0.1] 2015-10-30 +- Minor cleanups to test code. +- CMake and other build system improvements. +- API update: allow `hs_reset_stream()` and `hs_reset_and_copy_stream()` to be + supplied with a NULL scratch pointer if no matches are required. This is in + line with the behaviour of `hs_close_stream()`. +- Disallow bounded repeats with a very large minimum repeat but no maximum, + i.e. {N,} for very large N. +- Reduce compile memory usage in literal set explansion for some large cases. + +## [4.0.0] 2015-10-20 +- Original release of Hyperscan as open-source software. From 0f2cbb9ffd395bd984679015221587868b8d40dd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 16 Dec 2015 14:31:43 +1100 Subject: [PATCH 53/54] Small updates to documentation for 4.1 --- README.md | 21 +++++++++++++++++++++ doc/dev-reference/compilation.rst | 3 +++ 2 files changed, 24 insertions(+) diff --git a/README.md b/README.md index 37fc38ce..1753ecbe 100644 --- a/README.md +++ b/README.md @@ -20,3 +20,24 @@ the [Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/) Hyperscan is licensed under the BSD License. See the LICENSE file in the project repository. +# Versioning + +The `master` branch on Github will always contain the most recent release of +Hyperscan. Each version released to `master` goes through QA and testing before +it is released; if you're a user, rather than a developer, this is the version +you should be using. + +Further development towards the next release takes place on the `develop` +branch. + +# Get Involved + +The official homepage for Hyperscan is at [01.org/hyperscan](https://01.org/hyperscan). + +If you have questions or comments, we encourage you to [join the mailing +list](https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by +sending email to the list, or by creating an issue on Github. + +If you wish to contact the Hyperscan team at Intel directly, without posting +publicly to the mailing list, send email to +[hyperscan@intel.com](mailto:hyperscan@intel.com). diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 6e195f6a..f3723dc9 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -63,6 +63,9 @@ described at . However, not all constructs available in libpcre are supported. The use of unsupported constructs will result in compilation errors. +The version of PCRE used to validate Hyperscan's interpretation of this syntax +is 8.38. + ==================== Supported Constructs ==================== From a5944067d40945de886bb2f2706b9038abc9f4dc Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 18 Dec 2015 14:37:29 +1100 Subject: [PATCH 54/54] Bump version number --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a1dfcaf2..b4d81754 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11) project (Hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 0) -set (HS_PATCH_VERSION 1) +set (HS_MINOR_VERSION 1) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) string (TIMESTAMP BUILD_DATE "%Y-%m-%d")