From 68a35ff3b8a35c5c1552823c9ef11b4b9474d747 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 22 Dec 2016 16:33:14 +1100 Subject: [PATCH] rose: give longer literals to accel analysis --- src/rose/rose_build_matchers.cpp | 52 +++++++++++++++++++++++++++----- src/rose/rose_build_matchers.h | 14 +++++++-- src/util/container.h | 10 +++++- 3 files changed, 66 insertions(+), 10 deletions(-) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 8d6f68df..2e7305f7 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -59,6 +59,8 @@ using boost::adaptors::map_values; namespace ue2 { +static const size_t MAX_ACCEL_STRING_LEN = 16; + #ifdef DEBUG static UNUSED string dumpMask(const vector &v) { @@ -652,6 +654,16 @@ map makeFragGroupMap(const RoseBuildImpl &build, return frag_to_group; } +template +void trim_to_suffix(Container &c, size_t len) { + if (c.size() <= len) { + return; + } + + size_t suffix_len = c.size() - len; + c.erase(c.begin(), c.begin() + suffix_len); +} + MatcherProto makeMatcherProto(const RoseBuildImpl &build, const map &final_to_frag_map, rose_literal_table table, size_t max_len, @@ -726,6 +738,7 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, continue; } + mp.accel_lits.emplace_back(s, nocase, msk, cmp, groups); mp.history_required = max(mp.history_required, lit_hist_len); mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, cmp); @@ -756,6 +769,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, continue; } + mp.accel_lits.emplace_back(lit.get_string(), lit.any_nocase(), msk, + cmp, groups); mp.history_required = max(mp.history_required, lit_hist_len); mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, cmp); @@ -772,12 +787,30 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, lit.groups = frag_group_map.at(lit.id); } - sort(begin(mp.lits), end(mp.lits)); - mp.lits.erase(unique(begin(mp.lits), end(mp.lits)), end(mp.lits)); + sort_and_unique(mp.lits); + + // Literals used for acceleration must be limited to max_len, as that's all + // we can see in history. + for_each(begin(mp.accel_lits), end(mp.accel_lits), + [&max_len](AccelString &a) { + trim_to_suffix(a.s, max_len); + trim_to_suffix(a.msk, max_len); + trim_to_suffix(a.cmp, max_len); + }); + + sort_and_unique(mp.accel_lits); return mp; } +void MatcherProto::insert(const MatcherProto &a) { + ::ue2::insert(&lits, lits.end(), a.lits); + ::ue2::insert(&accel_lits, accel_lits.end(), a.accel_lits); + sort_and_unique(lits); + sort_and_unique(accel_lits); + history_required = max(history_required, a.history_required); +} + aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, const map &final_to_frag_map, @@ -802,7 +835,9 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, throw CompileError("Unable to generate bytecode."); } - buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.grey.hamsterAccelForward) { + buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); + } if (build.cc.streaming) { DEBUG_PRINTF("history_required=%zu\n", mp.history_required); @@ -851,8 +886,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - mp.lits.insert(mp.lits.end(), mp_anchored.lits.begin(), - mp_anchored.lits.end()); + mp.insert(mp_anchored); // None of our literals should be longer than the small block limit. assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { @@ -869,7 +903,9 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } - buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.grey.hamsterAccelForward) { + buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); + } *sbsize = hwlmSize(hwlm.get()); assert(*sbsize); @@ -898,7 +934,9 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } - buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.grey.hamsterAccelForward) { + buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); + } *esize = hwlmSize(hwlm.get()); assert(*esize); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 742e8a14..a1817307 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -35,6 +35,8 @@ #define ROSE_BUILD_MATCHERS_H #include "rose_build_impl.h" +#include "rose_build_lit_accel.h" +#include "hwlm/hwlm_literal.h" #include #include @@ -43,11 +45,19 @@ struct HWLM; namespace ue2 { -struct hwlmLiteral; - +/** \brief Prototype for literal matcher construction. */ struct MatcherProto { + /** \brief Literal fragments used to construct the literal matcher. */ std::vector lits; + + /** \brief Longer literals used for acceleration analysis. */ + std::vector accel_lits; + + /** \brief The history required by the literal matcher. */ size_t history_required = 0; + + /** \brief Insert the contents of another MatcherProto. */ + void insert(const MatcherProto &a); }; /** diff --git a/src/util/container.h b/src/util/container.h index e2cfb485..24f01fd2 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -89,6 +89,14 @@ auto make_vector_from(const std::pair &range) return std::vector(range.first, range.second); } +/** \brief Sort a sequence container and remove duplicates. */ +template +void sort_and_unique(C &container) { + std::sort(std::begin(container), std::end(container)); + container.erase(std::unique(std::begin(container), std::end(container)), + std::end(container)); +} + /** \brief Returns a set containing the keys in the given associative * container. */ template