diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index d59c5cc6..56ed5f41 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -55,7 +55,6 @@ #include "parser/unsupported.h" #include "parser/utf8_validate.h" #include "rose/rose_build.h" -#include "rose/rose_build_dump.h" #include "som/slot_manager_dump.h" #include "util/alloc.h" #include "util/compile_error.h" @@ -310,7 +309,6 @@ aligned_unique_ptr generateRoseEngine(NG &ng) { return nullptr; } - dumpRose(*ng.rose, rose.get(), ng.cc.grey); dumpReportManager(ng.rm, ng.cc.grey); dumpSomSlotManager(ng.ssm, ng.cc.grey); dumpSmallWrite(rose.get(), ng.cc.grey); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e7859405..32a1d075 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -33,6 +33,7 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" +#include "rose_build_dump.h" #include "rose_build_engine_blob.h" #include "rose_build_exclusive.h" #include "rose_build_groups.h" @@ -5582,6 +5583,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine = addSmallWriteEngine(*this, move(engine)); DEBUG_PRINTF("rose done %p\n", engine.get()); + + dumpRose(*this, engine.get()); + return engine; } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index a13fc964..0d05e8ac 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -275,10 +275,8 @@ private: } // namespace -void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t, +void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, const char *filename) { - const RoseBuildImpl &build = dynamic_cast(build_base); - const Grey &grey = build.cc.grey; /* "early" rose graphs should only be dumped if we are dumping intermediate @@ -497,9 +495,13 @@ string toRegex(const string &lit) { return os.str(); } -static -void dumpTestLiterals(const string &filename, const vector &lits) { - ofstream of(filename.c_str()); +void dumpMatcherLiterals(const vector &lits, const string &name, + const Grey &grey) { + if (!grey.dumpFlags) { + return; + } + + ofstream of(grey.dumpPath + "rose_" + name + "_test_literals.txt"); // Unique regex index, as literals may share an ID. u32 i = 0; @@ -528,40 +530,6 @@ void dumpTestLiterals(const string &filename, const vector &lits) { of.close(); } -static -void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { - size_t historyRequired = build.calcHistoryRequired(); - size_t longLitLengthThreshold = - calcLongLitThreshold(build, historyRequired); - - auto mp = - makeMatcherProto(build, ROSE_ANCHORED, false, longLitLengthThreshold); - dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - - mp = makeMatcherProto(build, ROSE_FLOATING, false, longLitLengthThreshold); - dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - - if (build.cc.streaming) { - mp = makeMatcherProto(build, ROSE_FLOATING, true, - longLitLengthThreshold); - dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", - mp.lits); - } - - mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, false, - build.ematcher_region_size); - dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); - - if (!build.cc.streaming) { - mp = makeMatcherProto(build, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, - ROSE_SMALL_BLOCK_LEN); - auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, false, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); - dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); - } -} - static const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) { if (!offset) { @@ -1894,14 +1862,13 @@ void roseDumpPrograms(const RoseBuildImpl &build, const RoseEngine *t, dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); } -void dumpRose(const RoseBuild &build_base, const RoseEngine *t, - const Grey &grey) { +void dumpRose(const RoseBuildImpl &build, const RoseEngine *t) { + const Grey &grey = build.cc.grey; + if (!grey.dumpFlags) { return; } - const RoseBuildImpl &build = dynamic_cast(build_base); - stringstream ss; ss << grey.dumpPath << "rose.txt"; @@ -1929,7 +1896,6 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t, ss.clear(); ss << grey.dumpPath << "rose_literals.txt"; dumpRoseLiterals(build, ss.str().c_str()); - dumpRoseTestLiterals(build, grey.dumpPath); f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w"); roseDumpStructRaw(t, f); diff --git a/src/rose/rose_build_dump.h b/src/rose/rose_build_dump.h index 28e9f53a..601f5914 100644 --- a/src/rose/rose_build_dump.h +++ b/src/rose/rose_build_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,30 +29,40 @@ #ifndef ROSE_BUILD_DUMP_H #define ROSE_BUILD_DUMP_H +#include +#include + struct RoseEngine; namespace ue2 { -class RoseBuild; +class RoseBuildImpl; struct Grey; +struct hwlmLiteral; #ifdef DUMP_SUPPORT // Dump the Rose graph in graphviz representation. -void dumpRoseGraph(const RoseBuild &build, const RoseEngine *t, +void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, const char *filename); -void dumpRose(const RoseBuild &build_base, const RoseEngine *t, - const Grey &grey); +void dumpRose(const RoseBuildImpl &build, const RoseEngine *t); + +void dumpMatcherLiterals(const std::vector &lits, + const std::string &name, const Grey &grey); #else static UNUSED -void dumpRoseGraph(const RoseBuild &, const RoseEngine *, const char *) { +void dumpRoseGraph(const RoseBuildImpl &, const RoseEngine *, const char *) { } static UNUSED -void dumpRose(const RoseBuild &, const RoseEngine *, const Grey &) { +void dumpRose(const RoseBuildImpl &, const RoseEngine *) { } +static UNUSED +void dumpMatcherLiterals(const std::vector &, const std::string &, + const Grey &) { +} #endif } // namespace ue2 diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 50e48a5b..5625437b 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -33,6 +33,7 @@ #include "rose_build_matchers.h" +#include "rose_build_dump.h" #include "rose_build_impl.h" #include "rose_build_lit_accel.h" #include "rose_build_width.h" @@ -645,9 +646,35 @@ void trim_to_suffix(Container &c, size_t len) { c.erase(c.begin(), c.begin() + suffix_len); } +namespace { + +/** \brief Prototype for literal matcher construction. */ +struct MatcherProto { + /** \brief Literal fragments used to construct the literal matcher. */ + vector lits; + + /** \brief Longer literals used for acceleration analysis. */ + vector accel_lits; + + /** \brief The history required by the literal matcher. */ + size_t history_required = 0; + + /** \brief Insert the contents of another MatcherProto. */ + void insert(const MatcherProto &a); +}; +} + +/** + * \brief Build up a vector of literals (and associated other data) for the + * given table. + * + * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can + * only lead to a pattern match after max_offset may be excluded. + */ +static MatcherProto makeMatcherProto(const RoseBuildImpl &build, rose_literal_table table, bool delay_rebuild, - size_t max_len, u32 max_offset) { + size_t max_len, u32 max_offset = ROSE_BOUND_INF) { MatcherProto mp; if (delay_rebuild) { @@ -794,6 +821,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, DEBUG_PRINTF("empty floating matcher\n"); return nullptr; } + dumpMatcherLiterals(mp.lits, "floating", build.cc.grey); for (const hwlmLiteral &lit : mp.lits) { *fgroups |= lit.groups; @@ -834,6 +862,7 @@ aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, DEBUG_PRINTF("empty delay rebuild matcher\n"); return nullptr; } + dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey); auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); if (!hwlm) { @@ -883,6 +912,7 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, } mp.insert(mp_anchored); + dumpMatcherLiterals(mp.lits, "smallblock", build.cc.grey); // None of our literals should be longer than the small block limit. assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { @@ -919,6 +949,7 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, assert(!build.ematcher_region_size); return nullptr; } + dumpMatcherLiterals(mp.lits, "eod", build.cc.grey); assert(build.ematcher_region_size); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 184c2633..cb56037d 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -35,42 +35,12 @@ #define ROSE_BUILD_MATCHERS_H #include "rose_build_impl.h" -#include "rose_build_lit_accel.h" -#include "hwlm/hwlm_literal.h" - -#include -#include +struct Grey; struct HWLM; namespace ue2 { -/** \brief Prototype for literal matcher construction. */ -struct MatcherProto { - /** \brief Literal fragments used to construct the literal matcher. */ - std::vector lits; - - /** \brief Longer literals used for acceleration analysis. */ - std::vector accel_lits; - - /** \brief The history required by the literal matcher. */ - size_t history_required = 0; - - /** \brief Insert the contents of another MatcherProto. */ - void insert(const MatcherProto &a); -}; - -/** - * \brief Build up a vector of literals (and associated other data) for the - * given table. - * - * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can - * only lead to a pattern match after max_offset may be excluded. - */ -MatcherProto makeMatcherProto(const RoseBuildImpl &build, - rose_literal_table table, bool delay_rebuild, - size_t max_len, u32 max_offset = ROSE_BOUND_INF); - aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, rose_group *fgroups,