/* * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "rose_build_dump.h" #include "rose_build_impl.h" #include "rose_build_matchers.h" #include "rose_internal.h" #include "rose_program.h" #include "ue2common.h" #include "hs_compile.h" #include "hwlm/hwlm_build.h" #include "hwlm/hwlm_dump.h" #include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" #include "som/slot_manager_dump.h" #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" #include "util/dump_util.h" #include "util/graph_range.h" #include "util/multibit.h" #include "util/multibit_build.h" #include "util/ue2string.h" #include #include #include #include #include #include #include #ifndef DUMP_SUPPORT #error No dump support! #endif using namespace std; namespace ue2 { /** \brief Return the kind of a left_id or a suffix_id. */ template string render_kind(const Graph &g) { if (g.graph()) { return to_string(g.graph()->kind); } if (g.dfa()) { return to_string(g.dfa()->kind); } if (g.haig()) { return to_string(g.haig()->kind); } if (g.castle()) { return to_string(g.castle()->kind); } return "UNKNOWN"; } namespace { struct rose_off { explicit rose_off(u32 j) : i(j) {} string str(void) const; u32 i; }; ostream &operator<<(ostream &o, const rose_off &to) { if (to.i == ROSE_BOUND_INF) { o << "inf"; } else { o << to.i; } return o; } string rose_off::str(void) const { ostringstream out; out << *this; return out.str(); } class RoseGraphWriter { public: RoseGraphWriter(const RoseBuildImpl &b_in, const map &frag_map_in, const map &lqm_in, const map &sqm_in, const RoseEngine *t_in) : frag_map(frag_map_in), leftfix_queue_map(lqm_in), suffix_queue_map(sqm_in), build(b_in), t(t_in) { for (const auto &m : build.ghost) { ghost.insert(m.second); } } void operator() (ostream &os, const RoseVertex &v) const { const RoseGraph &g = build.g; if (v == build.root) { os << "[label=\"\"]"; return; } if (v == build.anchored_root) { os << "[label=\"<^>\"]"; return; } os << "[label=\""; os << "index=" << g[v].index <<"\\n"; for (u32 lit_id : g[v].literals) { writeLiteral(os, lit_id); os << "\\n"; } os << "min_offset=" << g[v].min_offset; if (g[v].max_offset >= ROSE_BOUND_INF) { os << ", max_offset=inf"; } else { os << ", max_offset=" << g[v].max_offset; } os << "\\n"; if (!g[v].reports.empty()) { if (g[v].eod_accept) { os << "\\nACCEPT_EOD"; } else { os << "\\nACCEPT"; } os << " (rep=" << as_string_list(g[v].reports) << ")"; } if (g[v].suffix) { suffix_id suff(g[v].suffix); os << "\\n" << render_kind(suff) << " (top " << g[v].suffix.top; auto it = suffix_queue_map.find(suff); if (it != end(suffix_queue_map)) { os << ", queue " << it->second; } os << ")"; } if (ghost.find(v) != ghost.end()) { os << "\\nGHOST"; } if (g[v].left) { left_id left(g[v].left); os << "\\n" << render_kind(left) << " (queue "; auto it = leftfix_queue_map.find(left); if (it != end(leftfix_queue_map)) { os << it->second; } else { os << "??"; } os << ", report " << g[v].left.leftfix_report << ")"; } os << "\""; // Roles with a rose prefix get a colour. if (g[v].left) { os << " color=violetred "; } // Our accepts get different colours. if (!g[v].reports.empty()) { os << " color=blue "; } if (g[v].suffix) { os << " color=forestgreen "; } os << "]"; } void operator() (ostream &os, const RoseEdge &e) const { const RoseGraph &g = build.g; // Render the bounds on this edge. u32 minBound = g[e].minBound; u32 maxBound = g[e].maxBound; os << "[label=\""; if (minBound == 0 && maxBound == ROSE_BOUND_INF) { os << ".*"; } else if (minBound == 1 && maxBound == ROSE_BOUND_INF) { os << ".+"; } else { os << ".{" << minBound << ","; if (maxBound != ROSE_BOUND_INF) { os << maxBound; } os << "}"; } // If we lead to an infix, display which top we're using. RoseVertex v = target(e, g); if (g[v].left) { os << "\\nROSE TOP " << g[e].rose_top; } switch (g[e].history) { case ROSE_ROLE_HISTORY_NONE: break; case ROSE_ROLE_HISTORY_ANCH: os << "\\nANCH history"; break; case ROSE_ROLE_HISTORY_LAST_BYTE: os << "\\nLAST_BYTE history"; break; case ROSE_ROLE_HISTORY_INVALID: os << "\\nINVALID history"; break; } os << "\"]"; } private: // Render the literal associated with a vertex. void writeLiteral(ostream &os, u32 id) const { os << "lit=" << id; if (contains(frag_map, id)) { os << "/" << frag_map.at(id) << " "; } else { os << "/nofrag "; } const auto &lit = build.literals.at(id); os << '\'' << dotEscapeString(lit.s.get_string()) << '\''; if (lit.s.any_nocase()) { os << " (nocase)"; } if (lit.delay) { os << " +" << lit.delay; } } set ghost; const map &frag_map; const map &leftfix_queue_map; const map &suffix_queue_map; const RoseBuildImpl &build; const RoseEngine *t; }; } // namespace static map makeFragMap(const vector &fragments) { map fm; for (const auto &f : fragments) { for (u32 id : f.lit_ids) { fm[id] = f.fragment_id; } } return fm; } static void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, const vector &fragments, const map &leftfix_queue_map, const map &suffix_queue_map, const char *filename) { const Grey &grey = build.cc.grey; /* "early" rose graphs should only be dumped if we are dumping intermediate * graphs. Early graphs can be identified by the lack of a RoseEngine. */ u32 flag_test = t ? Grey::DUMP_IMPL : Grey::DUMP_INT_GRAPH; if (!(grey.dumpFlags & flag_test)) { return; } stringstream ss; ss << grey.dumpPath << filename; DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str()); ofstream os(ss.str()); auto frag_map = makeFragMap(fragments); RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map, t); writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g)); } void dumpRoseGraph(const RoseBuildImpl &build, const char *filename) { dumpRoseGraph(build, nullptr, {}, {}, {}, filename); } namespace { struct CompareVertexRole { explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {} inline bool operator()(const RoseVertex &a, const RoseVertex &b) const { return g[a].index < g[b].index; } private: const RoseGraph &g; }; } static void lit_graph_info(const RoseBuildImpl &build, const rose_literal_info &li, u32 *min_offset, bool *in_root_role) { *min_offset = ~0U; *in_root_role = false; for (auto v : li.vertices) { *in_root_role |= build.isRootSuccessor(v); LIMIT_TO_AT_MOST(min_offset, build.g[v].min_offset); } } static void dumpRoseLiterals(const RoseBuildImpl &build, const vector &fragments, const Grey &grey) { const RoseGraph &g = build.g; map frag_map = makeFragMap(fragments); DEBUG_PRINTF("dumping literals\n"); ofstream os(grey.dumpPath + "rose_literals.txt"); os << "ROSE LITERALS: a total of " << build.literals.size() << " literals and " << num_vertices(g) << " roles." << endl << endl; for (u32 id = 0; id < build.literals.size(); id++) { const auto &lit = build.literals.at(id); const ue2_literal &s = lit.s; const rose_literal_info &lit_info = build.literal_info[id]; switch (lit.table) { case ROSE_ANCHORED: os << "ANCHORED"; break; case ROSE_FLOATING: os << "FLOATING"; break; case ROSE_EOD_ANCHORED: os << "EOD-ANCHORED"; break; case ROSE_ANCHORED_SMALL_BLOCK: os << "SMALL-BLOCK"; break; case ROSE_EVENT: os << "EVENT"; break; } os << " ID " << id; if (contains(frag_map, id)) { os << "/" << frag_map.at(id); } os << ": \"" << escapeString(s.get_string()) << "\"" << " (len " << s.length() << ","; if (s.any_nocase()) { os << " nocase,"; } if (lit_info.requires_benefits) { os << " benefits,"; } if (lit.delay) { os << " delayed "<< lit.delay << ","; } os << " groups 0x" << hex << setw(16) << setfill('0') << lit_info.group_mask << dec << ","; if (lit_info.squash_group) { os << " squashes group,"; } u32 min_offset; bool in_root_role; lit_graph_info(build, lit_info, &min_offset, &in_root_role); os << " min offset " << min_offset; if (in_root_role) { os << " root literal"; } os << ") roles=" << lit_info.vertices.size() << endl; if (!lit_info.delayed_ids.empty()) { os << " Children:"; for (u32 d_id : lit_info.delayed_ids) { os << " " << d_id; } os << endl; } // Temporary vector, so that we can sort the output by role. vector verts(lit_info.vertices.begin(), lit_info.vertices.end()); sort(verts.begin(), verts.end(), CompareVertexRole(g)); for (RoseVertex v : verts) { // role info os << " Index " << g[v].index << ": groups=0x" << hex << setw(16) << setfill('0') << g[v].groups << dec; if (g[v].reports.empty()) { os << ", report=NONE"; } else { os << ", report={" << as_string_list(g[v].reports) << "}"; } os << ", min_offset=" << g[v].min_offset; os << ", max_offset=" << g[v].max_offset << endl; // pred info for (const auto &ie : in_edges_range(v, g)) { const auto &u = source(ie, g); os << " Predecessor index="; if (u == build.root) { os << "ROOT"; } else if (u == build.anchored_root) { os << "ANCHORED_ROOT"; } else { os << g[u].index; } os << ": bounds [" << g[ie].minBound << ", "; if (g[ie].maxBound == ROSE_BOUND_INF) { os << "inf"; } else { os << g[ie].maxBound; } os << "]" << endl; } } } os.close(); } template static string toHex(Iter i, const Iter &end) { ostringstream oss; for (; i != end; ++i) { u8 c = *i; oss << hex << setw(2) << setfill('0') << ((unsigned)c & 0xff); } return oss.str(); } static bool isMetaChar(char c) { switch (c) { case '#': case '$': case '(': case ')': case '*': case '+': case '.': case '/': case '?': case '[': case '\\': case ']': case '^': case '{': case '|': case '}': return true; default: return false; } } static string toRegex(const string &lit) { ostringstream os; for (char c : lit) { if (0x20 <= c && c <= 0x7e) { if (isMetaChar(c)) { os << "\\" << c; } else { os << c; } } else if (c == '\n') { os << "\\n"; } else if (c == '\r') { os << "\\r"; } else if (c == '\t') { os << "\\t"; } else { os << "\\x" << hex << setw(2) << setfill('0') << (unsigned)(c & 0xff) << dec; } } return os.str(); } void dumpMatcherLiterals(const vector &lits, const string &name, const Grey &grey) { if (!grey.dumpFlags) { return; } ofstream of(grey.dumpPath + "rose_" + name + "_test_literals.txt"); // Unique regex index, as literals may share an ID. u32 i = 0; for (const hwlmLiteral &lit : lits) { // First, detail in a comment. of << "# id=" << lit.id; if (!lit.msk.empty()) { of << " msk=0x" << toHex(lit.msk.begin(), lit.msk.end()); of << " cmp=0x" << toHex(lit.cmp.begin(), lit.cmp.end()); } of << " groups=0x" << hex << setfill('0') << lit.groups << dec; if (lit.noruns) { of << " noruns"; } of << endl; // Second, literal rendered as a regex. of << i << ":/" << toRegex(lit.s) << (lit.nocase ? "/i" : "/"); of << endl; i++; } of.close(); } static const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) { if (!offset) { return nullptr; } const char *lt = (const char *)t + offset; return lt; } static const void *getAnchoredMatcher(const RoseEngine *t) { return loadFromByteCodeOffset(t, t->amatcherOffset); } static const HWLM *getFloatingMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); } static const HWLM *getDelayRebuildMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset); } static const HWLM *getEodMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); } static const HWLM *getSmallBlockMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); } static CharReach bitvectorToReach(const u8 *reach) { CharReach cr; for (size_t i = 0; i < N_CHARS; i++) { if (reach[i / 8] & (1U << (i % 8))) { cr.set(i); } } return cr; } static CharReach multiBitvectorToReach(const u8 *reach, u8 path_mask) { CharReach cr; for (size_t i = 0; i < N_CHARS; i++) { if (reach[i] & path_mask) { cr.set(i); } } return cr; } static void dumpLookaround(ofstream &os, const RoseEngine *t, const ROSE_STRUCT_CHECK_LOOKAROUND *ri) { assert(ri); const u8 *base = (const u8 *)t; const s8 *look = (const s8 *)base + ri->look_index; const s8 *look_end = look + ri->count; const u8 *reach = base + ri->reach_index; os << " contents:" << endl; for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { os << " " << std::setw(4) << std::setfill(' ') << int{*look} << ": "; describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); os << endl; } } static void dumpMultipathLookaround(ofstream &os, const RoseEngine *t, const ROSE_STRUCT_MULTIPATH_LOOKAROUND *ri) { assert(ri); const u8 *base = (const u8 *)t; const s8 *look_begin = (const s8 *)base + ri->look_index; const s8 *look_end = look_begin + ri->count; const u8 *reach_begin = base + ri->reach_index; os << " contents:" << endl; u32 path_mask = ri->start_mask[0]; while (path_mask) { u32 path = findAndClearLSB_32(&path_mask); os << " Path #" << path << ":" << endl; os << " "; const s8 *look = look_begin; const u8 *reach = reach_begin; for (; look < look_end; look++, reach += MULTI_REACH_BITVECTOR_LEN) { CharReach cr = multiBitvectorToReach(reach, 1U << path); if (cr.any() && !cr.all()) { os << "<" << int(*look) << ": "; describeClass(os, cr, 1000, CC_OUT_TEXT); os << "> "; } } os << endl; } } static vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { vector keys; if (num_bits == 0) { return keys; } // Populate a multibit structure with all-ones. Note that the multibit // runtime assumes that it is always safe to read 8 bytes, so we must // over-allocate for smaller sizes. const size_t num_bytes = mmbit_size(num_bits); vector bits(max(size_t{8}, num_bytes), u8{0xff}); // All bits on. const u8 *b = bits.data(); if (num_bytes < 8) { b += 8 - num_bytes; } vector state(MAX_SPARSE_ITER_STATES); mmbit_sparse_state *s = state.data(); u32 idx = 0; u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s); while (i != MMB_INVALID) { keys.push_back(i); i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s); } return keys; } static void dumpJumpTable(ofstream &os, const RoseEngine *t, const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) { auto *it = (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset); auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table); for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) { os << " " << std::setw(4) << std::setfill(' ') << key << " : +" << *jumps << endl; ++jumps; } } static void dumpSomOperation(ofstream &os, const som_operation &op) { os << " som (type=" << u32{op.type} << ", onmatch=" << op.onmatch; switch (op.type) { case SOM_EXTERNAL_CALLBACK_REV_NFA: case SOM_INTERNAL_LOC_SET_REV_NFA: case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET: case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: os << ", revNfaIndex=" << op.aux.revNfaIndex; break; default: os << ", somDistance=" << op.aux.somDistance; break; } os << ")" << endl; } static string dumpStrMask(const u8 *mask, size_t len) { ostringstream oss; for (size_t i = 0; i < len; i++) { oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]} << " "; } return oss.str(); } static CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) { CharReach cr; for (u32 i = 0; i < N_CHARS; i++) { if(lo[i & 0xf] & hi[i >> 4] & bucket_mask) { cr.set(i); } } return cr; } static void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, const u8 *bucket_mask, u32 neg_mask, s32 offset) { assert(len == 16 || len == 32); os << " contents:" << endl; for (u32 idx = 0; idx < len; idx++) { CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); if (neg_mask & (1U << idx)) { cr.flip(); } if (cr.any() && !cr.all()) { os << " " << std::setw(4) << std::setfill(' ') << int(offset + idx) << ": "; describeClass(os, cr, 1000, CC_OUT_TEXT); os << endl; } } } static void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, const u8 *bucket_mask_2, u32 neg_mask, s32 offset) { assert(len == 16 || len == 32); os << " contents:" << endl; for (u32 idx = 0; idx < len; idx++) { CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); if (neg_mask & (1U << idx)) { cr.flip(); } if (cr.any() && !cr.all()) { os << " " << std::setw(4) << std::setfill(' ') << int(offset + idx) << ": "; describeClass(os, cr, 1000, CC_OUT_TEXT); os << endl; } } } static void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, const u8 *bucket_mask, const u8 *data_offset, u64a neg_mask, s32 base_offset) { assert(len == 16 || len == 32 || len == 64); os << " contents:" << endl; u32 path = 0; for (u32 idx = 0; idx < len; idx++) { CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); if (neg_mask & (1ULL << idx)) { cr.flip(); } if (cr.any() && !cr.all()) { if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { path++; if (idx) { os << endl; } os << " Path #" << path << ":" << endl; os << " "; } os << "<" << int(base_offset + data_offset[idx]) << ": "; describeClass(os, cr, 1000, CC_OUT_TEXT); os << "> "; } } os << endl; } static void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, const u8 *bucket_mask_2, const u8 *data_offset, u32 neg_mask, s32 base_offset) { assert(len == 16 || len == 32 || len == 64); os << " contents:"; u32 path = 0; for (u32 idx = 0; idx < len; idx++) { CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); if (neg_mask & (1ULL << idx)) { cr.flip(); } if (cr.any() && !cr.all()) { if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { path++; os << endl; os << " Path #" << path << ":" << endl; os << " "; } os << "<" << int(base_offset + data_offset[idx]) << ": "; describeClass(os, cr, 1000, CC_OUT_TEXT); os << "> "; } } os << endl; } #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ << ": " #name "\n"; \ const auto *ri = (const struct ROSE_STRUCT_##name *)pc; #define PROGRAM_NEXT_INSTRUCTION \ pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ break; \ } static void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *pc_base = pc; for (;;) { u8 code = *(const u8 *)pc; assert(code <= LAST_ROSE_INSTRUCTION); const size_t offset = pc - pc_base; switch (code) { PROGRAM_CASE(END) { return; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(ANCHORED_DELAY) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; os << " anch_id " << ri->anch_id << "\n"; os << " done_jump " << offset + ri->done_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LIT_EARLY) { os << " min_offset " << ri->min_offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_GROUPS) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_ONLY_EOD) { os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_BOUNDS) { os << " min_bound " << ri->min_bound << endl; os << " max_bound " << ri->max_bound << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_NOT_HANDLED) { os << " key " << ri->key << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { os << " offset " << int{ri->offset} << endl; os << " reach_index " << ri->reach_index << endl; os << " fail_jump " << offset + ri->fail_jump << endl; const u8 *reach = (const u8 *)t + ri->reach_index; os << " contents "; describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); os << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { os << " look_index " << ri->look_index << endl; os << " reach_index " << ri->reach_index << endl; os << " count " << ri->count << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaround(os, t, ri); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MASK) { os << " and_mask 0x" << std::hex << std::setw(16) << std::setfill('0') << ri->and_mask << std::dec << endl; os << " cmp_mask 0x" << std::hex << std::setw(16) << std::setfill('0') << ri->cmp_mask << std::dec << endl; os << " neg_mask 0x" << std::hex << std::setw(16) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MASK_32) { os << " and_mask " << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) << endl; os << " cmp_mask " << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) << endl; os << " neg_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_BYTE) { os << " and_mask 0x" << std::hex << std::setw(2) << std::setfill('0') << u32{ri->and_mask} << std::dec << endl; os << " cmp_mask 0x" << std::hex << std::setw(2) << std::setfill('0') << u32{ri->cmp_mask} << std::dec << endl; os << " negation " << u32{ri->negation} << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_SHUFTI_16x8) { os << " nib_mask " << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) << endl; os << " bucket_select_mask " << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; os << " neg_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaroundShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, ri->bucket_select_mask, ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_SHUFTI_32x8) { os << " hi_mask " << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) << endl; os << " lo_mask " << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) << endl; os << " bucket_select_mask " << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; os << " neg_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, ri->bucket_select_mask, ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_SHUFTI_16x16) { os << " hi_mask " << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) << endl; os << " lo_mask " << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) << endl; os << " bucket_select_mask " << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; os << " neg_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaroundShufti(os, 16, ri->lo_mask, ri->hi_mask, ri->lo_mask + 16, ri->hi_mask + 16, ri->bucket_select_mask, ri->bucket_select_mask + 16, ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_SHUFTI_32x16) { os << " hi_mask " << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) << endl; os << " lo_mask " << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) << endl; os << " bucket_select_mask_hi " << dumpStrMask(ri->bucket_select_mask_hi, sizeof(ri->bucket_select_mask_hi)) << endl; os << " bucket_select_mask_lo " << dumpStrMask(ri->bucket_select_mask_lo, sizeof(ri->bucket_select_mask_lo)) << endl; os << " neg_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, ri->lo_mask + 16, ri->hi_mask + 16, ri->bucket_select_mask_lo, ri->bucket_select_mask_hi, ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_INFIX) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; os << " report " << ri->report << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_PREFIX) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; os << " report " << ri->report << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(PUSH_DELAYED) { os << " delay " << u32{ri->delay} << endl; os << " index " << ri->index << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DUMMY_NOP) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CATCH_UP) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CATCH_UP_MPV) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SOM_ADJUST) { os << " distance " << ri->distance << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SOM_LEFTFIX) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SOM_FROM_REPORT) { dumpSomOperation(os, ri->som); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SOM_ZERO) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(TRIGGER_INFIX) { os << " queue " << ri->queue << endl; os << " event " << ri->event << endl; os << " cancel " << u32{ri->cancel} << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(TRIGGER_SUFFIX) { os << " queue " << ri->queue << endl; os << " event " << ri->event << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE) { os << " quash_som " << u32{ri->quash_som} << endl; os << " dkey " << ri->dkey << endl; os << " offset_adjust " << ri->offset_adjust << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE_SOM) { os << " quash_som " << u32{ri->quash_som} << endl; os << " dkey " << ri->dkey << endl; os << " offset_adjust " << ri->offset_adjust << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_CHAIN) { os << " event " << ri->event << endl; os << " top_squash_distance " << ri->top_squash_distance << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_INT) { dumpSomOperation(os, ri->som); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_AWARE) { dumpSomOperation(os, ri->som); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT) { os << " onmatch " << ri->onmatch << endl; os << " offset_adjust " << ri->offset_adjust << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_EXHAUST) { os << " onmatch " << ri->onmatch << endl; os << " offset_adjust " << ri->offset_adjust << endl; os << " ekey " << ri->ekey << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { os << " onmatch " << ri->onmatch << endl; os << " offset_adjust " << ri->offset_adjust << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_EXHAUST) { os << " onmatch " << ri->onmatch << endl; os << " offset_adjust " << ri->offset_adjust << endl; os << " ekey " << ri->ekey << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(DEDUPE_AND_REPORT) { os << " quash_som " << u32{ri->quash_som} << endl; os << " dkey " << ri->dkey << endl; os << " onmatch " << ri->onmatch << endl; os << " offset_adjust " << ri->offset_adjust << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(FINAL_REPORT) { os << " onmatch " << ri->onmatch << endl; os << " offset_adjust " << ri->offset_adjust << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_EXHAUSTED) { os << " ekey " << ri->ekey << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MIN_LENGTH) { os << " end_adj " << ri->end_adj << endl; os << " min_length " << ri->min_length << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SET_STATE) { os << " index " << ri->index << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SET_GROUPS) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SQUASH_GROUPS) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_STATE) { os << " index " << ri->index << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SPARSE_ITER_BEGIN) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; dumpJumpTable(os, t, ri); os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SPARSE_ITER_NEXT) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; os << " state " << ri->state << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SPARSE_ITER_ANY) { os << " iter_offset " << ri->iter_offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(ENGINES_EOD) { os << " iter_offset " << ri->iter_offset << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(SUFFIXES_EOD) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(MATCHER_EOD) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LONG_LIT) { os << " lit_offset " << ri->lit_offset << endl; os << " lit_length " << ri->lit_length << endl; const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { os << " lit_offset " << ri->lit_offset << endl; os << " lit_length " << ri->lit_length << endl; const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MED_LIT) { os << " lit_offset " << ri->lit_offset << endl; os << " lit_length " << ri->lit_length << endl; const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { os << " lit_offset " << ri->lit_offset << endl; os << " lit_length " << ri->lit_length << endl; const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CLEAR_WORK_DONE) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(MULTIPATH_LOOKAROUND) { os << " look_index " << ri->look_index << endl; os << " reach_index " << ri->reach_index << endl; os << " count " << ri->count << endl; os << " last_start " << ri->last_start << endl; os << " start_mask " << dumpStrMask(ri->start_mask, sizeof(ri->start_mask)) << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpMultipathLookaround(os, t, ri); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { os << " nib_mask " << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) << endl; os << " bucket_select_mask " << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; os << " data_select_mask " << dumpStrMask(ri->data_select_mask, sizeof(ri->data_select_mask)) << endl; os << " hi_bits_mask 0x" << std::hex << std::setw(4) << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; os << " lo_bits_mask 0x" << std::hex << std::setw(4) << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; os << " neg_mask 0x" << std::hex << std::setw(4) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " base_offset " << ri->base_offset << endl; os << " last_start " << ri->last_start << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpMultipathShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, ri->bucket_select_mask, ri->data_select_mask, ri->neg_mask, ri->base_offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { os << " hi_mask " << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) << endl; os << " lo_mask " << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) << endl; os << " bucket_select_mask " << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; os << " data_select_mask " << dumpStrMask(ri->data_select_mask, sizeof(ri->data_select_mask)) << endl; os << " hi_bits_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; os << " lo_bits_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; os << " neg_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " base_offset " << ri->base_offset << endl; os << " last_start " << ri->last_start << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, ri->bucket_select_mask, ri->data_select_mask, ri->neg_mask, ri->base_offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { os << " hi_mask " << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) << endl; os << " lo_mask " << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) << endl; os << " bucket_select_mask_hi " << dumpStrMask(ri->bucket_select_mask_hi, sizeof(ri->bucket_select_mask_hi)) << endl; os << " bucket_select_mask_lo " << dumpStrMask(ri->bucket_select_mask_lo, sizeof(ri->bucket_select_mask_lo)) << endl; os << " data_select_mask " << dumpStrMask(ri->data_select_mask, sizeof(ri->data_select_mask)) << endl; os << " hi_bits_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; os << " lo_bits_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; os << " neg_mask 0x" << std::hex << std::setw(8) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " base_offset " << ri->base_offset << endl; os << " last_start " << ri->last_start << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, ri->lo_mask + 16, ri->hi_mask + 16, ri->bucket_select_mask_lo, ri->bucket_select_mask_hi, ri->data_select_mask, ri->neg_mask, ri->base_offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { os << " hi_mask " << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) << endl; os << " lo_mask " << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) << endl; os << " bucket_select_mask " << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; os << " data_select_mask " << dumpStrMask(ri->data_select_mask, sizeof(ri->data_select_mask)) << endl; os << " hi_bits_mask 0x" << std::hex << std::setw(16) << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; os << " lo_bits_mask 0x" << std::hex << std::setw(16) << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; os << " neg_mask 0x" << std::hex << std::setw(16) << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " base_offset " << ri->base_offset << endl; os << " last_start " << ri->last_start << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpMultipathShufti(os, 64, ri->lo_mask, ri->hi_mask, ri->bucket_select_mask, ri->data_select_mask, ri->neg_mask, ri->base_offset); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(INCLUDED_JUMP) { os << " child_offset " << ri->child_offset << endl; os << " squash " << (u32)ri->squash << endl; } PROGRAM_NEXT_INSTRUCTION default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; return; } } } #undef PROGRAM_CASE #undef PROGRAM_NEXT_INSTRUCTION static void dumpRoseLitPrograms(const vector &fragments, const RoseEngine *t, const string &filename) { ofstream os(filename); // Collect all programs referenced by a literal fragment. vector programs; for (const auto &frag : fragments) { if (frag.lit_program_offset) { programs.push_back(frag.lit_program_offset); } if (frag.delay_program_offset) { programs.push_back(frag.delay_program_offset); } } sort_and_unique(programs); for (u32 prog_offset : programs) { os << "Program @ " << prog_offset << ":" << endl; const char *prog = (const char *)loadFromByteCodeOffset(t, prog_offset); dumpProgram(os, t, prog); os << endl; } os.close(); } static void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const char *base = (const char *)t; if (t->eodProgramOffset) { os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; dumpProgram(os, t, base + t->eodProgramOffset); os << endl; } else { os << "" << endl; } os.close(); } static void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const u32 *programs = (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset); for (u32 i = 0; i < t->reportProgramCount; i++) { os << "Report " << i << endl; os << "---------------" << endl; if (programs[i]) { os << "Program @ " << programs[i] << ":" << endl; const char *prog = (const char *)loadFromByteCodeOffset(t, programs[i]); dumpProgram(os, t, prog); } else { os << "" << endl; } } os.close(); } static void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const u32 *programs = (const u32 *)loadFromByteCodeOffset(t, t->anchoredProgramOffset); for (u32 i = 0; i < t->anchored_count; i++) { os << "Anchored entry " << i << endl; os << "---------------" << endl; if (programs[i]) { os << "Program @ " << programs[i] << ":" << endl; const char *prog = (const char *)loadFromByteCodeOffset(t, programs[i]); dumpProgram(os, t, prog); } else { os << "" << endl; } os << endl; } os.close(); } static void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const u32 *programs = (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset); for (u32 i = 0; i < t->delay_count; i++) { os << "Delay entry " << i << endl; os << "---------------" << endl; if (programs[i]) { os << "Program @ " << programs[i] << ":" << endl; const char *prog = (const char *)loadFromByteCodeOffset(t, programs[i]); dumpProgram(os, t, prog); } else { os << "" << endl; } os << endl; } os.close(); } static void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { const u32 qindex = n->queueIndex; if (qindex < t->outfixBeginQueue) { fout << "chained"; return; } if (qindex < t->outfixEndQueue) { fout << "outfix"; return; } const NfaInfo *nfa_info = getNfaInfoByQueue(t, qindex); const NFA *nfa = getNfaByInfo(t, nfa_info); if (nfa_info->eod) { fout << "eod "; } if (qindex < t->leftfixBeginQueue) { fout << "suffix"; return; } const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex); if (left->eager) { fout << "eager "; } if (left->transient) { fout << "transient " << (u32)left->transient << " "; } if (left->infix) { fout << "infix"; u32 maxQueueLen = left->maxQueueLen; if (maxQueueLen != (u32)(-1)) { fout << " maxqlen=" << maxQueueLen; } } else { fout << "prefix"; } fout << " maxlag=" << left->maxLag; if (left->stopTable) { fout << " miracles"; } if (left->countingMiracleOffset) { const RoseCountingMiracle *cm = (const RoseCountingMiracle *)((const char *)t + left->countingMiracleOffset); fout << " counting_miracle:" << (int)cm->count << (cm->shufti ? "s" : "v"); } if (nfaSupportsZombie(nfa)) { fout << " zombie"; } if (left->eod_check) { fout << " eod"; } } static void dumpComponentInfo(const RoseEngine *t, const string &base) { stringstream ss; ss << base << "rose_components.txt"; ofstream fout(ss.str().c_str()); fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; for (u32 i = 0; i < t->queueCount; i++) { const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); const NFA *n = getNfaByInfo(t, nfa_info); fout << left << setw(6) << i << " "; fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */ fout << left << setw(16) << describe(*n) << "\t"; fout << left << setw(6) << n->nPositions << " "; fout << left << setw(7) << n->streamStateSize << " "; fout << left << setw(7) << n->length << " "; dumpNfaNotes(fout, t, n); fout << endl; } } static void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { StdioFile f(base + "/rose_components.csv", "w"); fprintf(f, "Index, Offset,Engine Type,States,Stream State," "Bytecode Size,Kind,Notes\n"); for (u32 i = 0; i < t->queueCount; i++) { const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); const NFA *n = getNfaByInfo(t, nfa_info); nfa_kind kind; stringstream notes; if (i < t->outfixBeginQueue) { notes << "chained;"; } if (nfa_info->eod) { notes << "eod;"; } if (i < t->outfixEndQueue) { kind = NFA_OUTFIX; } else if (i < t->leftfixBeginQueue) { kind = NFA_SUFFIX; } else { const LeftNfaInfo *left = getLeftInfoByQueue(t, i); if (left->eager) { notes << "eager;"; } if (left->transient) { notes << "transient " << (u32)left->transient << ";"; } if (left->infix) { kind = NFA_INFIX; u32 maxQueueLen = left->maxQueueLen; if (maxQueueLen != (u32)(-1)) { notes << "maxqlen=" << maxQueueLen << ";"; } } else { kind = NFA_PREFIX; } notes << "maxlag=" << left->maxLag << ";"; if (left->stopTable) { notes << "miracles;"; } if (left->countingMiracleOffset) { auto cm = (const RoseCountingMiracle *) ((const char *)t + left->countingMiracleOffset); notes << "counting_miracle:" << (int)cm->count << (cm->shufti ? "s" : "v") << ";"; } if (nfaSupportsZombie(n)) { notes << " zombie;"; } if (left->eod_check) { notes << "left_eod;"; } } fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, (const char *)n - (const char *)t, describe(*n).c_str(), n->nPositions, n->streamStateSize, n->length, to_string(kind).c_str(), notes.str().c_str()); } } static void dumpExhaust(const RoseEngine *t, const string &base) { StdioFile f(base + "/rose_exhaust.csv", "w"); const NfaInfo *infos = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); u32 queue_count = t->activeArrayCount; for (u32 i = 0; i < queue_count; ++i) { u32 ekey_offset = infos[i].ekeyListOffset; fprintf(f, "%u (%u):", i, ekey_offset); if (ekey_offset) { const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset); while (1) { u32 e = *ekeys; ++ekeys; if (e == ~0U) { break; } fprintf(f, " %u", e); } } fprintf(f, "\n"); } } static void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { dumpExhaust(t, base); for (u32 i = 0; i < t->queueCount; i++) { const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); const NFA *n = getNfaByInfo(t, nfa_info); stringstream ssbase; ssbase << base << "rose_nfa_" << i; nfaGenerateDumpFiles(n, ssbase.str()); if (dump_raw) { stringstream ssraw; ssraw << base << "rose_nfa_" << i << ".raw"; StdioFile f(ssraw.str(), "w"); fwrite(n, 1, n->length, f); } } } static void dumpRevComponentInfo(const RoseEngine *t, const string &base) { stringstream ss; ss << base << "som_rev_components.txt"; ofstream fout(ss.str().c_str()); fout << "Index Offset\tEngine \tStates S.State Bytes\n"; const char *tp = (const char *)t; const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); for (u32 i = 0; i < t->somRevCount; i++) { u32 offset = rev_offsets[i]; const NFA *n = (const NFA *)(tp + offset); fout << left << setw(6) << i << " "; fout << left << offset << "\t"; /* offset */ fout << left << setw(16) << describe(*n) << "\t"; fout << left << setw(6) << n->nPositions << " "; fout << left << setw(7) << n->streamStateSize << " "; fout << left << setw(7) << n->length; fout << endl; } } static void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { const char *tp = (const char *)t; const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); for (u32 i = 0; i < t->somRevCount; i++) { const NFA *n = (const NFA *)(tp + rev_offsets[i]); stringstream ssbase; ssbase << base << "som_rev_nfa_" << i; nfaGenerateDumpFiles(n, ssbase.str()); if (dump_raw) { stringstream ssraw; ssraw << base << "som_rev_nfa_" << i << ".raw"; StdioFile f(ssraw.str(), "w"); fwrite(n, 1, n->length, f); } } } static void dumpAnchored(const RoseEngine *t, const string &base) { u32 i = 0; const anchored_matcher_info *curr = (const anchored_matcher_info *)getALiteralMatcher(t); while (curr) { const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); stringstream ssbase; ssbase << base << "anchored_" << i; nfaGenerateDumpFiles(n, ssbase.str()); curr = curr->next_offset ? (const anchored_matcher_info *) ((const char *)curr + curr->next_offset) : nullptr; i++; }; } static void dumpAnchoredStats(const void *atable, FILE *f) { assert(atable); u32 i = 0; const anchored_matcher_info *curr = (const anchored_matcher_info *)atable; while (curr) { const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); fprintf(f, " NFA %u: %s, %u states (%u bytes)\n", i, describe(*n).c_str(), n->nPositions, n->length); curr = curr->next_offset ? (const anchored_matcher_info *) ((const char *)curr + curr->next_offset) : nullptr; i++; }; } static void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, const RoseLongLitSubtable *ll_sub, FILE *f) { if (!ll_sub->hashBits) { fprintf(f, " \n"); return; } const char *base = (const char *)ll_table; u32 nbits = ll_sub->hashBits; u32 num_entries = 1U << nbits; const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); u32 hash_occ = count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { return ent.str_offset != 0; }); float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", nbits, hash_occ, num_entries, hash_occ_percent); u32 bloom_bits = ll_sub->bloomBits; u32 bloom_size = 1U << bloom_bits; const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); } static void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { if (!t->longLitTableOffset) { return; } fprintf(f, "\n"); fprintf(f, "Long literal table (streaming):\n"); const auto *ll_table = (const struct RoseLongLitTable *)loadFromByteCodeOffset( t, t->longLitTableOffset); fprintf(f, " total size : %u bytes\n", ll_table->size); fprintf(f, " longest len : %u\n", ll_table->maxLen); fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); fprintf(f, " caseful:\n"); dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); fprintf(f, " nocase:\n"); dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); } static void roseDumpText(const RoseEngine *t, FILE *f) { if (!t) { fprintf(f, "<< no rose >>\n"); return; } const void *atable = getAnchoredMatcher(t); const HWLM *ftable = getFloatingMatcher(t); const HWLM *drtable = getDelayRebuildMatcher(t); const HWLM *etable = getEodMatcher(t); const HWLM *sbtable = getSmallBlockMatcher(t); fprintf(f, "Rose:\n\n"); fprintf(f, "mode: : "); switch(t->mode) { case HS_MODE_BLOCK: fprintf(f, "block"); break; case HS_MODE_STREAM: fprintf(f, "streaming"); break; case HS_MODE_VECTORED: fprintf(f, "vectored"); break; } fprintf(f, "\n"); fprintf(f, "properties :"); if (t->canExhaust) { fprintf(f, " canExhaust"); } if (t->hasSom) { fprintf(f, " hasSom"); } if (t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL) { fprintf(f, " pureLiteral"); } if (t->runtimeImpl == ROSE_RUNTIME_SINGLE_OUTFIX) { fprintf(f, " soleOutfix"); } fprintf(f, "\n"); fprintf(f, "dkey count : %u\n", t->dkeyCount); fprintf(f, "som slot count : %u\n", t->somLocationCount); fprintf(f, "som width : %u bytes\n", t->somHorizon); fprintf(f, "rose count : %u\n", t->roseCount); fprintf(f, "\n"); fprintf(f, "total engine size : %u bytes\n", t->size); fprintf(f, " - anchored matcher : %u bytes over %u bytes\n", t->asize, t->anchoredDistance); fprintf(f, " - floating matcher : %zu bytes%s", ftable ? hwlmSize(ftable) : 0, t->noFloatingRoots ? " (cond)":""); if (t->floatingMinDistance) { fprintf(f, " from %s bytes\n", rose_off(t->floatingMinDistance).str().c_str()); } if (t->floatingDistance != ROSE_BOUND_INF && ftable) { fprintf(f, " over %u bytes\n", t->floatingDistance); } else { fprintf(f, "\n"); } fprintf(f, " - delay-rb matcher : %zu bytes\n", drtable ? hwlmSize(drtable) : 0); fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); fprintf(f, " - role state table : %zu bytes\n", t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %zu bytes\n", t->queueCount * sizeof(NfaInfo)); fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); fprintf(f, " - exhaustion vector : %u bytes\n", t->stateOffsets.exhausted_size); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - active array : %u bytes\n", t->stateOffsets.activeLeafArray_size); fprintf(f, " - active rose : %u bytes\n", t->stateOffsets.activeLeftArray_size); fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize); fprintf(f, " - nfa state : %u bytes\n", t->stateOffsets.end - t->stateOffsets.nfaStateBegin); fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize); fprintf(f, " - one whole bytes : %u bytes\n", t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable); fprintf(f, " - groups : %u bytes\n", t->stateOffsets.groups_size); fprintf(f, "\n"); fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); fprintf(f, "floating groups : 0x%016llx\n", t->floating_group_mask); fprintf(f, "handled key count : %u\n", t->handledKeyCount); fprintf(f, "\n"); fprintf(f, "total literal count : %u\n", t->totalNumLiterals); fprintf(f, " delayed literals : %u\n", t->delay_count); fprintf(f, "\n"); fprintf(f, " minWidth : %u\n", t->minWidth); fprintf(f, " minWidthExcludingBoundaries : %u\n", t->minWidthExcludingBoundaries); fprintf(f, " maxBiAnchoredWidth : %s\n", rose_off(t->maxBiAnchoredWidth).str().c_str()); fprintf(f, " minFloatLitMatchOffset : %s\n", rose_off(t->floatingMinLiteralMatchOffset).str().c_str()); fprintf(f, " maxFloatingDelayedMatch : %s\n", rose_off(t->maxFloatingDelayedMatch).str().c_str()); if (atable) { fprintf(f, "\nAnchored literal matcher stats:\n\n"); dumpAnchoredStats(atable, f); } dumpLongLiteralTable(t, f); } #define DUMP_U8(o, member) \ fprintf(f, " %-32s: %hhu/%hhx\n", #member, o->member, o->member) #define DUMP_U32(o, member) \ fprintf(f, " %-32s: %u/%08x\n", #member, o->member, o->member) #define DUMP_U64(o, member) \ fprintf(f, " %-32s: %llu/%016llx\n", #member, o->member, o->member) static void roseDumpStructRaw(const RoseEngine *t, FILE *f) { fprintf(f, "struct RoseEngine {\n"); DUMP_U8(t, noFloatingRoots); DUMP_U8(t, requiresEodCheck); DUMP_U8(t, hasOutfixesInSmallBlock); DUMP_U8(t, runtimeImpl); DUMP_U8(t, mpvTriggeredByLeaf); DUMP_U8(t, canExhaust); DUMP_U8(t, hasSom); DUMP_U8(t, somHorizon); DUMP_U32(t, mode); DUMP_U32(t, historyRequired); DUMP_U32(t, ekeyCount); DUMP_U32(t, dkeyCount); DUMP_U32(t, dkeyLogSize); DUMP_U32(t, invDkeyOffset); DUMP_U32(t, somLocationCount); DUMP_U32(t, somLocationFatbitSize); DUMP_U32(t, rolesWithStateCount); DUMP_U32(t, stateSize); DUMP_U32(t, anchorStateSize); DUMP_U32(t, tStateSize); DUMP_U32(t, smallWriteOffset); DUMP_U32(t, amatcherOffset); DUMP_U32(t, ematcherOffset); DUMP_U32(t, fmatcherOffset); DUMP_U32(t, drmatcherOffset); DUMP_U32(t, sbmatcherOffset); DUMP_U32(t, longLitTableOffset); DUMP_U32(t, amatcherMinWidth); DUMP_U32(t, fmatcherMinWidth); DUMP_U32(t, eodmatcherMinWidth); DUMP_U32(t, amatcherMaxBiAnchoredWidth); DUMP_U32(t, fmatcherMaxBiAnchoredWidth); DUMP_U32(t, reportProgramOffset); DUMP_U32(t, reportProgramCount); DUMP_U32(t, delayProgramOffset); DUMP_U32(t, anchoredProgramOffset); DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); DUMP_U32(t, activeQueueArraySize); DUMP_U32(t, eagerIterOffset); DUMP_U32(t, handledKeyCount); DUMP_U32(t, handledKeyFatbitSize); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, eodProgramOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); DUMP_U32(t, maxBiAnchoredWidth); DUMP_U32(t, anchoredDistance); DUMP_U32(t, anchoredMinDistance); DUMP_U32(t, floatingDistance); DUMP_U32(t, floatingMinDistance); DUMP_U32(t, smallBlockDistance); DUMP_U32(t, floatingMinLiteralMatchOffset); DUMP_U32(t, nfaInfoOffset); DUMP_U64(t, initialGroups); DUMP_U64(t, floating_group_mask); DUMP_U32(t, size); DUMP_U32(t, delay_count); DUMP_U32(t, delay_fatbit_size); DUMP_U32(t, anchored_count); DUMP_U32(t, anchored_fatbit_size); DUMP_U32(t, maxFloatingDelayedMatch); DUMP_U32(t, delayRebuildLength); DUMP_U32(t, stateOffsets.history); DUMP_U32(t, stateOffsets.exhausted); DUMP_U32(t, stateOffsets.exhausted_size); DUMP_U32(t, stateOffsets.activeLeafArray); DUMP_U32(t, stateOffsets.activeLeafArray_size); DUMP_U32(t, stateOffsets.activeLeftArray); DUMP_U32(t, stateOffsets.activeLeftArray_size); DUMP_U32(t, stateOffsets.leftfixLagTable); DUMP_U32(t, stateOffsets.anchorState); DUMP_U32(t, stateOffsets.groups); DUMP_U32(t, stateOffsets.groups_size); DUMP_U32(t, stateOffsets.longLitState); DUMP_U32(t, stateOffsets.longLitState_size); DUMP_U32(t, stateOffsets.somLocation); DUMP_U32(t, stateOffsets.somValid); DUMP_U32(t, stateOffsets.somWritable); DUMP_U32(t, stateOffsets.somMultibit_size); DUMP_U32(t, stateOffsets.nfaStateBegin); DUMP_U32(t, stateOffsets.end); DUMP_U32(t, boundary.reportEodOffset); DUMP_U32(t, boundary.reportZeroOffset); DUMP_U32(t, boundary.reportZeroEodOffset); DUMP_U32(t, totalNumLiterals); DUMP_U32(t, asize); DUMP_U32(t, outfixBeginQueue); DUMP_U32(t, outfixEndQueue); DUMP_U32(t, leftfixBeginQueue); DUMP_U32(t, initMpvNfa); DUMP_U32(t, rosePrefixCount); DUMP_U32(t, activeLeftIterOffset); DUMP_U32(t, ematcherRegionSize); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } static void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) { dumpComponentInfo(t, base); dumpComponentInfoCsv(t, base); dumpNfas(t, dump_raw, base); dumpAnchored(t, base); dumpRevComponentInfo(t, base); dumpRevNfas(t, dump_raw, base); } static void roseDumpPrograms(const vector &fragments, const RoseEngine *t, const string &base) { dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); } static void roseDumpLiteralMatchers(const RoseEngine *t, const string &base) { if (const HWLM *hwlm = getFloatingMatcher(t)) { hwlmGenerateDumpFiles(hwlm, base + "/lit_table_floating"); } if (const HWLM *hwlm = getDelayRebuildMatcher(t)) { hwlmGenerateDumpFiles(hwlm, base + "/lit_table_delay_rebuild"); } if (const HWLM *hwlm = getEodMatcher(t)) { hwlmGenerateDumpFiles(hwlm, base + "/lit_table_eod"); } if (const HWLM *hwlm = getSmallBlockMatcher(t)) { hwlmGenerateDumpFiles(hwlm, base + "/lit_table_small_block"); } } void dumpRose(const RoseBuildImpl &build, const vector &fragments, const map &leftfix_queue_map, const map &suffix_queue_map, const RoseEngine *t) { const Grey &grey = build.cc.grey; if (!grey.dumpFlags) { return; } StdioFile f(grey.dumpPath + "/rose.txt", "w"); if (!t) { fprintf(f, "<< no rose >>\n"); return; } // Dump Rose table info roseDumpText(t, f); roseDumpComponents(t, false, grey.dumpPath); roseDumpPrograms(fragments, t, grey.dumpPath); roseDumpLiteralMatchers(t, grey.dumpPath); // Graph. dumpRoseGraph(build, t, fragments, leftfix_queue_map, suffix_queue_map, "rose.dot"); // Literals dumpRoseLiterals(build, fragments, grey); f = StdioFile(grey.dumpPath + "/rose_struct.txt", "w"); roseDumpStructRaw(t, f); } } // namespace ue2