diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index ba7f2718..3f1614dd 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -374,7 +374,7 @@ unique_ptr makeCFG(const raw_som_dfa &raw) { } u16 top_sym = raw.alpha_remap[TOP]; - DEBUG_PRINTF("top: %hu, kind %d\n", top_sym, raw.kind); + DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str()); /* create edges, JOIN variables (on edge targets) */ map seen; diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 549bccf5..5289b074 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -84,7 +84,7 @@ struct mcclellan { u8 has_accel; /**< 1 iff there are any accel plans */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ - u32 accel_offset; /**< offset of the accel structures from start of NFA */ + u32 accel_offset; /**< offset of accel structures from start of McClellan */ u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ }; diff --git a/src/nfa/mcsheng_internal.h b/src/nfa/mcsheng_internal.h index 81a658e0..bb45ae23 100644 --- a/src/nfa/mcsheng_internal.h +++ b/src/nfa/mcsheng_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -84,7 +84,7 @@ struct mcsheng { u8 has_accel; /**< 1 iff there are any accel plans */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ - u32 accel_offset; /**< offset of the accel structures from start of NFA */ + u32 accel_offset; /**< offset of accel structures from start of McClellan */ m128 sheng_masks[N_CHARS]; }; diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 992faf7c..80545447 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,7 +73,7 @@ static void populateInit(const NGHolder &g, const flat_set &unused, stateset *init, stateset *initDS, vector *v_by_index) { - DEBUG_PRINTF("graph kind: %u\n", (int)g.kind); + DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str()); for (auto v : vertices_range(g)) { if (contains(unused, v)) { continue; diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 091b89b8..4ce5dc15 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -542,7 +542,8 @@ unique_ptr buildMcClellan(const NGHolder &graph, return nullptr; } - DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); + DEBUG_PRINTF("attempting to build %s mcclellan\n", + to_string(graph.kind).c_str()); assert(allMatchStatesHaveReports(graph)); bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph); diff --git a/src/nfagraph/ng_stop.cpp b/src/nfagraph/ng_stop.cpp index c335540a..5e627bb5 100644 --- a/src/nfagraph/ng_stop.cpp +++ b/src/nfagraph/ng_stop.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,7 +87,11 @@ private: /** Find the set of characters that are not present in the reachability of * graph \p g after a certain depth (currently 8). If a character in this set * is encountered, it means that the NFA is either dead or has not progressed - * more than 8 characters from its start states. */ + * more than 8 characters from its start states. + * + * This is only used to guide merging heuristics, use + * findLeftOffsetStopAlphabet for real uses. + */ CharReach findStopAlphabet(const NGHolder &g, som_type som) { const depth max_depth(MAX_STOP_DEPTH); const InitDepths depths(g); diff --git a/src/nfagraph/ng_stop.h b/src/nfagraph/ng_stop.h index da70a4fd..4a889dca 100644 --- a/src/nfagraph/ng_stop.h +++ b/src/nfagraph/ng_stop.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,7 +47,11 @@ class NGHolder; /** Find the set of characters that are not present in the reachability of * graph \p g after a certain depth (currently 8). If a character in this set * is encountered, it means that the NFA is either dead or has not progressed - * more than 8 characters from its start states. */ + * more than 8 characters from its start states. + * + * This is only used to guide merging heuristics, use + * findLeftOffsetStopAlphabet for real uses. + */ CharReach findStopAlphabet(const NGHolder &g, som_type som); /** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then diff --git a/src/rose/match.h b/src/rose/match.h index 0d4fb19c..c03b1ebb 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -66,6 +66,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, u64a top_squash_distance, u64a end, char in_catchup); +/** \brief Initialize the queue for a suffix/outfix engine. */ static really_inline void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, struct hs_scratch *scratch) { @@ -90,6 +91,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, info->stateOffset, *(u32 *)q->state); } +/** \brief Initialize the queue for a leftfix (prefix/infix) engine. */ static really_inline void initRoseQueue(const struct RoseEngine *t, u32 qi, const struct LeftNfaInfo *left, diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index e6ce9bdb..30ec4bcd 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -501,8 +501,7 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, } /* catches up engines enough to ensure any earlier mpv triggers are enqueued - * and then adds the trigger to the mpv queue. Must not be called during catch - * up */ + * and then adds the trigger to the mpv queue. */ static rose_inline hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 08f253cb..aa043fad 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -453,7 +453,7 @@ RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi, <= tbi->cc.grey.maxAnchoredRegion) { if (ep.maxBound || ep.minBound) { /* TODO: handle, however these cases are not generated currently by - ng_rose */ + ng_violet */ return RoseGraph::null_vertex(); } max_width = depth(ep.maxBound + iv_info.s.length()); @@ -567,7 +567,7 @@ void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table, assert(iv_info.type == RIV_LITERAL); assert(!parents.empty()); /* start vertices should not be here */ - // ng_rose should have ensured that mixed-sensitivity literals are no + // ng_violet should have ensured that mixed-sensitivity literals are no // longer than the benefits max width. assert(iv_info.s.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(iv_info.s)); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index c0eba22b..cc450a89 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -459,7 +459,7 @@ public: const_iterator end() const { return ordering.end(); } }; -typedef Bouquet RoseBouquet; +typedef Bouquet LeftfixBouquet; typedef Bouquet SuffixBouquet; } // namespace @@ -565,7 +565,7 @@ bool hasSameEngineType(const RoseVertexProps &u_prop, * * Parameters are vectors of literals + lag pairs. * - * Note: if more constaints of when the leftfixes were going to be checked + * Note: if more constraints of when the leftfixes were going to be checked * (mandatory lookarounds passing, offset checks), more merges may be allowed. */ static @@ -599,7 +599,7 @@ bool compatibleLiteralsForMerge( /* An engine requires that all accesses to it are ordered by offsets. (ie, we can not check an engine's state at offset Y, if we have already checked its status at offset X and X > Y). If we can not establish that - the literals used for triggering will statisfy this property, then it is + the literals used for triggering will satisfy this property, then it is not safe to merge the engine. */ for (const auto &ue : ulits) { const rose_literal_id &ul = *ue.first; @@ -1778,7 +1778,7 @@ u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) { } static -void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { +void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) { RoseGraph &g = tbi.g; DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size()); @@ -1894,7 +1894,7 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { RoseGraph &g = tbi.g; - RoseBouquet nfa_roses; + LeftfixBouquet nfa_leftfixes; for (auto v : vertices_range(g)) { if (!g[v].left) { @@ -1939,20 +1939,20 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { continue; } - nfa_roses.insert(left, v); + nfa_leftfixes.insert(left, v); } - deque rose_groups; - chunkBouquets(nfa_roses, rose_groups, MERGE_GROUP_SIZE_MAX); - nfa_roses.clear(); - DEBUG_PRINTF("chunked nfa roses into %zu groups\n", rose_groups.size()); + deque leftfix_groups; + chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX); + nfa_leftfixes.clear(); + DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n", + leftfix_groups.size()); - for (auto &group : rose_groups) { + for (auto &group : leftfix_groups) { mergeNfaLeftfixes(tbi, group); } } - static void mergeCastleChunk(RoseBuildImpl &build, vector &cands, insertion_ordered_map> &eng_verts) { diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index a7332df7..0b0e689c 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -993,15 +993,19 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { return true; } +/** + * \brief True if there is an engine with a top that is not triggered by a + * vertex in the Rose graph. This is a consistency check used in assertions. + */ bool hasOrphanedTops(const RoseBuildImpl &build) { const RoseGraph &g = build.g; - unordered_map> roses; + unordered_map> leftfixes; unordered_map> suffixes; for (auto v : vertices_range(g)) { if (g[v].left) { - set &tops = roses[g[v].left]; + set &tops = leftfixes[g[v].left]; if (!build.isRootSuccessor(v)) { // Tops for infixes come from the in-edges. for (const auto &e : in_edges_range(v, g)) { @@ -1014,7 +1018,7 @@ bool hasOrphanedTops(const RoseBuildImpl &build) { } } - for (const auto &e : roses) { + for (const auto &e : leftfixes) { if (all_tops(e.first) != e.second) { DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", as_string_list(all_tops(e.first)).c_str(), diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 8f350e29..e4e68136 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -280,7 +280,7 @@ void stripCheckHandledInstruction(RoseProgram &prog) { } -/** Returns true if the program may read the the interpreter's work_done flag */ +/** Returns true if the program may read the interpreter's work_done flag */ static bool reads_work_done_flag(const RoseProgram &prog) { for (const auto &ri : prog) { @@ -1837,7 +1837,7 @@ void makeRoleEagerEodReports(const RoseBuildImpl &build, program.add_before_end(move(eod_program)); } -/* Makes a program for a role/vertex given a specfic pred/in_edge. */ +/** Makes a program for a role/vertex given a specific pred/in_edge. */ static RoseProgram makeRoleProgram(const RoseBuildImpl &build, const map &leftfix_info, @@ -2045,7 +2045,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, } if (lit_id == build.eod_event_literal_id) { - /* Note: does not require the lit intial program */ + /* Note: does not require the lit initial program */ assert(build.eod_event_literal_id != MO_INVALID_IDX); return role_programs; } diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index 2c5ebbe9..499d796a 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,7 +41,6 @@ #include "rose_build.h" #include "rose_internal.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX -#include "util/charreach.h" #include "util/depth.h" #include "util/flat_containers.h" #include "util/ue2_graph.h" diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index ed4644ae..da0ea08d 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,7 @@ */ /** \file - * \brief Rose Input Graph: Used for ng_rose -> rose_build_add communication. + * \brief Rose Input Graph: Used for ng_violet -> rose_build_add communication. * * The input graph MUST be a DAG. * There MUST be exactly 1 START or ANCHORED_START vertex. @@ -127,7 +127,7 @@ public: flat_set reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */ u32 min_offset; /**< Minimum offset at which this vertex can match. */ u32 max_offset; /**< Maximum offset at which this vertex can match. */ - size_t index = 0; + size_t index = 0; /**< \brief Unique vertex index. */ }; struct RoseInEdgeProps { @@ -176,7 +176,13 @@ struct RoseInEdgeProps { /** \brief Haig version of graph, if required. */ std::shared_ptr haig; + /** + * \brief Distance behind the match offset for the literal in the target + * vertex that the leftfix needs to be checked at. + */ u32 graph_lag; + + /** \brief Unique edge index. */ size_t index = 0; }; diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index bf719fd7..3879e640 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -89,7 +89,7 @@ * (1) Deterministic ordering for vertices and edges * boost::adjacency_list<> uses pointer ordering for vertex_descriptors. As * a result, ordering of vertices and edges between runs is - * non-deterministic unless containers, etc use custom comparators. + * non-deterministic unless containers, etc use custom comparators. * * (2) Proper types for descriptors, etc. * No more void * for vertex_descriptors and trying to use it for the wrong @@ -288,7 +288,7 @@ private: vertex_edge_list in_edge_list; /* The out going edges are considered owned by the vertex and - * need to be freed when the graph is begin destroyed */ + * need to be freed when the graph is being destroyed */ vertex_edge_list out_edge_list; /* The destructor only frees memory owned by the vertex and will leave