diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index dc372860..7fa01d8a 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -2187,7 +2187,7 @@ u32 countAccelStates(NGHolder &h, if (!cc.grey.allowLimExNFA) { DEBUG_PRINTF("limex not allowed\n"); - return NFA_MAX_ACCEL_STATES + 1; + return 0; } // Sanity check the input data. @@ -2201,11 +2201,11 @@ u32 countAccelStates(NGHolder &h, do_accel, state_compression, cc, num_states); // Acceleration analysis. - fillAccelInfo(bi); + nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map); - u32 num_accel = verify_u32(bi.accel.accelerable.size()); + u32 num_accel = verify_u32(bi.accel.accel_map.size()); DEBUG_PRINTF("found %u accel states\n", num_accel); - return min(num_accel, (u32)NFA_MAX_ACCEL_STATES); + return num_accel; } } // namespace ue2 diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 0e3fdea0..62a07e10 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,11 +79,10 @@ aligned_unique_ptr generate(NGHolder &g, const CompileContext &cc); /** - * \brief For a given graph, count the number of accel states it will have in - * an implementation. + * \brief For a given graph, count the number of accelerable states it has. * - * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an - * implementation would not be constructible. + * Note that this number may be greater than the number that are actually + * implementable. */ u32 countAccelStates(NGHolder &h, const ue2::unordered_map &states, diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index e89a1772..5b992fcb 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -105,6 +105,10 @@ static const size_t DFA_MERGE_MAX_STATES = 8000; * merging with other graphs. */ static const size_t LARGE_LBR_MIN_VERTICES = 32; +/** \brief In block mode, merge two prefixes even if they don't have identical + * literal sets if they have fewer than this many states and the merged graph + * is also small. */ +static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; static size_t small_merge_max_vertices(const CompileContext &cc) { @@ -930,6 +934,99 @@ bool compatibleLiteralsForMerge( return true; } +/** + * True if this graph has few enough accel states to be implemented as an NFA + * with all of those states actually becoming accel schemes. + */ +static +bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { + u32 num = countAccelStates(g, &build.rm, build.cc); + DEBUG_PRINTF("graph with %zu vertices has %u accel states\n", + num_vertices(g), num); + return num <= NFA_MAX_ACCEL_STATES; +} + +/** + * In block mode, we want to be a little more selective, We will only merge + * prefix engines when the literal sets are the same, or if the merged graph + * has only grown by a small amount. + */ +static +bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, + RoseVertex v) { + assert(!build.cc.streaming); + assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u)) { + return true; + } + + const RoseGraph &g = build.g; + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (g[u].literals == g[v].literals) { + return true; + } + + // The rest of this function only deals with the case when both vertices + // have graph leftfixes. + if (!g[u].left.graph || !g[v].left.graph) { + return false; + } + + const size_t u_count = num_vertices(*g[u].left.graph); + const size_t v_count = num_vertices(*g[v].left.graph); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, *g[v].left.graph); + if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + const size_t max_size = u_count + v_count; + const size_t min_size = max(u_count, v_count); + const size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) && + (isAccelerableLeftfix(build, *g[u].left.graph) || + isAccelerableLeftfix(build, *g[v].left.graph))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, RoseVertex v) { assert(u != v); @@ -938,15 +1035,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, return false; } - // UE-1675: in block mode, we want to be a little more selective -- only - // merge prefix roses when the literal sets are the same. - if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) { - assert(tbi.isRootSuccessor(v)); - - if (tbi.g[u].literals != tbi.g[v].literals) { - DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); - return false; - } + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { + return false; } /* We cannot merge prefixes/vertices if they are successors of different @@ -1102,15 +1192,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, vector> ulits; /* lit + lag pairs */ for (auto a : verts1) { - // UE-1675: in block mode, we want to be a little more selective -- - // only merge prefix roses when the literal sets are the same. - if (!tbi.cc.streaming && is_prefix) { - assert(tbi.isRootSuccessor(a)); - - if (tbi.g[u_front].literals != tbi.g[a].literals) { - DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); - return false; - } + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + return false; } u32 ulag = tbi.g[a].left.lag; @@ -1121,15 +1204,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, vector> vlits; for (auto a : verts2) { - // UE-1675: in block mode, we want to be a little more selective -- - // only merge prefix roses when the literal sets are the same. - if (!tbi.cc.streaming && is_prefix) { - assert(tbi.isRootSuccessor(a)); - - if (tbi.g[u_front].literals != tbi.g[a].literals) { - DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); - return false; - } + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + return false; } u32 vlag = tbi.g[a].left.lag;