From 7ca81ff530650859067d98ffe1645bb28fc0f7e2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 7 Mar 2017 11:47:10 +1100 Subject: [PATCH] ng_find_matches: limit how big we're willing to go Tests which require tracking more than 15K states (including edit distance states) are very, very slow. --- unit/internal/nfagraph_find_matches.cpp | 3 ++- util/ng_find_matches.cpp | 15 ++++++++++++--- util/ng_find_matches.h | 10 +++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 99fdb09e..92c514d8 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -212,7 +212,8 @@ TEST_P(MatchesTest, Check) { bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; set> matches; - findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8); + bool success = findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8); + ASSERT_TRUE(success); set> expected(begin(t.matches), end(t.matches)); diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index b3c81574..0890319d 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -52,6 +52,9 @@ using StateBitSet = boost::dynamic_bitset<>; namespace { +/** \brief Max number of states (taking edit distance into account). */ +static constexpr size_t STATE_COUNT_MAX = 15000; + // returns all successors up to a given depth in a vector of sets, indexed by // zero-based depth from source vertex static @@ -1034,7 +1037,7 @@ void filterMatches(MatchSet &matches) { * * Fills \a matches with offsets into the data stream where a match is found. */ -void findMatches(const NGHolder &g, const ReportManager &rm, +bool findMatches(const NGHolder &g, const ReportManager &rm, const string &input, MatchSet &matches, const u32 edit_distance, const bool notEod, const bool utf8) { assert(hasCorrectlyNumberedVertices(g)); @@ -1042,7 +1045,12 @@ void findMatches(const NGHolder &g, const ReportManager &rm, // compile time, so make it an assert assert(!edit_distance || !utf8); - DEBUG_PRINTF("Finding matches\n"); + const size_t total_states = num_vertices(g) * (3 * edit_distance + 1); + DEBUG_PRINTF("Finding matches (%zu total states)\n", total_states); + if (total_states > STATE_COUNT_MAX) { + DEBUG_PRINTF("too big\n"); + return false; + } GraphCache gc(edit_distance, g); #ifdef DEBUG @@ -1068,7 +1076,7 @@ void findMatches(const NGHolder &g, const ReportManager &rm, state.next.count()); if (state.next.empty()) { filterMatches(matches); - return; + return true; } state.states = state.next; state.prev = state.cur; @@ -1086,4 +1094,5 @@ void findMatches(const NGHolder &g, const ReportManager &rm, getMatches(g, matches, state, !notEod); filterMatches(matches); + return true; } diff --git a/util/ng_find_matches.h b/util/ng_find_matches.h index 39ee3f68..9860c202 100644 --- a/util/ng_find_matches.h +++ b/util/ng_find_matches.h @@ -44,11 +44,15 @@ struct BoundaryReports; } // namespace ue2 -/** \brief Find all matches for a given graph when executed against \a input. +/** + * \brief Find all matches for a given graph when executed against \a input. * - * Fills \a matches with offsets into the data stream where a match is found. + * Fills \a matches with offsets into the data stream where a match is found. + * + * Returns false if this graph is too large to find its matches in reasonable + * time. */ -void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm, +bool findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm, const std::string &input, std::set> &matches, const unsigned int max_edit_distance, const bool notEod,