mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
ng_find_matches: limit how big we're willing to go
Tests which require tracking more than 15K states (including edit distance states) are very, very slow.
This commit is contained in:
parent
d2416736cb
commit
7ca81ff530
@ -212,7 +212,8 @@ TEST_P(MatchesTest, Check) {
|
||||
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
|
||||
|
||||
set<pair<size_t, size_t>> matches;
|
||||
findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
|
||||
bool success = findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
|
||||
ASSERT_TRUE(success);
|
||||
|
||||
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
|
||||
|
||||
|
@ -52,6 +52,9 @@ using StateBitSet = boost::dynamic_bitset<>;
|
||||
|
||||
namespace {
|
||||
|
||||
/** \brief Max number of states (taking edit distance into account). */
|
||||
static constexpr size_t STATE_COUNT_MAX = 15000;
|
||||
|
||||
// returns all successors up to a given depth in a vector of sets, indexed by
|
||||
// zero-based depth from source vertex
|
||||
static
|
||||
@ -1034,7 +1037,7 @@ void filterMatches(MatchSet &matches) {
|
||||
*
|
||||
* Fills \a matches with offsets into the data stream where a match is found.
|
||||
*/
|
||||
void findMatches(const NGHolder &g, const ReportManager &rm,
|
||||
bool findMatches(const NGHolder &g, const ReportManager &rm,
|
||||
const string &input, MatchSet &matches,
|
||||
const u32 edit_distance, const bool notEod, const bool utf8) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
@ -1042,7 +1045,12 @@ void findMatches(const NGHolder &g, const ReportManager &rm,
|
||||
// compile time, so make it an assert
|
||||
assert(!edit_distance || !utf8);
|
||||
|
||||
DEBUG_PRINTF("Finding matches\n");
|
||||
const size_t total_states = num_vertices(g) * (3 * edit_distance + 1);
|
||||
DEBUG_PRINTF("Finding matches (%zu total states)\n", total_states);
|
||||
if (total_states > STATE_COUNT_MAX) {
|
||||
DEBUG_PRINTF("too big\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
GraphCache gc(edit_distance, g);
|
||||
#ifdef DEBUG
|
||||
@ -1068,7 +1076,7 @@ void findMatches(const NGHolder &g, const ReportManager &rm,
|
||||
state.next.count());
|
||||
if (state.next.empty()) {
|
||||
filterMatches(matches);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
state.states = state.next;
|
||||
state.prev = state.cur;
|
||||
@ -1086,4 +1094,5 @@ void findMatches(const NGHolder &g, const ReportManager &rm,
|
||||
getMatches(g, matches, state, !notEod);
|
||||
|
||||
filterMatches(matches);
|
||||
return true;
|
||||
}
|
||||
|
@ -44,11 +44,15 @@ struct BoundaryReports;
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
/** \brief Find all matches for a given graph when executed against \a input.
|
||||
/**
|
||||
* \brief Find all matches for a given graph when executed against \a input.
|
||||
*
|
||||
* Fills \a matches with offsets into the data stream where a match is found.
|
||||
* Fills \a matches with offsets into the data stream where a match is found.
|
||||
*
|
||||
* Returns false if this graph is too large to find its matches in reasonable
|
||||
* time.
|
||||
*/
|
||||
void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
|
||||
bool findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
|
||||
const std::string &input,
|
||||
std::set<std::pair<size_t, size_t>> &matches,
|
||||
const unsigned int max_edit_distance, const bool notEod,
|
||||
|
Loading…
x
Reference in New Issue
Block a user