mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
ng_find_matches: limit how big we're willing to go
Tests which require tracking more than 15K states (including edit distance states) are very, very slow.
This commit is contained in:
parent
d2416736cb
commit
7ca81ff530
@ -212,7 +212,8 @@ TEST_P(MatchesTest, Check) {
|
|||||||
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
|
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
|
||||||
|
|
||||||
set<pair<size_t, size_t>> matches;
|
set<pair<size_t, size_t>> matches;
|
||||||
findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
|
bool success = findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
|
||||||
|
ASSERT_TRUE(success);
|
||||||
|
|
||||||
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
|
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
|
||||||
|
|
||||||
|
@ -52,6 +52,9 @@ using StateBitSet = boost::dynamic_bitset<>;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
/** \brief Max number of states (taking edit distance into account). */
|
||||||
|
static constexpr size_t STATE_COUNT_MAX = 15000;
|
||||||
|
|
||||||
// returns all successors up to a given depth in a vector of sets, indexed by
|
// returns all successors up to a given depth in a vector of sets, indexed by
|
||||||
// zero-based depth from source vertex
|
// zero-based depth from source vertex
|
||||||
static
|
static
|
||||||
@ -1034,7 +1037,7 @@ void filterMatches(MatchSet &matches) {
|
|||||||
*
|
*
|
||||||
* Fills \a matches with offsets into the data stream where a match is found.
|
* Fills \a matches with offsets into the data stream where a match is found.
|
||||||
*/
|
*/
|
||||||
void findMatches(const NGHolder &g, const ReportManager &rm,
|
bool findMatches(const NGHolder &g, const ReportManager &rm,
|
||||||
const string &input, MatchSet &matches,
|
const string &input, MatchSet &matches,
|
||||||
const u32 edit_distance, const bool notEod, const bool utf8) {
|
const u32 edit_distance, const bool notEod, const bool utf8) {
|
||||||
assert(hasCorrectlyNumberedVertices(g));
|
assert(hasCorrectlyNumberedVertices(g));
|
||||||
@ -1042,7 +1045,12 @@ void findMatches(const NGHolder &g, const ReportManager &rm,
|
|||||||
// compile time, so make it an assert
|
// compile time, so make it an assert
|
||||||
assert(!edit_distance || !utf8);
|
assert(!edit_distance || !utf8);
|
||||||
|
|
||||||
DEBUG_PRINTF("Finding matches\n");
|
const size_t total_states = num_vertices(g) * (3 * edit_distance + 1);
|
||||||
|
DEBUG_PRINTF("Finding matches (%zu total states)\n", total_states);
|
||||||
|
if (total_states > STATE_COUNT_MAX) {
|
||||||
|
DEBUG_PRINTF("too big\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
GraphCache gc(edit_distance, g);
|
GraphCache gc(edit_distance, g);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -1068,7 +1076,7 @@ void findMatches(const NGHolder &g, const ReportManager &rm,
|
|||||||
state.next.count());
|
state.next.count());
|
||||||
if (state.next.empty()) {
|
if (state.next.empty()) {
|
||||||
filterMatches(matches);
|
filterMatches(matches);
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
state.states = state.next;
|
state.states = state.next;
|
||||||
state.prev = state.cur;
|
state.prev = state.cur;
|
||||||
@ -1086,4 +1094,5 @@ void findMatches(const NGHolder &g, const ReportManager &rm,
|
|||||||
getMatches(g, matches, state, !notEod);
|
getMatches(g, matches, state, !notEod);
|
||||||
|
|
||||||
filterMatches(matches);
|
filterMatches(matches);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -44,11 +44,15 @@ struct BoundaryReports;
|
|||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
/** \brief Find all matches for a given graph when executed against \a input.
|
/**
|
||||||
|
* \brief Find all matches for a given graph when executed against \a input.
|
||||||
*
|
*
|
||||||
* Fills \a matches with offsets into the data stream where a match is found.
|
* Fills \a matches with offsets into the data stream where a match is found.
|
||||||
|
*
|
||||||
|
* Returns false if this graph is too large to find its matches in reasonable
|
||||||
|
* time.
|
||||||
*/
|
*/
|
||||||
void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
|
bool findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm,
|
||||||
const std::string &input,
|
const std::string &input,
|
||||||
std::set<std::pair<size_t, size_t>> &matches,
|
std::set<std::pair<size_t, size_t>> &matches,
|
||||||
const unsigned int max_edit_distance, const bool notEod,
|
const unsigned int max_edit_distance, const bool notEod,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user