Add support for approximate matching in NFA matcher unit tests

This commit is contained in:
Anatoly Burakov 2017-02-10 15:44:16 +00:00 committed by Matthew Barr
parent 4c2b7cc04f
commit 9f72dede5c
3 changed files with 45 additions and 4 deletions

View File

@ -136,3 +136,8 @@
139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5.
140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6.
141:/(*@&/ #Unknown control verb at index 2.
142:/abcd/si{edit_distance=4} #Approximate matching patterns that reduce to vacuous patterns are disallowed.
143:/foobar|hatstand/sL{edit_distance=6} #Approximate matching patterns that reduce to vacuous patterns are disallowed.
144:/abc\b/{edit_distance=1} #Zero-width assertions are disallowed for approximate matching.
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.

View File

@ -168,10 +168,37 @@ static const expected_info ei_test[] = {
// Some cases with extended parameters.
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 6, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 0, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
10, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
4, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
4, 6, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
4, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
4, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
4, 6, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2},
4, 8, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
4, 8, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
4, 6, 0, 0, 0},
};
INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -76,7 +76,7 @@ class MatchesTest: public TestWithParam<MatchesTestParams> {
static const MatchesTestParams matchesTests[] = {
// EOD and anchored patterns
// these should produce no matches
// these should produce no matches
{ "^foobar", "foolish", {}, 0, false, true},
{ "^foobar$", "ze foobar", {}, 0, false, true},
{ "^foobar$", "foobar ", {}, 0, false, true},
@ -212,10 +212,19 @@ TEST_P(MatchesTest, Check) {
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
set<pair<size_t, size_t>> matches;
findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8);
findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
// findMatches returns matches with SOM, so zero them out if not SOM
if (!t.som) {
set<pair<size_t, size_t>> new_matches;
for (auto &m : matches) {
new_matches.emplace(0, m.second);
}
matches.swap(new_matches);
}
ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
<< "' against input '" << t.input << "'";
}