Add support for approximate matching in NFA matcher unit tests

This commit is contained in:
Anatoly Burakov 2017-02-10 15:44:16 +00:00 committed by Matthew Barr
parent 4c2b7cc04f
commit 9f72dede5c
3 changed files with 45 additions and 4 deletions

View File

@ -136,3 +136,8 @@
139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5. 139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5.
140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6. 140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6.
141:/(*@&/ #Unknown control verb at index 2. 141:/(*@&/ #Unknown control verb at index 2.
142:/abcd/si{edit_distance=4} #Approximate matching patterns that reduce to vacuous patterns are disallowed.
143:/foobar|hatstand/sL{edit_distance=6} #Approximate matching patterns that reduce to vacuous patterns are disallowed.
144:/abc\b/{edit_distance=1} #Zero-width assertions are disallowed for approximate matching.
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.

View File

@ -168,10 +168,37 @@ static const expected_info ei_test[] = {
// Some cases with extended parameters. // Some cases with extended parameters.
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 6, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 0, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
10, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
4, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
4, 6, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
4, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
4, UINT_MAX, 0, 0, 0},
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
4, 6, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2},
4, 8, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
4, 8, 0, 0, 0},
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
4, 6, 0, 0, 0},
}; };
INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test)); INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -212,10 +212,19 @@ TEST_P(MatchesTest, Check) {
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
set<pair<size_t, size_t>> matches; set<pair<size_t, size_t>> matches;
findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8); findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches)); set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
// findMatches returns matches with SOM, so zero them out if not SOM
if (!t.som) {
set<pair<size_t, size_t>> new_matches;
for (auto &m : matches) {
new_matches.emplace(0, m.second);
}
matches.swap(new_matches);
}
ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
<< "' against input '" << t.input << "'"; << "' against input '" << t.input << "'";
} }