mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Add support for approximate matching in NFA matcher unit tests
This commit is contained in:
parent
4c2b7cc04f
commit
9f72dede5c
@ -136,3 +136,8 @@
|
|||||||
139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5.
|
139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5.
|
||||||
140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6.
|
140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6.
|
||||||
141:/(*@&/ #Unknown control verb at index 2.
|
141:/(*@&/ #Unknown control verb at index 2.
|
||||||
|
142:/abcd/si{edit_distance=4} #Approximate matching patterns that reduce to vacuous patterns are disallowed.
|
||||||
|
143:/foobar|hatstand/sL{edit_distance=6} #Approximate matching patterns that reduce to vacuous patterns are disallowed.
|
||||||
|
144:/abc\b/{edit_distance=1} #Zero-width assertions are disallowed for approximate matching.
|
||||||
|
145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching.
|
||||||
|
146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching.
|
||||||
|
@ -168,10 +168,37 @@ static const expected_info ei_test[] = {
|
|||||||
|
|
||||||
// Some cases with extended parameters.
|
// Some cases with extended parameters.
|
||||||
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
|
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
|
||||||
|
{"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 6, UINT_MAX, 0, 0, 0},
|
||||||
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
|
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
|
||||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0},
|
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0},
|
||||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0},
|
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0},
|
||||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 0, UINT_MAX, 0, 0, 0},
|
|
||||||
|
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
|
||||||
|
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
|
||||||
|
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
|
||||||
|
10, UINT_MAX, 0, 0, 0},
|
||||||
|
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
|
||||||
|
4, UINT_MAX, 0, 0, 0},
|
||||||
|
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
|
||||||
|
4, 6, 0, 0, 0},
|
||||||
|
|
||||||
|
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
|
||||||
|
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
|
||||||
|
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
|
||||||
|
4, UINT_MAX, 0, 0, 0},
|
||||||
|
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
|
||||||
|
4, UINT_MAX, 0, 0, 0},
|
||||||
|
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
|
||||||
|
4, 6, 0, 0, 0},
|
||||||
|
|
||||||
|
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0},
|
||||||
|
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0},
|
||||||
|
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2},
|
||||||
|
4, 8, 0, 0, 0},
|
||||||
|
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
|
||||||
|
4, 8, 0, 0, 0},
|
||||||
|
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
|
||||||
|
4, 6, 0, 0, 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));
|
INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -212,10 +212,19 @@ TEST_P(MatchesTest, Check) {
|
|||||||
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
|
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
|
||||||
|
|
||||||
set<pair<size_t, size_t>> matches;
|
set<pair<size_t, size_t>> matches;
|
||||||
findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8);
|
findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
|
||||||
|
|
||||||
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
|
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
|
||||||
|
|
||||||
|
// findMatches returns matches with SOM, so zero them out if not SOM
|
||||||
|
if (!t.som) {
|
||||||
|
set<pair<size_t, size_t>> new_matches;
|
||||||
|
for (auto &m : matches) {
|
||||||
|
new_matches.emplace(0, m.second);
|
||||||
|
}
|
||||||
|
matches.swap(new_matches);
|
||||||
|
}
|
||||||
|
|
||||||
ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
|
ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
|
||||||
<< "' against input '" << t.input << "'";
|
<< "' against input '" << t.input << "'";
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user