mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
Add support for Hamming distance approx matching
This commit is contained in:
@@ -144,3 +144,4 @@
|
||||
147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match.
|
||||
148:/\Q<>\Eaaaa/8 #Expression is not valid UTF-8.
|
||||
149:/[\Q<>\Eaaaa]/8 #Expression is not valid UTF-8.
|
||||
150:/abcd/{edit_distance=1,hamming_distance=1} #In hs_expr_ext, cannot have both edit distance and Hamming distance.
|
||||
|
@@ -84,6 +84,13 @@ ostream& operator<<(ostream &os, const hs_expr_ext &ext) {
|
||||
os << "edit_distance=" << ext.edit_distance;
|
||||
first = false;
|
||||
}
|
||||
if (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE) {
|
||||
if (!first) {
|
||||
os << ", ";
|
||||
}
|
||||
os << "hamming_distance=" << ext.hamming_distance;
|
||||
first = false;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
@@ -171,7 +178,7 @@ TEST_P(ExprInfop, check_ext_null) {
|
||||
free(info);
|
||||
}
|
||||
|
||||
static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0 };
|
||||
static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
static const expected_info ei_test[] = {
|
||||
{"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
@@ -214,38 +221,68 @@ static const expected_info ei_test[] = {
|
||||
{"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1},
|
||||
|
||||
// Some cases with extended parameters.
|
||||
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0}, 6, 10, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0}, 100, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0, 0}, 6, 10, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0, 0}, 100, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0, 0}, 6, UINT_MAX, 0, 0, 0},
|
||||
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0},
|
||||
10, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0},
|
||||
4, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
|
||||
{"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0},
|
||||
4, 6, 0, 0, 0},
|
||||
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2, 0},
|
||||
10, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0},
|
||||
4, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
|
||||
{"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0},
|
||||
4, 6, 0, 0, 0},
|
||||
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1, 0}, 5, 7, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2, 0}, 4, 8, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2, 0},
|
||||
8, 8, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2, 0},
|
||||
4, 8, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2},
|
||||
{"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2, 0},
|
||||
4, 6, 0, 0, 0},
|
||||
|
||||
{"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2},
|
||||
10, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2},
|
||||
6, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2},
|
||||
6, 6, 0, 0, 0},
|
||||
|
||||
{"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 0, 2},
|
||||
10, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2},
|
||||
6, UINT_MAX, 0, 0, 0},
|
||||
{"^abc.*def", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2},
|
||||
6, 6, 0, 0, 0},
|
||||
|
||||
{"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 1}, 6, 6, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 2}, 6, 6, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE, 0, 0, 0, 0, 5}, 6, 6, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 6, 0, 2},
|
||||
6, 6, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 0, 2},
|
||||
6, 6, 0, 0, 0},
|
||||
{"^abcdef", {HS_EXT_FLAG_HAMMING_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 0, 2},
|
||||
6, 6, 0, 0, 0},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));
|
||||
|
@@ -213,7 +213,7 @@ TEST_P(MatchesTest, Check) {
|
||||
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
|
||||
|
||||
set<pair<size_t, size_t>> matches;
|
||||
bool success = findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8);
|
||||
bool success = findMatches(*g, rm, t.input, matches, 0, 0, t.notEod, utf8);
|
||||
ASSERT_TRUE(success);
|
||||
|
||||
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
|
||||
|
Reference in New Issue
Block a user