From 9f72dede5cd81bec450e1189633f4e5d3a49b72a Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Fri, 10 Feb 2017 15:44:16 +0000 Subject: [PATCH] Add support for approximate matching in NFA matcher unit tests --- unit/hyperscan/bad_patterns.txt | 5 +++++ unit/hyperscan/expr_info.cpp | 29 ++++++++++++++++++++++++- unit/internal/nfagraph_find_matches.cpp | 15 ++++++++++--- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index d970761a..09a2f7e1 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -136,3 +136,8 @@ 139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5. 140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6. 141:/(*@&/ #Unknown control verb at index 2. +142:/abcd/si{edit_distance=4} #Approximate matching patterns that reduce to vacuous patterns are disallowed. +143:/foobar|hatstand/sL{edit_distance=6} #Approximate matching patterns that reduce to vacuous patterns are disallowed. +144:/abc\b/{edit_distance=1} #Zero-width assertions are disallowed for approximate matching. +145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching. +146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching. diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index aa242798..e6ffa9ea 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -168,10 +168,37 @@ static const expected_info ei_test[] = { // Some cases with extended parameters. {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 6, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 0, UINT_MAX, 0, 0, 0}, + + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2}, + 10, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, + + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, + + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2}, + 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, }; INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test)); diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 553d6dc5..99fdb09e 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,7 +76,7 @@ class MatchesTest: public TestWithParam { static const MatchesTestParams matchesTests[] = { // EOD and anchored patterns - // these should produce no matches + // these should produce no matches { "^foobar", "foolish", {}, 0, false, true}, { "^foobar$", "ze foobar", {}, 0, false, true}, { "^foobar$", "foobar ", {}, 0, false, true}, @@ -212,10 +212,19 @@ TEST_P(MatchesTest, Check) { bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; set> matches; - findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8); + findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8); set> expected(begin(t.matches), end(t.matches)); + // findMatches returns matches with SOM, so zero them out if not SOM + if (!t.som) { + set> new_matches; + for (auto &m : matches) { + new_matches.emplace(0, m.second); + } + matches.swap(new_matches); + } + ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern << "' against input '" << t.input << "'"; }