mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Multibyte matcher unit-tests
This commit is contained in:
parent
e6709cee5f
commit
fb932616ca
@ -47,6 +47,7 @@ set(unit_internal_SOURCES
|
||||
internal/limex_nfa.cpp
|
||||
internal/masked_move.cpp
|
||||
internal/multi_bit.cpp
|
||||
internal/multiaccel_matcher.cpp
|
||||
internal/multiaccel_shift.cpp
|
||||
internal/nfagraph_common.h
|
||||
internal/nfagraph_comp.cpp
|
||||
|
289
unit/internal/multiaccel_matcher.cpp
Normal file
289
unit/internal/multiaccel_matcher.cpp
Normal file
@ -0,0 +1,289 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works
|
||||
}
|
||||
|
||||
#include "config.h"
|
||||
#include "src/ue2common.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "nfagraph/ng_limex_accel.h"
|
||||
#include "nfa/accelcompile.h"
|
||||
#include "nfa/multivermicelli.h"
|
||||
#include "nfa/multishufti.h"
|
||||
#include "nfa/multitruffle.h"
|
||||
#include "util/charreach.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
|
||||
using namespace ue2;
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
/*
|
||||
* Static functions needed for this test's wellbeing
|
||||
*/
|
||||
|
||||
// char generator
|
||||
static inline
|
||||
char getChar(const CharReach &cr, bool match) {
|
||||
char result;
|
||||
do {
|
||||
result = rand() % CharReach::npos;
|
||||
} while (cr.test(result) != match);
|
||||
return result;
|
||||
}
|
||||
|
||||
// appends a string with matches/unmatches according to input match pattern
|
||||
static
|
||||
void getMatch(vector<u8> &result, const string &pattern, const CharReach &cr) {
|
||||
for (const auto &c : pattern) {
|
||||
result.push_back(getChar(cr, c == '1'));
|
||||
}
|
||||
}
|
||||
|
||||
// appends non-matching noise of certain lengths
|
||||
static
|
||||
void getNoise(vector<u8> &result, u32 len, const CharReach &cr) {
|
||||
for (unsigned i = 0; i < len; i++) {
|
||||
result.push_back(getChar(cr, false));
|
||||
}
|
||||
}
|
||||
|
||||
// test parameters structure
|
||||
struct MultiaccelTestParam {
|
||||
string match_pattern;
|
||||
u32 match_pattern_start_idx;
|
||||
u32 match_idx;
|
||||
bool test_all_offsets;
|
||||
u8 match_len1;
|
||||
u8 match_len2;
|
||||
MultibyteAccelInfo::multiaccel_type type;
|
||||
};
|
||||
|
||||
// buffer size is constant
|
||||
static const u32 BUF_SIZE = 200;
|
||||
|
||||
// strings, out of which CharReach will be generated
|
||||
static const string VERM_CR = "a";
|
||||
static const string V_NC_CR = "aA";
|
||||
static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz";
|
||||
static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99";
|
||||
|
||||
// Parameterized test case for multiaccel patterns.
|
||||
class MultiaccelTest : public TestWithParam<MultiaccelTestParam> {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
// set up is deferred until the actual test, since we can't compile
|
||||
// any accel schemes unless we know CharReach
|
||||
const MultiaccelTestParam &p = GetParam();
|
||||
|
||||
// reserve space in our buffer
|
||||
buffer.reserve(BUF_SIZE);
|
||||
|
||||
// store the index where we expect to see the match. note that it may
|
||||
// be different from where the match pattern has started since we may
|
||||
// have a flooded match (i.e. a match preceded by almost-match) or a
|
||||
// no-match (in which case "match" index is at the end of the buffer).
|
||||
match_idx = p.match_idx;
|
||||
|
||||
// make note if we need to test all offsets - sometimes we don't, for
|
||||
// example when testing partial or no-match.
|
||||
test_all_offsets = p.test_all_offsets;
|
||||
}
|
||||
|
||||
// deferred buffer generation, as we don't know CharReach before we run the test
|
||||
void GenerateBuffer(const CharReach &cr) {
|
||||
const MultiaccelTestParam &p = GetParam();
|
||||
|
||||
// step 1: fill prefix with non-matching noise
|
||||
getNoise(buffer, p.match_pattern_start_idx, cr);
|
||||
|
||||
// step 2: add a match
|
||||
getMatch(buffer, p.match_pattern, cr);
|
||||
|
||||
// step 3: fill in the rest of the buffer with non-matching noise
|
||||
getNoise(buffer, BUF_SIZE - p.match_pattern.size() -
|
||||
p.match_pattern_start_idx, cr);
|
||||
}
|
||||
|
||||
// deferred accel scheme generation, as we don't know CharReach before we run the test
|
||||
void CompileAccelScheme(const CharReach &cr, AccelAux *aux) {
|
||||
const MultiaccelTestParam &p = GetParam();
|
||||
|
||||
AccelInfo ai;
|
||||
ai.single_stops = cr; // dummy CharReach to prevent red tape accel
|
||||
ai.ma_len1 = p.match_len1;
|
||||
ai.ma_len2 = p.match_len2;
|
||||
ai.multiaccel_stops = cr;
|
||||
ai.ma_type = p.type;
|
||||
|
||||
buildAccelAux(ai, aux);
|
||||
|
||||
// now, verify we've successfully built our accel scheme, *and* that it's
|
||||
// a multibyte scheme
|
||||
ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM &&
|
||||
aux->accel_type <= ACCEL_MDSGTRUFFLE);
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
}
|
||||
|
||||
u32 match_idx;
|
||||
vector<u8> buffer;
|
||||
bool test_all_offsets;
|
||||
};
|
||||
|
||||
static
|
||||
void runTest(const vector<u8> &buffer, AccelAux *aux, unsigned match_idx,
|
||||
bool test_all_offsets) {
|
||||
const u8 *start = buffer.data();
|
||||
const u8 *end = start + buffer.size();
|
||||
const u8 *match = start + match_idx;
|
||||
|
||||
// comparing indexes into the buffer is easier to understand than pointers
|
||||
if (test_all_offsets) {
|
||||
// run_accel can only scan >15 byte buffers
|
||||
u32 end_offset = min(match_idx, (u32) buffer.size() - 15);
|
||||
|
||||
for (unsigned offset = 0; offset < end_offset; offset++) {
|
||||
const u8 *ptr = run_accel(aux, (start + offset), end);
|
||||
unsigned idx = ptr - start;
|
||||
ASSERT_EQ(match_idx, idx);
|
||||
}
|
||||
} else {
|
||||
const u8 *ptr = run_accel(aux, start, end);
|
||||
unsigned idx = ptr - start;
|
||||
ASSERT_EQ(match_idx, idx);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MultiaccelTest, TestVermicelli) {
|
||||
AccelAux aux = {0};
|
||||
CharReach cr(VERM_CR);
|
||||
|
||||
GenerateBuffer(cr);
|
||||
|
||||
CompileAccelScheme(cr, &aux);
|
||||
|
||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
||||
}
|
||||
|
||||
TEST_P(MultiaccelTest, TestVermicelliNocase) {
|
||||
AccelAux aux = {0};
|
||||
CharReach cr(V_NC_CR);
|
||||
|
||||
GenerateBuffer(cr);
|
||||
|
||||
CompileAccelScheme(cr, &aux);
|
||||
|
||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
||||
}
|
||||
|
||||
TEST_P(MultiaccelTest, TestShufti) {
|
||||
AccelAux aux = {0};
|
||||
CharReach cr(SHUF_CR);
|
||||
|
||||
GenerateBuffer(cr);
|
||||
|
||||
CompileAccelScheme(cr, &aux);
|
||||
|
||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
||||
}
|
||||
|
||||
TEST_P(MultiaccelTest, TestTruffle) {
|
||||
AccelAux aux = {0};
|
||||
CharReach cr(TRUF_CR);
|
||||
|
||||
GenerateBuffer(cr);
|
||||
|
||||
CompileAccelScheme(cr, &aux);
|
||||
|
||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
||||
}
|
||||
|
||||
static const MultiaccelTestParam multiaccelTests[] = {
|
||||
// long matcher
|
||||
|
||||
// full, partial, flooded, nomatch
|
||||
{"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
||||
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
||||
{"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
||||
{"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
||||
|
||||
// long-grab matcher
|
||||
|
||||
// full, partial, flooded, nomatch
|
||||
{"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
||||
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
||||
{"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
||||
{"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
||||
|
||||
// shift matcher
|
||||
|
||||
// full, partial, flooded, nomatch
|
||||
{"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
||||
{"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
||||
{"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
||||
{"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
||||
|
||||
// shift-grab matcher
|
||||
|
||||
// full, partial, flooded, nomatch
|
||||
{"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
||||
{"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
||||
{"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
||||
{"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
||||
|
||||
// doubleshift matcher
|
||||
|
||||
// full, partial (one and two shifts), flooded, nomatch
|
||||
{"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
||||
{"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
||||
{"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
||||
{"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
||||
{"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
||||
|
||||
// doubleshift-grab matcher
|
||||
|
||||
// full, partial (one and two shifts), flooded, nomatch
|
||||
{"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
||||
{"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
||||
{"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
||||
{"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
||||
{"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests));
|
Loading…
x
Reference in New Issue
Block a user