Multibyte matcher unit-tests

This commit is contained in:
Anatoly Burakov 2015-12-09 13:39:16 +00:00 committed by Matthew Barr
parent e6709cee5f
commit fb932616ca
2 changed files with 290 additions and 0 deletions

View File

@ -47,6 +47,7 @@ set(unit_internal_SOURCES
internal/limex_nfa.cpp
internal/masked_move.cpp
internal/multi_bit.cpp
internal/multiaccel_matcher.cpp
internal/multiaccel_shift.cpp
internal/nfagraph_common.h
internal/nfagraph_comp.cpp

View File

@ -0,0 +1,289 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
extern "C" {
#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works
}
#include "config.h"
#include "src/ue2common.h"
#include "gtest/gtest.h"
#include "nfagraph/ng_limex_accel.h"
#include "nfa/accelcompile.h"
#include "nfa/multivermicelli.h"
#include "nfa/multishufti.h"
#include "nfa/multitruffle.h"
#include "util/charreach.h"
#include <string>
#include <vector>
#include <algorithm>
#include <stdlib.h>
#include <iostream>
using namespace ue2;
using namespace std;
using namespace testing;
/*
* Static functions needed for this test's wellbeing
*/
// char generator
static inline
char getChar(const CharReach &cr, bool match) {
char result;
do {
result = rand() % CharReach::npos;
} while (cr.test(result) != match);
return result;
}
// appends a string with matches/unmatches according to input match pattern
static
void getMatch(vector<u8> &result, const string &pattern, const CharReach &cr) {
for (const auto &c : pattern) {
result.push_back(getChar(cr, c == '1'));
}
}
// appends non-matching noise of certain lengths
static
void getNoise(vector<u8> &result, u32 len, const CharReach &cr) {
for (unsigned i = 0; i < len; i++) {
result.push_back(getChar(cr, false));
}
}
// test parameters structure
struct MultiaccelTestParam {
string match_pattern;
u32 match_pattern_start_idx;
u32 match_idx;
bool test_all_offsets;
u8 match_len1;
u8 match_len2;
MultibyteAccelInfo::multiaccel_type type;
};
// buffer size is constant
static const u32 BUF_SIZE = 200;
// strings, out of which CharReach will be generated
static const string VERM_CR = "a";
static const string V_NC_CR = "aA";
static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz";
static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99";
// Parameterized test case for multiaccel patterns.
class MultiaccelTest : public TestWithParam<MultiaccelTestParam> {
protected:
virtual void SetUp() {
// set up is deferred until the actual test, since we can't compile
// any accel schemes unless we know CharReach
const MultiaccelTestParam &p = GetParam();
// reserve space in our buffer
buffer.reserve(BUF_SIZE);
// store the index where we expect to see the match. note that it may
// be different from where the match pattern has started since we may
// have a flooded match (i.e. a match preceded by almost-match) or a
// no-match (in which case "match" index is at the end of the buffer).
match_idx = p.match_idx;
// make note if we need to test all offsets - sometimes we don't, for
// example when testing partial or no-match.
test_all_offsets = p.test_all_offsets;
}
// deferred buffer generation, as we don't know CharReach before we run the test
void GenerateBuffer(const CharReach &cr) {
const MultiaccelTestParam &p = GetParam();
// step 1: fill prefix with non-matching noise
getNoise(buffer, p.match_pattern_start_idx, cr);
// step 2: add a match
getMatch(buffer, p.match_pattern, cr);
// step 3: fill in the rest of the buffer with non-matching noise
getNoise(buffer, BUF_SIZE - p.match_pattern.size() -
p.match_pattern_start_idx, cr);
}
// deferred accel scheme generation, as we don't know CharReach before we run the test
void CompileAccelScheme(const CharReach &cr, AccelAux *aux) {
const MultiaccelTestParam &p = GetParam();
AccelInfo ai;
ai.single_stops = cr; // dummy CharReach to prevent red tape accel
ai.ma_len1 = p.match_len1;
ai.ma_len2 = p.match_len2;
ai.multiaccel_stops = cr;
ai.ma_type = p.type;
buildAccelAux(ai, aux);
// now, verify we've successfully built our accel scheme, *and* that it's
// a multibyte scheme
ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM &&
aux->accel_type <= ACCEL_MDSGTRUFFLE);
}
virtual void TearDown() {
}
u32 match_idx;
vector<u8> buffer;
bool test_all_offsets;
};
static
void runTest(const vector<u8> &buffer, AccelAux *aux, unsigned match_idx,
bool test_all_offsets) {
const u8 *start = buffer.data();
const u8 *end = start + buffer.size();
const u8 *match = start + match_idx;
// comparing indexes into the buffer is easier to understand than pointers
if (test_all_offsets) {
// run_accel can only scan >15 byte buffers
u32 end_offset = min(match_idx, (u32) buffer.size() - 15);
for (unsigned offset = 0; offset < end_offset; offset++) {
const u8 *ptr = run_accel(aux, (start + offset), end);
unsigned idx = ptr - start;
ASSERT_EQ(match_idx, idx);
}
} else {
const u8 *ptr = run_accel(aux, start, end);
unsigned idx = ptr - start;
ASSERT_EQ(match_idx, idx);
}
}
TEST_P(MultiaccelTest, TestVermicelli) {
AccelAux aux = {0};
CharReach cr(VERM_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
TEST_P(MultiaccelTest, TestVermicelliNocase) {
AccelAux aux = {0};
CharReach cr(V_NC_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
TEST_P(MultiaccelTest, TestShufti) {
AccelAux aux = {0};
CharReach cr(SHUF_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
TEST_P(MultiaccelTest, TestTruffle) {
AccelAux aux = {0};
CharReach cr(TRUF_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
static const MultiaccelTestParam multiaccelTests[] = {
// long matcher
// full, partial, flooded, nomatch
{"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
{"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
{"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
// long-grab matcher
// full, partial, flooded, nomatch
{"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
{"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
{"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
// shift matcher
// full, partial, flooded, nomatch
{"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
{"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
{"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
{"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
// shift-grab matcher
// full, partial, flooded, nomatch
{"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
{"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
{"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
{"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
// doubleshift matcher
// full, partial (one and two shifts), flooded, nomatch
{"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
// doubleshift-grab matcher
// full, partial (one and two shifts), flooded, nomatch
{"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
};
INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests));