Apr 27th Update

This commit is contained in:
Ned Wright
2023-04-27 19:05:49 +00:00
parent cd4fb6e3e8
commit fd2d9fa081
89 changed files with 2175 additions and 544 deletions

View File

@@ -17,6 +17,7 @@
#include <fstream>
#include <algorithm>
#include <string>
#include <unordered_map>
#include "kiss_patterns.h"
#include "kiss_thin_nfa_impl.h"
@@ -132,17 +133,41 @@ PMHook::scanBufWithOffset(const Buffer &buf) const
}
void
PMHook::scanBufWithOffsetLambda(const Buffer &buf, function<void(uint, const PMPattern&)> cb) const
PMHook::scanBufWithOffsetLambda(const Buffer &buf, I_PMScan::CBFunction cb) const
{
dbgAssert(handle != nullptr) << "Unusable Pattern Matcher";
unordered_map<uint, uint> match_counts;
vector<pair<uint, uint>> pm_matches;
static const uint maxCbCount = 3;
uint totalCount = 0;
kiss_thin_nfa_exec(handle.get(), buf, pm_matches);
dbgTrace(D_PM) << pm_matches.size() << " raw matches found";
for (auto &res : pm_matches) {
cb(res.second, patterns.at(res.first));
uint patIndex = res.first;
uint cbCount = match_counts[patIndex];
const PMPattern &pat = patterns.at(patIndex);
bool noRegex = pat.isNoRegex();
bool isShort = (pat.size() == 1);
// Limit the max number of callback calls per precondition, unless it's used as a regex substitute
// On the last callback call, make sure to add the pre/post-word associated preconditions
if (noRegex || cbCount < maxCbCount) {
bool matchAll = !noRegex && (cbCount == maxCbCount-1 || isShort);
totalCount++;
cb(res.second, pat, matchAll);
if (matchAll)
match_counts[patIndex] = maxCbCount;
else
match_counts[patIndex]++;
}
}
dbgTrace(D_PM) << totalCount << " filtered matches found";
}
bool

View File

@@ -461,20 +461,69 @@ TEST(pm_scan, pm_offsets_test_pat_getIndex_method)
TEST(pm_scan, pm_offsets_lambda_test_pat_getIndex_method)
{
set<PMPattern> initPatts;
initPatts.insert(PMPattern("ABC", false, false)); // initialized with the default index 0
initPatts.insert(PMPattern("ABCD", false, false, 4));
initPatts.insert(PMPattern("CDE", false, false, 7));
initPatts.insert(PMPattern("DCB", false, false));
initPatts.insert(PMPattern("*", false, false));
PMHook pm;
EXPECT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("12345ABCDEF5678");
Buffer buf("12345ABCDEF5678 * DCB * DCB * DCB * DCB");
std::set<std::pair<u_int, PMPattern>> results;
pm.scanBufWithOffsetLambda(buf, [&] (uint offset, const PMPattern &pat) { results.emplace(offset, pat); });
pm.scanBufWithOffsetLambda(buf, [&] (uint offset, const PMPattern &pat, bool matchAll)
{ results.emplace(offset, pat); (void)matchAll; } );
// limit to 1 cb call for 1 character long matches, and 3 cb calles for longer matches
std::set<std::pair<uint, PMPattern>> expected{
{8, {"ABCD", false, false, 4}},
{7, {"ABC", false, false, 0}},
{9, {"CDE", false, false, 7}}
{9, {"CDE", false, false, 7}},
{20, {"DCB", false, false, 0}},
{26, {"DCB", false, false, 0}},
{32, {"DCB", false, false, 0}},
{22, {"*", false, false, 0}}
};
EXPECT_EQ(results, expected);
}
TEST(pm_scan, pm_offsets_lambda_test_pat_limit_noregex)
{
set<PMPattern> initPatts;
initPatts.insert(PMPattern("ABC", false, false)); // initialized with the default index 0
initPatts.insert(PMPattern("ABCD", false, false));
initPatts.insert(PMPattern("CDE", false, false));
initPatts.insert(PMPattern("DCB", false, false, 0, true));
initPatts.insert(PMPattern("*", false, false, 0, true));
PMHook pm;
EXPECT_TRUE(pm.prepare(initPatts).ok());
Buffer buf("12345ABCDEF5678 * DCB * DCB * DCB * DCB");
std::set<std::pair<u_int, PMPattern>> results;
pm.scanBufWithOffsetLambda(buf, [&] (uint offset, const PMPattern &pat, bool matchAll)
{
results.emplace(offset, pat);
EXPECT_FALSE(matchAll);
} );
// don't limit no. of cb when noregex is set
std::set<std::pair<uint, PMPattern>> expected{
{8, {"ABCD", false, false, 0}},
{7, {"ABC", false, false, 0}},
{9, {"CDE", false, false, 0}},
{20, {"DCB", false, false, 0, true}},
{26, {"DCB", false, false, 0, true}},
{32, {"DCB", false, false, 0, true}},
{38, {"DCB", false, false, 0, true}},
{16, {"*", false, false, 0, true}},
{22, {"*", false, false, 0, true}},
{28, {"*", false, false, 0, true}},
{34, {"*", false, false, 0, true}}
};
EXPECT_EQ(results, expected);