mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-18 10:10:35 +03:00
Initial commit of Hyperscan
This commit is contained in:
363
src/nfagraph/ng_lbr.cpp
Normal file
363
src/nfagraph/ng_lbr.cpp
Normal file
@@ -0,0 +1,363 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Large Bounded Repeat (LBR) engine build code.
|
||||
*/
|
||||
|
||||
#include "ng_lbr.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_repeat.h"
|
||||
#include "ng_reports.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "nfa/trufflecompile.h"
|
||||
#include "nfa/lbr_internal.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/repeatcompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h" // for lg2
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
u32 depth_to_u32(const depth &d) {
|
||||
assert(d.is_reachable());
|
||||
if (d.is_infinite()) {
|
||||
return REPEAT_INF;
|
||||
}
|
||||
|
||||
u32 d_val = d;
|
||||
assert(d_val < REPEAT_INF);
|
||||
return d_val;
|
||||
}
|
||||
|
||||
template<class LbrStruct> static
|
||||
u64a* getTable(NFA *nfa) {
|
||||
char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) +
|
||||
sizeof(RepeatInfo);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(u64a));
|
||||
return (u64a *)ptr;
|
||||
}
|
||||
|
||||
template <class LbrStruct> static
|
||||
void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
|
||||
assert(nfa);
|
||||
|
||||
RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);
|
||||
|
||||
DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
|
||||
repeatTypeName(rtype), repeatMin.str().c_str(),
|
||||
repeatMax.str().c_str());
|
||||
|
||||
// Fill the lbr_common structure first. Note that the RepeatInfo structure
|
||||
// directly follows the LbrStruct.
|
||||
const u32 info_offset = sizeof(LbrStruct);
|
||||
c->repeatInfoOffset = info_offset;
|
||||
c->report = report;
|
||||
|
||||
RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
|
||||
info->type = verify_u8(rtype);
|
||||
info->repeatMin = depth_to_u32(repeatMin);
|
||||
info->repeatMax = depth_to_u32(repeatMax);
|
||||
info->stateSize = rsi.stateSize;
|
||||
info->packedCtrlSize = rsi.packedCtrlSize;
|
||||
info->horizon = rsi.horizon;
|
||||
info->minPeriod = minPeriod;
|
||||
memcpy(&info->packedFieldSizes, rsi.packedFieldSizes.data(),
|
||||
byte_length(rsi.packedFieldSizes));
|
||||
info->patchCount = rsi.patchCount;
|
||||
info->patchSize = rsi.patchSize;
|
||||
info->encodingSize = rsi.encodingSize;
|
||||
info->patchesOffset = rsi.patchesOffset;
|
||||
|
||||
// Fill the NFA structure.
|
||||
nfa->nPositions = repeatMin;
|
||||
nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
|
||||
nfa->scratchStateSize = (u32)sizeof(lbr_state);
|
||||
nfa->minWidth = verify_u32(repeatMin);
|
||||
nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;
|
||||
|
||||
// Fill the lbr table for sparse lbr model.
|
||||
if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
|
||||
u64a *table = getTable<LbrStruct>(nfa);
|
||||
// Adjust table length according to the optimal patch length.
|
||||
size_t len = nfa->length;
|
||||
assert((u32)repeatMax >= rsi.patchSize);
|
||||
len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
|
||||
nfa->length = verify_u32(len);
|
||||
info->length = verify_u32(sizeof(RepeatInfo)
|
||||
+ sizeof(u64a) * (rsi.patchSize + 1));
|
||||
memcpy(table, rsi.table.data(), byte_length(rsi.table));
|
||||
}
|
||||
}
|
||||
|
||||
template <class LbrStruct> static
|
||||
aligned_unique_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type,
|
||||
enum RepeatType rtype,
|
||||
const depth &repeatMax) {
|
||||
size_t tableLen = 0;
|
||||
if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
|
||||
tableLen = sizeof(u64a) * (repeatMax + 1);
|
||||
}
|
||||
size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) +
|
||||
tableLen + sizeof(u64a);
|
||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(len);
|
||||
nfa->type = verify_u8(nfa_type);
|
||||
nfa->length = verify_u32(len);
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
if (!cr.all()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_dot>(LBR_NFA_Dot, rtype, repeatMax);
|
||||
struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get());
|
||||
|
||||
fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
DEBUG_PRINTF("built dot lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrVerm(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
const CharReach escapes(~cr);
|
||||
|
||||
if (escapes.count() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_verm>(LBR_NFA_Verm, rtype, repeatMax);
|
||||
struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
|
||||
lv->c = escapes.find_first();
|
||||
|
||||
fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
DEBUG_PRINTF("built verm lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrNVerm(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
const CharReach escapes(cr);
|
||||
|
||||
if (escapes.count() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_verm>(LBR_NFA_NVerm, rtype, repeatMax);
|
||||
struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
|
||||
lv->c = escapes.find_first();
|
||||
|
||||
fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
DEBUG_PRINTF("built negated verm lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrShuf(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_shuf>(LBR_NFA_Shuf, rtype, repeatMax);
|
||||
struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get());
|
||||
|
||||
fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
if (shuftiBuildMasks(~cr, &ls->mask_lo, &ls->mask_hi) == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("built shuf lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrTruf(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_truf>(LBR_NFA_Truf, rtype, repeatMax);
|
||||
struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get());
|
||||
|
||||
fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
truffleBuildMasks(~cr, &lc->mask1, &lc->mask2);
|
||||
|
||||
DEBUG_PRINTF("built truffle lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> constructLBR(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
|
||||
repeatMin.str().c_str(), repeatMax.str().c_str(),
|
||||
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
|
||||
report);
|
||||
assert(repeatMin <= repeatMax);
|
||||
assert(repeatMax.is_reachable());
|
||||
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);
|
||||
|
||||
if (!nfa) {
|
||||
nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
if (!nfa) {
|
||||
nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
if (!nfa) {
|
||||
nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
if (!nfa) {
|
||||
nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
|
||||
if (!nfa) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return nfa;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> constructLBR(const PureRepeat &repeat,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowLbr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(!repeat.reach.none());
|
||||
|
||||
if (repeat.reports.size() != 1) {
|
||||
DEBUG_PRINTF("too many reports\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool is_reset;
|
||||
u32 min_period = minPeriod(triggers, repeat.reach, &is_reset);
|
||||
|
||||
if (depth(min_period) > repeat.bounds.max) {
|
||||
DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
|
||||
is_reset = true;
|
||||
}
|
||||
|
||||
ReportID report = *repeat.reports.begin();
|
||||
|
||||
DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
|
||||
return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
|
||||
min_period, is_reset, report);
|
||||
}
|
||||
|
||||
/** \brief Construct an LBR engine from the given graph \p g. */
|
||||
aligned_unique_ptr<NFA> constructLBR(const NGHolder &g,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowLbr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
PureRepeat repeat;
|
||||
if (!isPureRepeat(g, repeat)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return constructLBR(repeat, triggers, cc);
|
||||
}
|
||||
|
||||
/** \brief True if graph \p g could be turned into an LBR engine. */
|
||||
bool isLBR(const NGHolder &g, const Grey &grey) {
|
||||
if (!grey.allowLbr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PureRepeat repeat;
|
||||
if (!isPureRepeat(g, repeat)) {
|
||||
DEBUG_PRINTF("not pure bounded repeat\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (repeat.reports.size() != 1) {
|
||||
DEBUG_PRINTF("too many reports\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
Reference in New Issue
Block a user