mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-10-09 15:52:27 +03:00
Initial commit of Hyperscan
This commit is contained in:
374
src/grey.cpp
Normal file
374
src/grey.cpp
Normal file
@@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib> // exit
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define DEFAULT_MAX_HISTORY 60
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
Grey::Grey(void) :
|
||||
optimiseComponentTree(true),
|
||||
performGraphSimplification(true),
|
||||
prefilterReductions(true),
|
||||
removeEdgeRedundancy(true),
|
||||
allowGough(true),
|
||||
allowHaigLit(true),
|
||||
allowLitHaig(true),
|
||||
allowLbr(true),
|
||||
allowMcClellan(true),
|
||||
allowPuff(true),
|
||||
allowRose(true),
|
||||
allowExtendedNFA(true), /* bounded repeats of course */
|
||||
allowLimExNFA(true),
|
||||
allowSidecar(true),
|
||||
allowAnchoredAcyclic(true),
|
||||
allowSmallLiteralSet(true),
|
||||
allowCastle(true),
|
||||
allowDecoratedLiteral(true),
|
||||
allowNoodle(true),
|
||||
fdrAllowTeddy(true),
|
||||
puffImproveHead(true),
|
||||
castleExclusive(true),
|
||||
mergeSEP(true), /* short exhaustible passthroughs */
|
||||
mergeRose(true), // roses inside rose
|
||||
mergeSuffixes(true), // suffix nfas inside rose
|
||||
mergeOutfixes(true),
|
||||
onlyOneOutfix(false),
|
||||
allowShermanStates(true),
|
||||
allowMcClellan8(true),
|
||||
highlanderPruneDFA(true),
|
||||
minimizeDFA(true),
|
||||
accelerateDFA(true),
|
||||
accelerateNFA(true),
|
||||
reverseAccelerate(true),
|
||||
squashNFA(true),
|
||||
compressNFAState(true),
|
||||
numberNFAStatesWrong(false), /* debugging only */
|
||||
highlanderSquash(true),
|
||||
allowZombies(true),
|
||||
floodAsPuffette(false),
|
||||
nfaForceSize(0),
|
||||
nfaForceShifts(0),
|
||||
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
|
||||
minHistoryAvailable(0), /* debugging only */
|
||||
maxAnchoredRegion(63), /* for rose's atable to run over */
|
||||
minRoseLiteralLength(3),
|
||||
minRoseNetflowLiteralLength(2),
|
||||
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
|
||||
minExtBoundedRepeatSize(32),
|
||||
goughCopyPropagate(true),
|
||||
goughRegisterAllocate(true),
|
||||
shortcutLiterals(true),
|
||||
roseGraphReduction(true),
|
||||
roseRoleAliasing(true),
|
||||
roseMasks(true),
|
||||
roseMaxBadLeafLength(5),
|
||||
roseConvertInfBadLeaves(true),
|
||||
roseConvertFloodProneSuffixes(true),
|
||||
roseMergeRosesDuringAliasing(true),
|
||||
roseMultiTopRoses(true),
|
||||
roseHamsterMasks(true),
|
||||
roseLookaroundMasks(true),
|
||||
roseMcClellanPrefix(1),
|
||||
roseMcClellanSuffix(1),
|
||||
roseMcClellanOutfix(2),
|
||||
roseTransformDelay(true),
|
||||
roseDesiredSplit(4),
|
||||
earlyMcClellanPrefix(true),
|
||||
earlyMcClellanInfix(true),
|
||||
earlyMcClellanSuffix(true),
|
||||
allowCountingMiracles(true),
|
||||
allowSomChain(true),
|
||||
somMaxRevNfaLength(126),
|
||||
hamsterAccelForward(true),
|
||||
hamsterAccelReverse(false),
|
||||
miracleHistoryBonus(16),
|
||||
equivalenceEnable(true),
|
||||
|
||||
allowSmallWrite(true), // McClellan dfas for small patterns
|
||||
|
||||
smallWriteLargestBuffer(70), // largest buffer that can be
|
||||
// considered a small write
|
||||
// all blocks larger than this
|
||||
// are given to rose &co
|
||||
smallWriteLargestBufferBad(35),
|
||||
limitSmallWriteOutfixSize(1048576), // 1 MB
|
||||
dumpFlags(0),
|
||||
limitPatternCount(8000000), // 8M patterns
|
||||
limitPatternLength(16000), // 16K bytes
|
||||
limitGraphVertices(500000), // 500K vertices
|
||||
limitGraphEdges(1000000), // 1M edges
|
||||
limitReportCount(4*8000000),
|
||||
limitLiteralCount(8000000), // 8M literals
|
||||
limitLiteralLength(16000),
|
||||
limitLiteralMatcherChars(1073741824), // 1 GB
|
||||
limitLiteralMatcherSize(1073741824), // 1 GB
|
||||
limitRoseRoleCount(4*8000000),
|
||||
limitRoseEngineCount(8000000), // 8M engines
|
||||
limitRoseAnchoredSize(1073741824), // 1 GB
|
||||
limitEngineSize(1073741824), // 1 GB
|
||||
limitDFASize(1073741824), // 1 GB
|
||||
limitNFASize(1048576), // 1 MB
|
||||
limitLBRSize(1048576) // 1 MB
|
||||
{
|
||||
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
using boost::lexical_cast;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void applyGreyOverrides(Grey *g, const string &s) {
|
||||
string::const_iterator p = s.begin();
|
||||
string::const_iterator pe = s.end();
|
||||
string help = "help:0";
|
||||
bool invalid_key_seen = false;
|
||||
Grey defaultg;
|
||||
|
||||
if (s == "help" || s == "help:") {
|
||||
printf("Valid grey overrides:\n");
|
||||
p = help.begin();
|
||||
pe = help.end();
|
||||
}
|
||||
|
||||
while (p != pe) {
|
||||
string::const_iterator ke = find(p, pe, ':');
|
||||
|
||||
if (ke == pe) {
|
||||
break;
|
||||
}
|
||||
|
||||
string key(p, ke);
|
||||
|
||||
string::const_iterator ve = find(ke, pe, ';');
|
||||
|
||||
unsigned int value = lexical_cast<unsigned int>(string(ke + 1, ve));
|
||||
bool done = false;
|
||||
|
||||
/* surely there exists a nice template to go with this macro to make
|
||||
* all the boring code disappear */
|
||||
#define G_UPDATE(k) do { \
|
||||
if (key == ""#k) { g->k = value; done = 1;} \
|
||||
if (key == "help") { \
|
||||
printf("\t%-30s\tdefault: %s\n", #k, \
|
||||
lexical_cast<string>(defaultg.k).c_str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
G_UPDATE(optimiseComponentTree);
|
||||
G_UPDATE(performGraphSimplification);
|
||||
G_UPDATE(prefilterReductions);
|
||||
G_UPDATE(removeEdgeRedundancy);
|
||||
G_UPDATE(allowGough);
|
||||
G_UPDATE(allowHaigLit);
|
||||
G_UPDATE(allowLitHaig);
|
||||
G_UPDATE(allowLbr);
|
||||
G_UPDATE(allowMcClellan);
|
||||
G_UPDATE(allowPuff);
|
||||
G_UPDATE(allowRose);
|
||||
G_UPDATE(allowExtendedNFA);
|
||||
G_UPDATE(allowLimExNFA);
|
||||
G_UPDATE(allowSidecar);
|
||||
G_UPDATE(allowAnchoredAcyclic);
|
||||
G_UPDATE(allowSmallLiteralSet);
|
||||
G_UPDATE(allowCastle);
|
||||
G_UPDATE(allowDecoratedLiteral);
|
||||
G_UPDATE(allowNoodle);
|
||||
G_UPDATE(fdrAllowTeddy);
|
||||
G_UPDATE(puffImproveHead);
|
||||
G_UPDATE(castleExclusive);
|
||||
G_UPDATE(mergeSEP);
|
||||
G_UPDATE(mergeRose);
|
||||
G_UPDATE(mergeSuffixes);
|
||||
G_UPDATE(mergeOutfixes);
|
||||
G_UPDATE(onlyOneOutfix);
|
||||
G_UPDATE(allowShermanStates);
|
||||
G_UPDATE(allowMcClellan8);
|
||||
G_UPDATE(highlanderPruneDFA);
|
||||
G_UPDATE(minimizeDFA);
|
||||
G_UPDATE(accelerateDFA);
|
||||
G_UPDATE(accelerateNFA);
|
||||
G_UPDATE(reverseAccelerate);
|
||||
G_UPDATE(squashNFA);
|
||||
G_UPDATE(compressNFAState);
|
||||
G_UPDATE(numberNFAStatesWrong);
|
||||
G_UPDATE(allowZombies);
|
||||
G_UPDATE(floodAsPuffette);
|
||||
G_UPDATE(nfaForceSize);
|
||||
G_UPDATE(nfaForceShifts);
|
||||
G_UPDATE(highlanderSquash);
|
||||
G_UPDATE(maxHistoryAvailable);
|
||||
G_UPDATE(minHistoryAvailable);
|
||||
G_UPDATE(maxAnchoredRegion);
|
||||
G_UPDATE(minRoseLiteralLength);
|
||||
G_UPDATE(minRoseNetflowLiteralLength);
|
||||
G_UPDATE(maxRoseNetflowEdges);
|
||||
G_UPDATE(minExtBoundedRepeatSize);
|
||||
G_UPDATE(goughCopyPropagate);
|
||||
G_UPDATE(goughRegisterAllocate);
|
||||
G_UPDATE(shortcutLiterals);
|
||||
G_UPDATE(roseGraphReduction);
|
||||
G_UPDATE(roseRoleAliasing);
|
||||
G_UPDATE(roseMasks);
|
||||
G_UPDATE(roseMaxBadLeafLength);
|
||||
G_UPDATE(roseConvertInfBadLeaves);
|
||||
G_UPDATE(roseConvertFloodProneSuffixes);
|
||||
G_UPDATE(roseMergeRosesDuringAliasing);
|
||||
G_UPDATE(roseMultiTopRoses);
|
||||
G_UPDATE(roseHamsterMasks);
|
||||
G_UPDATE(roseLookaroundMasks);
|
||||
G_UPDATE(roseMcClellanPrefix);
|
||||
G_UPDATE(roseMcClellanSuffix);
|
||||
G_UPDATE(roseMcClellanOutfix);
|
||||
G_UPDATE(roseTransformDelay);
|
||||
G_UPDATE(roseDesiredSplit);
|
||||
G_UPDATE(earlyMcClellanPrefix);
|
||||
G_UPDATE(earlyMcClellanInfix);
|
||||
G_UPDATE(earlyMcClellanSuffix);
|
||||
G_UPDATE(allowSomChain);
|
||||
G_UPDATE(allowCountingMiracles);
|
||||
G_UPDATE(somMaxRevNfaLength);
|
||||
G_UPDATE(hamsterAccelForward);
|
||||
G_UPDATE(hamsterAccelReverse);
|
||||
G_UPDATE(miracleHistoryBonus);
|
||||
G_UPDATE(equivalenceEnable);
|
||||
G_UPDATE(allowSmallWrite);
|
||||
G_UPDATE(smallWriteLargestBuffer);
|
||||
G_UPDATE(smallWriteLargestBufferBad);
|
||||
G_UPDATE(limitSmallWriteOutfixSize);
|
||||
G_UPDATE(limitPatternCount);
|
||||
G_UPDATE(limitPatternLength);
|
||||
G_UPDATE(limitGraphVertices);
|
||||
G_UPDATE(limitGraphEdges);
|
||||
G_UPDATE(limitReportCount);
|
||||
G_UPDATE(limitLiteralCount);
|
||||
G_UPDATE(limitLiteralLength);
|
||||
G_UPDATE(limitLiteralMatcherChars);
|
||||
G_UPDATE(limitLiteralMatcherSize);
|
||||
G_UPDATE(limitRoseRoleCount);
|
||||
G_UPDATE(limitRoseEngineCount);
|
||||
G_UPDATE(limitRoseAnchoredSize);
|
||||
G_UPDATE(limitEngineSize);
|
||||
G_UPDATE(limitDFASize);
|
||||
G_UPDATE(limitNFASize);
|
||||
G_UPDATE(limitLBRSize);
|
||||
|
||||
#undef G_UPDATE
|
||||
if (key == "simple_som") {
|
||||
g->allowHaigLit = false;
|
||||
g->allowLitHaig = false;
|
||||
g->allowSomChain = false;
|
||||
g->somMaxRevNfaLength = 0;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixesNFA") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = false;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = true;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = false;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixesDFA") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = false;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = false;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixes") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = true;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = true;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
|
||||
if (!done && key != "help") {
|
||||
printf("Invalid grey override key %s:%u\n", key.c_str(), value);
|
||||
invalid_key_seen = true;
|
||||
}
|
||||
|
||||
p = ve;
|
||||
|
||||
if (p != pe) {
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_key_seen) {
|
||||
applyGreyOverrides(g, "help");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user