mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
408 lines
15 KiB
C++
408 lines
15 KiB
C++
/*
|
|
* Copyright (c) 2015-2017, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "grey.h"
|
|
#include "ue2common.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstdlib> // exit
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#define DEFAULT_MAX_HISTORY 110
|
|
|
|
using namespace std;
|
|
|
|
namespace ue2 {
|
|
|
|
Grey::Grey(void) :
|
|
optimiseComponentTree(true),
|
|
calcComponents(true),
|
|
performGraphSimplification(true),
|
|
prefilterReductions(true),
|
|
removeEdgeRedundancy(true),
|
|
allowGough(true),
|
|
allowHaigLit(true),
|
|
allowLitHaig(true),
|
|
allowLbr(true),
|
|
allowMcClellan(true),
|
|
allowSheng(true),
|
|
allowMcSheng(true),
|
|
allowPuff(true),
|
|
allowLiteral(true),
|
|
allowViolet(true),
|
|
allowExtendedNFA(true), /* bounded repeats of course */
|
|
allowLimExNFA(true),
|
|
allowAnchoredAcyclic(true),
|
|
allowSmallLiteralSet(true),
|
|
allowCastle(true),
|
|
allowDecoratedLiteral(true),
|
|
allowApproximateMatching(true),
|
|
allowNoodle(true),
|
|
fdrAllowTeddy(true),
|
|
fdrAllowFlood(true),
|
|
violetAvoidSuffixes(true),
|
|
violetAvoidWeakInfixes(true),
|
|
violetDoubleCut(true),
|
|
violetExtractStrongLiterals(true),
|
|
violetLiteralChains(true),
|
|
violetDoubleCutLiteralLen(3),
|
|
violetEarlyCleanLiteralLen(6),
|
|
puffImproveHead(true),
|
|
castleExclusive(true),
|
|
mergeSEP(true), /* short exhaustible passthroughs */
|
|
mergeRose(true), // roses inside rose
|
|
mergeSuffixes(true), // suffix nfas inside rose
|
|
mergeOutfixes(true),
|
|
onlyOneOutfix(false),
|
|
allowShermanStates(true),
|
|
allowMcClellan8(true),
|
|
highlanderPruneDFA(true),
|
|
minimizeDFA(true),
|
|
accelerateDFA(true),
|
|
accelerateNFA(true),
|
|
reverseAccelerate(true),
|
|
squashNFA(true),
|
|
compressNFAState(true),
|
|
numberNFAStatesWrong(false), /* debugging only */
|
|
highlanderSquash(true),
|
|
allowZombies(true),
|
|
floodAsPuffette(false),
|
|
nfaForceSize(0),
|
|
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
|
|
minHistoryAvailable(0), /* debugging only */
|
|
maxAnchoredRegion(63), /* for rose's atable to run over */
|
|
minRoseLiteralLength(3),
|
|
minRoseNetflowLiteralLength(2),
|
|
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
|
|
maxEditDistance(16),
|
|
minExtBoundedRepeatSize(32),
|
|
goughCopyPropagate(true),
|
|
goughRegisterAllocate(true),
|
|
shortcutLiterals(true),
|
|
roseGraphReduction(true),
|
|
roseRoleAliasing(true),
|
|
roseMasks(true),
|
|
roseConvertFloodProneSuffixes(true),
|
|
roseMergeRosesDuringAliasing(true),
|
|
roseMultiTopRoses(true),
|
|
roseHamsterMasks(true),
|
|
roseLookaroundMasks(true),
|
|
roseMcClellanPrefix(1),
|
|
roseMcClellanSuffix(1),
|
|
roseMcClellanOutfix(2),
|
|
roseTransformDelay(true),
|
|
earlyMcClellanPrefix(true),
|
|
earlyMcClellanInfix(true),
|
|
earlyMcClellanSuffix(true),
|
|
allowCountingMiracles(true),
|
|
allowSomChain(true),
|
|
somMaxRevNfaLength(126),
|
|
hamsterAccelForward(true),
|
|
hamsterAccelReverse(false),
|
|
miracleHistoryBonus(16),
|
|
equivalenceEnable(true),
|
|
|
|
allowSmallWrite(true), // McClellan dfas for small patterns
|
|
allowSmallWriteSheng(false), // allow use of Sheng for SMWR
|
|
|
|
smallWriteLargestBuffer(70), // largest buffer that can be
|
|
// considered a small write
|
|
// all blocks larger than this
|
|
// are given to rose &co
|
|
smallWriteLargestBufferBad(35),
|
|
limitSmallWriteOutfixSize(1048576), // 1 MB
|
|
smallWriteMaxPatterns(10000),
|
|
smallWriteMaxLiterals(10000),
|
|
allowTamarama(true), // Tamarama engine
|
|
tamaChunkSize(100),
|
|
dumpFlags(0),
|
|
limitPatternCount(8000000), // 8M patterns
|
|
limitPatternLength(16000), // 16K bytes
|
|
limitGraphVertices(500000), // 500K vertices
|
|
limitGraphEdges(1000000), // 1M edges
|
|
limitReportCount(4*8000000),
|
|
limitLiteralCount(8000000), // 8M literals
|
|
limitLiteralLength(16000),
|
|
limitLiteralMatcherChars(1073741824), // 1 GB
|
|
limitLiteralMatcherSize(1073741824), // 1 GB
|
|
limitRoseRoleCount(4*8000000),
|
|
limitRoseEngineCount(8000000), // 8M engines
|
|
limitRoseAnchoredSize(1073741824), // 1 GB
|
|
limitEngineSize(1073741824), // 1 GB
|
|
limitDFASize(1073741824), // 1 GB
|
|
limitNFASize(1048576), // 1 MB
|
|
limitLBRSize(1048576), // 1 MB
|
|
limitApproxMatchingVertices(5000)
|
|
{
|
|
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
|
}
|
|
|
|
} // namespace ue2
|
|
|
|
#ifndef RELEASE_BUILD
|
|
|
|
#include <boost/lexical_cast.hpp>
|
|
using boost::lexical_cast;
|
|
|
|
namespace ue2 {
|
|
|
|
void applyGreyOverrides(Grey *g, const string &s) {
|
|
string::const_iterator p = s.begin();
|
|
string::const_iterator pe = s.end();
|
|
string help = "help:0";
|
|
bool invalid_key_seen = false;
|
|
Grey defaultg;
|
|
|
|
if (s == "help" || s == "help:") {
|
|
printf("Valid grey overrides:\n");
|
|
p = help.begin();
|
|
pe = help.end();
|
|
}
|
|
|
|
while (p != pe) {
|
|
string::const_iterator ke = find(p, pe, ':');
|
|
|
|
if (ke == pe) {
|
|
break;
|
|
}
|
|
|
|
string key(p, ke);
|
|
|
|
string::const_iterator ve = find(ke, pe, ';');
|
|
|
|
unsigned int value = lexical_cast<unsigned int>(string(ke + 1, ve));
|
|
bool done = false;
|
|
|
|
/* surely there exists a nice template to go with this macro to make
|
|
* all the boring code disappear */
|
|
#define G_UPDATE(k) do { \
|
|
if (key == ""#k) { g->k = value; done = 1;} \
|
|
if (key == "help") { \
|
|
printf("\t%-30s\tdefault: %s\n", #k, \
|
|
lexical_cast<string>(defaultg.k).c_str()); \
|
|
} \
|
|
} while (0)
|
|
|
|
G_UPDATE(optimiseComponentTree);
|
|
G_UPDATE(calcComponents);
|
|
G_UPDATE(performGraphSimplification);
|
|
G_UPDATE(prefilterReductions);
|
|
G_UPDATE(removeEdgeRedundancy);
|
|
G_UPDATE(allowGough);
|
|
G_UPDATE(allowHaigLit);
|
|
G_UPDATE(allowLitHaig);
|
|
G_UPDATE(allowLbr);
|
|
G_UPDATE(allowMcClellan);
|
|
G_UPDATE(allowSheng);
|
|
G_UPDATE(allowMcSheng);
|
|
G_UPDATE(allowPuff);
|
|
G_UPDATE(allowLiteral);
|
|
G_UPDATE(allowViolet);
|
|
G_UPDATE(allowExtendedNFA);
|
|
G_UPDATE(allowLimExNFA);
|
|
G_UPDATE(allowAnchoredAcyclic);
|
|
G_UPDATE(allowSmallLiteralSet);
|
|
G_UPDATE(allowCastle);
|
|
G_UPDATE(allowDecoratedLiteral);
|
|
G_UPDATE(allowNoodle);
|
|
G_UPDATE(allowApproximateMatching);
|
|
G_UPDATE(fdrAllowTeddy);
|
|
G_UPDATE(fdrAllowFlood);
|
|
G_UPDATE(violetAvoidSuffixes);
|
|
G_UPDATE(violetAvoidWeakInfixes);
|
|
G_UPDATE(violetDoubleCut);
|
|
G_UPDATE(violetExtractStrongLiterals);
|
|
G_UPDATE(violetLiteralChains);
|
|
G_UPDATE(violetDoubleCutLiteralLen);
|
|
G_UPDATE(violetEarlyCleanLiteralLen);
|
|
G_UPDATE(puffImproveHead);
|
|
G_UPDATE(castleExclusive);
|
|
G_UPDATE(mergeSEP);
|
|
G_UPDATE(mergeRose);
|
|
G_UPDATE(mergeSuffixes);
|
|
G_UPDATE(mergeOutfixes);
|
|
G_UPDATE(onlyOneOutfix);
|
|
G_UPDATE(allowShermanStates);
|
|
G_UPDATE(allowMcClellan8);
|
|
G_UPDATE(highlanderPruneDFA);
|
|
G_UPDATE(minimizeDFA);
|
|
G_UPDATE(accelerateDFA);
|
|
G_UPDATE(accelerateNFA);
|
|
G_UPDATE(reverseAccelerate);
|
|
G_UPDATE(squashNFA);
|
|
G_UPDATE(compressNFAState);
|
|
G_UPDATE(numberNFAStatesWrong);
|
|
G_UPDATE(allowZombies);
|
|
G_UPDATE(floodAsPuffette);
|
|
G_UPDATE(nfaForceSize);
|
|
G_UPDATE(highlanderSquash);
|
|
G_UPDATE(maxHistoryAvailable);
|
|
G_UPDATE(minHistoryAvailable);
|
|
G_UPDATE(maxAnchoredRegion);
|
|
G_UPDATE(minRoseLiteralLength);
|
|
G_UPDATE(minRoseNetflowLiteralLength);
|
|
G_UPDATE(maxRoseNetflowEdges);
|
|
G_UPDATE(maxEditDistance);
|
|
G_UPDATE(minExtBoundedRepeatSize);
|
|
G_UPDATE(goughCopyPropagate);
|
|
G_UPDATE(goughRegisterAllocate);
|
|
G_UPDATE(shortcutLiterals);
|
|
G_UPDATE(roseGraphReduction);
|
|
G_UPDATE(roseRoleAliasing);
|
|
G_UPDATE(roseMasks);
|
|
G_UPDATE(roseConvertFloodProneSuffixes);
|
|
G_UPDATE(roseMergeRosesDuringAliasing);
|
|
G_UPDATE(roseMultiTopRoses);
|
|
G_UPDATE(roseHamsterMasks);
|
|
G_UPDATE(roseLookaroundMasks);
|
|
G_UPDATE(roseMcClellanPrefix);
|
|
G_UPDATE(roseMcClellanSuffix);
|
|
G_UPDATE(roseMcClellanOutfix);
|
|
G_UPDATE(roseTransformDelay);
|
|
G_UPDATE(earlyMcClellanPrefix);
|
|
G_UPDATE(earlyMcClellanInfix);
|
|
G_UPDATE(earlyMcClellanSuffix);
|
|
G_UPDATE(allowSomChain);
|
|
G_UPDATE(allowCountingMiracles);
|
|
G_UPDATE(somMaxRevNfaLength);
|
|
G_UPDATE(hamsterAccelForward);
|
|
G_UPDATE(hamsterAccelReverse);
|
|
G_UPDATE(miracleHistoryBonus);
|
|
G_UPDATE(equivalenceEnable);
|
|
G_UPDATE(allowSmallWrite);
|
|
G_UPDATE(allowSmallWriteSheng);
|
|
G_UPDATE(smallWriteLargestBuffer);
|
|
G_UPDATE(smallWriteLargestBufferBad);
|
|
G_UPDATE(limitSmallWriteOutfixSize);
|
|
G_UPDATE(smallWriteMaxPatterns);
|
|
G_UPDATE(smallWriteMaxLiterals);
|
|
G_UPDATE(allowTamarama);
|
|
G_UPDATE(tamaChunkSize);
|
|
G_UPDATE(limitPatternCount);
|
|
G_UPDATE(limitPatternLength);
|
|
G_UPDATE(limitGraphVertices);
|
|
G_UPDATE(limitGraphEdges);
|
|
G_UPDATE(limitReportCount);
|
|
G_UPDATE(limitLiteralCount);
|
|
G_UPDATE(limitLiteralLength);
|
|
G_UPDATE(limitLiteralMatcherChars);
|
|
G_UPDATE(limitLiteralMatcherSize);
|
|
G_UPDATE(limitRoseRoleCount);
|
|
G_UPDATE(limitRoseEngineCount);
|
|
G_UPDATE(limitRoseAnchoredSize);
|
|
G_UPDATE(limitEngineSize);
|
|
G_UPDATE(limitDFASize);
|
|
G_UPDATE(limitNFASize);
|
|
G_UPDATE(limitLBRSize);
|
|
G_UPDATE(limitApproxMatchingVertices);
|
|
|
|
#undef G_UPDATE
|
|
if (key == "simple_som") {
|
|
g->allowHaigLit = false;
|
|
g->allowLitHaig = false;
|
|
g->allowSomChain = false;
|
|
g->somMaxRevNfaLength = 0;
|
|
done = true;
|
|
}
|
|
if (key == "forceOutfixesNFA") {
|
|
g->allowAnchoredAcyclic = false;
|
|
g->allowCastle = false;
|
|
g->allowDecoratedLiteral = false;
|
|
g->allowGough = false;
|
|
g->allowHaigLit = false;
|
|
g->allowLbr = false;
|
|
g->allowLimExNFA = true;
|
|
g->allowLitHaig = false;
|
|
g->allowMcClellan = false;
|
|
g->allowPuff = false;
|
|
g->allowLiteral = false;
|
|
g->allowViolet = false;
|
|
g->allowSmallLiteralSet = false;
|
|
g->roseMasks = false;
|
|
done = true;
|
|
}
|
|
if (key == "forceOutfixesDFA") {
|
|
g->allowAnchoredAcyclic = false;
|
|
g->allowCastle = false;
|
|
g->allowDecoratedLiteral = false;
|
|
g->allowGough = false;
|
|
g->allowHaigLit = false;
|
|
g->allowLbr = false;
|
|
g->allowLimExNFA = false;
|
|
g->allowLitHaig = false;
|
|
g->allowMcClellan = true;
|
|
g->allowPuff = false;
|
|
g->allowLiteral = false;
|
|
g->allowViolet = false;
|
|
g->allowSmallLiteralSet = false;
|
|
g->roseMasks = false;
|
|
done = true;
|
|
}
|
|
if (key == "forceOutfixes") {
|
|
g->allowAnchoredAcyclic = false;
|
|
g->allowCastle = false;
|
|
g->allowDecoratedLiteral = false;
|
|
g->allowGough = true;
|
|
g->allowHaigLit = false;
|
|
g->allowLbr = false;
|
|
g->allowLimExNFA = true;
|
|
g->allowLitHaig = false;
|
|
g->allowMcClellan = true;
|
|
g->allowPuff = false;
|
|
g->allowLiteral = false;
|
|
g->allowViolet = false;
|
|
g->allowSmallLiteralSet = false;
|
|
g->roseMasks = false;
|
|
done = true;
|
|
}
|
|
|
|
if (!done && key != "help") {
|
|
printf("Invalid grey override key %s:%u\n", key.c_str(), value);
|
|
invalid_key_seen = true;
|
|
}
|
|
|
|
p = ve;
|
|
|
|
if (p != pe) {
|
|
++p;
|
|
}
|
|
}
|
|
|
|
if (invalid_key_seen) {
|
|
applyGreyOverrides(g, "help");
|
|
exit(1);
|
|
}
|
|
|
|
assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
|
}
|
|
|
|
} // namespace ue2
|
|
|
|
#endif
|