expressions: add much faster limitToSignatures()

This commit is contained in:
Justin Viiret 2017-04-07 17:02:00 +10:00 committed by Matthew Barr
parent 0b8f25a036
commit 083d84cfd6
3 changed files with 22 additions and 26 deletions

View File

@ -756,6 +756,7 @@ int main(int argc, char *argv[]) {
// known expressions together. // known expressions together.
if (sigSets.empty()) { if (sigSets.empty()) {
SignatureSet sigs; SignatureSet sigs;
sigs.reserve(exprMapTemplate.size());
for (auto i : exprMapTemplate | map_keys) { for (auto i : exprMapTemplate | map_keys) {
sigs.push_back(i); sigs.push_back(i);
} }
@ -772,9 +773,7 @@ int main(int argc, char *argv[]) {
} }
for (const auto &s : sigSets) { for (const auto &s : sigSets) {
ExpressionMap exprMap = exprMapTemplate; // copy auto exprMap = limitToSignatures(exprMapTemplate, s.sigs);
limitBySignature(exprMap, s.sigs);
if (exprMap.empty()) { if (exprMap.empty()) {
continue; continue;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -27,6 +27,10 @@
*/ */
#include "config.h" #include "config.h"
#include "expressions.h"
#include "hs.h"
#include "string_util.h"
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
@ -34,7 +38,6 @@
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <boost/algorithm/string/trim.hpp>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#if !defined(_WIN32) #if !defined(_WIN32)
@ -45,9 +48,7 @@
#include <windows.h> #include <windows.h>
#endif #endif
#include "expressions.h" #include <boost/algorithm/string/trim.hpp>
#include "hs.h"
#include "string_util.h"
using namespace std; using namespace std;
@ -90,7 +91,7 @@ void processLine(string &line, unsigned lineNum,
//cout << "Inserting expr: id=" << id << ", pcre=" << pcre_str << endl; //cout << "Inserting expr: id=" << id << ", pcre=" << pcre_str << endl;
bool ins = exprMap.insert(ExpressionMap::value_type(id, pcre_str)).second; bool ins = exprMap.emplace(id, pcre_str).second;
if (!ins) { if (!ins) {
failLine(lineNum, file, line, "Duplicate ID found."); failLine(lineNum, file, line, "Duplicate ID found.");
} }
@ -278,20 +279,19 @@ void loadSignatureList(const string &inFile,
} }
} }
void limitBySignature(ExpressionMap &exprMap, ExpressionMap limitToSignatures(const ExpressionMap &exprMap,
const SignatureSet &signatures) { const SignatureSet &signatures) {
ExpressionMap keepers; ExpressionMap keepers;
SignatureSet::const_iterator it, ite; for (auto id : signatures) {
for (it = signatures.begin(), ite = signatures.end(); it != ite; ++it) { auto match = exprMap.find(id);
ExpressionMap::const_iterator match = exprMap.find(*it);
if (match == exprMap.end()) { if (match == exprMap.end()) {
cerr << "Unable to find signature " << *it cerr << "Unable to find signature " << id
<< " in expression set!" << endl; << " in expression set!" << endl;
exit(1); exit(1);
} }
keepers.insert(*match); keepers.insert(*match);
} }
exprMap.swap(keepers); return keepers;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -31,10 +31,10 @@
#include <map> #include <map>
#include <string> #include <string>
#include <list> #include <vector>
typedef std::map<unsigned, std::string> ExpressionMap; using ExpressionMap = std::map<unsigned, std::string>;
typedef std::list<unsigned> SignatureSet; using SignatureSet = std::vector<unsigned>;
// load all of the expressions from the given directory into the given // load all of the expressions from the given directory into the given
// expression map. Exits on failure. // expression map. Exits on failure.
@ -45,11 +45,8 @@ void loadExpressionsFromFile(const std::string &fname, ExpressionMap &exprMap);
// load a list of signature IDs // load a list of signature IDs
void loadSignatureList(const std::string &inFile, SignatureSet &signatures); void loadSignatureList(const std::string &inFile, SignatureSet &signatures);
// produce a new expression map only containing those signatures in the // trim expression map to only the given signatures, returning result
// expression list ExpressionMap limitToSignatures(const ExpressionMap &exprMap,
void generateExprMap(const SignatureSet &signatures, const SignatureSet &signatures);
const ExpressionMap &allExprs, ExpressionMap &out);
// trim expression map to only the given signatures (in-place)
void limitBySignature(ExpressionMap &exprMap, const SignatureSet &signatures);
#endif #endif