mirror of
https://github.com/openappsec/openappsec.git
synced 2025-09-29 19:24:26 +03:00
First release of open-appsec source code
This commit is contained in:
341
components/security_apps/waap/waap_clib/WaapRegexPreconditions.cc
Executable file
341
components/security_apps/waap/waap_clib/WaapRegexPreconditions.cc
Executable file
@@ -0,0 +1,341 @@
|
||||
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "WaapRegexPreconditions.h"
|
||||
#include "Waf2Util.h"
|
||||
#include "debug.h"
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
USE_DEBUG_FLAG(D_WAAP_REGEX);
|
||||
|
||||
namespace Waap {
|
||||
const RegexPreconditions::WordIndex RegexPreconditions::emptyWordIndex = 0;
|
||||
|
||||
RegexPreconditions::RegexPreconditions(const picojson::value::object &jsObj, bool &error)
|
||||
{
|
||||
// Register empty string work under known index
|
||||
registerWord("");
|
||||
|
||||
// The key should always be there unless data file is corrupted (but there's a unit test that tests exactly
|
||||
// that!)
|
||||
if (jsObj.find("preconditions") == jsObj.end()) {
|
||||
dbgError(D_WAAP_REGEX) << "Error loading regex preconditions (signatures data file corrupt?)...";
|
||||
error = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (jsObj.find("precondition_keys") == jsObj.end()) {
|
||||
dbgError(D_WAAP_REGEX) << "Error loading regex precondition sets (signatures data file corrupt?)...";
|
||||
error = true;
|
||||
return;
|
||||
}
|
||||
|
||||
auto preconditions = jsObj.at("preconditions").get<picojson::value::object>();
|
||||
|
||||
// Build full list of words to load into aho-corasick pattern matcher
|
||||
dbgTrace(D_WAAP_REGEX) << "Loading regex precondition_keys into Aho-Corasick pattern matcher...";
|
||||
|
||||
auto preconditionKeys = jsObj.at("precondition_keys").get<picojson::value::array>();
|
||||
std::set<PMPattern> pmPatterns;
|
||||
|
||||
for (const auto &preconditionKey : preconditionKeys) {
|
||||
std::string wordStr(preconditionKey.get<std::string>());
|
||||
|
||||
// Do not load the "empty" word into Aho-Corasick. It's meaningless and Aho prepare() call would fail.
|
||||
if (wordStr.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
WordIndex wordIndex = registerWord(wordStr);
|
||||
pmPatterns.insert(PMPattern(wordStr, false, false, wordIndex));
|
||||
}
|
||||
|
||||
// Initialize the aho-corasick pattern matcher with the patterns
|
||||
Maybe<void> pmHookStatus = m_pmHook.prepare(pmPatterns);
|
||||
|
||||
if (!pmHookStatus.ok()) {
|
||||
dbgError(D_WAAP_REGEX) << "Aho-Corasick engine failed to load!";
|
||||
error = true;
|
||||
return;
|
||||
}
|
||||
|
||||
dbgTrace(D_WAAP_REGEX) << "Aho-Corasick engine loaded.";
|
||||
|
||||
// Loop over pre-conditions (rules) and load them
|
||||
dbgTrace(D_WAAP_REGEX) << "Loading regex preconditions...";
|
||||
|
||||
for (const auto &precondition : preconditions)
|
||||
{
|
||||
// Each precondition consists of an aho-corasick pattern matcher word as a key and list of actions
|
||||
// (for that word) - as a value.
|
||||
const std::string wordStr = precondition.first;
|
||||
|
||||
// Information from the "empty string"" word is not required by the engine to operate
|
||||
if (wordStr.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
WordIndex wordIndex = registerWord(wordStr);
|
||||
|
||||
if (boost::algorithm::ends_with(wordStr, "_napost_napre")) {
|
||||
WordIndex baseWordIndex = registerWord(wordStr.substr(0, wordStr.size() - strlen("_napost_napre")));
|
||||
m_pmWordInfo[baseWordIndex].napostNapreWordIndex = wordIndex;
|
||||
m_pmWordInfo[wordIndex].baseWordIndex = baseWordIndex;
|
||||
}
|
||||
else if (boost::algorithm::ends_with(wordStr, "_napost")) {
|
||||
WordIndex baseWordIndex = registerWord(wordStr.substr(0, wordStr.size() - strlen("_napost")));
|
||||
m_pmWordInfo[baseWordIndex].napostWordIndex = wordIndex;
|
||||
m_pmWordInfo[wordIndex].baseWordIndex = baseWordIndex;
|
||||
}
|
||||
else if (boost::algorithm::ends_with(wordStr, "_napre")) {
|
||||
WordIndex baseWordIndex = registerWord(wordStr.substr(0, wordStr.size() - strlen("_napre")));
|
||||
m_pmWordInfo[baseWordIndex].napreWordIndex = wordIndex;
|
||||
m_pmWordInfo[wordIndex].baseWordIndex = baseWordIndex;
|
||||
}
|
||||
|
||||
// Load actions
|
||||
const auto &jsActionsList = precondition.second.get<picojson::value::array>();
|
||||
|
||||
for (const auto &jsAction : jsActionsList) {
|
||||
const auto &action = jsAction.get<picojson::value::array>();
|
||||
|
||||
if (action.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// The first item in the Action json object (it's a tuple of 1 or more items) is an action type string.
|
||||
const std::string actionType = action[0].get<std::string>();
|
||||
|
||||
// There are currently three action types:
|
||||
// 1. "regex" - allow specific regex to be scanned when the Aho-Corasick word is detected
|
||||
// 2. "set" - specify another "prefix" (string) to be enabled when the Aho-Corasick word is detected.
|
||||
// if at least one prefix is enabled - it will trigger one or more other regexes.
|
||||
// 3. "and_condition" - specify (comma-separated) sorted list of "prefixes" (in one string).
|
||||
// all of these prefixes should come together in order to complete a set to match a
|
||||
// condition and enable one or more other regexes.
|
||||
if (actionType == "regex" && action.size() >= 3) {
|
||||
const std::string regexPattern = action[1].get<std::string>();
|
||||
if (m_regexToWordMap.find(regexPattern) != m_regexToWordMap.end() &&
|
||||
m_regexToWordMap[regexPattern] != wordIndex)
|
||||
{
|
||||
dbgError(D_WAAP_REGEX) << "ERROR: trying to overwrite m_regexToWordMap. pattern='" <<
|
||||
regexPattern << "'. Old wordIndex='" << m_regexToWordMap[regexPattern] << "' new word='"
|
||||
<< wordStr << "' (wordIndex=" << wordIndex << ")";
|
||||
error = true;
|
||||
return;
|
||||
}
|
||||
|
||||
std::string flags = action[2].get<std::string>();
|
||||
|
||||
if (flags == "_noregex") {
|
||||
// Add regex pattern to set of "noRegex" patterns
|
||||
m_noRegexPatterns.insert(regexPattern);
|
||||
}
|
||||
|
||||
m_regexToWordMap[regexPattern] = wordIndex;
|
||||
}
|
||||
else if (actionType == "set" && action.size() >= 2) {
|
||||
const std::string setValueStr = action[1].get<std::string>();
|
||||
WordIndex setValueIndex = registerWord(setValueStr);
|
||||
std::vector<WordIndex> &prefixSet = m_wordToPrefixSet[wordIndex];
|
||||
if (std::find(prefixSet.begin(), prefixSet.end(),
|
||||
setValueIndex) == prefixSet.end()) {
|
||||
prefixSet.push_back(setValueIndex);
|
||||
}
|
||||
}
|
||||
else if (actionType == "and_condition" && action.size() >= 2) {
|
||||
const std::string groupValueStr = action[1].get<std::string>();
|
||||
WordIndex groupValueIndex = registerWord(groupValueStr);
|
||||
size_t expectedCount = static_cast<size_t>(std::stoi(groupValueStr));
|
||||
auto value(std::make_pair(groupValueIndex, expectedCount));
|
||||
std::vector<std::pair<WordIndex, size_t>> &prefixGroup = m_wordToPrefixGroup[wordIndex];
|
||||
if (std::find(prefixGroup.begin(), prefixGroup.end(),
|
||||
value) == prefixGroup.end()) {
|
||||
prefixGroup.push_back(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dbgTrace(D_WAAP_REGEX) << "Aho-corasick pattern matching engine initialized!";
|
||||
}
|
||||
|
||||
bool Waap::RegexPreconditions::isNoRegexPattern(const std::string &pattern) const
|
||||
{
|
||||
return m_noRegexPatterns.find(pattern) != m_noRegexPatterns.end();
|
||||
}
|
||||
|
||||
const std::string &Waap::RegexPreconditions::getWordStrByWordIndex(WordIndex wordIndex) const
|
||||
{
|
||||
WordIndex baseWordIndex = m_pmWordInfo[wordIndex].baseWordIndex;
|
||||
|
||||
if (baseWordIndex != Waap::RegexPreconditions::emptyWordIndex) {
|
||||
return m_pmWordInfo[baseWordIndex].wordStr;
|
||||
}
|
||||
|
||||
return m_pmWordInfo[wordIndex].wordStr;
|
||||
}
|
||||
|
||||
// Check that the regex pattern (string) is known to be related to an Aho-Corasick word/prefix
|
||||
// Returns empty string if not found, or the Aho-Corasick/prefix string otherwise.
|
||||
// This function is called during each Regex object creation and helps to pre-compute data required for a fast
|
||||
// lookup later during traffic processing.
|
||||
Waap::RegexPreconditions::WordIndex RegexPreconditions::getWordByRegex(const std::string ®exPattern) const
|
||||
{
|
||||
const auto &found = m_regexToWordMap.find(regexPattern);
|
||||
|
||||
if (found != m_regexToWordMap.end()) {
|
||||
return found->second;
|
||||
}
|
||||
|
||||
return Waap::RegexPreconditions::emptyWordIndex;
|
||||
}
|
||||
|
||||
void RegexPreconditions::processWord(RegexPreconditions::PmWordSet &wordsSet, WordIndex wordIndex) const
|
||||
{
|
||||
const auto &found = m_wordToPrefixSet.find(wordIndex);
|
||||
|
||||
if (found != m_wordToPrefixSet.end()) {
|
||||
for (const auto &prefixIndex : found->second) {
|
||||
// One of the items in the "OR" condition - add the OR prefix to the wordsSet
|
||||
wordsSet.insert(prefixIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// Add words from the Aho Corasick scanner
|
||||
wordsSet.insert(wordIndex);
|
||||
}
|
||||
|
||||
inline bool isRegexWordChar(u_char c) {
|
||||
return Waap::Util::isAlphaAsciiFast(c) || isdigit(c) || '_' == c;
|
||||
}
|
||||
|
||||
void RegexPreconditions::pass1(RegexPreconditions::PmWordSet &wordsSet, Buffer &&buffer) const
|
||||
{
|
||||
dbgTrace(D_WAAP_REGEX) << "Rules pass #1: collect OR sets";
|
||||
|
||||
m_pmHook.scanBufWithOffsetLambda(buffer, [this, &wordsSet, &buffer]
|
||||
(u_int endMatchOffset, const PMPattern &pmPattern)
|
||||
{
|
||||
uint offset = endMatchOffset + 1 - pmPattern.size(); // reported offset points to last character of a match
|
||||
|
||||
// Extract the word index from the PMPattern object (we do not need the string part of it)
|
||||
WordIndex wordIndex = pmPattern.getIndex();
|
||||
|
||||
bool regexWordBefore = (offset != 0) &&
|
||||
(isRegexWordChar(buffer.data()[offset - 1]));
|
||||
bool regexWordAfter = (offset + pmPattern.size() < buffer.size()) &&
|
||||
(isRegexWordChar(buffer.data()[offset + pmPattern.size()]));
|
||||
|
||||
processWord(wordsSet, wordIndex);
|
||||
|
||||
// Compute additional constraints ([!\w] before, [!\w] after, [!\w] aroung the match ...)
|
||||
WordIndex napreWordIndex = m_pmWordInfo[wordIndex].napreWordIndex;
|
||||
WordIndex napostWordIndex = m_pmWordInfo[wordIndex].napostWordIndex;
|
||||
WordIndex napostNapreWordIndex = m_pmWordInfo[wordIndex].napostNapreWordIndex;
|
||||
|
||||
if (!regexWordBefore && regexWordAfter) {
|
||||
if (napreWordIndex != emptyWordIndex) {
|
||||
processWord(wordsSet, napreWordIndex);
|
||||
}
|
||||
}
|
||||
else if (regexWordBefore && !regexWordAfter) {
|
||||
if (napostWordIndex != emptyWordIndex) {
|
||||
processWord(wordsSet, napostWordIndex);
|
||||
}
|
||||
}
|
||||
else if (!regexWordBefore && !regexWordAfter) {
|
||||
if (napreWordIndex != emptyWordIndex) {
|
||||
processWord(wordsSet, napreWordIndex);
|
||||
}
|
||||
|
||||
if (napostWordIndex != emptyWordIndex) {
|
||||
processWord(wordsSet, napostWordIndex);
|
||||
}
|
||||
|
||||
if (napostNapreWordIndex != emptyWordIndex) {
|
||||
processWord(wordsSet, napostNapreWordIndex);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void RegexPreconditions::pass2(RegexPreconditions::PmWordSet &wordsSet) const
|
||||
{
|
||||
dbgTrace(D_WAAP_REGEX) << "Rules pass #2: collect AND groups";
|
||||
|
||||
std::unordered_map<WordIndex, std::set<WordIndex>> allGroups;
|
||||
std::vector<WordIndex> prefixes;
|
||||
|
||||
for (WordIndex wordIndex : wordsSet) {
|
||||
// find in wordToPrefixGroup map
|
||||
const auto &found = m_wordToPrefixGroup.find(wordIndex);
|
||||
|
||||
if (found != m_wordToPrefixGroup.end()) {
|
||||
for (const auto &prefixCountPair : found->second) {
|
||||
WordIndex prefixIndex = prefixCountPair.first;
|
||||
size_t expectedCount = prefixCountPair.second;
|
||||
|
||||
auto found = allGroups.find(prefixIndex);
|
||||
size_t actualWordCount = 1;
|
||||
|
||||
if (found == allGroups.end()) {
|
||||
allGroups.emplace(prefixIndex, std::set<WordIndex>{wordIndex});
|
||||
}
|
||||
else {
|
||||
found->second.insert(wordIndex);
|
||||
actualWordCount = found->second.size();
|
||||
}
|
||||
|
||||
if (actualWordCount == expectedCount) {
|
||||
// Full "AND" condition collected succesfully - add the AND prefixCountPair to the wordsSet
|
||||
prefixes.push_back(prefixIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &prefixIndex : prefixes) {
|
||||
wordsSet.insert(prefixIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// This function scans the buffer with Aho-Corasick scanner and adds all the words found into wordsSet
|
||||
// It then continues and runs two pass algorithm to compute OR and AND conditions over a prefixes data.
|
||||
// The prefix strings are also added to the wordsSet and are looked up in the same database.
|
||||
void RegexPreconditions::pmScan(Buffer &&buffer, RegexPreconditions::PmWordSet &wordsSet) const
|
||||
{
|
||||
wordsSet.clear();
|
||||
pass1(wordsSet, std::move(buffer));
|
||||
pass2(wordsSet);
|
||||
// The empty string key contains all regexes that should always be scanned
|
||||
wordsSet.insert(Waap::RegexPreconditions::emptyWordIndex);
|
||||
}
|
||||
|
||||
// Get known wordIndex by wordStr, or allocate a new wordIndex for words yet unknown
|
||||
Waap::RegexPreconditions::WordIndex RegexPreconditions::registerWord(const std::string &wordStr)
|
||||
{
|
||||
const auto &found = m_wordStrToIndex.find(wordStr);
|
||||
if (found != m_wordStrToIndex.end()) {
|
||||
return found->second;
|
||||
}
|
||||
else {
|
||||
WordIndex wordIndex = m_pmWordInfo.size();
|
||||
m_wordStrToIndex[wordStr] = wordIndex; // index of the new element that will be added below...
|
||||
WordInfo wordInfo;
|
||||
wordInfo.wordStr = wordStr;
|
||||
m_pmWordInfo.push_back(wordInfo);
|
||||
return wordIndex;
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user