mirror of
https://github.com/openappsec/openappsec.git
synced 2025-11-16 09:21:54 +03:00
My 11th 2023 update
This commit is contained in:
372
components/utils/keywords/pcre_keyword.cc
Normal file
372
components/utils/keywords/pcre_keyword.cc
Normal file
@@ -0,0 +1,372 @@
|
||||
#include "single_keyword.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
|
||||
#include "output.h"
|
||||
#include "debug.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
USE_DEBUG_FLAG(D_KEYWORD);
|
||||
|
||||
class PCREKeyword : public SingleKeyword
|
||||
{
|
||||
public:
|
||||
explicit PCREKeyword(const vector<KeywordAttr> &attr, const VariablesMapping &known_vars);
|
||||
MatchStatus isMatch(const I_KeywordRuntimeState *prev) const override;
|
||||
|
||||
private:
|
||||
void
|
||||
setOffset(const KeywordAttr &attr, const VariablesMapping &vars)
|
||||
{
|
||||
offset.setAttr(attr, vars, "pcre");
|
||||
}
|
||||
|
||||
void
|
||||
setDepth(const KeywordAttr &attr, const VariablesMapping &vars)
|
||||
{
|
||||
depth.setAttr(attr, vars, "pcre");
|
||||
}
|
||||
|
||||
void
|
||||
setRelative(const KeywordAttr &attr, const VariablesMapping &)
|
||||
{
|
||||
is_relative.setAttr(attr, "pcre");
|
||||
}
|
||||
|
||||
void
|
||||
setCaseInsensitive(const KeywordAttr &attr, const VariablesMapping &)
|
||||
{
|
||||
is_case_insensitive.setAttr(attr, "pcre");
|
||||
}
|
||||
|
||||
void
|
||||
setContext(const KeywordAttr &attr, const VariablesMapping &)
|
||||
{
|
||||
ctx.setAttr(attr, "pcre");
|
||||
}
|
||||
|
||||
string parseString(const string &str);
|
||||
pair<string, string> findExprInStr(const string &str, size_t start, size_t end);
|
||||
void parseOptions(const string &str);
|
||||
void compilePCRE(const string &str);
|
||||
|
||||
pair<uint, uint> getStartOffsetAndLength(uint buf_size, const I_KeywordRuntimeState *prev) const;
|
||||
|
||||
bool
|
||||
isConstant() const
|
||||
{
|
||||
return !is_relative && offset.isConstant() && depth.isConstant();
|
||||
}
|
||||
|
||||
class PCREDelete
|
||||
{
|
||||
public:
|
||||
void
|
||||
operator()(pcre2_code *ptr)
|
||||
{
|
||||
pcre2_code_free(ptr);
|
||||
}
|
||||
};
|
||||
unique_ptr<pcre2_code, PCREDelete> pcre_machine;
|
||||
|
||||
class PCREResultDelete
|
||||
{
|
||||
public:
|
||||
void
|
||||
operator()(pcre2_match_data *ptr)
|
||||
{
|
||||
pcre2_match_data_free(ptr);
|
||||
}
|
||||
};
|
||||
unique_ptr<pcre2_match_data, PCREResultDelete> pcre_result;
|
||||
|
||||
NumericAttr offset;
|
||||
NumericAttr depth;
|
||||
BoolAttr is_negative;
|
||||
BoolAttr is_relative;
|
||||
|
||||
BoolAttr is_case_insensitive;
|
||||
BoolAttr is_multiline;
|
||||
BoolAttr is_dotall;
|
||||
BoolAttr is_extended;
|
||||
BoolAttr is_dollar_endonly;
|
||||
BoolAttr is_anchor;
|
||||
BoolAttr is_ungreedy;
|
||||
|
||||
CtxAttr ctx;
|
||||
|
||||
string pcre_expr;
|
||||
|
||||
static const map<string, void(PCREKeyword::*)(const KeywordAttr &, const VariablesMapping &)> setops;
|
||||
};
|
||||
|
||||
const map<string, void(PCREKeyword::*)(const KeywordAttr&, const VariablesMapping&)> PCREKeyword::setops = {
|
||||
{ "relative", &PCREKeyword::setRelative },
|
||||
{ "offset", &PCREKeyword::setOffset },
|
||||
{ "depth", &PCREKeyword::setDepth },
|
||||
{ "nocase", &PCREKeyword::setCaseInsensitive },
|
||||
{ "part", &PCREKeyword::setContext },
|
||||
};
|
||||
|
||||
PCREKeyword::PCREKeyword(const vector<KeywordAttr> &attrs, const VariablesMapping &known_vars)
|
||||
:
|
||||
offset(),
|
||||
depth()
|
||||
{
|
||||
auto &expr_param = attrs[0].getParams();
|
||||
if (expr_param.size() != 1) throw KeywordError("More than one element in the 'pcre' keyword pattern");
|
||||
auto expr = parseString(expr_param[0]);
|
||||
dbgDebug(D_KEYWORD) << "Creating a new 'pcre' expression: " << expr;
|
||||
|
||||
for (uint i = 1; i<attrs.size(); i++) {
|
||||
auto curr = setops.find(attrs[i].getAttrName());
|
||||
if (curr == setops.end()) {
|
||||
throw KeywordError("Unknown attribute '" + attrs[i].getAttrName() + "' in the 'pcre' keyword");
|
||||
}
|
||||
auto set_func = curr->second;
|
||||
(this->*set_func)(attrs[i], known_vars);
|
||||
}
|
||||
|
||||
compilePCRE(expr);
|
||||
}
|
||||
|
||||
string
|
||||
PCREKeyword::parseString(const string &str)
|
||||
{
|
||||
size_t start_offset = 0, end_offset = str.size();
|
||||
|
||||
if (start_offset<end_offset && str[start_offset]=='!') {
|
||||
is_negative.setAttr("pcre", "negative");
|
||||
start_offset++;
|
||||
}
|
||||
|
||||
if (start_offset+1>=end_offset || str[start_offset]!='"' || str[end_offset-1]!='"') {
|
||||
throw KeywordError("The 'pcre' expression should be enclosed in quotation marks");
|
||||
}
|
||||
start_offset++;
|
||||
end_offset--;
|
||||
|
||||
string expr, options;
|
||||
tie(expr, options) = findExprInStr(str, start_offset, end_offset);
|
||||
|
||||
parseOptions(options);
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
pair<string, string>
|
||||
PCREKeyword::findExprInStr(const string &str, size_t start, size_t end)
|
||||
{
|
||||
if (start>=end) throw KeywordError("The 'pcre' string is empty");
|
||||
|
||||
// There are two way to write the regular expression:
|
||||
// Either between '/' charecters: "/regexp/" (this is the default delimiter)
|
||||
// Or use 'm' to set a delimiter: "mDregexpD" (here 'D' is used as the delimiter)
|
||||
// The switch will set the parameter 'start' so the 'str[start]' is the first delimiter.
|
||||
switch (str[start]) {
|
||||
case '/': {
|
||||
break;
|
||||
}
|
||||
case 'm': {
|
||||
start++;
|
||||
if (start>=end) {
|
||||
throw KeywordError("Failed to detect a delimiter in the 'pcre' keyword regular expression");
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
KeywordError("Bad start for the 'pcre' regular expression");
|
||||
}
|
||||
|
||||
size_t expr_end = str.find_last_of(str[start], end-1);
|
||||
start++;
|
||||
if (expr_end<=start) throw KeywordError("The 'pcre' regular expression is empty");
|
||||
|
||||
auto options_start = expr_end+1;
|
||||
return make_pair(str.substr(start, expr_end-start), str.substr(options_start, end-options_start));
|
||||
}
|
||||
|
||||
void
|
||||
PCREKeyword::parseOptions(const string &options)
|
||||
{
|
||||
for (auto ch : options) {
|
||||
switch (ch) {
|
||||
case 'i': {
|
||||
is_case_insensitive.setAttr("pcre", "nocase");
|
||||
break;
|
||||
}
|
||||
case 'R': {
|
||||
is_relative.setAttr("pcre", "relative");
|
||||
break;
|
||||
}
|
||||
case 'm': {
|
||||
is_multiline.setAttr("pcre", "multiline");
|
||||
break;
|
||||
}
|
||||
case 's': {
|
||||
is_dotall.setAttr("pcre", "dotall");
|
||||
break;
|
||||
}
|
||||
case 'x': {
|
||||
is_extended.setAttr("pcre", "extended");
|
||||
break;
|
||||
}
|
||||
case 'E': {
|
||||
is_dollar_endonly.setAttr("pcre", "dollar_endonly");
|
||||
break;
|
||||
}
|
||||
case 'A': {
|
||||
is_anchor.setAttr("pcre", "anchor");
|
||||
break;
|
||||
}
|
||||
case 'G': {
|
||||
is_ungreedy.setAttr("pcre", "ungreedy");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw KeywordError("Unknown option " + dumpHexChar(ch) + " in the 'pcre' keyword");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PCREKeyword::compilePCRE(const string &expr)
|
||||
{
|
||||
uint32_t options = PCRE2_NO_AUTO_CAPTURE;
|
||||
if (is_case_insensitive) options |= PCRE2_CASELESS;
|
||||
if (is_multiline) options |= PCRE2_MULTILINE;
|
||||
if (is_dotall) options |= PCRE2_DOTALL;
|
||||
if (is_extended) options |= PCRE2_EXTENDED;
|
||||
if (is_dollar_endonly) options |= PCRE2_DOLLAR_ENDONLY;
|
||||
if (is_anchor) options |= PCRE2_ANCHORED;
|
||||
if (is_ungreedy) options |= PCRE2_UNGREEDY;
|
||||
|
||||
int error;
|
||||
PCRE2_SIZE error_offset;
|
||||
auto pattern = reinterpret_cast<PCRE2_SPTR>(expr.c_str());
|
||||
pcre_machine.reset(pcre2_compile(pattern, expr.size(), options, &error, &error_offset, nullptr));
|
||||
if (pcre_machine == nullptr) {
|
||||
vector<u_char> msg;
|
||||
msg.reserve(128);
|
||||
pcre2_get_error_message(error, msg.data(), msg.capacity());
|
||||
|
||||
throw KeywordError(
|
||||
"Failed to compile the 'pcre' at offset "
|
||||
+ to_string(error_offset)
|
||||
+ " with error: "
|
||||
+ reinterpret_cast<char *>(msg.data())
|
||||
);
|
||||
}
|
||||
|
||||
pcre_result.reset(pcre2_match_data_create_from_pattern(pcre_machine.get(), nullptr));
|
||||
if (pcre_result == nullptr) {
|
||||
throw KeywordError("Failed to allocate PCRE results container");
|
||||
}
|
||||
|
||||
pcre_expr = expr;
|
||||
}
|
||||
|
||||
static uint
|
||||
addOffset(uint offset, int add)
|
||||
{
|
||||
if (add<0 && offset<static_cast<uint>(-add)) return 0;
|
||||
return offset + add;
|
||||
}
|
||||
|
||||
pair<uint, uint>
|
||||
PCREKeyword::getStartOffsetAndLength(uint buf_size, const I_KeywordRuntimeState *prev) const
|
||||
{
|
||||
uint keyword_offset = is_relative?prev->getOffset(ctx):0;
|
||||
uint start_offset = addOffset(keyword_offset, offset.evalAttr(prev));
|
||||
|
||||
if (start_offset>=buf_size) return make_pair(0, 0);
|
||||
|
||||
uint length = buf_size-start_offset;
|
||||
|
||||
if (depth.isSet()) {
|
||||
length = min(length, static_cast<uint>(depth.evalAttr(prev)));
|
||||
}
|
||||
|
||||
return make_pair(start_offset, length);
|
||||
}
|
||||
|
||||
MatchStatus
|
||||
PCREKeyword::isMatch(const I_KeywordRuntimeState *prev) const
|
||||
{
|
||||
dbgAssert(pcre_machine!=nullptr) << "Trying to run on an uninitialized keyword 'pcre'";
|
||||
|
||||
auto part = Singleton::Consume<I_Environment>::by<KeywordComp>()->get<Buffer>(static_cast<string>(ctx));
|
||||
|
||||
if (!part.ok()) {
|
||||
if (is_negative) {
|
||||
return runNext(prev);
|
||||
}
|
||||
return MatchStatus::NoMatchFinal;
|
||||
}
|
||||
|
||||
uint offset, length;
|
||||
tie(offset, length) = getStartOffsetAndLength((*part).size(), prev);
|
||||
auto buf = (*part).getPtr(offset, length);
|
||||
|
||||
if (!buf.ok()) {
|
||||
dbgTrace(D_KEYWORD) << "Could not get the buffer for the 'pcre' keyword";
|
||||
return MatchStatus::NoMatchFinal;
|
||||
}
|
||||
const unsigned char *ptr = *buf;
|
||||
|
||||
bool match_found = false;
|
||||
uint buf_offset_found;
|
||||
for (uint buf_pos = 0; buf_pos<length; buf_pos = buf_offset_found) {
|
||||
dbgDebug(D_KEYWORD) << "Looking for expression: " << pcre_expr;
|
||||
dbgTrace(D_KEYWORD) << "Running pcre_exec for expression: " << ptr;
|
||||
int result = pcre2_match(
|
||||
pcre_machine.get(),
|
||||
ptr,
|
||||
length,
|
||||
buf_pos,
|
||||
0,
|
||||
pcre_result.get(),
|
||||
nullptr
|
||||
);
|
||||
|
||||
if (result<0) {
|
||||
// No match (possiblely due to an error)
|
||||
dbgDebug(D_KEYWORD) << "No match, possiblely due to an error in 'pcre_exec'";
|
||||
break;
|
||||
} else {
|
||||
dbgDebug(D_KEYWORD) << "Match found";
|
||||
}
|
||||
|
||||
if (is_negative) {
|
||||
return isConstant()?MatchStatus::NoMatchFinal:MatchStatus::NoMatch;
|
||||
}
|
||||
match_found = true;
|
||||
buf_offset_found = pcre2_get_ovector_pointer(pcre_result.get())[0];
|
||||
OffsetRuntimeState new_offset(prev, ctx, offset+buf_offset_found);
|
||||
auto next_keyword_result = runNext(&new_offset);
|
||||
if (next_keyword_result!=MatchStatus::NoMatch) return next_keyword_result;
|
||||
if (buf_offset_found<=buf_pos) buf_offset_found = buf_pos+1; // Deal with empty matches
|
||||
}
|
||||
|
||||
// No matchs is a success for negative keywords
|
||||
if (is_negative && !match_found) {
|
||||
return runNext(prev);
|
||||
}
|
||||
|
||||
// If there were no matchs and the keyword is an effected by other keywords, then we know that the rule won't match
|
||||
if (isConstant() && !match_found) {
|
||||
return MatchStatus::NoMatchFinal;
|
||||
}
|
||||
|
||||
return MatchStatus::NoMatch;
|
||||
}
|
||||
|
||||
unique_ptr<SingleKeyword>
|
||||
genPCREKeyword(const vector<KeywordAttr> &attr, VariablesMapping &known_vars)
|
||||
{
|
||||
return make_unique<PCREKeyword>(attr, known_vars);
|
||||
}
|
||||
Reference in New Issue
Block a user