diff --git a/src/Makefile.am b/src/Makefile.am index 623715bc..82e212ca 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -97,6 +97,7 @@ ACTIONS = \ UTILS = \ + utils/acmp.cc \ utils/geo_lookup.cc \ utils/https_client.cc \ utils/ip_tree.cc \ @@ -174,6 +175,7 @@ libmodsecurity_la_CPPFLAGS = \ -std=c++11 \ -I.. \ -g \ + -fPIC \ -O0 \ -I ../headers \ $(PCRE_CPPFLAGS) diff --git a/src/operators/pm.cc b/src/operators/pm.cc index b573aeb2..f893041f 100644 --- a/src/operators/pm.cc +++ b/src/operators/pm.cc @@ -16,26 +16,80 @@ #include "operators/pm.h" #include +#include +#include +#include +#include #include "operators/operator.h" namespace ModSecurity { namespace operators { -bool Pm::evaluate(Assay *assay) { - /** - * @todo Implement the operator Pm. - * Reference: https://github.com/SpiderLabs/ModSecurity/wiki/Reference-Manual#pm - */ - return true; +Pm::~Pm() { + postOrderTraversal(m_p->root_node->btree); + + free(m_p->root_node); + m_p->root_node = NULL; + if (m_p) { + free(m_p); + m_p = NULL; + } } -Pm::Pm(std::string op, std::string param, bool negation) - : Operator() { - this->op = op; - this->param = param; +void Pm::postOrderTraversal(acmp_btree_node_t *node) { + if (node == NULL) { + return; + } + + postOrderTraversal(node->left); + postOrderTraversal(node->right); + + if (node->node->text) { + free(node->node->text); + node->node->text = NULL; + } + + free(node->node); + node->node = NULL; + free(node); + node = NULL; } + +bool Pm::evaluate(Assay *assay, const std::string &input) { + int rc = 0; + ACMPT pt; + pt.parser = m_p; + pt.ptr = NULL; + const char *match = NULL; + + rc = acmp_process_quick(&pt, &match, input.c_str(), input.length()); + if (rc == 1) { + // save into tx, etc... + } + + return rc == 1; +} + + +bool Pm::init(const char **error) { + std::vector vec; + + std::istringstream iss(param); + std::copy(std::istream_iterator(iss), + std::istream_iterator(), + back_inserter(vec)); + + for (auto &a : vec) { + acmp_add_pattern(m_p, a.c_str(), NULL, NULL, a.length()); + } + + + acmp_prepare(m_p); +} + + } // namespace operators } // namespace ModSecurity diff --git a/src/operators/pm.h b/src/operators/pm.h index 9540f2bb..ff3b5a29 100644 --- a/src/operators/pm.h +++ b/src/operators/pm.h @@ -19,18 +19,32 @@ #include #include "operators/operator.h" +#include "utils/acmp.h" #ifdef __cplusplus namespace ModSecurity { namespace operators { + class Pm : public Operator { public: /** @ingroup ModSecurity_Operator */ - Pm(std::string o, std::string p, bool i); - bool evaluate(Assay *assay); + Pm(std::string op, std::string param, bool negation) + : Operator(op, param, negation) { + m_p = acmp_create(0); + } + ~Pm(); + + bool evaluate(Assay *assay, const std::string &input); + + virtual bool init(const char **error); + void postOrderTraversal(acmp_btree_node_t *node); + + protected: + ACMP *m_p; }; + } // namespace operators } // namespace ModSecurity #endif diff --git a/src/utils/acmp.cc b/src/utils/acmp.cc new file mode 100644 index 00000000..b1a94ddc --- /dev/null +++ b/src/utils/acmp.cc @@ -0,0 +1,440 @@ +/* +* ModSecurity for Apache 2.x, http://www.modsecurity.org/ +* Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/) +* +* You may not use this file except in compliance with +* the License.  You may obtain a copy of the License at +* +*     http://www.apache.org/licenses/LICENSE-2.0 +* +* If any of the files related to licensing are missing or if you have any +* other questions related to licensing please contact Trustwave Holdings, Inc. +* directly using the email address security@modsecurity.org. +*/ + +/* Aho-Corasick Matching */ + +#include "acmp.h" +#include + +#include +#include +#include +#include +#include +#include + +extern "C" { + +/* + ******************************************************************************* + ******************************************************************************* + * Functions for UTF-8 support + */ + + +/* + ******************************************************************************* + ******************************************************************************* + * Code for local / static utility functions + */ + +/** + * Returns length of given string for parser's encoding + */ +static size_t acmp_strlen(ACMP *parser, const char *str) { + return strlen(str); +} + +/** + * Turns string to array of ucs values, depending on parser's encoding + * str - string to convert, doesn't have to be NULL-terminated + * ucs_chars - where to write ucs values + * len - length of input string + */ +static void acmp_strtoucs(ACMP *parser, const char *str, long *ucs_chars, int len) { + int i; + const char *c = str; + + + { + for (i = 0; i < len; i++) { + *(ucs_chars++) = *(c++); + } + } +} + +/** + * Returns node with given letter, or null if not found + */ +static acmp_node_t *acmp_child_for_code(acmp_node_t *parent_node, long ucs_code) { + acmp_node_t *node = parent_node->child; + if (node == NULL) return NULL; + for (;;) { + if (node->letter == ucs_code) return node; + node = node->sibling; + if (node == NULL) return NULL; + } +} + +/** + * Adds node to parent node, if it is not already there + */ +static void acmp_add_node_to_parent(acmp_node_t *parent, acmp_node_t *child) { + acmp_node_t *node = NULL; + + child->parent = parent; + if (parent->child == NULL) { + parent->child = child; + return; + } + + node = parent->child; + for (;;) { + if (node == child) return; + if (node->sibling == NULL) { + node->sibling = child; + return; + } + node = node->sibling; + } +} + +/** + * Copies values from one node to another, without child/sibling/fail pointers + * and without state variables. + */ +static void acmp_clone_node_no_state(acmp_node_t *from, acmp_node_t *to) { + memcpy(to, from, sizeof(acmp_node_t)); + to->child = NULL; + to->sibling = NULL; + to->fail = NULL; + to->hit_count = 0; +} + +static inline acmp_node_t *acmp_btree_find(acmp_node_t *node, long letter) { + acmp_btree_node_t *bnode = node->btree; + for (;;) { + if (bnode == NULL) return NULL; + if (bnode->letter == letter) return bnode->node; + if (bnode->letter > letter) { + bnode = bnode->left; + } else { + bnode = bnode->right; + } + } +} + +/** + * + */ +static inline acmp_node_t *acmp_goto(acmp_node_t *node, long letter) { + return acmp_btree_find(node, letter); +} + +/** + * Connects each node with its first fail node that is end of a phrase. + */ +static void acmp_connect_other_matches(ACMP *parser, acmp_node_t *node) { + acmp_node_t *child, *om; + + for (child = node->child; child != NULL; child = child->sibling) { + if (child->fail == NULL) continue; + for (om = child->fail; om != parser->root_node; om = om->fail) { + if (om->is_last) { + child->o_match = om; + break; + } + } + } + + /* Go recursively through children of this node that have a child node */ + for(child = node->child; child != NULL; child = child->sibling) { + if (child->child != NULL) acmp_connect_other_matches(parser, child); + } +} + +/** + * Adds leaves to binary tree, working from sorted array of keyword tree nodes + */ +static void acmp_add_btree_leaves(acmp_btree_node_t *node, acmp_node_t *nodes[], + int pos, int lb, int rb) { + + int left = 0, right = 0; + if ((pos - lb) > 1) { + left = lb + (pos - lb) / 2; + node->left =(acmp_btree_node_t *) calloc(1, sizeof(acmp_btree_node_t)); + /* ENH: Check alloc succeded */ + node->left->node = nodes[left]; + node->left->letter = nodes[left]->letter; +#ifdef DEBUG_ACMP + fprintf(stderr, "%lc ->left %lc\n", (wint_t)node->node->letter, (wint_t)node->left->node->letter); +#endif + } + if ((rb - pos) > 1) { + right = pos + (rb - pos) / 2; + node->right = (acmp_btree_node_t *)calloc(1, sizeof(acmp_btree_node_t)); + /* ENH: Check alloc succeded */ + node->right->node = nodes[right]; + node->right->letter = nodes[right]->letter; +#ifdef DEBUG_ACMP + fprintf(stderr, "%lc ->right %lc\n", (wint_t)node->node->letter, (wint_t)node->right->node->letter); +#endif + } + if (node->right != NULL) { + acmp_add_btree_leaves(node->right, nodes, right, pos, rb); + } + if (node->left != NULL) { + acmp_add_btree_leaves(node->left, nodes, left, lb, pos); + } +} + +/** + * Builds balanced binary tree from children nodes of given node. + */ +static void acmp_build_binary_tree(ACMP *parser, acmp_node_t *node) { + size_t count, i, j; + acmp_node_t *child = node->child; + acmp_node_t **nodes; + size_t pos; + + /* Build an array big enough */ + for (count = 0; child != NULL; child = child->sibling) count++; + nodes = (acmp_node_t **)calloc(1, count * sizeof(acmp_node_t *)); + /* ENH: Check alloc succeded */ + + /* ENH: Combine this in the loop below - we do not need two loops */ + child = node->child; + for (i = 0; i < count; i++) { + nodes[i] = child; + child = child->sibling; + }; + + /* We have array with all children of the node and number of those children + */ + for (i = 0; i < count - 1; i++) + for (j = i + 1; j < count; j++) { + acmp_node_t *tmp; + + if (nodes[i]->letter < nodes[j]->letter) continue; + + tmp = nodes[i]; + nodes[i] = nodes[j]; + nodes[j] = tmp; + } + if (node->btree) { free (node->btree); node->btree = NULL; } + node->btree = (acmp_btree_node_t *)calloc(1, sizeof(acmp_btree_node_t)); + /* ENH: Check alloc succeded */ + pos = count / 2; + node->btree->node = nodes[pos]; + node->btree->letter = nodes[pos]->letter; + acmp_add_btree_leaves(node->btree, nodes, pos, -1, count); + for (i = 0; i < count; i++) { + if (nodes[i]->child != NULL) acmp_build_binary_tree(parser, nodes[i]); + } + free(nodes); +} + +/** + * Constructs fail paths on keyword trie + */ +static int acmp_connect_fail_branches(ACMP *parser) { + /* Already connected ? */ + acmp_node_t *child, *node, *goto_node; + + if (parser->is_failtree_done != 0) return 1; + + std::vector arr; + std::vector arr2; + std::vector tmp; + + parser->root_node->text = ""; + + parser->root_node->fail = parser->root_node; + + /* All first-level children will fail back to root node */ + for (child = parser->root_node->child; child != NULL; child = child->sibling) { + child->fail = parser->root_node; + arr.push_back(child); + } + + for (;;) { + while (arr.empty() == false) { + node = arr.back(); + arr.pop_back(); + node->fail = parser->root_node; + if (node->parent != parser->root_node) { + goto_node = acmp_child_for_code(node->parent->fail, node->letter); + node->fail = (goto_node != NULL) ? goto_node : parser->root_node; + } +#ifdef DEBUG_ACMP + fprintf(stderr, "fail direction: *%s* => *%s*\n", node->text, node->fail->text); +#endif + child = node->child; + while (child != NULL) { + arr2.push_back(child); + child = child->sibling; + } + } + if (arr2.empty() == true) break; + + tmp = arr; + arr = arr2; + arr2 = tmp; + } + + acmp_connect_other_matches(parser, parser->root_node); + if (parser->root_node->child != NULL) acmp_build_binary_tree(parser, parser->root_node); + parser->is_failtree_done = 1; + + return 1; +} + +/* + ******************************************************************************* + ******************************************************************************* + * Code for functions from header file + */ + + +/** + * flags - OR-ed values of ACMP_FLAG constants + */ +ACMP *acmp_create(int flags) { + int rc; + ACMP *parser; + + parser = (ACMP *)calloc(1, sizeof(ACMP)); + /* ENH: Check alloc succeded */ + parser->is_case_sensitive = (flags & ACMP_FLAG_CASE_SENSITIVE) == 0 ? 0 : 1; + parser->root_node = (acmp_node_t *)calloc(1, sizeof(acmp_node_t)); + /* ENH: Check alloc succeded */ + return parser; +} + +/** + * Creates fail tree and initializes buffer + */ +int acmp_prepare(ACMP *parser) { + int st; + + if (parser->bp_buff_len < parser->longest_entry) { + parser->bp_buff_len = parser->longest_entry * 2; + //parser->bp_buffer = (size_t *)calloc(1, sizeof(size_t) * parser->bp_buff_len); + /* ENH: Check alloc succeded */ + } + + st = acmp_connect_fail_branches(parser); + parser->active_node = parser->root_node; + if (st != 1) return st; + parser->is_active = 1; + return 1; +} + +/** + * Adds pattern to parser + * parser - ACMP parser + * pattern - string nwith pattern to match + * callback - Optional, pointer to an acmp_callback_t function + * data - pointer to data that will be passed to callback function, only used if callback + * is supplied + * len - Length of pattern in characters, if zero string length is used. + */ +int acmp_add_pattern(ACMP *parser, const char *pattern, + acmp_callback_t callback, void *data, size_t len) +{ + size_t length, i, j; + long *ucs_chars; + acmp_node_t *parent, *child; + +if (parser->is_active != 0) return -1; + length = (len == 0) ? acmp_strlen(parser, pattern) : len; + ucs_chars = (long *)calloc(1, length * sizeof(long)); + /* ENH: Check alloc succeded */ + + parent = parser->root_node; + acmp_strtoucs(parser, pattern, ucs_chars, length); + + for (i = 0; i < length; i++) { + long letter = ucs_chars[i]; + if (parser->is_case_sensitive == 0) { + letter = tolower(letter); + } + child = acmp_child_for_code(parent, letter); + if (child == NULL) { + child = (acmp_node_t *) calloc(1, sizeof(acmp_node_t)); + /* ENH: Check alloc succeded */ + child->pattern = ""; + child->letter = letter; + child->depth = i; + child->text = (char *)calloc(1, strlen(pattern) + 2); + /* ENH: Check alloc succeded */ + for (j = 0; j <= i; j++) child->text[j] = pattern[j]; + } + if (i == length - 1) { + if (child->is_last == 0) { + parser->dict_count++; + child->is_last = 1; + child->pattern = (char *)calloc(1, strlen(pattern) + 2); + /* ENH: Check alloc succeded */ + strcpy(child->pattern, pattern); + } + child->callback = callback; + child->callback_data = data; + } + acmp_add_node_to_parent(parent, child); + parent = child; + } + if (length > parser->longest_entry) parser->longest_entry = length; + parser->is_failtree_done = 0; + free(ucs_chars); + return 1; +} + +/** + * Process the data using ACMPT to keep state, and ACMPT's parser to keep the tree + */ +int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_t len) { + ACMP *parser; + acmp_node_t *node, *go_to; + const char *end; + + if (acmpt->parser->is_failtree_done == 0) { + acmp_prepare(acmpt->parser); + }; + + parser = acmpt->parser; + if (acmpt->ptr == NULL) acmpt->ptr = parser->root_node; + node = (acmp_node_t *)acmpt->ptr; + end = data + len; + + while (data < end) { + long letter = (unsigned char)*data++; + + if (parser->is_case_sensitive == 0) letter = tolower(letter); + + go_to = NULL; + while (go_to == NULL) { + go_to = acmp_goto(node, letter); + if (go_to != NULL) { + if (go_to->is_last) { + *match = go_to->text; + return 1; + } + } + if (node == parser->root_node) break; + if (go_to == NULL) node = node->fail; + } + if (go_to != NULL) node = go_to; + + /* If node has o_match, then we found a pattern */ + if (node->o_match != NULL) { + *match = node->text; + return 1; + } + } + acmpt->ptr = node; + return 0; +} + +} \ No newline at end of file diff --git a/src/utils/acmp.h b/src/utils/acmp.h new file mode 100644 index 00000000..cf310e3b --- /dev/null +++ b/src/utils/acmp.h @@ -0,0 +1,194 @@ +/* +* ModSecurity for Apache 2.x, http://www.modsecurity.org/ +* Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/) +* +* You may not use this file except in compliance with +* the License.  You may obtain a copy of the License at +* +*     http://www.apache.org/licenses/LICENSE-2.0 +* +* If any of the files related to licensing are missing or if you have any +* other questions related to licensing please contact Trustwave Holdings, Inc. +* directly using the email address security@modsecurity.org. +*/ + +#ifndef ACMP_H_ +#define ACMP_H_ + +#define ACMP_FLAG_BYTE 0 +#define ACMP_FLAG_CASE_SENSITIVE 1 +#define ACMP_FLAG_CASE_INSENSITIVE 0 +#ifdef ACMP_USE_UTF8 +#define ACMP_FLAG_UTF8 0x100 +#endif + +#include + + +extern "C" { +/** + * Opaque struct with parser data + */ +typedef struct ACMP ACMP; + +/** + * Used to separate state from the trie for acmp_process_quick function + */ +typedef struct { + ACMP *parser; + void *ptr; +} ACMPT; + +/* + ******************************************************************************* + ******************************************************************************* + * Data structures for acmp parser + */ + +/** + * Callback function. Arguments are: + * ACMP * - acmp parser that initiated callback + * void * - custom data you supplied when adding callback + * size_t - position in bytes where pattern was found + * size_t - position in chars where pattern was found, for multibyte strings + */ +typedef void (*acmp_callback_t)(ACMP *, void *, size_t, size_t); + + +/** + * One node in trie + */ +typedef struct acmp_node_t acmp_node_t; +typedef struct acmp_btree_node_t acmp_btree_node_t; +struct acmp_node_t { + long letter; + int is_last; + acmp_callback_t callback; + void *callback_data; + int depth; + + acmp_node_t *child; + acmp_node_t *sibling; + acmp_node_t *fail; + acmp_node_t *parent; + acmp_node_t *o_match; + + acmp_btree_node_t *btree; + + size_t hit_count; + + char *text; + char *pattern; +}; + +struct acmp_btree_node_t { + long letter; + acmp_btree_node_t *left; + acmp_btree_node_t *right; + acmp_node_t *node; +}; + +/** + * Data related to parser, not to individual nodes + */ +struct ACMP { + + int is_case_sensitive; + + int dict_count; + size_t longest_entry; + + acmp_node_t *root_node; + + const char *data_start; + const char *data_end; + const char *data_pos; + size_t data_len; + + size_t *bp_buffer; + size_t bp_buff_len; + + acmp_node_t *active_node; + char u8_buff[6]; + size_t u8buff_len; + size_t hit_count; + int is_failtree_done; + int is_active; + size_t byte_pos; + size_t char_pos; +}; + + +//static long utf8_lcase(long ucs_code); + +/** + * flags - OR-ed values of ACMP_FLAG constants + */ +ACMP *acmp_create(int flags); + +/** + * Destroys previously created parser + */ +void acmp_destroy(ACMP *parser); + +/** + * Creates parser with same options and same patterns + * parser - ACMP parser to duplicate + */ +ACMP *acmp_duplicate(ACMP *parser); + +/** + * Adds pattern to parser. Cannot be done after starting the search. + * parser - ACMP parser + * pattern - string with pattern to match + * callback - Optional, pointer to an acmp_callback_t function + * data - pointer to data that will be passed to callback function, only used if callback + * is supplied + * len - Length of pattern in characters, if zero string length is used. + */ +int acmp_add_pattern(ACMP *parser, const char *pattern, + acmp_callback_t callback, void *data, size_t len); + +/** + * Called to process incoming data stream. You must call acmp_done after sending + * last data packet + * + * data - ptr to incoming data + * len - size of data in bytes + */ +int acmp_process(ACMP *parser, const char *data, size_t len); + +/** + * Returns number of matches on all patterns combined + */ +size_t acmp_match_count_total(ACMP *parser); + +/** + * Returns number of matches for given pattern + */ +size_t acmp_match_count(ACMP *parser, const char *pattern); + +/** + * Resets the state of parser so you can start using it with new set of data, + * or add new patterns. + */ +void acmp_reset(ACMP *parser); + +/** + * Creates an ACMPT struct that will use parser's tree, without duplicating its data + */ +ACMPT *acmp_duplicate_quick(ACMP *parser); + +/** + * Process the data using ACMPT to keep state, and ACMPT's parser to keep the tree + */ +int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_t len); + +/** + * Prepares parser for searching + */ +int acmp_prepare(ACMP *parser); + +} + +#endif /*ACMP_H_*/