From e887faac2b6573e5bd969eb5d09305bd82e0f8fa Mon Sep 17 00:00:00 2001 From: brectanus Date: Wed, 30 May 2007 22:02:35 +0000 Subject: [PATCH] Add @pm/@pmfile operators (parallel patch). See #16. --- CHANGES | 4 + apache2/Makefile | 1 + apache2/Makefile.win | 2 +- apache2/acmp.c | 711 ++++++++++++++++++++++ apache2/acmp.h | 115 ++++ apache2/modules.mk | 4 +- apache2/re_operators.c | 145 +++++ apache2/utf8tables.h | 810 ++++++++++++++++++++++++++ doc/modsecurity2-apache-reference.xml | 34 ++ 9 files changed, 1823 insertions(+), 3 deletions(-) create mode 100644 apache2/acmp.c create mode 100644 apache2/acmp.h create mode 100644 apache2/utf8tables.h diff --git a/CHANGES b/CHANGES index 5d05d88d..6386ead5 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,10 @@ ?? ??? 2007 - 2.2.0-trunk ------------------------- + * Added new parallel matching operators, @pm and @pmfile. These use + an alternate set based matching engine to perform faster keyword + type matches. + * Cache transformations per-request/phase so they are not repeated. * Fixed problem with subrequests not being intercepted (only logged). diff --git a/apache2/Makefile b/apache2/Makefile index c2095a54..9cdbbeca 100644 --- a/apache2/Makefile +++ b/apache2/Makefile @@ -29,6 +29,7 @@ APACHECTL = apachectl INCLUDES = -I /usr/include/libxml2 DEFS = -DWITH_LIBXML2 +#DEFS = -DWITH_LIBXML2 -DDEBUG_CONF #DEFS = -DWITH_LIBXML2 -DCACHE_DEBUG #LIBS = -Lmy/lib/dir -lmylib diff --git a/apache2/Makefile.win b/apache2/Makefile.win index 071741c2..96284cd7 100644 --- a/apache2/Makefile.win +++ b/apache2/Makefile.win @@ -24,7 +24,7 @@ OBJS = mod_security2.obj apache2_config.obj apache2_io.obj apache2_util.obj \ re.obj re_operators.obj re_actions.obj re_tfns.obj re_variables.obj \ msc_logging.obj msc_xml.obj msc_multipart.obj modsecurity.obj msc_parsers.obj \ msc_util.obj msc_pcre.obj persist_dbm.obj msc_reqbody.obj pdf_protect.obj \ - msc_geo.obj + msc_geo.obj acmp.obj all: $(DLL) diff --git a/apache2/acmp.c b/apache2/acmp.c new file mode 100644 index 00000000..d5a6dba8 --- /dev/null +++ b/apache2/acmp.c @@ -0,0 +1,711 @@ +/* + * ModSecurity for Apache 2.x, http://www.modsecurity.org/ + * Copyright (c) 2004-2007 Breach Security, Inc. (http://www.breach.com/) + * + * You should have received a copy of the licence along with this + * program (stored in the file "LICENSE"). If the file is missing, + * or if you have any other questions related to the licence, please + * write to Breach Security, Inc. at support@breach.com. + * + */ +#include "acmp.h" +#include "utf8tables.h" +#include +#include +#include + + +/* + ******************************************************************************* + ******************************************************************************* + * Data structures for acmp parser + */ + + /** + * One node in trie + */ +typedef struct acmp_node_t acmp_node_t; +typedef struct acmp_btree_node_t acmp_btree_node_t; +struct acmp_node_t { + acmp_utf8_char_t letter; + int is_last; + acmp_callback_t callback; + void *callback_data; + int depth; + + acmp_node_t *child; + acmp_node_t *sibling; + acmp_node_t *fail; + acmp_node_t *parent; + acmp_node_t *o_match; + + acmp_btree_node_t *btree; + + apr_size_t hit_count; + + char *text; + char *pattern; +}; + +struct acmp_btree_node_t { + acmp_utf8_char_t letter; + acmp_btree_node_t *left; + acmp_btree_node_t *right; + acmp_node_t *node; +}; + +/** + * Data related to parser, not to individual nodes + */ +struct ACMP { + int is_utf8; + int is_case_sensitive; + apr_pool_t *parent_pool; + apr_pool_t *pool; + + int dict_count; + apr_size_t longest_entry; + + acmp_node_t *root_node; + + const char *data_start; + const char *data_end; + const char *data_pos; + apr_size_t data_len; + + apr_size_t *bp_buffer; + apr_size_t bp_buff_len; + + acmp_node_t *active_node; + char u8_buff[6]; + apr_size_t u8buff_len; + apr_size_t hit_count; + int is_failtree_done; + int is_active; + apr_size_t byte_pos; + apr_size_t char_pos; +}; + +/* + ******************************************************************************* + ******************************************************************************* + * Functions for UTF-8 support + */ + +/** + * Returns length of utf-8 sequence based on its first byte + */ +static int utf8_seq_len(const char *first_byte) { + return utf8_seq_lengths[(unsigned int)(unsigned char)first_byte[0]]; +} + +/** + * Returns length of utf8-encoded text + */ +static size_t utf8_strlen(const char *str) { + int len = 0; + const char *c = str; + while (*c != 0) { + c += utf8_seq_len(c); + len++; + } + return len; +} + +/** + * Returns ucs code for given utf-8 sequence + */ +static acmp_utf8_char_t utf8_decodechar(const char *str) { + int len = utf8_seq_len(str); + acmp_utf8_char_t ch = 0; + switch (len) { + case 6: ch += (unsigned char)*str++; ch <<= 6; + case 5: ch += (unsigned char)*str++; ch <<= 6; + case 4: ch += (unsigned char)*str++; ch <<= 6; + case 3: ch += (unsigned char)*str++; ch <<= 6; + case 2: ch += (unsigned char)*str++; ch <<= 6; + case 1: ch += (unsigned char)*str++; + } + ch -= utf8_offsets[len - 1]; + return ch; +} + +/** + * Returns lowercase for given unicode character. Searches through + * utf8_lcase_map table, if it doesn't find the code assumes + * it doesn't have a lowercase variant and returns code itself. + */ +static long utf8_lcase(acmp_utf8_char_t ucs_code) { + long mid, left, right; + left = 1; + right = UTF8_LCASEMAP_LEN * 2 + 1; + + while (left <= right) { + mid = (left + right) >> 1; + mid -= (mid % 2); mid++; + if (ucs_code > utf8_lcase_map[mid]) + left = mid + 2; + else if (ucs_code < utf8_lcase_map[mid]) + right = mid - 2; + else if (ucs_code == utf8_lcase_map[mid]) + return utf8_lcase_map[mid - 1]; + } + return ucs_code; +} + +/* + ******************************************************************************* + ******************************************************************************* + * Code for local / static utility functions + */ + +/** + * Returns length of given string for parser's encoding + */ +static size_t acmp_strlen(ACMP *parser, const char *str) { + return (parser->is_utf8 == 0) ? strlen(str) : utf8_strlen(str); +} + +/** + * Turns string to array of ucs values, depending on parser's encoding + * str - string to convert, doesn't have to be NULL-terminated + * ucs_chars - where to write ucs values + * len - length of input string + */ +static void acmp_strtoucs(ACMP *parser, const char *str, acmp_utf8_char_t *ucs_chars, int len) { + int i; + const char *c = str; + + if (parser->is_utf8 == 0) { + for (i = 0; i < len; i++) { + *ucs_chars++ = *c++; + } + } else { + for (i = 0; i < len; i++) { + *ucs_chars++ = utf8_decodechar(c); + c += utf8_seq_len(c); + } + } +} + +/** + * Returns node with given letter, or null if not found + */ +static acmp_node_t *acmp_child_for_code(acmp_node_t *parent_node, acmp_utf8_char_t ucs_code) { + acmp_node_t *node = parent_node->child; + if (node == NULL) return NULL; + for (;;) { + if (node->letter == ucs_code) return node; + node = node->sibling; + if (node == NULL) return NULL; + } +} + +/** + * Adds node to parent node, if it is not already there + */ +static void acmp_add_node_to_parent(acmp_node_t *parent, acmp_node_t *child) { + child->parent = parent; + if (parent->child == NULL) { + parent->child = child; + return; + } + acmp_node_t *node = parent->child; + for (;;) { + if (node == child) return; + if (node->sibling == NULL) { + node->sibling = child; + return; + } + node = node->sibling; + } +} + +/** + * Copies values from one node to another, without child/sibling/fail pointers + * and without state variables. + */ +static void acmp_clone_node_no_state(acmp_node_t *from, acmp_node_t *to) { + memcpy(to, from, sizeof(acmp_node_t)); + to->child = NULL; + to->sibling = NULL; + to->fail = NULL; + to->hit_count = 0; +} + +/** + * Copies sibling nodes and child node for from given "from" node to "to" node. + * Both nodes must already exist. + */ +static void acmp_copy_nodes_recursive(acmp_node_t *from, acmp_node_t *to, apr_pool_t *pool) { + acmp_node_t *old_node = from->child, *new_node, *nn2; + if (old_node == NULL) return; + nn2 = apr_pcalloc(pool, sizeof(acmp_node_t)); + acmp_clone_node_no_state(old_node, nn2); + nn2->parent = to; + to->child = nn2; + acmp_copy_nodes_recursive(from->child, to->child, pool); + + for (;;) { + old_node = old_node->sibling; + if (old_node == NULL) break; + new_node = apr_pcalloc(pool, sizeof(acmp_node_t)); + acmp_clone_node_no_state(old_node, new_node); + new_node->parent = to; + nn2->sibling = new_node; + nn2 = new_node; + acmp_copy_nodes_recursive(old_node, new_node, pool); + } +} + +static inline acmp_node_t *acmp_btree_find(acmp_node_t *node, acmp_utf8_char_t letter) { + acmp_btree_node_t *bnode = node->btree; + for (;;) { + if (bnode == NULL) return NULL; + if (bnode->letter == letter) return bnode->node; + if (bnode->letter > letter) { + bnode = bnode->left; + } else { + bnode = bnode->right; + } + } +} + +/** + * + */ +static inline acmp_node_t *acmp_goto(acmp_node_t *node, acmp_utf8_char_t letter) { + //return acmp_child_for_code(node, letter); + return acmp_btree_find(node, letter); +} + +/** + * Connects each node with its first fail node that is end of a phrase. + */ +static void acmp_connect_other_matches(ACMP *parser, acmp_node_t *node) { + acmp_node_t *child, *om; + + for (child = node->child; child != NULL; child = child->sibling) { + if (child->fail == NULL) continue; + for (om = child->fail; om != parser->root_node; om = om->fail) { + if (om->is_last) { + child->o_match = om; + break; + } + } + } + + /* Go recursively through children of this node that have a child node */ + for(child = node->child; child != NULL; child = child->sibling) { + if (child->child != NULL) acmp_connect_other_matches(parser, child); + } +} + +/** + * Adds leaves to binary tree, working from sorted array of keyword tree nodes + */ +static void acmp_add_btree_leaves(acmp_btree_node_t *node, acmp_node_t *nodes[], + int pos, int lb, int rb, apr_pool_t *pool) { + + int left = 0, right = 0; + if ((pos - lb) > 1) { + left = lb + (pos - lb) / 2; + node->left = apr_pcalloc(pool, sizeof(acmp_btree_node_t)); + node->left->node = nodes[left]; + node->left->letter = nodes[left]->letter; + /* printf("%c ->left %c \n", node->node->letter, node->left->node->letter); */ + } + if ((rb - pos) > 1) { + right = pos + (rb - pos) / 2; + node->right = apr_pcalloc(pool, sizeof(acmp_btree_node_t)); + node->right->node = nodes[right]; + node->right->letter = nodes[right]->letter; + /* printf("%c ->right %c \n", node->node->letter, node->right->node->letter); */ + } + if (node->right != NULL) { + acmp_add_btree_leaves(node->right, nodes, right, pos, rb, pool); + } + if (node->left != NULL) { + acmp_add_btree_leaves(node->left, nodes, left, lb, pos, pool); + } +} + +/** + * Builds balanced binary tree from children nodes of given node. + */ +static void acmp_build_binary_tree(ACMP *parser, acmp_node_t *node) { + apr_size_t count, i, j; + acmp_node_t *child = node->child; + + for (count = 0; child != NULL; child = child->sibling) count++; + acmp_node_t *nodes[count]; + child = node->child; + for (i = 0; i < count; i++) { + nodes[i] = child; + child = child->sibling; + }; + /* We have array with all children of the node and number of those children + */ + for (i = 0; i < count - 1; i++) + for (j = i + 1; j < count; j++) { + if (nodes[i]->letter < nodes[j]->letter) continue; + acmp_node_t *tmp = nodes[i]; + nodes[i] = nodes[j]; + nodes[j] = tmp; + } + node->btree = apr_pcalloc(parser->pool, sizeof(acmp_btree_node_t)); + apr_size_t pos = count / 2; + node->btree->node = nodes[pos]; + node->btree->letter = nodes[pos]->letter; + acmp_add_btree_leaves(node->btree, nodes, pos, -1, count, parser->pool); + for (i = 0; i < count; i++) { + if (nodes[i]->child != NULL) acmp_build_binary_tree(parser, nodes[i]); + } +} + +/** + * Constructs fail paths on keyword trie + */ +static apr_status_t acmp_connect_fail_branches(ACMP *parser) { + /* Already connected ? */ + if (parser->is_failtree_done != 0) return APR_SUCCESS; + acmp_node_t *child, *node, *goto_node; + apr_array_header_t *arr, *arr2, *tmp; + + parser->root_node->text = ""; + arr = apr_array_make(parser->pool, 32, sizeof(acmp_node_t *)); + arr2 = apr_array_make(parser->pool, 32, sizeof(acmp_node_t *)); + + parser->root_node->fail = parser->root_node; + + /* All first-level children will fail back to root node */ + for (child = parser->root_node->child; child != NULL; child = child->sibling) { + child->fail = parser->root_node; + *(acmp_node_t **)apr_array_push(arr) = child; + /* printf("fail direction: *%s* => *%s*\n", child->text, child->fail->text); */ + } + + for (;;) { + while (apr_is_empty_array(arr) == 0) { + node = *(acmp_node_t **)apr_array_pop(arr); + node->fail = parser->root_node; + if (node->parent != parser->root_node) { + goto_node = acmp_child_for_code(node->parent->fail, node->letter); + node->fail = (goto_node != NULL) ? goto_node : parser->root_node; + } + /* printf("fail direction: *%s* => *%s*\n", node->text, node->fail->text); */ + child = node->child; + while (child != NULL) { + *(acmp_node_t **)apr_array_push(arr2) = child; + child = child->sibling; + } + } + if (apr_is_empty_array(arr2) != 0) break; + + tmp = arr; + arr = arr2; + arr2 = tmp; + } + acmp_connect_other_matches(parser, parser->root_node); + if (parser->root_node->child != NULL) acmp_build_binary_tree(parser, parser->root_node); + parser->is_failtree_done = 1; + return APR_SUCCESS; +} + +/** + * Clears hit count of each node, called from acmp_reset() + */ +static void acmp_clear_hit_count_recursive(acmp_node_t *node) { + for (; node != NULL; node = node->sibling) { + node->hit_count = 0; + if (node->child != NULL) acmp_clear_hit_count_recursive(node->child); + } +} + +/** + * Called when a match is found + */ +static void acmp_found(ACMP *parser, acmp_node_t *node) { + if (node->callback) { + node->callback(parser, node->callback_data, + parser->bp_buffer[(parser->char_pos - node->depth - 1) % parser->bp_buff_len], + parser->char_pos - node->depth - 1); + } + /* printf("found: %s at position %d\n", node->pattern, parser->char_pos - node->depth - 1); */ + node->hit_count++; + parser->hit_count++; +} + +/* + ******************************************************************************* + ******************************************************************************* + * Code for functions from header file + */ + + +/** + * flags - OR-ed values of ACMP_FLAG constants + * pool - apr_pool to use as parent pool, can be set to NULL + */ +ACMP *acmp_create(int flags, apr_pool_t *pool) { + apr_status_t rc; + apr_pool_t *p; + rc = apr_pool_create(&p, pool); + if (rc != APR_SUCCESS) return NULL; + + ACMP *parser = apr_pcalloc(p, sizeof(ACMP)); + parser->pool = p; + parser->parent_pool = pool; + parser->is_utf8 = (flags & ACMP_FLAG_UTF8) == 0 ? 0 : 1; + parser->is_case_sensitive = (flags & ACMP_FLAG_CASE_SENSITIVE) == 0 ? 0 : 1; + parser->root_node = apr_pcalloc(p, sizeof(acmp_node_t)); + return parser; +} + +/** + * Destroys previously created parser + */ +void acmp_destroy(ACMP *parser) { + /* + * All data is kept in parser's pool (including parser struct itself), so + * destroying the pool will destroy everything + */ + apr_pool_destroy(parser->pool); +} + +/** + * Creates parser with same options and same patterns + * parser - ACMP parser to duplicate + * pool - parent pool to use, if left as NULL original parser's parent pool is used + */ +ACMP *acmp_duplicate(ACMP *parser, apr_pool_t *pool) { + apr_status_t rc; + apr_pool_t *p; + + if (pool == NULL) pool = parser->parent_pool; + rc = apr_pool_create(&p, pool); + if (rc != APR_SUCCESS) return NULL; + + ACMP *new_parser = apr_pcalloc(p, sizeof(ACMP)); + new_parser->pool = p; + new_parser->parent_pool = pool; + new_parser->is_utf8 = parser->is_utf8; + new_parser->is_case_sensitive = parser->is_case_sensitive; + new_parser->root_node = apr_pcalloc(p, sizeof(acmp_node_t)); + new_parser->dict_count = parser->dict_count; + new_parser->longest_entry = parser->longest_entry; + acmp_copy_nodes_recursive(parser->root_node, new_parser->root_node, new_parser->pool); + acmp_prepare(new_parser); + return new_parser; +} + +/** + * Creates fail tree and initializes buffer + */ +apr_status_t acmp_prepare(ACMP *parser) { + if (parser->bp_buff_len < parser->longest_entry) { + parser->bp_buff_len = parser->longest_entry * 2; + parser->bp_buffer = apr_pcalloc(parser->pool, sizeof(apr_size_t) * parser->bp_buff_len); + } + apr_status_t st = acmp_connect_fail_branches(parser); + parser->active_node = parser->root_node; + if (st != APR_SUCCESS) return st; + parser->is_active = 1; + return APR_SUCCESS; +} + +/** + * Adds pattern to parser + * parser - ACMP parser + * pattern - string with pattern to match + * callback - Optional, pointer to an acmp_callback_t function + * data - pointer to data that will be passed to callback function, only used if callback + * is supplied + * len - Length of pattern in characters, if zero string length is used. + */ +apr_status_t acmp_add_pattern(ACMP *parser, const char *pattern, + acmp_callback_t callback, void *data, apr_size_t len) +{ + if (parser->is_active != 0) return APR_EGENERAL; + size_t length = (len == 0) ? acmp_strlen(parser, pattern) : len; + size_t i, j; + acmp_utf8_char_t ucs_chars[length]; + + acmp_node_t *parent = parser->root_node, *child; + acmp_strtoucs(parser, pattern, ucs_chars, length); + + for (i = 0; i < length; i++) { + acmp_utf8_char_t letter = ucs_chars[i]; + if (parser->is_case_sensitive == 0) { + letter = utf8_lcase(letter); + } + child = acmp_child_for_code(parent, letter); + if (child == NULL) { + child = apr_pcalloc(parser->pool, sizeof(acmp_node_t)); + child->pattern = ""; + child->letter = letter; + child->depth = i; + child->text = apr_pcalloc(parser->pool, strlen(pattern) + 2); + for (j = 0; j <= i; j++) child->text[j] = pattern[j]; + } + if (i == length - 1) { + if (child->is_last == 0) { + parser->dict_count++; + child->is_last = 1; + child->pattern = apr_pcalloc(parser->pool, strlen(pattern) + 2); + strcpy(child->pattern, pattern); + } + child->callback = callback; + child->callback_data = data; + } + acmp_add_node_to_parent(parent, child); + parent = child; + } + if (length > parser->longest_entry) parser->longest_entry = length; + parser->is_failtree_done = 0; + + return APR_SUCCESS; +} + +/** + * Called to process incoming data stream + * data - ptr to incoming data + * len - size of data in bytes + */ +apr_status_t acmp_process(ACMP *parser, const char *data, apr_size_t len) { + if (parser->is_failtree_done == 0) acmp_prepare(parser); + acmp_node_t *node = parser->active_node, *go_to; + apr_size_t seq_length; + const char *end = (data + len); + + while (data < end) { + parser->bp_buffer[parser->char_pos % parser->bp_buff_len] = parser->byte_pos; + acmp_utf8_char_t letter; + if (parser->is_utf8) { + if (parser->u8buff_len > 0) { + /* Resuming partial utf-8 sequence */ + seq_length = utf8_seq_len(parser->u8_buff); + for (;;) { + parser->u8_buff[parser->u8buff_len++] = *data++; + if (parser->u8buff_len == seq_length) { + parser->u8buff_len = 0; + letter = utf8_decodechar(parser->u8_buff); + parser->byte_pos += seq_length; + parser->char_pos++; + break; + } + } + } else { + /* not resuming partial sequence, reading from the stream */ + seq_length = utf8_seq_len(data); + if ((data + seq_length) > end) { + while (data < end) parser->u8_buff[parser->u8buff_len++] = *data++; + return APR_SUCCESS; + } else { + letter = utf8_decodechar(data); + data += seq_length; + parser->byte_pos += seq_length; + parser->char_pos++; + } + } + } else { + letter = *data++; + parser->byte_pos++; + parser->char_pos++; + } + if (parser->is_case_sensitive == 0) letter = utf8_lcase(letter); + + go_to = NULL; + while (go_to == NULL) { + acmp_node_t *n2 = acmp_goto(node, letter); + go_to = acmp_child_for_code(node, letter); + if (n2 != go_to) { + n2 = acmp_goto(node, letter); + }; + if (go_to != NULL) { + if (go_to->is_last) { + acmp_found(parser, go_to); + } + } + if (node == parser->root_node) break; + if (go_to == NULL) node = node->fail; + } + if (go_to != NULL) node = go_to; + + /* We need to collect other nodes that are last letters of phrase. These + * will be fail node of current node if it has is_last flag set, and + * fail node of that node, recursively down to root node. + */ + go_to = node; + if (go_to != parser->root_node) { + for (go_to = go_to->o_match; go_to != NULL; go_to = go_to->o_match) { + acmp_found(parser, go_to); + } + } + } + parser->active_node = node; + return parser->hit_count > 0 ? 1 : 0; +} + +/** + * Resets the state of parser so you can start using it with new set of data. + * + * No need to clear buffer since it will be re-initialized at first run of + * acmp_process + */ +void acmp_reset(ACMP *parser) { + parser->is_active = 0; + parser->byte_pos = 0; + parser->char_pos = 0; + parser->hit_count = 0; + parser->u8buff_len = 0; + acmp_clear_hit_count_recursive(parser->root_node); +} + +/** + * Creates an ACMPT struct that will use parser's tree, without duplicating its data + */ +ACMPT *acmp_duplicate_quick(ACMP *parser, apr_pool_t *pool) { + apr_pool_t *p = (pool != NULL) ? pool : parser->pool; + ACMPT *dup = apr_pcalloc(p, sizeof(ACMPT)); + dup->parser = parser; + return dup; +} + +/** + * Process the data using ACMPT to keep state, and ACMPT's parser to keep the tree + */ +apr_status_t acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, apr_size_t len) { + if (acmpt->parser->is_failtree_done == 0) { + acmp_prepare(acmpt->parser); + }; + ACMP *parser = acmpt->parser; + if (acmpt->ptr == NULL) acmpt->ptr = parser->root_node; + acmp_node_t *node = acmpt->ptr, *go_to; + const char *end = (data + len); + + while (data < end) { + acmp_utf8_char_t letter = (unsigned char)*data++; + go_to = NULL; + while (go_to == NULL) { + go_to = acmp_goto(node, letter); + if (go_to != NULL) { + if (go_to->is_last) { + *match = go_to->text; + return 1; + } + } + if (node == parser->root_node) break; + if (go_to == NULL) node = node->fail; + } + if (go_to != NULL) node = go_to; + + /* If node has o_match, then we found a pattern */ + if (node->o_match != NULL) { + *match = node->text; + return 1; + } + } + acmpt->ptr = node; + return 0; +} diff --git a/apache2/acmp.h b/apache2/acmp.h new file mode 100644 index 00000000..40865ac7 --- /dev/null +++ b/apache2/acmp.h @@ -0,0 +1,115 @@ +/* + * ModSecurity for Apache 2.x, http://www.modsecurity.org/ + * Copyright (c) 2004-2007 Breach Security, Inc. (http://www.breach.com/) + * + * You should have received a copy of the licence along with this + * program (stored in the file "LICENSE"). If the file is missing, + * or if you have any other questions related to the licence, please + * write to Breach Security, Inc. at support@breach.com. + * + */ +#ifndef ACMP_H_ +#define ACMP_H_ + +#include +#include + +#define ACMP_FLAG_BYTE 0 +#define ACMP_FLAG_UTF8 0x100 +#define ACMP_FLAG_CASE_SENSITIVE 1 +#define ACMP_FLAG_CASE_INSENSITIVE 0 + +/** + * Opaque struct with parser data + */ +typedef struct ACMP ACMP; + +/** + * Used to separate state from the trie for acmp_process_quick function + */ +typedef struct { + ACMP *parser; + void *ptr; +} ACMPT; + +/** + * Callback function. Arguments are: + * ACMP * - acmp parser that initiated callback + * void * - custom data you supplied when adding callback + * apr_size_t - position in bytes where pattern was found + * apr_size_t - position in chars where pattern was found, for multibyte strings + */ +typedef void (*acmp_callback_t)(ACMP *, void *, apr_size_t, apr_size_t); + +/** + * flags - OR-ed values of ACMP_FLAG constants + * pool - apr_pool to use as parent pool, can be set to NULL + */ +ACMP *acmp_create(int flags, apr_pool_t *pool); + +/** + * Destroys previously created parser + */ +void acmp_destroy(ACMP *parser); + +/** + * Creates parser with same options and same patterns + * parser - ACMP parser to duplicate + * pool - parent pool to use, if left as NULL original parser's parent pool is used + */ +ACMP *acmp_duplicate(ACMP *parser, apr_pool_t *pool); + +/** + * Adds pattern to parser. Cannot be done after starting the search. + * parser - ACMP parser + * pattern - string with pattern to match + * callback - Optional, pointer to an acmp_callback_t function + * data - pointer to data that will be passed to callback function, only used if callback + * is supplied + * len - Length of pattern in characters, if zero string length is used. + */ +apr_status_t acmp_add_pattern(ACMP *parser, const char *pattern, + acmp_callback_t callback, void *data, apr_size_t len); + +/** + * Called to process incoming data stream. You must call acmp_done after sending + * last data packet + * + * data - ptr to incoming data + * len - size of data in bytes + */ +apr_status_t acmp_process(ACMP *parser, const char *data, apr_size_t len); + +/** + * Returns number of matches on all patterns combined + */ +apr_size_t acmp_match_count_total(ACMP *parser); + +/** + * Returns number of matches for given pattern + */ +apr_size_t acmp_match_count(ACMP *parser, const char *pattern); + +/** + * Resets the state of parser so you can start using it with new set of data, + * or add new patterns. + */ +void acmp_reset(ACMP *parser); + +/** + * Creates an ACMPT struct that will use parser's tree, without duplicating its data + */ +ACMPT *acmp_duplicate_quick(ACMP *parser, apr_pool_t *pool); + +/** + * Process the data using ACMPT to keep state, and ACMPT's parser to keep the tree + */ +apr_status_t acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, apr_size_t len); + +/** + * Prepares parser for searching + */ +apr_status_t acmp_prepare(ACMP *parser); + + +#endif /*ACMP_H_*/ diff --git a/apache2/modules.mk b/apache2/modules.mk index 2f9cfbb4..bce0eb7d 100644 --- a/apache2/modules.mk +++ b/apache2/modules.mk @@ -2,11 +2,11 @@ MOD_SECURITY2 = mod_security2 apache2_config apache2_io apache2_util \ re re_operators re_actions re_tfns re_variables \ msc_logging msc_xml msc_multipart modsecurity msc_parsers msc_util msc_pcre \ - persist_dbm msc_reqbody pdf_protect msc_geo + persist_dbm msc_reqbody pdf_protect msc_geo acmp H = re.h modsecurity.h msc_logging.h msc_multipart.h msc_parsers.h \ msc_pcre.h msc_util.h msc_xml.h persist_dbm.h apache2.h pdf_protect.h \ - msc_geo.h + msc_geo.h acmp.h utf8tables.h ${MOD_SECURITY2:=.slo}: ${H} ${MOD_SECURITY2:=.lo}: ${H} diff --git a/apache2/re_operators.c b/apache2/re_operators.c index 8d9b1ff9..727be9c2 100644 --- a/apache2/re_operators.c +++ b/apache2/re_operators.c @@ -13,7 +13,9 @@ #include "re.h" #include "msc_pcre.h" #include "msc_geo.h" +#include "apr_lib.h" #include "apr_strmatch.h" +#include "acmp.h" /** * @@ -179,6 +181,135 @@ static int msre_op_rx_execute(modsec_rec *msr, msre_rule *rule, msre_var *var, c return 0; } +/* pm */ + +static int msre_op_pm_param_init(msre_rule *rule, char **error_msg) { + if ((rule->op_param == NULL)||(strlen(rule->op_param) == 0)) { + *error_msg = apr_psprintf(rule->ruleset->mp, "Missing parameter for operator 'pm'."); + return 0; /* ERROR */ + } + + ACMP *p = acmp_create(0, rule->ruleset->mp); + if (p == NULL) return 0; + + const char *s = rule->op_param; + const char *e = rule->op_param + strlen(rule->op_param); + + for (;;) { + while((isspace(*s) != 0) && (*s != 0)) s++; + if (*s == 0) break; + e = s; + while((isspace(*e) == 0) && (*e != 0)) e++; + acmp_add_pattern(p, s, NULL, NULL, e - s); + s = e; + } + acmp_prepare(p); + rule->op_param_data = p; + return 1; +} + +/* pmfile */ + +static int msre_op_pmfile_param_init(msre_rule *rule, char **error_msg) { + char errstr[1024]; + char buf[HUGE_STRING_LEN + 1]; + char *ptr = NULL; + apr_status_t rc; + apr_file_t *fd; + + if ((rule->op_param == NULL)||(strlen(rule->op_param) == 0)) { + *error_msg = apr_psprintf(rule->ruleset->mp, "Missing parameter for operator 'pm'."); + return 0; /* ERROR */ + } + + ACMP *p = acmp_create(0, rule->ruleset->mp); + if (p == NULL) return 0; + + char *fn = apr_pstrdup(rule->ruleset->mp, rule->op_param); + char *next = fn + strlen(rule->op_param); + + /* Loop through filenames */ + for (;;) { + int line = 0; + + /* Trim whitespace */ + while((isspace(*fn) != 0) && (*fn != 0)) fn++; + if (*fn == '\0') break; + next = fn; + while((isspace(*next) == 0) && (*next != '\0')) next++; + while((isspace(*next) != 0) && (*next != '\0')) *next++ = '\0'; + + /* Open file and read */ + rc = apr_file_open(&fd, fn, APR_READ | APR_FILE_NOCLEANUP, 0, rule->ruleset->mp); + if (rc != APR_SUCCESS) { + *error_msg = apr_psprintf(rule->ruleset->mp, "Could not open pmfile \"%s\": %s", fn, apr_strerror(rc, errstr, 1024)); + return 0; + } + + #ifdef DEBUG_CONF + fprintf(stderr, "Loading pmfile: \"%s\"\n", fn); + #endif + + /* Read one pattern per line skipping empty/commented */ + for(;;) { + line++; + rc = apr_file_gets(buf, HUGE_STRING_LEN, fd); + if (rc == APR_EOF) break; + if (rc != APR_SUCCESS) { + *error_msg = apr_psprintf(rule->ruleset->mp, "Could read \"%s\" line %d: %s", fn, line, apr_strerror(rc, errstr, 1024)); + return 0; + } + + /* Remove newline */ + ptr = buf; + while(*ptr != '\0') ptr++; + if ((ptr > buf) && (*(ptr - 1) == '\n')) *(ptr - 1) = '\0'; + + /* Ignore empty lines and comments */ + ptr = buf; + while((*ptr != '\0') && apr_isspace(*ptr)) ptr++; + if ((*ptr == '\0') || (*ptr == '#')) continue; + + #ifdef DEBUG_CONF + fprintf(stderr, "Adding pmfile pattern: \"%s\"\n", buf); + #endif + + acmp_add_pattern(p, buf, NULL, NULL, strlen(buf)); + } + fn = next; + } + if (fd != NULL) apr_file_close(fd); + acmp_prepare(p); + rule->op_param_data = p; + return 1; +} + +static int msre_op_pm_execute(modsec_rec *msr, msre_rule *rule, msre_var *var, char **error_msg) { + const char *match = NULL; + apr_status_t rc = 0; + + /* Nothing to read */ + if ((var->value == NULL) || (var->value_len == 0)) return 0; + + ACMPT pt = {(ACMP *)rule->op_param_data, NULL}; + + rc = acmp_process_quick(&pt, &match, var->value, var->value_len); + if (rc) { + char *pattern_escaped = log_escape(msr->mp, match ? match : ""); + + /* This message will be logged. */ + if (strlen(pattern_escaped) > 252) { + *error_msg = apr_psprintf(msr->mp, "Pattern match \"%.252s ...\" at %s.", + pattern_escaped, var->name); + } else { + *error_msg = apr_psprintf(msr->mp, "Pattern match \"%s\" at %s.", + pattern_escaped, var->name); + } + return 1; + } + return rc; +} + /* contains */ static int msre_op_contains_execute(modsec_rec *msr, msre_rule *rule, msre_var *var, char **error_msg) { @@ -1215,6 +1346,20 @@ void msre_engine_register_default_operators(msre_engine *engine) { msre_op_rx_execute ); + /* pm */ + msre_engine_op_register(engine, + "pm", + msre_op_pm_param_init, + msre_op_pm_execute + ); + + /* pmfile */ + msre_engine_op_register(engine, + "pmfile", + msre_op_pmfile_param_init, + msre_op_pm_execute + ); + /* contains */ msre_engine_op_register(engine, "contains", diff --git a/apache2/utf8tables.h b/apache2/utf8tables.h new file mode 100644 index 00000000..f12dc19b --- /dev/null +++ b/apache2/utf8tables.h @@ -0,0 +1,810 @@ +/* + * ModSecurity for Apache 2.x, http://www.modsecurity.org/ + * Copyright (c) 2004-2007 Breach Security, Inc. (http://www.breach.com/) + * + * You should have received a copy of the licence along with this + * program (stored in the file "LICENSE"). If the file is missing, + * or if you have any other questions related to the licence, please + * write to Breach Security, Inc. at support@breach.com. + * + */ +#ifndef UTF8TABLES_H_ +#define UTF8TABLES_H_ + +/** + * This include file is used by acmp.c only, it's not included anywhere else + */ + +typedef long acmp_utf8_char_t; + +static const char utf8_seq_lengths[256] = { + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,6,6, +}; + +static const acmp_utf8_char_t utf8_offsets[6] = { + 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL +}; + +/** + * How many element pairs are there in utf8_lcase_map + */ +#define UTF8_LCASEMAP_LEN 759 + +/** + * Table mapping is from PHP's mbstring extension, maps uppercase + */ +static const acmp_utf8_char_t utf8_lcase_map[UTF8_LCASEMAP_LEN * 2] = { + 0x00000061, 0x00000041, + 0x00000062, 0x00000042, + 0x00000063, 0x00000043, + 0x00000064, 0x00000044, + 0x00000065, 0x00000045, + 0x00000066, 0x00000046, + 0x00000067, 0x00000047, + 0x00000068, 0x00000048, + 0x00000069, 0x00000049, + 0x0000006a, 0x0000004a, + 0x0000006b, 0x0000004b, + 0x0000006c, 0x0000004c, + 0x0000006d, 0x0000004d, + 0x0000006e, 0x0000004e, + 0x0000006f, 0x0000004f, + 0x00000070, 0x00000050, + 0x00000071, 0x00000051, + 0x00000072, 0x00000052, + 0x00000073, 0x00000053, + 0x00000074, 0x00000054, + 0x00000075, 0x00000055, + 0x00000076, 0x00000056, + 0x00000077, 0x00000057, + 0x00000078, 0x00000058, + 0x00000079, 0x00000059, + 0x0000007a, 0x0000005a, + 0x000000b5, 0x0000039c, + 0x000000e0, 0x000000c0, + 0x000000e1, 0x000000c1, + 0x000000e2, 0x000000c2, + 0x000000e3, 0x000000c3, + 0x000000e4, 0x000000c4, + 0x000000e5, 0x000000c5, + 0x000000e6, 0x000000c6, + 0x000000e7, 0x000000c7, + 0x000000e8, 0x000000c8, + 0x000000e9, 0x000000c9, + 0x000000ea, 0x000000ca, + 0x000000eb, 0x000000cb, + 0x000000ec, 0x000000cc, + 0x000000ed, 0x000000cd, + 0x000000ee, 0x000000ce, + 0x000000ef, 0x000000cf, + 0x000000f0, 0x000000d0, + 0x000000f1, 0x000000d1, + 0x000000f2, 0x000000d2, + 0x000000f3, 0x000000d3, + 0x000000f4, 0x000000d4, + 0x000000f5, 0x000000d5, + 0x000000f6, 0x000000d6, + 0x000000f8, 0x000000d8, + 0x000000f9, 0x000000d9, + 0x000000fa, 0x000000da, + 0x000000fb, 0x000000db, + 0x000000fc, 0x000000dc, + 0x000000fd, 0x000000dd, + 0x000000fe, 0x000000de, + 0x000000ff, 0x00000178, + 0x00000101, 0x00000100, + 0x00000103, 0x00000102, + 0x00000105, 0x00000104, + 0x00000107, 0x00000106, + 0x00000109, 0x00000108, + 0x0000010b, 0x0000010a, + 0x0000010d, 0x0000010c, + 0x0000010f, 0x0000010e, + 0x00000111, 0x00000110, + 0x00000113, 0x00000112, + 0x00000115, 0x00000114, + 0x00000117, 0x00000116, + 0x00000119, 0x00000118, + 0x0000011b, 0x0000011a, + 0x0000011d, 0x0000011c, + 0x0000011f, 0x0000011e, + 0x00000121, 0x00000120, + 0x00000123, 0x00000122, + 0x00000125, 0x00000124, + 0x00000127, 0x00000126, + 0x00000129, 0x00000128, + 0x0000012b, 0x0000012a, + 0x0000012d, 0x0000012c, + 0x0000012f, 0x0000012e, + 0x00000131, 0x00000049, + 0x00000133, 0x00000132, + 0x00000135, 0x00000134, + 0x00000137, 0x00000136, + 0x0000013a, 0x00000139, + 0x0000013c, 0x0000013b, + 0x0000013e, 0x0000013d, + 0x00000140, 0x0000013f, + 0x00000142, 0x00000141, + 0x00000144, 0x00000143, + 0x00000146, 0x00000145, + 0x00000148, 0x00000147, + 0x0000014b, 0x0000014a, + 0x0000014d, 0x0000014c, + 0x0000014f, 0x0000014e, + 0x00000151, 0x00000150, + 0x00000153, 0x00000152, + 0x00000155, 0x00000154, + 0x00000157, 0x00000156, + 0x00000159, 0x00000158, + 0x0000015b, 0x0000015a, + 0x0000015d, 0x0000015c, + 0x0000015f, 0x0000015e, + 0x00000161, 0x00000160, + 0x00000163, 0x00000162, + 0x00000165, 0x00000164, + 0x00000167, 0x00000166, + 0x00000169, 0x00000168, + 0x0000016b, 0x0000016a, + 0x0000016d, 0x0000016c, + 0x0000016f, 0x0000016e, + 0x00000171, 0x00000170, + 0x00000173, 0x00000172, + 0x00000175, 0x00000174, + 0x00000177, 0x00000176, + 0x0000017a, 0x00000179, + 0x0000017c, 0x0000017b, + 0x0000017e, 0x0000017d, + 0x0000017f, 0x00000053, + 0x00000183, 0x00000182, + 0x00000185, 0x00000184, + 0x00000188, 0x00000187, + 0x0000018c, 0x0000018b, + 0x00000192, 0x00000191, + 0x00000195, 0x000001f6, + 0x00000199, 0x00000198, + 0x0000019e, 0x00000220, + 0x000001a1, 0x000001a0, + 0x000001a3, 0x000001a2, + 0x000001a5, 0x000001a4, + 0x000001a8, 0x000001a7, + 0x000001ad, 0x000001ac, + 0x000001b0, 0x000001af, + 0x000001b4, 0x000001b3, + 0x000001b6, 0x000001b5, + 0x000001b9, 0x000001b8, + 0x000001bd, 0x000001bc, + 0x000001bf, 0x000001f7, + 0x000001c6, 0x000001c4, + 0x000001c9, 0x000001c7, + 0x000001cc, 0x000001ca, + 0x000001ce, 0x000001cd, + 0x000001d0, 0x000001cf, + 0x000001d2, 0x000001d1, + 0x000001d4, 0x000001d3, + 0x000001d6, 0x000001d5, + 0x000001d8, 0x000001d7, + 0x000001da, 0x000001d9, + 0x000001dc, 0x000001db, + 0x000001dd, 0x0000018e, + 0x000001df, 0x000001de, + 0x000001e1, 0x000001e0, + 0x000001e3, 0x000001e2, + 0x000001e5, 0x000001e4, + 0x000001e7, 0x000001e6, + 0x000001e9, 0x000001e8, + 0x000001eb, 0x000001ea, + 0x000001ed, 0x000001ec, + 0x000001ef, 0x000001ee, + 0x000001f3, 0x000001f1, + 0x000001f5, 0x000001f4, + 0x000001f9, 0x000001f8, + 0x000001fb, 0x000001fa, + 0x000001fd, 0x000001fc, + 0x000001ff, 0x000001fe, + 0x00000201, 0x00000200, + 0x00000203, 0x00000202, + 0x00000205, 0x00000204, + 0x00000207, 0x00000206, + 0x00000209, 0x00000208, + 0x0000020b, 0x0000020a, + 0x0000020d, 0x0000020c, + 0x0000020f, 0x0000020e, + 0x00000211, 0x00000210, + 0x00000213, 0x00000212, + 0x00000215, 0x00000214, + 0x00000217, 0x00000216, + 0x00000219, 0x00000218, + 0x0000021b, 0x0000021a, + 0x0000021d, 0x0000021c, + 0x0000021f, 0x0000021e, + 0x00000223, 0x00000222, + 0x00000225, 0x00000224, + 0x00000227, 0x00000226, + 0x00000229, 0x00000228, + 0x0000022b, 0x0000022a, + 0x0000022d, 0x0000022c, + 0x0000022f, 0x0000022e, + 0x00000231, 0x00000230, + 0x00000233, 0x00000232, + 0x00000253, 0x00000181, + 0x00000254, 0x00000186, + 0x00000256, 0x00000189, + 0x00000257, 0x0000018a, + 0x00000259, 0x0000018f, + 0x0000025b, 0x00000190, + 0x00000260, 0x00000193, + 0x00000263, 0x00000194, + 0x00000268, 0x00000197, + 0x00000269, 0x00000196, + 0x0000026f, 0x0000019c, + 0x00000272, 0x0000019d, + 0x00000275, 0x0000019f, + 0x00000280, 0x000001a6, + 0x00000283, 0x000001a9, + 0x00000288, 0x000001ae, + 0x0000028a, 0x000001b1, + 0x0000028b, 0x000001b2, + 0x00000292, 0x000001b7, + 0x00000345, 0x00000399, + 0x000003ac, 0x00000386, + 0x000003ad, 0x00000388, + 0x000003ae, 0x00000389, + 0x000003af, 0x0000038a, + 0x000003b1, 0x00000391, + 0x000003b2, 0x00000392, + 0x000003b3, 0x00000393, + 0x000003b4, 0x00000394, + 0x000003b5, 0x00000395, + 0x000003b6, 0x00000396, + 0x000003b7, 0x00000397, + 0x000003b8, 0x00000398, + 0x000003b9, 0x00000399, + 0x000003ba, 0x0000039a, + 0x000003bb, 0x0000039b, + 0x000003bc, 0x0000039c, + 0x000003bd, 0x0000039d, + 0x000003be, 0x0000039e, + 0x000003bf, 0x0000039f, + 0x000003c0, 0x000003a0, + 0x000003c1, 0x000003a1, + 0x000003c2, 0x000003a3, + 0x000003c3, 0x000003a3, + 0x000003c4, 0x000003a4, + 0x000003c5, 0x000003a5, + 0x000003c6, 0x000003a6, + 0x000003c7, 0x000003a7, + 0x000003c8, 0x000003a8, + 0x000003c9, 0x000003a9, + 0x000003ca, 0x000003aa, + 0x000003cb, 0x000003ab, + 0x000003cc, 0x0000038c, + 0x000003cd, 0x0000038e, + 0x000003ce, 0x0000038f, + 0x000003d0, 0x00000392, + 0x000003d1, 0x00000398, + 0x000003d5, 0x000003a6, + 0x000003d6, 0x000003a0, + 0x000003d9, 0x000003d8, + 0x000003db, 0x000003da, + 0x000003dd, 0x000003dc, + 0x000003df, 0x000003de, + 0x000003e1, 0x000003e0, + 0x000003e3, 0x000003e2, + 0x000003e5, 0x000003e4, + 0x000003e7, 0x000003e6, + 0x000003e9, 0x000003e8, + 0x000003eb, 0x000003ea, + 0x000003ed, 0x000003ec, + 0x000003ef, 0x000003ee, + 0x000003f0, 0x0000039a, + 0x000003f1, 0x000003a1, + 0x000003f2, 0x000003a3, + 0x000003f5, 0x00000395, + 0x00000430, 0x00000410, + 0x00000431, 0x00000411, + 0x00000432, 0x00000412, + 0x00000433, 0x00000413, + 0x00000434, 0x00000414, + 0x00000435, 0x00000415, + 0x00000436, 0x00000416, + 0x00000437, 0x00000417, + 0x00000438, 0x00000418, + 0x00000439, 0x00000419, + 0x0000043a, 0x0000041a, + 0x0000043b, 0x0000041b, + 0x0000043c, 0x0000041c, + 0x0000043d, 0x0000041d, + 0x0000043e, 0x0000041e, + 0x0000043f, 0x0000041f, + 0x00000440, 0x00000420, + 0x00000441, 0x00000421, + 0x00000442, 0x00000422, + 0x00000443, 0x00000423, + 0x00000444, 0x00000424, + 0x00000445, 0x00000425, + 0x00000446, 0x00000426, + 0x00000447, 0x00000427, + 0x00000448, 0x00000428, + 0x00000449, 0x00000429, + 0x0000044a, 0x0000042a, + 0x0000044b, 0x0000042b, + 0x0000044c, 0x0000042c, + 0x0000044d, 0x0000042d, + 0x0000044e, 0x0000042e, + 0x0000044f, 0x0000042f, + 0x00000450, 0x00000400, + 0x00000451, 0x00000401, + 0x00000452, 0x00000402, + 0x00000453, 0x00000403, + 0x00000454, 0x00000404, + 0x00000455, 0x00000405, + 0x00000456, 0x00000406, + 0x00000457, 0x00000407, + 0x00000458, 0x00000408, + 0x00000459, 0x00000409, + 0x0000045a, 0x0000040a, + 0x0000045b, 0x0000040b, + 0x0000045c, 0x0000040c, + 0x0000045d, 0x0000040d, + 0x0000045e, 0x0000040e, + 0x0000045f, 0x0000040f, + 0x00000461, 0x00000460, + 0x00000463, 0x00000462, + 0x00000465, 0x00000464, + 0x00000467, 0x00000466, + 0x00000469, 0x00000468, + 0x0000046b, 0x0000046a, + 0x0000046d, 0x0000046c, + 0x0000046f, 0x0000046e, + 0x00000471, 0x00000470, + 0x00000473, 0x00000472, + 0x00000475, 0x00000474, + 0x00000477, 0x00000476, + 0x00000479, 0x00000478, + 0x0000047b, 0x0000047a, + 0x0000047d, 0x0000047c, + 0x0000047f, 0x0000047e, + 0x00000481, 0x00000480, + 0x0000048b, 0x0000048a, + 0x0000048d, 0x0000048c, + 0x0000048f, 0x0000048e, + 0x00000491, 0x00000490, + 0x00000493, 0x00000492, + 0x00000495, 0x00000494, + 0x00000497, 0x00000496, + 0x00000499, 0x00000498, + 0x0000049b, 0x0000049a, + 0x0000049d, 0x0000049c, + 0x0000049f, 0x0000049e, + 0x000004a1, 0x000004a0, + 0x000004a3, 0x000004a2, + 0x000004a5, 0x000004a4, + 0x000004a7, 0x000004a6, + 0x000004a9, 0x000004a8, + 0x000004ab, 0x000004aa, + 0x000004ad, 0x000004ac, + 0x000004af, 0x000004ae, + 0x000004b1, 0x000004b0, + 0x000004b3, 0x000004b2, + 0x000004b5, 0x000004b4, + 0x000004b7, 0x000004b6, + 0x000004b9, 0x000004b8, + 0x000004bb, 0x000004ba, + 0x000004bd, 0x000004bc, + 0x000004bf, 0x000004be, + 0x000004c2, 0x000004c1, + 0x000004c4, 0x000004c3, + 0x000004c6, 0x000004c5, + 0x000004c8, 0x000004c7, + 0x000004ca, 0x000004c9, + 0x000004cc, 0x000004cb, + 0x000004ce, 0x000004cd, + 0x000004d1, 0x000004d0, + 0x000004d3, 0x000004d2, + 0x000004d5, 0x000004d4, + 0x000004d7, 0x000004d6, + 0x000004d9, 0x000004d8, + 0x000004db, 0x000004da, + 0x000004dd, 0x000004dc, + 0x000004df, 0x000004de, + 0x000004e1, 0x000004e0, + 0x000004e3, 0x000004e2, + 0x000004e5, 0x000004e4, + 0x000004e7, 0x000004e6, + 0x000004e9, 0x000004e8, + 0x000004eb, 0x000004ea, + 0x000004ed, 0x000004ec, + 0x000004ef, 0x000004ee, + 0x000004f1, 0x000004f0, + 0x000004f3, 0x000004f2, + 0x000004f5, 0x000004f4, + 0x000004f9, 0x000004f8, + 0x00000501, 0x00000500, + 0x00000503, 0x00000502, + 0x00000505, 0x00000504, + 0x00000507, 0x00000506, + 0x00000509, 0x00000508, + 0x0000050b, 0x0000050a, + 0x0000050d, 0x0000050c, + 0x0000050f, 0x0000050e, + 0x00000561, 0x00000531, + 0x00000562, 0x00000532, + 0x00000563, 0x00000533, + 0x00000564, 0x00000534, + 0x00000565, 0x00000535, + 0x00000566, 0x00000536, + 0x00000567, 0x00000537, + 0x00000568, 0x00000538, + 0x00000569, 0x00000539, + 0x0000056a, 0x0000053a, + 0x0000056b, 0x0000053b, + 0x0000056c, 0x0000053c, + 0x0000056d, 0x0000053d, + 0x0000056e, 0x0000053e, + 0x0000056f, 0x0000053f, + 0x00000570, 0x00000540, + 0x00000571, 0x00000541, + 0x00000572, 0x00000542, + 0x00000573, 0x00000543, + 0x00000574, 0x00000544, + 0x00000575, 0x00000545, + 0x00000576, 0x00000546, + 0x00000577, 0x00000547, + 0x00000578, 0x00000548, + 0x00000579, 0x00000549, + 0x0000057a, 0x0000054a, + 0x0000057b, 0x0000054b, + 0x0000057c, 0x0000054c, + 0x0000057d, 0x0000054d, + 0x0000057e, 0x0000054e, + 0x0000057f, 0x0000054f, + 0x00000580, 0x00000550, + 0x00000581, 0x00000551, + 0x00000582, 0x00000552, + 0x00000583, 0x00000553, + 0x00000584, 0x00000554, + 0x00000585, 0x00000555, + 0x00000586, 0x00000556, + 0x00001e01, 0x00001e00, + 0x00001e03, 0x00001e02, + 0x00001e05, 0x00001e04, + 0x00001e07, 0x00001e06, + 0x00001e09, 0x00001e08, + 0x00001e0b, 0x00001e0a, + 0x00001e0d, 0x00001e0c, + 0x00001e0f, 0x00001e0e, + 0x00001e11, 0x00001e10, + 0x00001e13, 0x00001e12, + 0x00001e15, 0x00001e14, + 0x00001e17, 0x00001e16, + 0x00001e19, 0x00001e18, + 0x00001e1b, 0x00001e1a, + 0x00001e1d, 0x00001e1c, + 0x00001e1f, 0x00001e1e, + 0x00001e21, 0x00001e20, + 0x00001e23, 0x00001e22, + 0x00001e25, 0x00001e24, + 0x00001e27, 0x00001e26, + 0x00001e29, 0x00001e28, + 0x00001e2b, 0x00001e2a, + 0x00001e2d, 0x00001e2c, + 0x00001e2f, 0x00001e2e, + 0x00001e31, 0x00001e30, + 0x00001e33, 0x00001e32, + 0x00001e35, 0x00001e34, + 0x00001e37, 0x00001e36, + 0x00001e39, 0x00001e38, + 0x00001e3b, 0x00001e3a, + 0x00001e3d, 0x00001e3c, + 0x00001e3f, 0x00001e3e, + 0x00001e41, 0x00001e40, + 0x00001e43, 0x00001e42, + 0x00001e45, 0x00001e44, + 0x00001e47, 0x00001e46, + 0x00001e49, 0x00001e48, + 0x00001e4b, 0x00001e4a, + 0x00001e4d, 0x00001e4c, + 0x00001e4f, 0x00001e4e, + 0x00001e51, 0x00001e50, + 0x00001e53, 0x00001e52, + 0x00001e55, 0x00001e54, + 0x00001e57, 0x00001e56, + 0x00001e59, 0x00001e58, + 0x00001e5b, 0x00001e5a, + 0x00001e5d, 0x00001e5c, + 0x00001e5f, 0x00001e5e, + 0x00001e61, 0x00001e60, + 0x00001e63, 0x00001e62, + 0x00001e65, 0x00001e64, + 0x00001e67, 0x00001e66, + 0x00001e69, 0x00001e68, + 0x00001e6b, 0x00001e6a, + 0x00001e6d, 0x00001e6c, + 0x00001e6f, 0x00001e6e, + 0x00001e71, 0x00001e70, + 0x00001e73, 0x00001e72, + 0x00001e75, 0x00001e74, + 0x00001e77, 0x00001e76, + 0x00001e79, 0x00001e78, + 0x00001e7b, 0x00001e7a, + 0x00001e7d, 0x00001e7c, + 0x00001e7f, 0x00001e7e, + 0x00001e81, 0x00001e80, + 0x00001e83, 0x00001e82, + 0x00001e85, 0x00001e84, + 0x00001e87, 0x00001e86, + 0x00001e89, 0x00001e88, + 0x00001e8b, 0x00001e8a, + 0x00001e8d, 0x00001e8c, + 0x00001e8f, 0x00001e8e, + 0x00001e91, 0x00001e90, + 0x00001e93, 0x00001e92, + 0x00001e95, 0x00001e94, + 0x00001e9b, 0x00001e60, + 0x00001ea1, 0x00001ea0, + 0x00001ea3, 0x00001ea2, + 0x00001ea5, 0x00001ea4, + 0x00001ea7, 0x00001ea6, + 0x00001ea9, 0x00001ea8, + 0x00001eab, 0x00001eaa, + 0x00001ead, 0x00001eac, + 0x00001eaf, 0x00001eae, + 0x00001eb1, 0x00001eb0, + 0x00001eb3, 0x00001eb2, + 0x00001eb5, 0x00001eb4, + 0x00001eb7, 0x00001eb6, + 0x00001eb9, 0x00001eb8, + 0x00001ebb, 0x00001eba, + 0x00001ebd, 0x00001ebc, + 0x00001ebf, 0x00001ebe, + 0x00001ec1, 0x00001ec0, + 0x00001ec3, 0x00001ec2, + 0x00001ec5, 0x00001ec4, + 0x00001ec7, 0x00001ec6, + 0x00001ec9, 0x00001ec8, + 0x00001ecb, 0x00001eca, + 0x00001ecd, 0x00001ecc, + 0x00001ecf, 0x00001ece, + 0x00001ed1, 0x00001ed0, + 0x00001ed3, 0x00001ed2, + 0x00001ed5, 0x00001ed4, + 0x00001ed7, 0x00001ed6, + 0x00001ed9, 0x00001ed8, + 0x00001edb, 0x00001eda, + 0x00001edd, 0x00001edc, + 0x00001edf, 0x00001ede, + 0x00001ee1, 0x00001ee0, + 0x00001ee3, 0x00001ee2, + 0x00001ee5, 0x00001ee4, + 0x00001ee7, 0x00001ee6, + 0x00001ee9, 0x00001ee8, + 0x00001eeb, 0x00001eea, + 0x00001eed, 0x00001eec, + 0x00001eef, 0x00001eee, + 0x00001ef1, 0x00001ef0, + 0x00001ef3, 0x00001ef2, + 0x00001ef5, 0x00001ef4, + 0x00001ef7, 0x00001ef6, + 0x00001ef9, 0x00001ef8, + 0x00001f00, 0x00001f08, + 0x00001f01, 0x00001f09, + 0x00001f02, 0x00001f0a, + 0x00001f03, 0x00001f0b, + 0x00001f04, 0x00001f0c, + 0x00001f05, 0x00001f0d, + 0x00001f06, 0x00001f0e, + 0x00001f07, 0x00001f0f, + 0x00001f10, 0x00001f18, + 0x00001f11, 0x00001f19, + 0x00001f12, 0x00001f1a, + 0x00001f13, 0x00001f1b, + 0x00001f14, 0x00001f1c, + 0x00001f15, 0x00001f1d, + 0x00001f20, 0x00001f28, + 0x00001f21, 0x00001f29, + 0x00001f22, 0x00001f2a, + 0x00001f23, 0x00001f2b, + 0x00001f24, 0x00001f2c, + 0x00001f25, 0x00001f2d, + 0x00001f26, 0x00001f2e, + 0x00001f27, 0x00001f2f, + 0x00001f30, 0x00001f38, + 0x00001f31, 0x00001f39, + 0x00001f32, 0x00001f3a, + 0x00001f33, 0x00001f3b, + 0x00001f34, 0x00001f3c, + 0x00001f35, 0x00001f3d, + 0x00001f36, 0x00001f3e, + 0x00001f37, 0x00001f3f, + 0x00001f40, 0x00001f48, + 0x00001f41, 0x00001f49, + 0x00001f42, 0x00001f4a, + 0x00001f43, 0x00001f4b, + 0x00001f44, 0x00001f4c, + 0x00001f45, 0x00001f4d, + 0x00001f51, 0x00001f59, + 0x00001f53, 0x00001f5b, + 0x00001f55, 0x00001f5d, + 0x00001f57, 0x00001f5f, + 0x00001f60, 0x00001f68, + 0x00001f61, 0x00001f69, + 0x00001f62, 0x00001f6a, + 0x00001f63, 0x00001f6b, + 0x00001f64, 0x00001f6c, + 0x00001f65, 0x00001f6d, + 0x00001f66, 0x00001f6e, + 0x00001f67, 0x00001f6f, + 0x00001f70, 0x00001fba, + 0x00001f71, 0x00001fbb, + 0x00001f72, 0x00001fc8, + 0x00001f73, 0x00001fc9, + 0x00001f74, 0x00001fca, + 0x00001f75, 0x00001fcb, + 0x00001f76, 0x00001fda, + 0x00001f77, 0x00001fdb, + 0x00001f78, 0x00001ff8, + 0x00001f79, 0x00001ff9, + 0x00001f7a, 0x00001fea, + 0x00001f7b, 0x00001feb, + 0x00001f7c, 0x00001ffa, + 0x00001f7d, 0x00001ffb, + 0x00001f80, 0x00001f88, + 0x00001f81, 0x00001f89, + 0x00001f82, 0x00001f8a, + 0x00001f83, 0x00001f8b, + 0x00001f84, 0x00001f8c, + 0x00001f85, 0x00001f8d, + 0x00001f86, 0x00001f8e, + 0x00001f87, 0x00001f8f, + 0x00001f90, 0x00001f98, + 0x00001f91, 0x00001f99, + 0x00001f92, 0x00001f9a, + 0x00001f93, 0x00001f9b, + 0x00001f94, 0x00001f9c, + 0x00001f95, 0x00001f9d, + 0x00001f96, 0x00001f9e, + 0x00001f97, 0x00001f9f, + 0x00001fa0, 0x00001fa8, + 0x00001fa1, 0x00001fa9, + 0x00001fa2, 0x00001faa, + 0x00001fa3, 0x00001fab, + 0x00001fa4, 0x00001fac, + 0x00001fa5, 0x00001fad, + 0x00001fa6, 0x00001fae, + 0x00001fa7, 0x00001faf, + 0x00001fb0, 0x00001fb8, + 0x00001fb1, 0x00001fb9, + 0x00001fb3, 0x00001fbc, + 0x00001fbe, 0x00000399, + 0x00001fc3, 0x00001fcc, + 0x00001fd0, 0x00001fd8, + 0x00001fd1, 0x00001fd9, + 0x00001fe0, 0x00001fe8, + 0x00001fe1, 0x00001fe9, + 0x00001fe5, 0x00001fec, + 0x00001ff3, 0x00001ffc, + 0x00002170, 0x00002160, + 0x00002171, 0x00002161, + 0x00002172, 0x00002162, + 0x00002173, 0x00002163, + 0x00002174, 0x00002164, + 0x00002175, 0x00002165, + 0x00002176, 0x00002166, + 0x00002177, 0x00002167, + 0x00002178, 0x00002168, + 0x00002179, 0x00002169, + 0x0000217a, 0x0000216a, + 0x0000217b, 0x0000216b, + 0x0000217c, 0x0000216c, + 0x0000217d, 0x0000216d, + 0x0000217e, 0x0000216e, + 0x0000217f, 0x0000216f, + 0x000024d0, 0x000024b6, + 0x000024d1, 0x000024b7, + 0x000024d2, 0x000024b8, + 0x000024d3, 0x000024b9, + 0x000024d4, 0x000024ba, + 0x000024d5, 0x000024bb, + 0x000024d6, 0x000024bc, + 0x000024d7, 0x000024bd, + 0x000024d8, 0x000024be, + 0x000024d9, 0x000024bf, + 0x000024da, 0x000024c0, + 0x000024db, 0x000024c1, + 0x000024dc, 0x000024c2, + 0x000024dd, 0x000024c3, + 0x000024de, 0x000024c4, + 0x000024df, 0x000024c5, + 0x000024e0, 0x000024c6, + 0x000024e1, 0x000024c7, + 0x000024e2, 0x000024c8, + 0x000024e3, 0x000024c9, + 0x000024e4, 0x000024ca, + 0x000024e5, 0x000024cb, + 0x000024e6, 0x000024cc, + 0x000024e7, 0x000024cd, + 0x000024e8, 0x000024ce, + 0x000024e9, 0x000024cf, + 0x0000ff41, 0x0000ff21, + 0x0000ff42, 0x0000ff22, + 0x0000ff43, 0x0000ff23, + 0x0000ff44, 0x0000ff24, + 0x0000ff45, 0x0000ff25, + 0x0000ff46, 0x0000ff26, + 0x0000ff47, 0x0000ff27, + 0x0000ff48, 0x0000ff28, + 0x0000ff49, 0x0000ff29, + 0x0000ff4a, 0x0000ff2a, + 0x0000ff4b, 0x0000ff2b, + 0x0000ff4c, 0x0000ff2c, + 0x0000ff4d, 0x0000ff2d, + 0x0000ff4e, 0x0000ff2e, + 0x0000ff4f, 0x0000ff2f, + 0x0000ff50, 0x0000ff30, + 0x0000ff51, 0x0000ff31, + 0x0000ff52, 0x0000ff32, + 0x0000ff53, 0x0000ff33, + 0x0000ff54, 0x0000ff34, + 0x0000ff55, 0x0000ff35, + 0x0000ff56, 0x0000ff36, + 0x0000ff57, 0x0000ff37, + 0x0000ff58, 0x0000ff38, + 0x0000ff59, 0x0000ff39, + 0x0000ff5a, 0x0000ff3a, + 0x00010428, 0x00010400, + 0x00010429, 0x00010401, + 0x0001042a, 0x00010402, + 0x0001042b, 0x00010403, + 0x0001042c, 0x00010404, + 0x0001042d, 0x00010405, + 0x0001042e, 0x00010406, + 0x0001042f, 0x00010407, + 0x00010430, 0x00010408, + 0x00010431, 0x00010409, + 0x00010432, 0x0001040a, + 0x00010433, 0x0001040b, + 0x00010434, 0x0001040c, + 0x00010435, 0x0001040d, + 0x00010436, 0x0001040e, + 0x00010437, 0x0001040f, + 0x00010438, 0x00010410, + 0x00010439, 0x00010411, + 0x0001043a, 0x00010412, + 0x0001043b, 0x00010413, + 0x0001043c, 0x00010414, + 0x0001043d, 0x00010415, + 0x0001043e, 0x00010416, + 0x0001043f, 0x00010417, + 0x00010440, 0x00010418, + 0x00010441, 0x00010419, + 0x00010442, 0x0001041a, + 0x00010443, 0x0001041b, + 0x00010444, 0x0001041c, + 0x00010445, 0x0001041d, + 0x00010446, 0x0001041e, + 0x00010447, 0x0001041f, + 0x00010448, 0x00010420, + 0x00010449, 0x00010421, + 0x0001044a, 0x00010422, + 0x0001044b, 0x00010423, + 0x0001044c, 0x00010424, + 0x0001044d, 0x00010425, +}; + +#endif /*UTF8TABLES_H_*/ diff --git a/doc/modsecurity2-apache-reference.xml b/doc/modsecurity2-apache-reference.xml index 4196c7af..c6221372 100644 --- a/doc/modsecurity2-apache-reference.xml +++ b/doc/modsecurity2-apache-reference.xml @@ -4287,6 +4287,40 @@ SecRule ARGS:route "!@endsWith %{REQUEST_ADDR}" role="bold">@lt 15" +
+ <literal>pm</literal> + + Description: Parallel Match + operator. This operator uses a set based matching engine for faster + matches of keyword lists. + + Example: + + SecRule REQUEST_HEADERS:User-Agent "@pm WebZIP WebCopier Webster WebStripper SiteSnagger ProWebWalker CheeseBot" "deny,status:403 + + The above would deny access with 403 if any of the words matched + within the User-Agent HTTP header value. +
+ +
+ <literal>pmfile</literal> + + Description: Parallel Match + operator. This operator uses a set based matching engine for faster + matches of keyword lists. It is the same as @pm + except that it takes a list of files arguments. The contents of the + files should be one pattern per line. + + Example: + + SecRule REQUEST_HEADERS:User-Agent "@pm /path/to/blacklist1 /path/to/blacklist2" "deny,status:403 + + The above would deny access with 403 if any of the patterns in the + two files matched within the User-Agent HTTP header value. +
+
<literal>rbl</literal>