mirror of
https://github.com/owasp-modsecurity/ModSecurity.git
synced 2025-08-14 05:45:59 +03:00
Adds support to the @pm operator
This commit is contained in:
parent
774d897351
commit
95efb99a8c
@ -97,6 +97,7 @@ ACTIONS = \
|
||||
|
||||
|
||||
UTILS = \
|
||||
utils/acmp.cc \
|
||||
utils/geo_lookup.cc \
|
||||
utils/https_client.cc \
|
||||
utils/ip_tree.cc \
|
||||
@ -174,6 +175,7 @@ libmodsecurity_la_CPPFLAGS = \
|
||||
-std=c++11 \
|
||||
-I.. \
|
||||
-g \
|
||||
-fPIC \
|
||||
-O0 \
|
||||
-I ../headers \
|
||||
$(PCRE_CPPFLAGS)
|
||||
|
@ -16,26 +16,80 @@
|
||||
#include "operators/pm.h"
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "operators/operator.h"
|
||||
|
||||
namespace ModSecurity {
|
||||
namespace operators {
|
||||
|
||||
bool Pm::evaluate(Assay *assay) {
|
||||
/**
|
||||
* @todo Implement the operator Pm.
|
||||
* Reference: https://github.com/SpiderLabs/ModSecurity/wiki/Reference-Manual#pm
|
||||
*/
|
||||
return true;
|
||||
Pm::~Pm() {
|
||||
postOrderTraversal(m_p->root_node->btree);
|
||||
|
||||
free(m_p->root_node);
|
||||
m_p->root_node = NULL;
|
||||
if (m_p) {
|
||||
free(m_p);
|
||||
m_p = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Pm::Pm(std::string op, std::string param, bool negation)
|
||||
: Operator() {
|
||||
this->op = op;
|
||||
this->param = param;
|
||||
void Pm::postOrderTraversal(acmp_btree_node_t *node) {
|
||||
if (node == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
postOrderTraversal(node->left);
|
||||
postOrderTraversal(node->right);
|
||||
|
||||
if (node->node->text) {
|
||||
free(node->node->text);
|
||||
node->node->text = NULL;
|
||||
}
|
||||
|
||||
free(node->node);
|
||||
node->node = NULL;
|
||||
free(node);
|
||||
node = NULL;
|
||||
}
|
||||
|
||||
|
||||
bool Pm::evaluate(Assay *assay, const std::string &input) {
|
||||
int rc = 0;
|
||||
ACMPT pt;
|
||||
pt.parser = m_p;
|
||||
pt.ptr = NULL;
|
||||
const char *match = NULL;
|
||||
|
||||
rc = acmp_process_quick(&pt, &match, input.c_str(), input.length());
|
||||
if (rc == 1) {
|
||||
// save into tx, etc...
|
||||
}
|
||||
|
||||
return rc == 1;
|
||||
}
|
||||
|
||||
|
||||
bool Pm::init(const char **error) {
|
||||
std::vector<std::string> vec;
|
||||
|
||||
std::istringstream iss(param);
|
||||
std::copy(std::istream_iterator<std::string>(iss),
|
||||
std::istream_iterator<std::string>(),
|
||||
back_inserter(vec));
|
||||
|
||||
for (auto &a : vec) {
|
||||
acmp_add_pattern(m_p, a.c_str(), NULL, NULL, a.length());
|
||||
}
|
||||
|
||||
|
||||
acmp_prepare(m_p);
|
||||
}
|
||||
|
||||
|
||||
} // namespace operators
|
||||
} // namespace ModSecurity
|
||||
|
@ -19,18 +19,32 @@
|
||||
#include <string>
|
||||
|
||||
#include "operators/operator.h"
|
||||
#include "utils/acmp.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace ModSecurity {
|
||||
namespace operators {
|
||||
|
||||
|
||||
class Pm : public Operator {
|
||||
public:
|
||||
/** @ingroup ModSecurity_Operator */
|
||||
Pm(std::string o, std::string p, bool i);
|
||||
bool evaluate(Assay *assay);
|
||||
Pm(std::string op, std::string param, bool negation)
|
||||
: Operator(op, param, negation) {
|
||||
m_p = acmp_create(0);
|
||||
}
|
||||
~Pm();
|
||||
|
||||
bool evaluate(Assay *assay, const std::string &input);
|
||||
|
||||
virtual bool init(const char **error);
|
||||
void postOrderTraversal(acmp_btree_node_t *node);
|
||||
|
||||
protected:
|
||||
ACMP *m_p;
|
||||
};
|
||||
|
||||
|
||||
} // namespace operators
|
||||
} // namespace ModSecurity
|
||||
#endif
|
||||
|
440
src/utils/acmp.cc
Normal file
440
src/utils/acmp.cc
Normal file
@ -0,0 +1,440 @@
|
||||
/*
|
||||
* ModSecurity for Apache 2.x, http://www.modsecurity.org/
|
||||
* Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/)
|
||||
*
|
||||
* You may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* If any of the files related to licensing are missing or if you have any
|
||||
* other questions related to licensing please contact Trustwave Holdings, Inc.
|
||||
* directly using the email address security@modsecurity.org.
|
||||
*/
|
||||
|
||||
/* Aho-Corasick Matching */
|
||||
|
||||
#include "acmp.h"
|
||||
#include <vector>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <cstddef>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
/*
|
||||
*******************************************************************************
|
||||
*******************************************************************************
|
||||
* Functions for UTF-8 support
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
*******************************************************************************
|
||||
*******************************************************************************
|
||||
* Code for local / static utility functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns length of given string for parser's encoding
|
||||
*/
|
||||
static size_t acmp_strlen(ACMP *parser, const char *str) {
|
||||
return strlen(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns string to array of ucs values, depending on parser's encoding
|
||||
* str - string to convert, doesn't have to be NULL-terminated
|
||||
* ucs_chars - where to write ucs values
|
||||
* len - length of input string
|
||||
*/
|
||||
static void acmp_strtoucs(ACMP *parser, const char *str, long *ucs_chars, int len) {
|
||||
int i;
|
||||
const char *c = str;
|
||||
|
||||
|
||||
{
|
||||
for (i = 0; i < len; i++) {
|
||||
*(ucs_chars++) = *(c++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns node with given letter, or null if not found
|
||||
*/
|
||||
static acmp_node_t *acmp_child_for_code(acmp_node_t *parent_node, long ucs_code) {
|
||||
acmp_node_t *node = parent_node->child;
|
||||
if (node == NULL) return NULL;
|
||||
for (;;) {
|
||||
if (node->letter == ucs_code) return node;
|
||||
node = node->sibling;
|
||||
if (node == NULL) return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds node to parent node, if it is not already there
|
||||
*/
|
||||
static void acmp_add_node_to_parent(acmp_node_t *parent, acmp_node_t *child) {
|
||||
acmp_node_t *node = NULL;
|
||||
|
||||
child->parent = parent;
|
||||
if (parent->child == NULL) {
|
||||
parent->child = child;
|
||||
return;
|
||||
}
|
||||
|
||||
node = parent->child;
|
||||
for (;;) {
|
||||
if (node == child) return;
|
||||
if (node->sibling == NULL) {
|
||||
node->sibling = child;
|
||||
return;
|
||||
}
|
||||
node = node->sibling;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies values from one node to another, without child/sibling/fail pointers
|
||||
* and without state variables.
|
||||
*/
|
||||
static void acmp_clone_node_no_state(acmp_node_t *from, acmp_node_t *to) {
|
||||
memcpy(to, from, sizeof(acmp_node_t));
|
||||
to->child = NULL;
|
||||
to->sibling = NULL;
|
||||
to->fail = NULL;
|
||||
to->hit_count = 0;
|
||||
}
|
||||
|
||||
static inline acmp_node_t *acmp_btree_find(acmp_node_t *node, long letter) {
|
||||
acmp_btree_node_t *bnode = node->btree;
|
||||
for (;;) {
|
||||
if (bnode == NULL) return NULL;
|
||||
if (bnode->letter == letter) return bnode->node;
|
||||
if (bnode->letter > letter) {
|
||||
bnode = bnode->left;
|
||||
} else {
|
||||
bnode = bnode->right;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
static inline acmp_node_t *acmp_goto(acmp_node_t *node, long letter) {
|
||||
return acmp_btree_find(node, letter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Connects each node with its first fail node that is end of a phrase.
|
||||
*/
|
||||
static void acmp_connect_other_matches(ACMP *parser, acmp_node_t *node) {
|
||||
acmp_node_t *child, *om;
|
||||
|
||||
for (child = node->child; child != NULL; child = child->sibling) {
|
||||
if (child->fail == NULL) continue;
|
||||
for (om = child->fail; om != parser->root_node; om = om->fail) {
|
||||
if (om->is_last) {
|
||||
child->o_match = om;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Go recursively through children of this node that have a child node */
|
||||
for(child = node->child; child != NULL; child = child->sibling) {
|
||||
if (child->child != NULL) acmp_connect_other_matches(parser, child);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds leaves to binary tree, working from sorted array of keyword tree nodes
|
||||
*/
|
||||
static void acmp_add_btree_leaves(acmp_btree_node_t *node, acmp_node_t *nodes[],
|
||||
int pos, int lb, int rb) {
|
||||
|
||||
int left = 0, right = 0;
|
||||
if ((pos - lb) > 1) {
|
||||
left = lb + (pos - lb) / 2;
|
||||
node->left =(acmp_btree_node_t *) calloc(1, sizeof(acmp_btree_node_t));
|
||||
/* ENH: Check alloc succeded */
|
||||
node->left->node = nodes[left];
|
||||
node->left->letter = nodes[left]->letter;
|
||||
#ifdef DEBUG_ACMP
|
||||
fprintf(stderr, "%lc ->left %lc\n", (wint_t)node->node->letter, (wint_t)node->left->node->letter);
|
||||
#endif
|
||||
}
|
||||
if ((rb - pos) > 1) {
|
||||
right = pos + (rb - pos) / 2;
|
||||
node->right = (acmp_btree_node_t *)calloc(1, sizeof(acmp_btree_node_t));
|
||||
/* ENH: Check alloc succeded */
|
||||
node->right->node = nodes[right];
|
||||
node->right->letter = nodes[right]->letter;
|
||||
#ifdef DEBUG_ACMP
|
||||
fprintf(stderr, "%lc ->right %lc\n", (wint_t)node->node->letter, (wint_t)node->right->node->letter);
|
||||
#endif
|
||||
}
|
||||
if (node->right != NULL) {
|
||||
acmp_add_btree_leaves(node->right, nodes, right, pos, rb);
|
||||
}
|
||||
if (node->left != NULL) {
|
||||
acmp_add_btree_leaves(node->left, nodes, left, lb, pos);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds balanced binary tree from children nodes of given node.
|
||||
*/
|
||||
static void acmp_build_binary_tree(ACMP *parser, acmp_node_t *node) {
|
||||
size_t count, i, j;
|
||||
acmp_node_t *child = node->child;
|
||||
acmp_node_t **nodes;
|
||||
size_t pos;
|
||||
|
||||
/* Build an array big enough */
|
||||
for (count = 0; child != NULL; child = child->sibling) count++;
|
||||
nodes = (acmp_node_t **)calloc(1, count * sizeof(acmp_node_t *));
|
||||
/* ENH: Check alloc succeded */
|
||||
|
||||
/* ENH: Combine this in the loop below - we do not need two loops */
|
||||
child = node->child;
|
||||
for (i = 0; i < count; i++) {
|
||||
nodes[i] = child;
|
||||
child = child->sibling;
|
||||
};
|
||||
|
||||
/* We have array with all children of the node and number of those children
|
||||
*/
|
||||
for (i = 0; i < count - 1; i++)
|
||||
for (j = i + 1; j < count; j++) {
|
||||
acmp_node_t *tmp;
|
||||
|
||||
if (nodes[i]->letter < nodes[j]->letter) continue;
|
||||
|
||||
tmp = nodes[i];
|
||||
nodes[i] = nodes[j];
|
||||
nodes[j] = tmp;
|
||||
}
|
||||
if (node->btree) { free (node->btree); node->btree = NULL; }
|
||||
node->btree = (acmp_btree_node_t *)calloc(1, sizeof(acmp_btree_node_t));
|
||||
/* ENH: Check alloc succeded */
|
||||
pos = count / 2;
|
||||
node->btree->node = nodes[pos];
|
||||
node->btree->letter = nodes[pos]->letter;
|
||||
acmp_add_btree_leaves(node->btree, nodes, pos, -1, count);
|
||||
for (i = 0; i < count; i++) {
|
||||
if (nodes[i]->child != NULL) acmp_build_binary_tree(parser, nodes[i]);
|
||||
}
|
||||
free(nodes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs fail paths on keyword trie
|
||||
*/
|
||||
static int acmp_connect_fail_branches(ACMP *parser) {
|
||||
/* Already connected ? */
|
||||
acmp_node_t *child, *node, *goto_node;
|
||||
|
||||
if (parser->is_failtree_done != 0) return 1;
|
||||
|
||||
std::vector<acmp_node_t *> arr;
|
||||
std::vector<acmp_node_t *> arr2;
|
||||
std::vector<acmp_node_t *> tmp;
|
||||
|
||||
parser->root_node->text = "";
|
||||
|
||||
parser->root_node->fail = parser->root_node;
|
||||
|
||||
/* All first-level children will fail back to root node */
|
||||
for (child = parser->root_node->child; child != NULL; child = child->sibling) {
|
||||
child->fail = parser->root_node;
|
||||
arr.push_back(child);
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
while (arr.empty() == false) {
|
||||
node = arr.back();
|
||||
arr.pop_back();
|
||||
node->fail = parser->root_node;
|
||||
if (node->parent != parser->root_node) {
|
||||
goto_node = acmp_child_for_code(node->parent->fail, node->letter);
|
||||
node->fail = (goto_node != NULL) ? goto_node : parser->root_node;
|
||||
}
|
||||
#ifdef DEBUG_ACMP
|
||||
fprintf(stderr, "fail direction: *%s* => *%s*\n", node->text, node->fail->text);
|
||||
#endif
|
||||
child = node->child;
|
||||
while (child != NULL) {
|
||||
arr2.push_back(child);
|
||||
child = child->sibling;
|
||||
}
|
||||
}
|
||||
if (arr2.empty() == true) break;
|
||||
|
||||
tmp = arr;
|
||||
arr = arr2;
|
||||
arr2 = tmp;
|
||||
}
|
||||
|
||||
acmp_connect_other_matches(parser, parser->root_node);
|
||||
if (parser->root_node->child != NULL) acmp_build_binary_tree(parser, parser->root_node);
|
||||
parser->is_failtree_done = 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
*******************************************************************************
|
||||
*******************************************************************************
|
||||
* Code for functions from header file
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* flags - OR-ed values of ACMP_FLAG constants
|
||||
*/
|
||||
ACMP *acmp_create(int flags) {
|
||||
int rc;
|
||||
ACMP *parser;
|
||||
|
||||
parser = (ACMP *)calloc(1, sizeof(ACMP));
|
||||
/* ENH: Check alloc succeded */
|
||||
parser->is_case_sensitive = (flags & ACMP_FLAG_CASE_SENSITIVE) == 0 ? 0 : 1;
|
||||
parser->root_node = (acmp_node_t *)calloc(1, sizeof(acmp_node_t));
|
||||
/* ENH: Check alloc succeded */
|
||||
return parser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates fail tree and initializes buffer
|
||||
*/
|
||||
int acmp_prepare(ACMP *parser) {
|
||||
int st;
|
||||
|
||||
if (parser->bp_buff_len < parser->longest_entry) {
|
||||
parser->bp_buff_len = parser->longest_entry * 2;
|
||||
//parser->bp_buffer = (size_t *)calloc(1, sizeof(size_t) * parser->bp_buff_len);
|
||||
/* ENH: Check alloc succeded */
|
||||
}
|
||||
|
||||
st = acmp_connect_fail_branches(parser);
|
||||
parser->active_node = parser->root_node;
|
||||
if (st != 1) return st;
|
||||
parser->is_active = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds pattern to parser
|
||||
* parser - ACMP parser
|
||||
* pattern - string nwith pattern to match
|
||||
* callback - Optional, pointer to an acmp_callback_t function
|
||||
* data - pointer to data that will be passed to callback function, only used if callback
|
||||
* is supplied
|
||||
* len - Length of pattern in characters, if zero string length is used.
|
||||
*/
|
||||
int acmp_add_pattern(ACMP *parser, const char *pattern,
|
||||
acmp_callback_t callback, void *data, size_t len)
|
||||
{
|
||||
size_t length, i, j;
|
||||
long *ucs_chars;
|
||||
acmp_node_t *parent, *child;
|
||||
|
||||
if (parser->is_active != 0) return -1;
|
||||
length = (len == 0) ? acmp_strlen(parser, pattern) : len;
|
||||
ucs_chars = (long *)calloc(1, length * sizeof(long));
|
||||
/* ENH: Check alloc succeded */
|
||||
|
||||
parent = parser->root_node;
|
||||
acmp_strtoucs(parser, pattern, ucs_chars, length);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
long letter = ucs_chars[i];
|
||||
if (parser->is_case_sensitive == 0) {
|
||||
letter = tolower(letter);
|
||||
}
|
||||
child = acmp_child_for_code(parent, letter);
|
||||
if (child == NULL) {
|
||||
child = (acmp_node_t *) calloc(1, sizeof(acmp_node_t));
|
||||
/* ENH: Check alloc succeded */
|
||||
child->pattern = "";
|
||||
child->letter = letter;
|
||||
child->depth = i;
|
||||
child->text = (char *)calloc(1, strlen(pattern) + 2);
|
||||
/* ENH: Check alloc succeded */
|
||||
for (j = 0; j <= i; j++) child->text[j] = pattern[j];
|
||||
}
|
||||
if (i == length - 1) {
|
||||
if (child->is_last == 0) {
|
||||
parser->dict_count++;
|
||||
child->is_last = 1;
|
||||
child->pattern = (char *)calloc(1, strlen(pattern) + 2);
|
||||
/* ENH: Check alloc succeded */
|
||||
strcpy(child->pattern, pattern);
|
||||
}
|
||||
child->callback = callback;
|
||||
child->callback_data = data;
|
||||
}
|
||||
acmp_add_node_to_parent(parent, child);
|
||||
parent = child;
|
||||
}
|
||||
if (length > parser->longest_entry) parser->longest_entry = length;
|
||||
parser->is_failtree_done = 0;
|
||||
free(ucs_chars);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the data using ACMPT to keep state, and ACMPT's parser to keep the tree
|
||||
*/
|
||||
int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_t len) {
|
||||
ACMP *parser;
|
||||
acmp_node_t *node, *go_to;
|
||||
const char *end;
|
||||
|
||||
if (acmpt->parser->is_failtree_done == 0) {
|
||||
acmp_prepare(acmpt->parser);
|
||||
};
|
||||
|
||||
parser = acmpt->parser;
|
||||
if (acmpt->ptr == NULL) acmpt->ptr = parser->root_node;
|
||||
node = (acmp_node_t *)acmpt->ptr;
|
||||
end = data + len;
|
||||
|
||||
while (data < end) {
|
||||
long letter = (unsigned char)*data++;
|
||||
|
||||
if (parser->is_case_sensitive == 0) letter = tolower(letter);
|
||||
|
||||
go_to = NULL;
|
||||
while (go_to == NULL) {
|
||||
go_to = acmp_goto(node, letter);
|
||||
if (go_to != NULL) {
|
||||
if (go_to->is_last) {
|
||||
*match = go_to->text;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (node == parser->root_node) break;
|
||||
if (go_to == NULL) node = node->fail;
|
||||
}
|
||||
if (go_to != NULL) node = go_to;
|
||||
|
||||
/* If node has o_match, then we found a pattern */
|
||||
if (node->o_match != NULL) {
|
||||
*match = node->text;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
acmpt->ptr = node;
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
194
src/utils/acmp.h
Normal file
194
src/utils/acmp.h
Normal file
@ -0,0 +1,194 @@
|
||||
/*
|
||||
* ModSecurity for Apache 2.x, http://www.modsecurity.org/
|
||||
* Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/)
|
||||
*
|
||||
* You may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* If any of the files related to licensing are missing or if you have any
|
||||
* other questions related to licensing please contact Trustwave Holdings, Inc.
|
||||
* directly using the email address security@modsecurity.org.
|
||||
*/
|
||||
|
||||
#ifndef ACMP_H_
|
||||
#define ACMP_H_
|
||||
|
||||
#define ACMP_FLAG_BYTE 0
|
||||
#define ACMP_FLAG_CASE_SENSITIVE 1
|
||||
#define ACMP_FLAG_CASE_INSENSITIVE 0
|
||||
#ifdef ACMP_USE_UTF8
|
||||
#define ACMP_FLAG_UTF8 0x100
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
|
||||
extern "C" {
|
||||
/**
|
||||
* Opaque struct with parser data
|
||||
*/
|
||||
typedef struct ACMP ACMP;
|
||||
|
||||
/**
|
||||
* Used to separate state from the trie for acmp_process_quick function
|
||||
*/
|
||||
typedef struct {
|
||||
ACMP *parser;
|
||||
void *ptr;
|
||||
} ACMPT;
|
||||
|
||||
/*
|
||||
*******************************************************************************
|
||||
*******************************************************************************
|
||||
* Data structures for acmp parser
|
||||
*/
|
||||
|
||||
/**
|
||||
* Callback function. Arguments are:
|
||||
* ACMP * - acmp parser that initiated callback
|
||||
* void * - custom data you supplied when adding callback
|
||||
* size_t - position in bytes where pattern was found
|
||||
* size_t - position in chars where pattern was found, for multibyte strings
|
||||
*/
|
||||
typedef void (*acmp_callback_t)(ACMP *, void *, size_t, size_t);
|
||||
|
||||
|
||||
/**
|
||||
* One node in trie
|
||||
*/
|
||||
typedef struct acmp_node_t acmp_node_t;
|
||||
typedef struct acmp_btree_node_t acmp_btree_node_t;
|
||||
struct acmp_node_t {
|
||||
long letter;
|
||||
int is_last;
|
||||
acmp_callback_t callback;
|
||||
void *callback_data;
|
||||
int depth;
|
||||
|
||||
acmp_node_t *child;
|
||||
acmp_node_t *sibling;
|
||||
acmp_node_t *fail;
|
||||
acmp_node_t *parent;
|
||||
acmp_node_t *o_match;
|
||||
|
||||
acmp_btree_node_t *btree;
|
||||
|
||||
size_t hit_count;
|
||||
|
||||
char *text;
|
||||
char *pattern;
|
||||
};
|
||||
|
||||
struct acmp_btree_node_t {
|
||||
long letter;
|
||||
acmp_btree_node_t *left;
|
||||
acmp_btree_node_t *right;
|
||||
acmp_node_t *node;
|
||||
};
|
||||
|
||||
/**
|
||||
* Data related to parser, not to individual nodes
|
||||
*/
|
||||
struct ACMP {
|
||||
|
||||
int is_case_sensitive;
|
||||
|
||||
int dict_count;
|
||||
size_t longest_entry;
|
||||
|
||||
acmp_node_t *root_node;
|
||||
|
||||
const char *data_start;
|
||||
const char *data_end;
|
||||
const char *data_pos;
|
||||
size_t data_len;
|
||||
|
||||
size_t *bp_buffer;
|
||||
size_t bp_buff_len;
|
||||
|
||||
acmp_node_t *active_node;
|
||||
char u8_buff[6];
|
||||
size_t u8buff_len;
|
||||
size_t hit_count;
|
||||
int is_failtree_done;
|
||||
int is_active;
|
||||
size_t byte_pos;
|
||||
size_t char_pos;
|
||||
};
|
||||
|
||||
|
||||
//static long utf8_lcase(long ucs_code);
|
||||
|
||||
/**
|
||||
* flags - OR-ed values of ACMP_FLAG constants
|
||||
*/
|
||||
ACMP *acmp_create(int flags);
|
||||
|
||||
/**
|
||||
* Destroys previously created parser
|
||||
*/
|
||||
void acmp_destroy(ACMP *parser);
|
||||
|
||||
/**
|
||||
* Creates parser with same options and same patterns
|
||||
* parser - ACMP parser to duplicate
|
||||
*/
|
||||
ACMP *acmp_duplicate(ACMP *parser);
|
||||
|
||||
/**
|
||||
* Adds pattern to parser. Cannot be done after starting the search.
|
||||
* parser - ACMP parser
|
||||
* pattern - string with pattern to match
|
||||
* callback - Optional, pointer to an acmp_callback_t function
|
||||
* data - pointer to data that will be passed to callback function, only used if callback
|
||||
* is supplied
|
||||
* len - Length of pattern in characters, if zero string length is used.
|
||||
*/
|
||||
int acmp_add_pattern(ACMP *parser, const char *pattern,
|
||||
acmp_callback_t callback, void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Called to process incoming data stream. You must call acmp_done after sending
|
||||
* last data packet
|
||||
*
|
||||
* data - ptr to incoming data
|
||||
* len - size of data in bytes
|
||||
*/
|
||||
int acmp_process(ACMP *parser, const char *data, size_t len);
|
||||
|
||||
/**
|
||||
* Returns number of matches on all patterns combined
|
||||
*/
|
||||
size_t acmp_match_count_total(ACMP *parser);
|
||||
|
||||
/**
|
||||
* Returns number of matches for given pattern
|
||||
*/
|
||||
size_t acmp_match_count(ACMP *parser, const char *pattern);
|
||||
|
||||
/**
|
||||
* Resets the state of parser so you can start using it with new set of data,
|
||||
* or add new patterns.
|
||||
*/
|
||||
void acmp_reset(ACMP *parser);
|
||||
|
||||
/**
|
||||
* Creates an ACMPT struct that will use parser's tree, without duplicating its data
|
||||
*/
|
||||
ACMPT *acmp_duplicate_quick(ACMP *parser);
|
||||
|
||||
/**
|
||||
* Process the data using ACMPT to keep state, and ACMPT's parser to keep the tree
|
||||
*/
|
||||
int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_t len);
|
||||
|
||||
/**
|
||||
* Prepares parser for searching
|
||||
*/
|
||||
int acmp_prepare(ACMP *parser);
|
||||
|
||||
}
|
||||
|
||||
#endif /*ACMP_H_*/
|
Loading…
x
Reference in New Issue
Block a user