mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-30 19:47:43 +03:00
Initial commit of Hyperscan
This commit is contained in:
173
util/ExpressionParser.rl
Normal file
173
util/ExpressionParser.rl
Normal file
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ExpressionParser.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hs_compile.h"
|
||||
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace { // anon
|
||||
|
||||
enum ParamKey {
|
||||
PARAM_NONE,
|
||||
PARAM_MIN_OFFSET,
|
||||
PARAM_MAX_OFFSET,
|
||||
PARAM_MIN_LENGTH
|
||||
};
|
||||
|
||||
%%{
|
||||
machine ExpressionParser;
|
||||
|
||||
action accumulateNum {
|
||||
num = (num * 10) + (fc - '0');
|
||||
}
|
||||
|
||||
action handleFlag {
|
||||
switch (fc) {
|
||||
case 'i': *flags |= HS_FLAG_CASELESS; break;
|
||||
case 's': *flags |= HS_FLAG_DOTALL; break;
|
||||
case 'm': *flags |= HS_FLAG_MULTILINE; break;
|
||||
case 'H': *flags |= HS_FLAG_SINGLEMATCH; break;
|
||||
case 'O':
|
||||
if (must_be_ordered) {
|
||||
*must_be_ordered = true;
|
||||
}
|
||||
break;
|
||||
case 'V': *flags |= HS_FLAG_ALLOWEMPTY; break;
|
||||
case 'W': *flags |= HS_FLAG_UCP; break;
|
||||
case '8': *flags |= HS_FLAG_UTF8; break;
|
||||
case 'P': *flags |= HS_FLAG_PREFILTER; break;
|
||||
case 'L': *flags |= HS_FLAG_SOM_LEFTMOST; break;
|
||||
default: fbreak;
|
||||
}
|
||||
}
|
||||
|
||||
action handleExtParam {
|
||||
switch (key) {
|
||||
case PARAM_MIN_OFFSET:
|
||||
ext->flags |= HS_EXT_FLAG_MIN_OFFSET;
|
||||
ext->min_offset = num;
|
||||
break;
|
||||
case PARAM_MAX_OFFSET:
|
||||
ext->flags |= HS_EXT_FLAG_MAX_OFFSET;
|
||||
ext->max_offset = num;
|
||||
break;
|
||||
case PARAM_MIN_LENGTH:
|
||||
ext->flags |= HS_EXT_FLAG_MIN_LENGTH;
|
||||
ext->min_length = num;
|
||||
break;
|
||||
case PARAM_NONE:
|
||||
default:
|
||||
// No key specified, syntax invalid.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
write data;
|
||||
}%%
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
void initExt(hs_expr_ext *ext) {
|
||||
memset(ext, 0, sizeof(*ext));
|
||||
ext->max_offset = MAX_OFFSET;
|
||||
}
|
||||
|
||||
bool readExpression(const std::string &input, std::string &expr,
|
||||
unsigned int *flags, hs_expr_ext *ext,
|
||||
bool *must_be_ordered) {
|
||||
assert(flags);
|
||||
assert(ext);
|
||||
|
||||
// Init flags and ext params.
|
||||
*flags = 0;
|
||||
initExt(ext);
|
||||
if (must_be_ordered) {
|
||||
*must_be_ordered = false;
|
||||
}
|
||||
|
||||
// Extract expr, which is easier to do in straight C++ than with Ragel.
|
||||
if (input.empty() || input[0] != '/') {
|
||||
return false;
|
||||
}
|
||||
size_t end = input.find_last_of('/');
|
||||
if (end == string::npos || end == 0) {
|
||||
return false;
|
||||
}
|
||||
expr = input.substr(1, end - 1);
|
||||
|
||||
// Use a Ragel scanner to handle flags and params.
|
||||
const char *p = input.c_str() + end + 1;
|
||||
const char *pe = input.c_str() + input.size();
|
||||
UNUSED const char *eof = pe;
|
||||
UNUSED const char *ts = p, *te = p;
|
||||
int cs;
|
||||
UNUSED int act;
|
||||
|
||||
assert(p);
|
||||
assert(pe);
|
||||
|
||||
// For storing integers as they're scanned.
|
||||
u64a num = 0;
|
||||
enum ParamKey key = PARAM_NONE;
|
||||
|
||||
%%{
|
||||
single_flag = [ismW8HPLVO];
|
||||
param = ('min_offset' @{ key = PARAM_MIN_OFFSET; } |
|
||||
'max_offset' @{ key = PARAM_MAX_OFFSET; } |
|
||||
'min_length' @{ key = PARAM_MIN_LENGTH; } );
|
||||
|
||||
value = (digit @accumulateNum)+ >{num = 0;};
|
||||
param_spec = (' '* param '=' value ' '*) >{ key = PARAM_NONE; }
|
||||
%handleExtParam;
|
||||
|
||||
main := ( single_flag @handleFlag )* # single-char flags
|
||||
( '{' param_spec (',' param_spec)* '}' )? # list of ext params
|
||||
$^{ return false; };
|
||||
|
||||
# Intialize and execute.
|
||||
write init;
|
||||
write exec;
|
||||
}%%
|
||||
|
||||
DEBUG_PRINTF("expr='%s', flags=%u\n", expr.c_str(), *flags);
|
||||
|
||||
return (cs != ExpressionParser_error) && (p == pe);
|
||||
}
|
Reference in New Issue
Block a user