mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-10-01 12:07:42 +03:00
add new Literal API for pure literal expressions:
Design compile time api hs_compile_lit() and hs_compile_lit_multi() to handle pure literal pattern sets. Corresponding option --literal-on is added for hyperscan testing suites. Extended parameters and part of flags are not supported for this api.
This commit is contained in:
committed by
Chang, Harry
parent
8bfbf07f75
commit
23e5f06594
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -56,11 +56,13 @@
|
||||
#include "parser/unsupported.h"
|
||||
#include "parser/utf8_validate.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_internal.h"
|
||||
#include "som/slot_manager_dump.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
@@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) {
|
||||
|
||||
}
|
||||
|
||||
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
|
||||
bool nocase) {
|
||||
const char *c = expression;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
lit.push_back(*c, nocase);
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
ParsedLitExpression::ParsedLitExpression(unsigned index_in,
|
||||
const char *expression,
|
||||
size_t expLength, unsigned flags,
|
||||
ReportID report)
|
||||
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
|
||||
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
|
||||
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
|
||||
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
|
||||
|
||||
if (flags & ~HS_FLAG_ALL) {
|
||||
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
||||
throw CompileError("Unrecognised flag.");
|
||||
}
|
||||
|
||||
// FIXME: we disallow highlander + SOM, see UE-1850.
|
||||
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
|
||||
// Set SOM type.
|
||||
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
||||
expr.som = SOM_LEFT;
|
||||
}
|
||||
|
||||
// Transfer expression text into ue2_literal.
|
||||
bool nocase = flags & HS_FLAG_CASELESS ? true : false;
|
||||
parseLiteral(expression, expLength, nocase);
|
||||
|
||||
}
|
||||
|
||||
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||
unsigned flags, ReportID report,
|
||||
const hs_expr_ext *ext)
|
||||
@@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
}
|
||||
}
|
||||
|
||||
void addLitExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID id,
|
||||
size_t expLength) {
|
||||
assert(expression);
|
||||
const CompileContext &cc = ng.cc;
|
||||
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index,
|
||||
id, flags, expression, expLength);
|
||||
|
||||
// Extended parameters are not supported for pure literal patterns.
|
||||
if (ext && ext->flags != 0LLU) {
|
||||
throw CompileError("Extended parameters are not supported for pure "
|
||||
"literal matching API.");
|
||||
}
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw CompileError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
// filter out flags not supported by pure literal API.
|
||||
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
|
||||
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
|
||||
HS_FLAG_QUIET;
|
||||
|
||||
if (flags & not_supported) {
|
||||
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
|
||||
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
|
||||
"supported in literal API.");
|
||||
}
|
||||
|
||||
// This expression must be a pure literal, we can build ue2_literal
|
||||
// directly based on expression text.
|
||||
ParsedLitExpression ple(index, expression, expLength, flags, id);
|
||||
|
||||
// Feed the ue2_literal into Rose.
|
||||
const auto &expr = ple.expr;
|
||||
if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander,
|
||||
expr.som, expr.quiet)) {
|
||||
DEBUG_PRINTF("took pure literal\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
||||
const u32 minWidth =
|
||||
@@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
||||
}
|
||||
|
||||
|
||||
struct hs_database *build(NG &ng, unsigned int *length) {
|
||||
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
|
||||
assert(length);
|
||||
|
||||
auto rose = generateRoseEngine(ng);
|
||||
struct RoseEngine *roseHead = rose.get();
|
||||
roseHead->pureLiteral = pureFlag;
|
||||
|
||||
if (!rose) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "compiler/expression_info.h"
|
||||
#include "parser/Component.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
@@ -66,6 +67,22 @@ public:
|
||||
std::unique_ptr<Component> component;
|
||||
};
|
||||
|
||||
|
||||
/** \brief Class gathering together the pieces of a parsed lit-expression. */
|
||||
class ParsedLitExpression : noncopyable {
|
||||
public:
|
||||
ParsedLitExpression(unsigned index, const char *expression,
|
||||
size_t expLength, unsigned flags, ReportID report);
|
||||
|
||||
void parseLiteral(const char *expression, size_t len, bool nocase);
|
||||
|
||||
/** \brief Expression information (from flags, extparam etc) */
|
||||
ExpressionInfo expr;
|
||||
|
||||
/** \brief Format the lit-expression text into Hyperscan literal type. */
|
||||
ue2_literal lit;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Class gathering together the pieces of an expression that has been
|
||||
* built into an NFA graph.
|
||||
@@ -99,6 +116,10 @@ struct BuiltExpression {
|
||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID report);
|
||||
|
||||
void addLitExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID id,
|
||||
size_t expLength);
|
||||
|
||||
/**
|
||||
* Build a Hyperscan database out of the expressions we've been given. A
|
||||
* fatal error will result in an exception being thrown.
|
||||
@@ -107,11 +128,13 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
* The global NG object.
|
||||
* @param[out] length
|
||||
* The number of bytes occupied by the compiled structure.
|
||||
* @param pureFlag
|
||||
* The flag indicating invocation from literal API or not.
|
||||
* @return
|
||||
* The compiled structure. Should be deallocated with the
|
||||
* hs_database_free() function.
|
||||
*/
|
||||
struct hs_database *build(NG &ng, unsigned int *length);
|
||||
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag);
|
||||
|
||||
/**
|
||||
* Constructs an NFA graph from the given expression tree.
|
||||
|
Reference in New Issue
Block a user