add new Literal API for pure literal expressions:

Design compile time api hs_compile_lit() and hs_compile_lit_multi()
to handle pure literal pattern sets. Corresponding option --literal-on
is added for hyperscan testing suites. Extended parameters and part of
flags are not supported for this api.
This commit is contained in:
Hong, Yang A
2019-07-18 00:29:27 +08:00
committed by Chang, Harry
parent 8bfbf07f75
commit 23e5f06594
36 changed files with 745 additions and 116 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -56,11 +56,13 @@
#include "parser/unsupported.h"
#include "parser/utf8_validate.h"
#include "rose/rose_build.h"
#include "rose/rose_internal.h"
#include "som/slot_manager_dump.h"
#include "util/bytecode_ptr.h"
#include "util/compile_error.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include "util/ue2string.h"
#include <algorithm>
#include <cassert>
@@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) {
}
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
bool nocase) {
const char *c = expression;
for (size_t i = 0; i < len; i++) {
lit.push_back(*c, nocase);
c++;
}
}
ParsedLitExpression::ParsedLitExpression(unsigned index_in,
const char *expression,
size_t expLength, unsigned flags,
ReportID report)
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
if (flags & ~HS_FLAG_ALL) {
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
throw CompileError("Unrecognised flag.");
}
// FIXME: we disallow highlander + SOM, see UE-1850.
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
expr.som = SOM_LEFT;
}
// Transfer expression text into ue2_literal.
bool nocase = flags & HS_FLAG_CASELESS ? true : false;
parseLiteral(expression, expLength, nocase);
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID report,
const hs_expr_ext *ext)
@@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression,
}
}
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength) {
assert(expression);
const CompileContext &cc = ng.cc;
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index,
id, flags, expression, expLength);
// Extended parameters are not supported for pure literal patterns.
if (ext && ext->flags != 0LLU) {
throw CompileError("Extended parameters are not supported for pure "
"literal matching API.");
}
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit.");
}
// filter out flags not supported by pure literal API.
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
HS_FLAG_QUIET;
if (flags & not_supported) {
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
"supported in literal API.");
}
// This expression must be a pure literal, we can build ue2_literal
// directly based on expression text.
ParsedLitExpression ple(index, expression, expLength, flags, id);
// Feed the ue2_literal into Rose.
const auto &expr = ple.expr;
if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander,
expr.som, expr.quiet)) {
DEBUG_PRINTF("took pure literal\n");
return;
}
}
static
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
const u32 minWidth =
@@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
}
struct hs_database *build(NG &ng, unsigned int *length) {
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
assert(length);
auto rose = generateRoseEngine(ng);
struct RoseEngine *roseHead = rose.get();
roseHead->pureLiteral = pureFlag;
if (!rose) {
throw CompileError("Unable to generate bytecode.");
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -38,6 +38,7 @@
#include "compiler/expression_info.h"
#include "parser/Component.h"
#include "util/noncopyable.h"
#include "util/ue2string.h"
#include <memory>
@@ -66,6 +67,22 @@ public:
std::unique_ptr<Component> component;
};
/** \brief Class gathering together the pieces of a parsed lit-expression. */
class ParsedLitExpression : noncopyable {
public:
ParsedLitExpression(unsigned index, const char *expression,
size_t expLength, unsigned flags, ReportID report);
void parseLiteral(const char *expression, size_t len, bool nocase);
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
/** \brief Format the lit-expression text into Hyperscan literal type. */
ue2_literal lit;
};
/**
* \brief Class gathering together the pieces of an expression that has been
* built into an NFA graph.
@@ -99,6 +116,10 @@ struct BuiltExpression {
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID report);
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength);
/**
* Build a Hyperscan database out of the expressions we've been given. A
* fatal error will result in an exception being thrown.
@@ -107,11 +128,13 @@ void addExpression(NG &ng, unsigned index, const char *expression,
* The global NG object.
* @param[out] length
* The number of bytes occupied by the compiled structure.
* @param pureFlag
* The flag indicating invocation from literal API or not.
* @return
* The compiled structure. Should be deallocated with the
* hs_database_free() function.
*/
struct hs_database *build(NG &ng, unsigned int *length);
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag);
/**
* Constructs an NFA graph from the given expression tree.