add new Literal API for pure literal expressions:

Design compile time api hs_compile_lit() and hs_compile_lit_multi()
to handle pure literal pattern sets. Corresponding option --literal-on
is added for hyperscan testing suites. Extended parameters and part of
flags are not supported for this api.
This commit is contained in:
Hong, Yang A
2019-07-18 00:29:27 +08:00
committed by Chang, Harry
parent 8bfbf07f75
commit 23e5f06594
36 changed files with 745 additions and 116 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -56,11 +56,13 @@
#include "parser/unsupported.h"
#include "parser/utf8_validate.h"
#include "rose/rose_build.h"
#include "rose/rose_internal.h"
#include "som/slot_manager_dump.h"
#include "util/bytecode_ptr.h"
#include "util/compile_error.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include "util/ue2string.h"
#include <algorithm>
#include <cassert>
@@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) {
}
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
bool nocase) {
const char *c = expression;
for (size_t i = 0; i < len; i++) {
lit.push_back(*c, nocase);
c++;
}
}
ParsedLitExpression::ParsedLitExpression(unsigned index_in,
const char *expression,
size_t expLength, unsigned flags,
ReportID report)
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
if (flags & ~HS_FLAG_ALL) {
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
throw CompileError("Unrecognised flag.");
}
// FIXME: we disallow highlander + SOM, see UE-1850.
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
expr.som = SOM_LEFT;
}
// Transfer expression text into ue2_literal.
bool nocase = flags & HS_FLAG_CASELESS ? true : false;
parseLiteral(expression, expLength, nocase);
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID report,
const hs_expr_ext *ext)
@@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression,
}
}
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength) {
assert(expression);
const CompileContext &cc = ng.cc;
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index,
id, flags, expression, expLength);
// Extended parameters are not supported for pure literal patterns.
if (ext && ext->flags != 0LLU) {
throw CompileError("Extended parameters are not supported for pure "
"literal matching API.");
}
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit.");
}
// filter out flags not supported by pure literal API.
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
HS_FLAG_QUIET;
if (flags & not_supported) {
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
"supported in literal API.");
}
// This expression must be a pure literal, we can build ue2_literal
// directly based on expression text.
ParsedLitExpression ple(index, expression, expLength, flags, id);
// Feed the ue2_literal into Rose.
const auto &expr = ple.expr;
if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander,
expr.som, expr.quiet)) {
DEBUG_PRINTF("took pure literal\n");
return;
}
}
static
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
const u32 minWidth =
@@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
}
struct hs_database *build(NG &ng, unsigned int *length) {
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
assert(length);
auto rose = generateRoseEngine(ng);
struct RoseEngine *roseHead = rose.get();
roseHead->pureLiteral = pureFlag;
if (!rose) {
throw CompileError("Unable to generate bytecode.");
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -38,6 +38,7 @@
#include "compiler/expression_info.h"
#include "parser/Component.h"
#include "util/noncopyable.h"
#include "util/ue2string.h"
#include <memory>
@@ -66,6 +67,22 @@ public:
std::unique_ptr<Component> component;
};
/** \brief Class gathering together the pieces of a parsed lit-expression. */
class ParsedLitExpression : noncopyable {
public:
ParsedLitExpression(unsigned index, const char *expression,
size_t expLength, unsigned flags, ReportID report);
void parseLiteral(const char *expression, size_t len, bool nocase);
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
/** \brief Format the lit-expression text into Hyperscan literal type. */
ue2_literal lit;
};
/**
* \brief Class gathering together the pieces of an expression that has been
* built into an NFA graph.
@@ -99,6 +116,10 @@ struct BuiltExpression {
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID report);
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength);
/**
* Build a Hyperscan database out of the expressions we've been given. A
* fatal error will result in an exception being thrown.
@@ -107,11 +128,13 @@ void addExpression(NG &ng, unsigned index, const char *expression,
* The global NG object.
* @param[out] length
* The number of bytes occupied by the compiled structure.
* @param pureFlag
* The flag indicating invocation from literal API or not.
* @return
* The compiled structure. Should be deallocated with the
* hs_database_free() function.
*/
struct hs_database *build(NG &ng, unsigned int *length);
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag);
/**
* Constructs an NFA graph from the given expression tree.

View File

@@ -807,9 +807,6 @@ void findIncludedLits(vector<hwlmLiteral> &lits,
for (size_t i = 0; i < cnt; i++) {
u32 bucket1 = group[i].first;
u32 id1 = group[i].second;
if (lits[id1].pure) {
continue;
}
buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
exception_map);
}

View File

@@ -62,7 +62,6 @@ struct LitInfo {
u8 size;
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
u8 next;
u8 pure; //!< The pass-on of pure flag from hwlmLiteral.
};
#define FDRC_FLAG_NO_CONFIRM 1

View File

@@ -87,7 +87,6 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
info.flags = flags;
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
info.groups = lit.groups;
info.pure = lit.pure;
// these are built up assuming a LE machine
CONF_TYPE msk = all_ones;

View File

@@ -65,7 +65,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
u8 oldNext; // initialized in loop
do {
assert(ISALIGNED(li));
scratch->pure = li->pure;
if (unlikely((conf_key & li->msk) != li->v)) {
goto out;
@@ -100,7 +99,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
li++;
} while (oldNext);
scratch->fdr_conf = NULL;
scratch->pure = 0;
}
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -251,7 +251,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
ng.rm.logicalKeyRenumber();
unsigned length = 0;
struct hs_database *out = build(ng, &length);
struct hs_database *out = build(ng, &length, 0);
assert(out); // should have thrown exception on error
assert(length);
@@ -281,6 +281,130 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
}
}
hs_error_t
hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags,
const unsigned *ids, const hs_expr_ext *const *ext,
const size_t *lens, unsigned elements, unsigned mode,
const hs_platform_info_t *platform, hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g) {
// Check the args: note that it's OK for flags, ids or ext to be null.
if (!comp_error) {
if (db) {
*db = nullptr;
}
// nowhere to write the string, but we can still report an error code
return HS_COMPILER_ERROR;
}
if (!db) {
*comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
return HS_COMPILER_ERROR;
}
if (!expressions) {
*db = nullptr;
*comp_error
= generateCompileError("Invalid parameter: expressions is NULL",
-1);
return HS_COMPILER_ERROR;
}
if (!lens) {
*db = nullptr;
*comp_error = generateCompileError("Invalid parameter: len is NULL", -1);
return HS_COMPILER_ERROR;
}
if (elements == 0) {
*db = nullptr;
*comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
return HS_COMPILER_ERROR;
}
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
*db = nullptr;
*comp_error = generateCompileError("Unsupported architecture", -1);
return HS_ARCH_ERROR;
}
#endif
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode.
return HS_COMPILER_ERROR;
}
if (!checkPlatform(platform, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkPlattform.
return HS_COMPILER_ERROR;
}
if (elements > g.limitPatternCount) {
*db = nullptr;
*comp_error = generateCompileError("Number of patterns too large", -1);
return HS_COMPILER_ERROR;
}
// This function is simply a wrapper around both the parser and compiler
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
bool isVectored = mode & HS_MODE_VECTORED;
unsigned somPrecision = getSomPrecision(mode);
target_t target_info = platform ? target_t(*platform)
: get_current_target();
try {
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, elements, somPrecision);
for (unsigned int i = 0; i < elements; i++) {
// Add this expression to the compiler
try {
addLitExpression(ng, i, expressions[i], flags ? flags[i] : 0,
ext ? ext[i] : nullptr, ids ? ids[i] : 0,
lens[i]);
} catch (CompileError &e) {
/* Caught a parse error;
* throw it upstream as a CompileError with a specific index */
e.setExpressionIndex(i);
throw; /* do not slice */
}
}
// Check sub-expression ids
ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
// Renumber and assign lkey to reports
ng.rm.logicalKeyRenumber();
unsigned length = 0;
struct hs_database *out = build(ng, &length, 1);
assert(out); //should have thrown exception on error
assert(length);
*db = out;
*comp_error = nullptr;
return HS_SUCCESS;
}
catch (const CompileError &e) {
// Compiler error occurred
*db = nullptr;
*comp_error = generateCompileError(e.reason,
e.hasIndex ? (int)e.index : -1);
return HS_COMPILER_ERROR;
}
catch (const std::bad_alloc &) {
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal errror, unexpected exception");
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
return HS_COMPILER_ERROR;
}
}
} // namespace ue2
extern "C" HS_PUBLIC_API
@@ -326,6 +450,41 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions,
platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
const size_t len, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
if (expression == nullptr) {
*db = nullptr;
*error = generateCompileError("Invalid parameter: expression is NULL",
-1);
return HS_COMPILER_ERROR;
}
unsigned id = 0; // single expressions get zero as an ID
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_lit_multi_int(&expression, &flags, &id, ext, &len, 1,
mode, platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
const unsigned *flags,
const unsigned *ids,
const size_t *lens,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_lit_multi_int(expressions, flags, ids, ext, lens,
elements, mode, platform, db, error,
Grey());
}
static
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext, unsigned int mode,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -323,6 +323,10 @@ typedef struct hs_expr_ext {
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
@@ -392,6 +396,10 @@ hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
@@ -472,6 +480,10 @@ hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
@@ -527,6 +539,165 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* The basic pure literal expression compiler.
*
* This is the function call with which a pure literal expression (not a
* common regular expression) is compiled into a Hyperscan database which
* can be passed to the runtime functions (such as @ref hs_scan(),
* @ref hs_open_stream(), etc.)
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @p flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
* flags. Meanwhile, the string content shall be fully parsed in a literal
* sense without any regular grammars. For example, the @p expression
* `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
* here doesn't mean 0 or 1 quantifier under regular semantics.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Compared to @ref hs_compile(), fewer
* valid values are provided:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param len
* The length of the text content of the pure literal expression. As the
* text content indicated by @p expression is treated as single character
* one by one, the special terminating character `\0` should be allowed
* to appear in expression, and not treated as a terminator for a string.
* Thus, the end of a pure literal expression cannot be indicated by
* identifying `\0`, but by counting to the expression length.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
const size_t len, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error);
/**
* The multiple pure literal expression compiler.
*
* This is the function call with which a set of pure literal expressions is
* compiled into a database which can be passed to the runtime functions (such
* as @ref hs_scan(), @ref hs_open_stream(), etc.) Each expression can be
* labelled with a unique integer which is passed into the match callback to
* identify the pattern that has matched.
*
* @param expressions
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @p flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
* flags. Meanwhile, the string content shall be fully parsed in a literal
* sense without any regular grammars. For example, the @p expression
* `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
* here doesn't mean 0 or 1 quantifier under regular semantics.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Compared to @ref hs_compile_multi(), fewer valid values are provided:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param lens
* Array of lengths of the text content of each pure literal expression.
* As the text content indicated by @p expression is treated as single
* character one by one, the special terminating character `\0` should be
* allowed to appear in expression, and not treated as a terminator for a
* string. Thus, the end of a pure literal expression cannot be indicated
* by identifying `\0`, but by counting to the expression length.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
const unsigned *flags,
const unsigned *ids,
const size_t *lens,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error);
/**
* Free an error structure generated by @ref hs_compile(), @ref
* hs_compile_multi() or @ref hs_compile_ext_multi().
@@ -579,6 +750,10 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param info
* On success, a pointer to the pattern information will be returned in
@@ -641,6 +816,10 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -52,6 +52,17 @@ hs_error_t hs_compile_multi_int(const char *const *expressions,
hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g);
/** \brief Internal use only: takes a Grey argument so that we can use it in
* tools. */
hs_error_t hs_compile_lit_multi_int(const char *const *expressions,
const unsigned *flags, const unsigned *ids,
const hs_expr_ext *const *ext,
const size_t *lens, unsigned elements,
unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **comp_error,
const Grey &g);
} // namespace ue2
extern "C"

View File

@@ -83,10 +83,9 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
* \ref HWLM_MASKLEN. */
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
const vector<u8> &msk_in, const vector<u8> &cmp_in,
bool pure_in)
const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) {
groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());

View File

@@ -113,16 +113,13 @@ struct hwlmLiteral {
*/
std::vector<u8> cmp;
bool pure; //!< \brief The pass-on of pure flag from LitFragment.
/** \brief Complete constructor, takes group information and msk/cmp.
*
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
* \ref HWLM_MASKLEN. */
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
u32 id_in, hwlm_group_t groups_in,
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in,
bool pure_in = false);
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
/** \brief Simple constructor: no group information, no msk/cmp.
*

View File

@@ -185,7 +185,6 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
return false;
}
vis.lit.set_pure();
const ue2_literal &lit = vis.lit;
if (lit.empty()) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:

View File

@@ -238,10 +238,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
assert(id && id < t->size); // id is an offset into bytecode
const u64a som = 0;
const u8 flags = 0;
if (!scratch->pure) {
return roseRunProgram(t, scratch, id, som, end, flags);
} else {
if (t->pureLiteral) {
return roseRunProgram_l(t, scratch, id, som, end, flags);
} else {
return roseRunProgram(t, scratch, id, som, end, flags);
}
}
@@ -619,8 +619,12 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
// Our match ID is the program offset.
const u32 program = id;
const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
hwlmcb_rv_t rv =
roseRunProgram(rose, scratch, program, start, end, flags);
hwlmcb_rv_t rv;
if (rose->pureLiteral) {
rv = roseRunProgram_l(rose, scratch, program, start, end, flags);
} else {
rv = roseRunProgram(rose, scratch, program, start, end, flags);
}
if (rv == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}

View File

@@ -2884,6 +2884,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
assert(programOffset >= sizeof(struct RoseEngine));
assert(programOffset < t->size);
const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
const char *pc_base = getByOffset(t, programOffset);
@@ -2911,6 +2912,56 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_GROUPS) {
DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
tctxt->groups, ri->groups);
if (!(ri->groups & tctxt->groups)) {
DEBUG_PRINTF("halt: no groups are set\n");
return HWLM_CONTINUE_MATCHING;
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_MASK) {
struct core_info *ci = &scratch->core_info;
if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask,
ri->neg_mask, ri->offset, end)) {
DEBUG_PRINTF("failed mask check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_MASK_32) {
struct core_info *ci = &scratch->core_info;
if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
ri->neg_mask, ri->offset, end)) {
assert(ri->fail_jump);
pc += ri->fail_jump;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_BYTE) {
const struct core_info *ci = &scratch->core_info;
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
ri->negation, ri->offset, end)) {
DEBUG_PRINTF("failed byte check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(PUSH_DELAYED) {
rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end);
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CATCH_UP) {
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
@@ -2967,6 +3018,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(REPORT_CHAIN) {
// Note: sequence points updated inside this function.
if (roseCatchUpAndHandleChainMatch(
t, scratch, ri->event, ri->top_squash_distance, end,
in_catchup) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
work_done = 1;
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(REPORT) {
updateSeqPoint(tctxt, end, from_mpv);
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
@@ -3117,6 +3179,24 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(INCLUDED_JUMP) {
if (scratch->fdr_conf) {
// squash the bucket of included literal
u8 shift = scratch->fdr_conf_offset & ~7U;
u64a mask = ((~(u64a)ri->squash) << shift);
*(scratch->fdr_conf) &= mask;
pc = getByOffset(t, ri->child_offset);
pc_base = pc;
programOffset = (const u8 *)pc_base -(const u8 *)t;
DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
pc_base, pc, ri->child_offset, ri->squash);
work_done = 0;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(SET_LOGICAL) {
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
ri->lkey, ri->offset_adjust);

View File

@@ -2843,34 +2843,9 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
dumpString(lit.s).c_str());
/** 0:/xxabcdefgh/ */
/** 1:/yyabcdefgh/ */
/** 2:/yyabcdefgh.+/ */
// Above 3 patterns should firstly convert into RoseLiteralMap with
// 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into
// LitFragment with 1 element ("abcdefgh"). Special care should be
// taken to handle the 'pure' flag during the conversion.
rose_literal_id lit_frag = getFragment(lit);
auto it = frag_info.find(lit_frag);
if (it != frag_info.end()) {
if (!lit_frag.s.get_pure() && it->first.s.get_pure()) {
struct FragmentInfo f_info = it->second;
f_info.lit_ids.push_back(lit_id);
f_info.groups |= groups;
frag_info.erase(it->first);
frag_info.emplace(lit_frag, f_info);
} else {
it->second.lit_ids.push_back(lit_id);
it->second.groups |= groups;
}
} else {
struct FragmentInfo f_info;
f_info.lit_ids.push_back(lit_id);
f_info.groups |= groups;
frag_info.emplace(lit_frag, f_info);
}
auto &fi = frag_info[getFragment(lit)];
fi.lit_ids.push_back(lit_id);
fi.groups |= groups;
}
for (auto &m : frag_info) {

View File

@@ -340,14 +340,7 @@ public:
std::pair<u32, bool> insert(const rose_literal_id &lit) {
auto it = lits_index.find(lit);
if (it != lits_index.end()) {
u32 idx = it->second;
auto &l = lits.at(idx);
if (!lit.s.get_pure() && l.s.get_pure()) {
lits_index.erase(l);
l.s.unset_pure();
lits_index.emplace(l, idx);
}
return {idx, false};
return {it->second, false};
}
u32 id = verify_u32(lits.size());
lits.push_back(lit);

View File

@@ -727,7 +727,6 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
const auto &s_final = lit_final.get_string();
bool nocase = lit_final.any_nocase();
bool pure = f.s.get_pure();
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n",
f.fragment_id, escapeString(s_final).c_str(), (int)nocase,
@@ -741,7 +740,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
const auto &groups = f.groups;
mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
groups, msk, cmp, pure);
groups, msk, cmp);
}
static

View File

@@ -328,6 +328,7 @@ struct RoseBoundaryReports {
* nfas). Rose nfa info table can distinguish the cases.
*/
struct RoseEngine {
u8 pureLiteral; /* Indicator of pure literal API */
u8 noFloatingRoots; /* only need to run the anchored table if something
* matched in the anchored table */
u8 requiresEodCheck; /* stuff happens at eod time */

View File

@@ -141,7 +141,6 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
s->deduper.current_report_offset = ~0ULL;
s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
s->fdr_conf = NULL;
s->pure = 0;
// Rose program execution (used for some report paths) depends on these
// values being initialised.

View File

@@ -137,7 +137,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
s->scratchSize = alloc_size;
s->scratch_alloc = (char *)s_tmp;
s->fdr_conf = NULL;
s->pure = 0;
// each of these is at an offset from the previous
char *current = (char *)s + sizeof(*s);

View File

@@ -211,7 +211,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
u64a *fdr_conf; /**< FDR confirm value */
u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
* in buffer */
u8 pure; /**< indicator of pure-literal or cutting-literal */
};
/* array of fatbit ptr; TODO: why not an array of fatbits? */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -283,7 +283,6 @@ ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
}
void ue2_literal::push_back(char c, bool nc) {
assert(!nc || ourisalpha(c));
if (nc) {
c = mytoupper(c);
}

View File

@@ -211,17 +211,10 @@ public:
size_t hash() const;
void set_pure() { pure = true; }
void unset_pure() { pure = false; }
bool get_pure() const { return pure; }
/* TODO: consider existing member functions possibly related with pure. */
private:
friend const_iterator;
std::string s;
boost::dynamic_bitset<> nocase;
bool pure = false; /**< born from cutting or not (pure literal). */
};
/// Return a reversed copy of this literal.