Merge branch develop to master

This commit is contained in:
Chang, Harry 2019-08-13 14:56:02 +08:00
commit 4cebdaa435
63 changed files with 1534 additions and 245 deletions

View File

@ -2,6 +2,21 @@
This is a list of notable changes to Hyperscan, in reverse chronological order.
## [5.2.0] 2019-07-12
- Literal API: add new API `hs_compile_lit()` and `hs_compile_lit_multi()` to
process pure literal rule sets. The 2 literal APIs treat each expression text
in a literal sense without recognizing any regular grammers.
- Logical combination: add support for purely negative combinations, which
report match at EOD in case of no sub-expressions matched.
- Windows porting: support shared library (DLL) on Windows with available tools
hscheck, hsbench and hsdump.
- Bugfix for issue #148: fix uninitialized use of `scatter_unit_uX` due to
padding.
- Bugfix for issue #155: fix numerical result out of range error.
- Bugfix for issue #165: avoid corruption of pending combination report in
streaming mode.
- Bugfix for issue #174: fix scratch free issue when memory allocation fails.
## [5.1.1] 2019-04-03
- Add extra detection and handling when invalid rose programs are triggered.
- Bugfix for issue #136: fix CMake parsing of CPU architecure for GCC-9.

View File

@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
project (hyperscan C CXX)
set (HS_MAJOR_VERSION 5)
set (HS_MINOR_VERSION 1)
set (HS_PATCH_VERSION 1)
set (HS_MINOR_VERSION 2)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@ -31,6 +31,7 @@ else()
endif()
if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO|MINSIZEREL)
message(STATUS "using release build")
set(RELEASE_BUILD TRUE)
else()
set(RELEASE_BUILD FALSE)
@ -109,11 +110,9 @@ option(BUILD_SHARED_LIBS "Build shared libs instead of static" OFF)
option(BUILD_STATIC_AND_SHARED "Build shared libs as well as static" OFF)
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
if (WIN32)
message(FATAL_ERROR "Windows DLLs currently not supported")
else()
message(STATUS "Building shared libraries")
endif()
else()
message(STATUS "Building static libraries")
endif()
if (NOT BUILD_SHARED_LIBS)
@ -151,9 +150,6 @@ if(MSVC OR MSVC_IDE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /wd4267 /Qdiag-disable:remark")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
else()
# todo: change these as required
set(ARCH_C_FLAGS "/arch:AVX2")
set(ARCH_CXX_FLAGS "/arch:AVX2")
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 /wd4996 -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD")
@ -1298,12 +1294,14 @@ endif()
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
if (NOT FAT_RUNTIME)
add_library(hs_runtime_shared SHARED src/hs_version.c
src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec_shared>)
src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec_shared>
hs_runtime.def)
else()
add_library(hs_runtime_shared SHARED src/hs_version.c
src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_exec_common_shared>
${RUNTIME_SHLIBS})
${RUNTIME_SHLIBS}
hs_runtime.def)
endif()
set_target_properties(hs_runtime_shared PROPERTIES
VERSION ${LIB_VERSION}
@ -1349,7 +1347,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
${RUNTIME_SHLIBS})
endif ()
add_library(hs_shared SHARED ${hs_shared_SRCS})
add_library(hs_shared SHARED ${hs_shared_SRCS} hs.def)
add_dependencies(hs_shared ragel_Parser)
set_target_properties(hs_shared PROPERTIES

View File

@ -322,7 +322,7 @@ PatternData::PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
ch_misc_free(info);
u32 guardflags;
guardflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
guardflags = flags | HS_FLAG_PREFILTER;
guard = isHyperscanSupported(pattern, guardflags, platform);
} else {
// We can't even prefilter this pattern, so we're dependent on Big Dumb

View File

@ -54,6 +54,75 @@ version of Hyperscan used to scan with it.
Hyperscan provides support for targeting a database at a particular CPU
platform; see :ref:`instr_specialization` for details.
=====================
Compile Pure Literals
=====================
Pure literal is a special case of regular expression. A character sequence is
regarded as a pure literal if and only if each character is read and
interpreted independently. No syntax association happens between any adjacent
characters.
For example, given an expression written as :regexp:`/bc?/`. We could say it is
a regluar expression, with the meaning that character ``b`` followed by nothing
or by one character ``c``. On the other view, we could also say it is a pure
literal expression, with the meaning that this is a character sequence of 3-byte
length, containing characters ``b``, ``c`` and ``?``. In regular case, the
question mark character ``?`` has a particular syntax role called 0-1 quantifier,
which has an syntax association with the character ahead of it. Similar
characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
``}``, ``-``, ``*``, ``+``, ``\``, ``|``, ``/``, ``:``, ``^``, ``.``, ``$``.
While in pure literal case, all these meta characters lost extra meanings
expect for that they are just common ASCII codes.
Hyperscan is initially designed to process common regualr expressions. It is
hence embedded with a complex parser to do comprehensive regular grammer
interpretion. Particularly, the identification of above meta characters is the
basic step for the interpretion of far more complex regular grammers.
However in real cases, patterns may not always be regualr expressions. They
could just be pure literals. Problem will come if the pure literals contain
regular meta characters. Supposing fed directly into traditional Hyperscan
compile API, all these meta characters will be interpreted in predefined ways,
which is unnecessary and the result is totally out of expectation. To avoid
such misunderstanding by traditional API, users have to preprocess these
literal patterns by converting the meta characters into some other formats:
either by adding a backslash ``\`` before certain meta characters, or by
converting all the characters into a hexadecimal representation.
In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns:
#. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern
database.
#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a
pattern database. All of the supplied patterns will be scanned for
concurrently at scan time, with user-supplied identifiers returned when they
match.
These 2 APIs are designed for use cases where all patterns contained in the
target rule set are pure literals. Users can pass the initial pure literal
content directly into these APIs without worrying about writing regular meta
characters in their patterns. No preprocessing work is needed any more.
For new APIs, the ``length`` of each literal pattern is a newly added parameter.
Hyperscan needs to locate the end position of the input expression via clearly
knowing each literal's length, not by simply identifying character ``\0`` of a
string.
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`,
:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`.
.. note:: We don't support literal compilation API with :ref:`extparam`. And
for runtime implementation, traditional runtime APIs can still be
used to match pure literal patterns.
.. note:: If the target rule set contains at least one regular expression,
please use traditional compile APIs :c:func:`hs_compile`,
:c:func:`hs_compile_multi` and :c:func:`hs_compile_ext_multi`.
The new literal APIs introduced here are designed for rule sets
containing only pure literal expressions.
***************
Pattern Support
***************

43
hs.def Normal file
View File

@ -0,0 +1,43 @@
; Hyperscan DLL export definitions
LIBRARY hs
EXPORTS
hs_alloc_scratch
hs_clone_scratch
hs_close_stream
hs_compile
hs_compile_ext_multi
hs_compile_multi
hs_compress_stream
hs_copy_stream
hs_database_info
hs_database_size
hs_deserialize_database
hs_deserialize_database_at
hs_expand_stream
hs_expression_ext_info
hs_expression_info
hs_free_compile_error
hs_free_database
hs_free_scratch
hs_open_stream
hs_populate_platform
hs_reset_and_copy_stream
hs_reset_and_expand_stream
hs_reset_stream
hs_scan
hs_scan_stream
hs_scan_vector
hs_scratch_size
hs_serialize_database
hs_serialized_database_info
hs_serialized_database_size
hs_set_allocator
hs_set_database_allocator
hs_set_misc_allocator
hs_set_scratch_allocator
hs_set_stream_allocator
hs_stream_size
hs_valid_platform
hs_version

36
hs_runtime.def Normal file
View File

@ -0,0 +1,36 @@
; Hyperscan DLL export definitions
LIBRARY hs_runtime
EXPORTS
hs_alloc_scratch
hs_clone_scratch
hs_close_stream
hs_compress_stream
hs_copy_stream
hs_database_info
hs_database_size
hs_deserialize_database
hs_deserialize_database_at
hs_expand_stream
hs_free_database
hs_free_scratch
hs_open_stream
hs_reset_and_copy_stream
hs_reset_and_expand_stream
hs_reset_stream
hs_scan
hs_scan_stream
hs_scan_vector
hs_scratch_size
hs_serialize_database
hs_serialized_database_info
hs_serialized_database_size
hs_set_allocator
hs_set_database_allocator
hs_set_misc_allocator
hs_set_scratch_allocator
hs_set_stream_allocator
hs_stream_size
hs_valid_platform
hs_version

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -56,11 +56,13 @@
#include "parser/unsupported.h"
#include "parser/utf8_validate.h"
#include "rose/rose_build.h"
#include "rose/rose_internal.h"
#include "som/slot_manager_dump.h"
#include "util/bytecode_ptr.h"
#include "util/compile_error.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include "util/ue2string.h"
#include <algorithm>
#include <cassert>
@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) {
}
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
bool nocase) {
const char *c = expression;
for (size_t i = 0; i < len; i++) {
lit.push_back(*c, nocase);
c++;
}
}
ParsedLitExpression::ParsedLitExpression(unsigned index_in,
const char *expression,
size_t expLength, unsigned flags,
ReportID report)
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
if (flags & ~HS_FLAG_ALL) {
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
throw CompileError("Unrecognised flag.");
}
// FIXME: we disallow highlander + SOM, see UE-1850.
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
expr.som = SOM_LEFT;
}
// Transfer expression text into ue2_literal.
bool nocase = flags & HS_FLAG_CASELESS ? true : false;
parseLiteral(expression, expLength, nocase);
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID report,
const hs_expr_ext *ext)
@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression,
}
}
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength) {
assert(expression);
const CompileContext &cc = ng.cc;
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index,
id, flags, expression, expLength);
// Extended parameters are not supported for pure literal patterns.
if (ext && ext->flags != 0LLU) {
throw CompileError("Extended parameters are not supported for pure "
"literal matching API.");
}
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit.");
}
// filter out flags not supported by pure literal API.
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
HS_FLAG_QUIET;
if (flags & not_supported) {
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
"supported in literal API.");
}
// This expression must be a pure literal, we can build ue2_literal
// directly based on expression text.
ParsedLitExpression ple(index, expression, expLength, flags, id);
// Feed the ue2_literal into Rose.
const auto &expr = ple.expr;
if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander,
expr.som, expr.quiet)) {
DEBUG_PRINTF("took pure literal\n");
return;
}
}
static
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
const u32 minWidth =
@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
}
struct hs_database *build(NG &ng, unsigned int *length) {
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
assert(length);
auto rose = generateRoseEngine(ng);
struct RoseEngine *roseHead = rose.get();
roseHead->pureLiteral = pureFlag;
if (!rose) {
throw CompileError("Unable to generate bytecode.");
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -38,6 +38,7 @@
#include "compiler/expression_info.h"
#include "parser/Component.h"
#include "util/noncopyable.h"
#include "util/ue2string.h"
#include <memory>
@ -66,6 +67,22 @@ public:
std::unique_ptr<Component> component;
};
/** \brief Class gathering together the pieces of a parsed lit-expression. */
class ParsedLitExpression : noncopyable {
public:
ParsedLitExpression(unsigned index, const char *expression,
size_t expLength, unsigned flags, ReportID report);
void parseLiteral(const char *expression, size_t len, bool nocase);
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
/** \brief Format the lit-expression text into Hyperscan literal type. */
ue2_literal lit;
};
/**
* \brief Class gathering together the pieces of an expression that has been
* built into an NFA graph.
@ -99,6 +116,10 @@ struct BuiltExpression {
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID report);
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength);
/**
* Build a Hyperscan database out of the expressions we've been given. A
* fatal error will result in an exception being thrown.
@ -107,11 +128,13 @@ void addExpression(NG &ng, unsigned index, const char *expression,
* The global NG object.
* @param[out] length
* The number of bytes occupied by the compiled structure.
* @param pureFlag
* The flag indicating invocation from literal API or not.
* @return
* The compiled structure. Should be deallocated with the
* hs_database_free() function.
*/
struct hs_database *build(NG &ng, unsigned int *length);
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag);
/**
* Constructs an NFA graph from the given expression tree.

View File

@ -51,7 +51,7 @@
} \
\
/* resolver */ \
static void(*JOIN(resolve_, NAME)(void)) { \
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
if (check_avx512()) { \
return JOIN(avx512_, NAME); \
} \

View File

@ -282,7 +282,7 @@ const array<double, 100> Scorer::count_lut{{
}};
const array<double, 9> Scorer::len_lut{{
pow(0, -3.0), pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
0, pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}};
/**
@ -807,9 +807,6 @@ void findIncludedLits(vector<hwlmLiteral> &lits,
for (size_t i = 0; i < cnt; i++) {
u32 bucket1 = group[i].first;
u32 id1 = group[i].second;
if (lits[id1].pure) {
continue;
}
buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
exception_map);
}

View File

@ -62,7 +62,6 @@ struct LitInfo {
u8 size;
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
u8 next;
u8 pure; //!< The pass-on of pure flag from hwlmLiteral.
};
#define FDRC_FLAG_NO_CONFIRM 1

View File

@ -87,7 +87,6 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
info.flags = flags;
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
info.groups = lit.groups;
info.pure = lit.pure;
// these are built up assuming a LE machine
CONF_TYPE msk = all_ones;

View File

@ -65,7 +65,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
u8 oldNext; // initialized in loop
do {
assert(ISALIGNED(li));
scratch->pure = li->pure;
if (unlikely((conf_key & li->msk) != li->v)) {
goto out;
@ -100,7 +99,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
li++;
} while (oldNext);
scratch->fdr_conf = NULL;
scratch->pure = 0;
}
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -251,7 +251,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
ng.rm.logicalKeyRenumber();
unsigned length = 0;
struct hs_database *out = build(ng, &length);
struct hs_database *out = build(ng, &length, 0);
assert(out); // should have thrown exception on error
assert(length);
@ -281,6 +281,130 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
}
}
hs_error_t
hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags,
const unsigned *ids, const hs_expr_ext *const *ext,
const size_t *lens, unsigned elements, unsigned mode,
const hs_platform_info_t *platform, hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g) {
// Check the args: note that it's OK for flags, ids or ext to be null.
if (!comp_error) {
if (db) {
*db = nullptr;
}
// nowhere to write the string, but we can still report an error code
return HS_COMPILER_ERROR;
}
if (!db) {
*comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
return HS_COMPILER_ERROR;
}
if (!expressions) {
*db = nullptr;
*comp_error
= generateCompileError("Invalid parameter: expressions is NULL",
-1);
return HS_COMPILER_ERROR;
}
if (!lens) {
*db = nullptr;
*comp_error = generateCompileError("Invalid parameter: len is NULL", -1);
return HS_COMPILER_ERROR;
}
if (elements == 0) {
*db = nullptr;
*comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
return HS_COMPILER_ERROR;
}
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
*db = nullptr;
*comp_error = generateCompileError("Unsupported architecture", -1);
return HS_ARCH_ERROR;
}
#endif
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode.
return HS_COMPILER_ERROR;
}
if (!checkPlatform(platform, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkPlattform.
return HS_COMPILER_ERROR;
}
if (elements > g.limitPatternCount) {
*db = nullptr;
*comp_error = generateCompileError("Number of patterns too large", -1);
return HS_COMPILER_ERROR;
}
// This function is simply a wrapper around both the parser and compiler
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
bool isVectored = mode & HS_MODE_VECTORED;
unsigned somPrecision = getSomPrecision(mode);
target_t target_info = platform ? target_t(*platform)
: get_current_target();
try {
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, elements, somPrecision);
for (unsigned int i = 0; i < elements; i++) {
// Add this expression to the compiler
try {
addLitExpression(ng, i, expressions[i], flags ? flags[i] : 0,
ext ? ext[i] : nullptr, ids ? ids[i] : 0,
lens[i]);
} catch (CompileError &e) {
/* Caught a parse error;
* throw it upstream as a CompileError with a specific index */
e.setExpressionIndex(i);
throw; /* do not slice */
}
}
// Check sub-expression ids
ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
// Renumber and assign lkey to reports
ng.rm.logicalKeyRenumber();
unsigned length = 0;
struct hs_database *out = build(ng, &length, 1);
assert(out); //should have thrown exception on error
assert(length);
*db = out;
*comp_error = nullptr;
return HS_SUCCESS;
}
catch (const CompileError &e) {
// Compiler error occurred
*db = nullptr;
*comp_error = generateCompileError(e.reason,
e.hasIndex ? (int)e.index : -1);
return HS_COMPILER_ERROR;
}
catch (const std::bad_alloc &) {
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal errror, unexpected exception");
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
return HS_COMPILER_ERROR;
}
}
} // namespace ue2
extern "C" HS_PUBLIC_API
@ -326,6 +450,41 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions,
platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
const size_t len, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
if (expression == nullptr) {
*db = nullptr;
*error = generateCompileError("Invalid parameter: expression is NULL",
-1);
return HS_COMPILER_ERROR;
}
unsigned id = 0; // single expressions get zero as an ID
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_lit_multi_int(&expression, &flags, &id, ext, &len, 1,
mode, platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
const unsigned *flags,
const unsigned *ids,
const size_t *lens,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_lit_multi_int(expressions, flags, ids, ext, lens,
elements, mode, platform, db, error,
Grey());
}
static
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext, unsigned int mode,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -323,6 +323,10 @@ typedef struct hs_expr_ext {
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
@ -392,6 +396,10 @@ hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
@ -472,6 +480,10 @@ hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
@ -527,6 +539,165 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* The basic pure literal expression compiler.
*
* This is the function call with which a pure literal expression (not a
* common regular expression) is compiled into a Hyperscan database which
* can be passed to the runtime functions (such as @ref hs_scan(),
* @ref hs_open_stream(), etc.)
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @p flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
* flags. Meanwhile, the string content shall be fully parsed in a literal
* sense without any regular grammars. For example, the @p expression
* `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
* here doesn't mean 0 or 1 quantifier under regular semantics.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Compared to @ref hs_compile(), fewer
* valid values are provided:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param len
* The length of the text content of the pure literal expression. As the
* text content indicated by @p expression is treated as single character
* one by one, the special terminating character `\0` should be allowed
* to appear in expression, and not treated as a terminator for a string.
* Thus, the end of a pure literal expression cannot be indicated by
* identifying `\0`, but by counting to the expression length.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
const size_t len, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error);
/**
* The multiple pure literal expression compiler.
*
* This is the function call with which a set of pure literal expressions is
* compiled into a database which can be passed to the runtime functions (such
* as @ref hs_scan(), @ref hs_open_stream(), etc.) Each expression can be
* labelled with a unique integer which is passed into the match callback to
* identify the pattern that has matched.
*
* @param expressions
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @p flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
* flags. Meanwhile, the string content shall be fully parsed in a literal
* sense without any regular grammars. For example, the @p expression
* `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
* here doesn't mean 0 or 1 quantifier under regular semantics.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Compared to @ref hs_compile_multi(), fewer valid values are provided:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param lens
* Array of lengths of the text content of each pure literal expression.
* As the text content indicated by @p expression is treated as single
* character one by one, the special terminating character `\0` should be
* allowed to appear in expression, and not treated as a terminator for a
* string. Thus, the end of a pure literal expression cannot be indicated
* by identifying `\0`, but by counting to the expression length.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
const unsigned *flags,
const unsigned *ids,
const size_t *lens,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error);
/**
* Free an error structure generated by @ref hs_compile(), @ref
* hs_compile_multi() or @ref hs_compile_ext_multi().
@ -579,6 +750,10 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param info
* On success, a pointer to the pattern information will be returned in
@ -641,6 +816,10 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -52,6 +52,17 @@ hs_error_t hs_compile_multi_int(const char *const *expressions,
hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g);
/** \brief Internal use only: takes a Grey argument so that we can use it in
* tools. */
hs_error_t hs_compile_lit_multi_int(const char *const *expressions,
const unsigned *flags, const unsigned *ids,
const hs_expr_ext *const *ext,
const size_t *lens, unsigned elements,
unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **comp_error,
const Grey &g);
} // namespace ue2
extern "C"

View File

@ -83,10 +83,9 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
* \ref HWLM_MASKLEN. */
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
const vector<u8> &msk_in, const vector<u8> &cmp_in,
bool pure_in)
const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) {
groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());

View File

@ -113,16 +113,13 @@ struct hwlmLiteral {
*/
std::vector<u8> cmp;
bool pure; //!< \brief The pass-on of pure flag from LitFragment.
/** \brief Complete constructor, takes group information and msk/cmp.
*
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
* \ref HWLM_MASKLEN. */
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
u32 id_in, hwlm_group_t groups_in,
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in,
bool pure_in = false);
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
/** \brief Simple constructor: no group information, no msk/cmp.
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Intel Corporation
* Copyright (c) 2018-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -254,44 +254,6 @@ void popOperator(vector<LogicalOperator> &op_stack, vector<u32> &subid_stack,
op_stack.pop_back();
}
static
char getValue(const vector<char> &lv, u32 ckey) {
if (ckey & LOGICAL_OP_BIT) {
return lv[ckey & ~LOGICAL_OP_BIT];
} else {
return 0;
}
}
static
bool hasMatchFromPurelyNegative(const vector<LogicalOp> &tree,
u32 start, u32 result) {
vector<char> lv(tree.size());
assert(start <= result);
for (u32 i = start; i <= result; i++) {
assert(i & LOGICAL_OP_BIT);
const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT];
assert(i == op.id);
switch (op.op) {
case LOGICAL_OP_NOT:
lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro);
break;
case LOGICAL_OP_AND:
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) &
getValue(lv, op.ro);
break;
case LOGICAL_OP_OR:
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) |
getValue(lv, op.ro);
break;
default:
assert(0);
break;
}
}
return lv[result & ~LOGICAL_OP_BIT];
}
void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
u32 ekey, u64a min_offset,
u64a max_offset) {
@ -366,9 +328,6 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
if (lkey_start == INVALID_LKEY) {
throw CompileError("No logical operation.");
}
if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) {
throw CompileError("Has match from purely negative sub-expressions.");
}
combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result,
min_offset, max_offset);
}

View File

@ -185,7 +185,6 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
return false;
}
vis.lit.set_pure();
const ue2_literal &lit = vis.lit;
if (lit.empty()) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, Intel Corporation
* Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -222,6 +222,58 @@ char isLogicalCombination(const struct RoseEngine *rose, char *lvec,
return getLogicalVal(rose, lvec, result);
}
/** \brief Returns 1 if combination matches when no sub-expression matches. */
static really_inline
char isPurelyNegativeMatch(const struct RoseEngine *rose, char *lvec,
u32 start, u32 result) {
const struct LogicalOp *logicalTree = (const struct LogicalOp *)
((const char *)rose + rose->logicalTreeOffset);
assert(start >= rose->lkeyCount);
assert(start <= result);
assert(result < rose->lkeyCount + rose->lopCount);
for (u32 i = start; i <= result; i++) {
const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount);
assert(i == op->id);
assert(op->op <= LAST_LOGICAL_OP);
switch ((enum LogicalOpType)op->op) {
case LOGICAL_OP_NOT:
if ((op->ro < rose->lkeyCount) &&
getLogicalVal(rose, lvec, op->ro)) {
// sub-expression not negative
return 0;
}
setLogicalVal(rose, lvec, op->id,
!getLogicalVal(rose, lvec, op->ro));
break;
case LOGICAL_OP_AND:
if (((op->lo < rose->lkeyCount) &&
getLogicalVal(rose, lvec, op->lo)) ||
((op->ro < rose->lkeyCount) &&
getLogicalVal(rose, lvec, op->ro))) {
// sub-expression not negative
return 0;
}
setLogicalVal(rose, lvec, op->id,
getLogicalVal(rose, lvec, op->lo) &
getLogicalVal(rose, lvec, op->ro)); // &&
break;
case LOGICAL_OP_OR:
if (((op->lo < rose->lkeyCount) &&
getLogicalVal(rose, lvec, op->lo)) ||
((op->ro < rose->lkeyCount) &&
getLogicalVal(rose, lvec, op->ro))) {
// sub-expression not negative
return 0;
}
setLogicalVal(rose, lvec, op->id,
getLogicalVal(rose, lvec, op->lo) |
getLogicalVal(rose, lvec, op->ro)); // ||
break;
}
}
return getLogicalVal(rose, lvec, result);
}
/** \brief Clear all keys in the logical vector. */
static really_inline
void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:

View File

@ -238,10 +238,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
assert(id && id < t->size); // id is an offset into bytecode
const u64a som = 0;
const u8 flags = 0;
if (!scratch->pure) {
return roseRunProgram(t, scratch, id, som, end, flags);
} else {
if (t->pureLiteral) {
return roseRunProgram_l(t, scratch, id, som, end, flags);
} else {
return roseRunProgram(t, scratch, id, som, end, flags);
}
}
@ -591,6 +591,23 @@ int roseRunFlushCombProgram(const struct RoseEngine *rose,
return MO_CONTINUE_MATCHING;
}
/**
* \brief Execute last flush combination program.
*
* Returns MO_HALT_MATCHING if the stream is exhausted or the user has
* instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
*/
int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end) {
hwlmcb_rv_t rv = roseRunProgram(rose, scratch,
rose->lastFlushCombProgramOffset,
0, end, 0);
if (rv == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}
return MO_CONTINUE_MATCHING;
}
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
@ -602,8 +619,12 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
// Our match ID is the program offset.
const u32 program = id;
const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
hwlmcb_rv_t rv =
roseRunProgram(rose, scratch, program, start, end, flags);
hwlmcb_rv_t rv;
if (rose->pureLiteral) {
rv = roseRunProgram_l(rose, scratch, program, start, end, flags);
} else {
rv = roseRunProgram(rose, scratch, program, start, end, flags);
}
if (rv == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}

View File

@ -480,6 +480,25 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch,
return roseHaltIfExhausted(t, scratch);
}
static rose_inline
hwlmcb_rv_t roseReportComb(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end,
ReportID onmatch, s32 offset_adjust, u32 ekey) {
DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end);
int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey);
if (cb_rv == MO_HALT_MATCHING) {
DEBUG_PRINTF("termination requested\n");
return HWLM_TERMINATE_MATCHING;
}
if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
return HWLM_CONTINUE_MATCHING;
}
return roseHaltIfExhausted(t, scratch);
}
/* catches up engines enough to ensure any earlier mpv triggers are enqueued
* and then adds the trigger to the mpv queue. */
static rose_inline
@ -1866,7 +1885,7 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
}
DEBUG_PRINTF("Logical Combination Passed!\n");
if (roseReport(t, scratch, end, ci->id, 0,
if (roseReportComb(t, scratch, end, ci->id, 0,
ci->ekey) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
@ -1875,6 +1894,49 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
return HWLM_CONTINUE_MATCHING;
}
static rose_inline
hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end) {
for (u32 i = 0; i < t->ckeyCount; i++) {
const struct CombInfo *combInfoMap = (const struct CombInfo *)
((const char *)t + t->combInfoMapOffset);
const struct CombInfo *ci = combInfoMap + i;
if ((ci->min_offset != 0) && (end < ci->min_offset)) {
DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset);
continue;
}
if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) {
DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset);
continue;
}
DEBUG_PRINTF("check ekey %u\n", ci->ekey);
if (ci->ekey != INVALID_EKEY) {
assert(ci->ekey < t->ekeyCount);
const char *evec = scratch->core_info.exhaustionVector;
if (isExhausted(t, evec, ci->ekey)) {
DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
ci->ekey);
continue;
}
}
DEBUG_PRINTF("check ckey %u purely negative\n", i);
char *lvec = scratch->core_info.logicalVector;
if (!isPurelyNegativeMatch(t, lvec, ci->start, ci->result)) {
DEBUG_PRINTF("Logical Combination from purely negative Failed!\n");
continue;
}
DEBUG_PRINTF("Logical Combination from purely negative Passed!\n");
if (roseReportComb(t, scratch, end, ci->id, 0,
ci->ekey) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
return HWLM_CONTINUE_MATCHING;
}
#if !defined(_WIN32)
#define PROGRAM_CASE(name) \
case ROSE_INSTR_##name: { \
@ -2004,7 +2066,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
&&LABEL_ROSE_INSTR_SET_LOGICAL,
&&LABEL_ROSE_INSTR_SET_COMBINATION,
&&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
&&LABEL_ROSE_INSTR_SET_EXHAUST
&&LABEL_ROSE_INSTR_SET_EXHAUST,
&&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
};
#endif
@ -2772,6 +2835,19 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(LAST_FLUSH_COMBINATION) {
assert(end >= tctxt->lastCombMatchOffset);
if (flushActiveCombinations(t, scratch)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
if (checkPurelyNegatives(t, scratch, end)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
PROGRAM_NEXT_INSTRUCTION
default: {
assert(0); // unreachable
scratch->core_info.status |= STATUS_ERROR;
@ -2808,6 +2884,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
assert(programOffset >= sizeof(struct RoseEngine));
assert(programOffset < t->size);
const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
const char *pc_base = getByOffset(t, programOffset);
@ -2835,6 +2912,56 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_GROUPS) {
DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
tctxt->groups, ri->groups);
if (!(ri->groups & tctxt->groups)) {
DEBUG_PRINTF("halt: no groups are set\n");
return HWLM_CONTINUE_MATCHING;
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_MASK) {
struct core_info *ci = &scratch->core_info;
if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask,
ri->neg_mask, ri->offset, end)) {
DEBUG_PRINTF("failed mask check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_MASK_32) {
struct core_info *ci = &scratch->core_info;
if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
ri->neg_mask, ri->offset, end)) {
assert(ri->fail_jump);
pc += ri->fail_jump;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CHECK_BYTE) {
const struct core_info *ci = &scratch->core_info;
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
ri->negation, ri->offset, end)) {
DEBUG_PRINTF("failed byte check\n");
assert(ri->fail_jump); // must progress
pc += ri->fail_jump;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(PUSH_DELAYED) {
rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end);
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(CATCH_UP) {
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
@ -2891,6 +3018,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(REPORT_CHAIN) {
// Note: sequence points updated inside this function.
if (roseCatchUpAndHandleChainMatch(
t, scratch, ri->event, ri->top_squash_distance, end,
in_catchup) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
work_done = 1;
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(REPORT) {
updateSeqPoint(tctxt, end, from_mpv);
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
@ -3041,6 +3179,24 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(INCLUDED_JUMP) {
if (scratch->fdr_conf) {
// squash the bucket of included literal
u8 shift = scratch->fdr_conf_offset & ~7U;
u64a mask = ((~(u64a)ri->squash) << shift);
*(scratch->fdr_conf) &= mask;
pc = getByOffset(t, ri->child_offset);
pc_base = pc;
programOffset = (const u8 *)pc_base -(const u8 *)t;
DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
pc_base, pc, ri->child_offset, ri->squash);
work_done = 0;
L_PROGRAM_NEXT_INSTRUCTION_JUMP
}
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(SET_LOGICAL) {
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
ri->lkey, ri->offset_adjust);
@ -3082,6 +3238,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION
L_PROGRAM_CASE(LAST_FLUSH_COMBINATION) {
assert(end >= tctxt->lastCombMatchOffset);
if (flushActiveCombinations(t, scratch)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
if (checkPurelyNegatives(t, scratch, end)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
L_PROGRAM_NEXT_INSTRUCTION
default: {
assert(0); // unreachable
scratch->core_info.status |= STATUS_ERROR;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -56,4 +56,7 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
int roseRunFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end);
int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end);
#endif // ROSE_H

View File

@ -2843,34 +2843,9 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
dumpString(lit.s).c_str());
/** 0:/xxabcdefgh/ */
/** 1:/yyabcdefgh/ */
/** 2:/yyabcdefgh.+/ */
// Above 3 patterns should firstly convert into RoseLiteralMap with
// 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into
// LitFragment with 1 element ("abcdefgh"). Special care should be
// taken to handle the 'pure' flag during the conversion.
rose_literal_id lit_frag = getFragment(lit);
auto it = frag_info.find(lit_frag);
if (it != frag_info.end()) {
if (!lit_frag.s.get_pure() && it->first.s.get_pure()) {
struct FragmentInfo f_info = it->second;
f_info.lit_ids.push_back(lit_id);
f_info.groups |= groups;
frag_info.erase(it->first);
frag_info.emplace(lit_frag, f_info);
} else {
it->second.lit_ids.push_back(lit_id);
it->second.groups |= groups;
}
} else {
struct FragmentInfo f_info;
f_info.lit_ids.push_back(lit_id);
f_info.groups |= groups;
frag_info.emplace(lit_frag, f_info);
}
auto &fi = frag_info[getFragment(lit)];
fi.lit_ids.push_back(lit_id);
fi.groups |= groups;
}
for (auto &m : frag_info) {
@ -3370,6 +3345,15 @@ RoseProgram makeFlushCombProgram(const RoseEngine &t) {
return program;
}
static
RoseProgram makeLastFlushCombProgram(const RoseEngine &t) {
RoseProgram program;
if (t.ckeyCount) {
addLastFlushCombinationProgram(program);
}
return program;
}
static
u32 history_required(const rose_literal_id &key) {
if (key.msk.size() < key.s.length()) {
@ -3740,6 +3724,10 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
auto flushComb_prog = makeFlushCombProgram(proto);
proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
auto lastFlushComb_prog = makeLastFlushCombProgram(proto);
proto.lastFlushCombProgramOffset =
writeProgram(bc, move(lastFlushComb_prog));
// Build anchored matcher.
auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);
if (atable) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -1486,6 +1486,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(LAST_FLUSH_COMBINATION) {}
PROGRAM_NEXT_INSTRUCTION
default:
os << " UNKNOWN (code " << int{code} << ")" << endl;
os << " <stopping>" << endl;
@ -1557,6 +1560,25 @@ void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
os.close();
}
static
void dumpRoseLastFlushCombPrograms(const RoseEngine *t,
const string &filename) {
ofstream os(filename);
const char *base = (const char *)t;
if (t->lastFlushCombProgramOffset) {
os << "Last Flush Combination Program @ "
<< t->lastFlushCombProgramOffset
<< ":" << endl;
dumpProgram(os, t, base + t->lastFlushCombProgramOffset);
os << endl;
} else {
os << "<No Last Flush Combination Program>" << endl;
}
os.close();
}
static
void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename);
@ -2249,6 +2271,8 @@ void roseDumpPrograms(const vector<LitFragment> &fragments, const RoseEngine *t,
dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
dumpRoseLastFlushCombPrograms(t,
base + "/rose_last_flush_comb_programs.txt");
dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");

View File

@ -340,14 +340,7 @@ public:
std::pair<u32, bool> insert(const rose_literal_id &lit) {
auto it = lits_index.find(lit);
if (it != lits_index.end()) {
u32 idx = it->second;
auto &l = lits.at(idx);
if (!lit.s.get_pure() && l.s.get_pure()) {
lits_index.erase(l);
l.s.unset_pure();
lits_index.emplace(l, idx);
}
return {idx, false};
return {it->second, false};
}
u32 id = verify_u32(lits.size());
lits.push_back(lit);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2018, Intel Corporation
* Copyright (c) 2017-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -48,6 +48,7 @@ RoseInstrMatcherEod::~RoseInstrMatcherEod() = default;
RoseInstrEnd::~RoseInstrEnd() = default;
RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default;
RoseInstrFlushCombination::~RoseInstrFlushCombination() = default;
RoseInstrLastFlushCombination::~RoseInstrLastFlushCombination() = default;
using OffsetMap = RoseInstruction::OffsetMap;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2018, Intel Corporation
* Copyright (c) 2017-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -2206,6 +2206,14 @@ public:
~RoseInstrFlushCombination() override;
};
class RoseInstrLastFlushCombination
: public RoseInstrBaseTrivial<ROSE_INSTR_LAST_FLUSH_COMBINATION,
ROSE_STRUCT_LAST_FLUSH_COMBINATION,
RoseInstrLastFlushCombination> {
public:
~RoseInstrLastFlushCombination() override;
};
class RoseInstrSetExhaust
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_EXHAUST,
ROSE_STRUCT_SET_EXHAUST,

View File

@ -727,7 +727,6 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
const auto &s_final = lit_final.get_string();
bool nocase = lit_final.any_nocase();
bool pure = f.s.get_pure();
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n",
f.fragment_id, escapeString(s_final).c_str(), (int)nocase,
@ -741,7 +740,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
const auto &groups = f.groups;
mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
groups, msk, cmp, pure);
groups, msk, cmp);
}
static

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, Intel Corporation
* Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -317,6 +317,10 @@ void addFlushCombinationProgram(RoseProgram &program) {
program.add_before_end(make_unique<RoseInstrFlushCombination>());
}
void addLastFlushCombinationProgram(RoseProgram &program) {
program.add_before_end(make_unique<RoseInstrLastFlushCombination>());
}
static
void makeRoleCheckLeftfix(const RoseBuildImpl &build,
const map<RoseVertex, left_build_info> &leftfix_info,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, Intel Corporation
* Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -188,6 +188,7 @@ void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
void addSuffixesEodProgram(RoseProgram &program);
void addMatcherEodProgram(RoseProgram &program);
void addFlushCombinationProgram(RoseProgram &program);
void addLastFlushCombinationProgram(RoseProgram &program);
static constexpr u32 INVALID_QUEUE = ~0U;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -328,6 +328,7 @@ struct RoseBoundaryReports {
* nfas). Rose nfa info table can distinguish the cases.
*/
struct RoseEngine {
u8 pureLiteral; /* Indicator of pure literal API */
u8 noFloatingRoots; /* only need to run the anchored table if something
* matched in the anchored table */
u8 requiresEodCheck; /* stuff happens at eod time */
@ -426,6 +427,8 @@ struct RoseEngine {
u32 eodProgramOffset; //!< EOD program, otherwise 0.
u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
u32 lastFlushCombProgramOffset; /**< LastFlushCombination program,
* otherwise 0 */
u32 lastByteHistoryIterOffset; // if non-zero

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -201,7 +201,14 @@ enum RoseInstructionCode {
/** \brief Mark as exhausted instead of report while quiet. */
ROSE_INSTR_SET_EXHAUST,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel.
/**
* \brief Calculate any combination's logical value if none of its
* sub-expression matches until EOD, then check if compliant with any
* logical constraints.
*/
ROSE_INSTR_LAST_FLUSH_COMBINATION,
LAST_ROSE_INSTRUCTION = ROSE_INSTR_LAST_FLUSH_COMBINATION //!< Sentinel.
};
struct ROSE_STRUCT_END {
@ -674,4 +681,8 @@ struct ROSE_STRUCT_SET_EXHAUST {
u8 code; //!< From enum RoseInstructionCode.
u32 ekey; //!< Exhaustion key.
};
struct ROSE_STRUCT_LAST_FLUSH_COMBINATION {
u8 code; //!< From enum RoseInstructionCode.
};
#endif // ROSE_ROSE_PROGRAM_H

View File

@ -141,7 +141,6 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
s->deduper.current_report_offset = ~0ULL;
s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
s->fdr_conf = NULL;
s->pure = 0;
// Rose program execution (used for some report paths) depends on these
// values being initialised.
@ -455,8 +454,9 @@ set_retval:
return HS_UNKNOWN_ERROR;
}
if (rose->flushCombProgramOffset) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
if (rose->lastFlushCombProgramOffset) {
if (roseRunLastFlushCombProgram(rose, scratch, length)
== MO_HALT_MATCHING) {
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
@ -651,8 +651,10 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
if (!id->offset) {
scratch->tctxt.lastCombMatchOffset = id->offset;
}
}
if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset);
@ -698,8 +700,9 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
}
}
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
if (rose->lastFlushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunLastFlushCombProgram(rose, scratch, id->offset)
== MO_HALT_MATCHING) {
DEBUG_PRINTF("told to stop matching\n");
scratch->core_info.status |= STATUS_TERMINATED;
}
@ -906,8 +909,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
if (!id->offset) {
scratch->tctxt.lastCombMatchOffset = id->offset;
}
}
assert(scratch->core_info.hlen <= id->offset
&& scratch->core_info.hlen <= rose->historyRequired);
@ -1013,18 +1018,6 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
unmarkScratchInUse(scratch);
}
if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
== MO_HALT_MATCHING) {
scratch->core_info.status |= STATUS_TERMINATED;
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
}
}
hs_stream_free(id);
return HS_SUCCESS;
@ -1054,18 +1047,6 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
unmarkScratchInUse(scratch);
}
if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
== MO_HALT_MATCHING) {
scratch->core_info.status |= STATUS_TERMINATED;
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
}
}
// history already initialised
init_stream(id, id->rose, 0);

View File

@ -137,7 +137,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
s->scratchSize = alloc_size;
s->scratch_alloc = (char *)s_tmp;
s->fdr_conf = NULL;
s->pure = 0;
// each of these is at an offset from the previous
char *current = (char *)s + sizeof(*s);
@ -280,7 +279,9 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
hs_error_t proto_ret = hs_check_alloc(proto_tmp);
if (proto_ret != HS_SUCCESS) {
hs_scratch_free(proto_tmp);
hs_scratch_free(*scratch);
if (*scratch) {
hs_scratch_free((*scratch)->scratch_alloc);
}
*scratch = NULL;
return proto_ret;
}

View File

@ -211,7 +211,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
u64a *fdr_conf; /**< FDR confirm value */
u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
* in buffer */
u8 pure; /**< indicator of pure-literal or cutting-literal */
};
/* array of fatbit ptr; TODO: why not an array of fatbits? */

View File

@ -192,11 +192,11 @@ vector<mmbit_sparse_iter> mmbBuildSparseIterator(const vector<u32> &bits,
template<typename T>
static
void add_scatter(vector<T> *out, u32 offset, u64a mask) {
T su;
out->emplace_back();
T &su = out->back();
memset(&su, 0, sizeof(su));
su.offset = offset;
su.val = mask;
out->push_back(su);
DEBUG_PRINTF("add %llu at offset %u\n", mask, offset);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -283,7 +283,6 @@ ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
}
void ue2_literal::push_back(char c, bool nc) {
assert(!nc || ourisalpha(c));
if (nc) {
c = mytoupper(c);
}

View File

@ -211,17 +211,10 @@ public:
size_t hash() const;
void set_pure() { pure = true; }
void unset_pure() { pure = false; }
bool get_pure() const { return pure; }
/* TODO: consider existing member functions possibly related with pure. */
private:
friend const_iterator;
std::string s;
boost::dynamic_bitset<> nocase;
bool pure = false; /**< born from cutting or not (pure literal). */
};
/// Return a reversed copy of this literal.

View File

@ -56,10 +56,7 @@ if (BUILD_CHIMERA)
engine_pcre.cpp
engine_pcre.h
)
endif()
add_executable(hsbench ${hsbench_SOURCES})
if (BUILD_CHIMERA)
include_directories(${PCRE_INCLUDE_DIRS})
if(NOT WIN32)
target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil
@ -69,6 +66,11 @@ if (BUILD_CHIMERA)
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
endif()
else()
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
add_executable(hsbench ${hsbench_SOURCES} $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
else()
add_executable(hsbench ${hsbench_SOURCES})
endif()
target_link_libraries(hsbench hs databaseutil expressionutil
${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
endif()

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, Intel Corporation
* Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -41,6 +41,7 @@ extern unsigned int somPrecisionMode;
extern bool forceEditDistance;
extern unsigned editDistance;
extern bool printCompressSize;
extern bool useLiteralApi;
/** Structure for the result of a single complete scan. */
struct ResultEntry {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, Intel Corporation
* Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -411,22 +411,30 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
ext_ptr[i] = &ext[i];
}
Timer timer;
timer.start();
hs_compile_error_t *compile_err;
Timer timer;
#ifndef RELEASE_BUILD
err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
ext_ptr.data(), count, full_mode, nullptr,
&db, &compile_err, grey);
#else
err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(),
ext_ptr.data(), count, full_mode, nullptr,
&db, &compile_err);
#endif
if (useLiteralApi) {
// Pattern length computation should be done before timer start.
vector<size_t> lens(count);
for (unsigned int i = 0; i < count; i++) {
lens[i] = strlen(patterns[i]);
}
timer.start();
err = hs_compile_lit_multi_int(patterns.data(), flags.data(),
ids.data(), ext_ptr.data(),
lens.data(), count, full_mode,
nullptr, &db, &compile_err, grey);
timer.complete();
} else {
timer.start();
err = hs_compile_multi_int(patterns.data(), flags.data(),
ids.data(), ext_ptr.data(), count,
full_mode, nullptr, &db, &compile_err,
grey);
timer.complete();
}
compileSecs = timer.seconds();
peakMemorySize = getPeakHeap();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018, Intel Corporation
* Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -87,6 +87,7 @@ unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
bool forceEditDistance = false;
unsigned editDistance = 0;
bool printCompressSize = false;
bool useLiteralApi = false;
// Globals local to this file.
static bool compressStream = false;
@ -218,6 +219,7 @@ void usage(const char *error) {
printf(" --per-scan Display per-scan Mbit/sec results.\n");
printf(" --echo-matches Display all matches that occur during scan.\n");
printf(" --sql-out FILE Output sqlite db.\n");
printf(" --literal-on Use Hyperscan pure literal matching.\n");
printf(" -S NAME Signature set name (for sqlite db).\n");
printf("\n\n");
@ -250,6 +252,7 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
int do_echo_matches = 0;
int do_sql_output = 0;
int option_index = 0;
int literalFlag = 0;
vector<string> sigFiles;
static struct option longopts[] = {
@ -257,6 +260,7 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
{"echo-matches", no_argument, &do_echo_matches, 1},
{"compress-stream", no_argument, &do_compress, 1},
{"sql-out", required_argument, &do_sql_output, 1},
{"literal-on", no_argument, &literalFlag, 1},
{nullptr, 0, nullptr, 0}
};
@ -463,6 +467,8 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
loadSignatureList(file, sigs);
sigSets.emplace_back(file, move(sigs));
}
useLiteralApi = (bool)literalFlag;
}
/** Start the global timer. */

View File

@ -15,8 +15,12 @@ if (BUILD_CHIMERA)
else()
target_link_libraries(hscheck hs chimera pcre expressionutil)
endif()
else()
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
add_executable(hscheck ${hscheck_SOURCES} $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
else()
add_executable(hscheck ${hscheck_SOURCES})
endif()
if(NOT WIN32)
target_link_libraries(hscheck hs expressionutil pthread)
else()

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -92,6 +92,7 @@ bool g_allSignatures = false;
bool g_forceEditDistance = false;
bool build_sigs = false;
bool check_logical = false;
bool use_literal_api = false;
unsigned int g_signature;
unsigned int g_editDistance;
unsigned int globalFlags = 0;
@ -322,11 +323,26 @@ void checkExpression(UNUSED void *threadarg) {
#if !defined(RELEASE_BUILD)
// This variant is available in non-release builds and allows us to
// modify greybox settings.
err = hs_compile_multi_int(&regexp, &flags, nullptr, &extp, 1, mode,
nullptr, &db, &compile_err, *g_grey);
if (use_literal_api) {
size_t len = strlen(regexp);
err = hs_compile_lit_multi_int(&regexp, &flags, nullptr, &extp,
&len, 1, mode, nullptr, &db,
&compile_err, *g_grey);
} else {
err = hs_compile_multi_int(&regexp, &flags, nullptr, &extp, 1,
mode, nullptr, &db, &compile_err,
*g_grey);
}
#else
err = hs_compile_ext_multi(&regexp, &flags, nullptr, &extp, 1, mode,
nullptr, &db, &compile_err);
if (use_literal_api) {
size_t len = strlen(regexp);
err = hs_compile_lit_multi_int(&regexp, &flags, nullptr, &extp,
&len, 1, mode, nullptr, &db,
&compile_err, *g_grey);
} else {
err = hs_compile_ext_multi(&regexp, &flags, nullptr, &extp, 1,
mode, nullptr, &db, &compile_err);
}
#endif
if (err == HS_SUCCESS) {
@ -381,6 +397,11 @@ void checkLogicalExpression(UNUSED void *threadarg) {
ExprExtMap::const_iterator it;
while (getNextLogicalExpression(it)) {
if (use_literal_api) {
recordSuccess(g_exprMap, it->first);
continue;
}
const ParsedExpr &comb = it->second;
vector<unsigned> subIds;
@ -470,6 +491,7 @@ void usage() {
<< " -h Display this help." << endl
<< " -B Build signature set." << endl
<< " -C Check logical combinations (default: off)." << endl
<< " --literal-on Processing pure literals, no need to check." << endl
<< endl;
}
@ -477,9 +499,15 @@ static
void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
const char options[] = "e:E:s:z:hHLNV8G:T:BC";
bool signatureSet = false;
int literalFlag = 0;
static struct option longopts[] = {
{"literal-on", no_argument, &literalFlag, 1},
{nullptr, 0, nullptr, 0}
};
for (;;) {
int c = getopt_long(argc, argv, options, nullptr, nullptr);
int c = getopt_long(argc, argv, options, longopts, nullptr);
if (c < 0) {
break;
}
@ -539,6 +567,9 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
case 'C':
check_logical = true;
break;
case 0:
case 1:
break;
default:
usage();
exit(1);
@ -564,6 +595,8 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
usage();
exit(1);
}
use_literal_api = (bool)literalFlag;
}
static

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -299,6 +299,46 @@ char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
return lv[result];
}
/** \brief Returns 1 if combination matches when no sub-expression matches. */
static
char isPurelyNegativeMatch(vector<char> &lv, const vector<LogicalOp> &comb,
size_t lkeyCount, unsigned start, unsigned result) {
assert(start <= result);
for (unsigned i = start; i <= result; i++) {
const LogicalOp &op = comb[i - lkeyCount];
assert(i == op.id);
switch (op.op) {
case LOGICAL_OP_NOT:
if ((op.ro < lkeyCount) && lv[op.ro]) {
// sub-expression not negative
return 0;
}
lv[op.id] = !lv[op.ro];
break;
case LOGICAL_OP_AND:
if (((op.lo < lkeyCount) && lv[op.lo]) ||
((op.ro < lkeyCount) && lv[op.ro])) {
// sub-expression not negative
return 0;
}
lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
break;
case LOGICAL_OP_OR:
if (((op.lo < lkeyCount) && lv[op.lo]) ||
((op.ro < lkeyCount) && lv[op.ro])) {
// sub-expression not negative
return 0;
}
lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
break;
default:
assert(0);
break;
}
}
return lv[result];
}
bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
const string &buffer, ResultSet &rs, string &error) {
if (cngi.quiet) {
@ -359,6 +399,13 @@ bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
}
}
}
if (isPurelyNegativeMatch(lv, comb, m_lkey.size(),
li.start, li.result)) {
u64a to = buffer.length();
if ((to >= cngi.min_offset) && (to <= cngi.max_offset)) {
rs.addMatch(0, to);
}
}
return true;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -43,6 +43,7 @@
#include "parser/Parser.h"
#include "parser/parse_error.h"
#include "util/make_unique.h"
#include "util/string_util.h"
#include "util/unicode_def.h"
#include "util/unordered.h"
@ -111,6 +112,15 @@ bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander,
return false;
}
if (use_literal_api) {
// filter out flags not supported by pure literal API.
u32 not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
HS_FLAG_UCP | HS_FLAG_PREFILTER;
hs_flags &= ~not_supported;
force_utf8 = false;
force_prefilter = false;
}
expr.swap(regex);
if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som,
@ -260,9 +270,29 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
throw PcreCompileFailure("Unable to decode flags.");
}
// When hyperscan literal api is on, transfer the regex string into hex.
if (use_literal_api && !combination) {
unsigned char *pat
= reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
char *str = makeHex(pat, re.length());
if (!str) {
throw PcreCompileFailure("makeHex() malloc failure.");
}
re.assign(str);
free(str);
}
// filter out flags not supported by PCRE
u64a supported = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET |
HS_EXT_FLAG_MIN_LENGTH;
if (use_literal_api) {
ext.flags &= 0ULL;
ext.min_offset = 0;
ext.max_offset = MAX_OFFSET;
ext.min_length = 0;
ext.edit_distance = 0;
ext.hamming_distance = 0;
}
if (ext.flags & ~supported) {
// edit distance is a known unsupported flag, so just throw a soft error
if (ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) {
@ -314,7 +344,6 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
return compiled;
}
compiled->bytecode =
pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr);
@ -557,6 +586,46 @@ char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
return lv[result];
}
/** \brief Returns 1 if combination matches when no sub-expression matches. */
static
char isPurelyNegativeMatch(vector<char> &lv, const vector<LogicalOp> &comb,
size_t lkeyCount, unsigned start, unsigned result) {
assert(start <= result);
for (unsigned i = start; i <= result; i++) {
const LogicalOp &op = comb[i - lkeyCount];
assert(i == op.id);
switch (op.op) {
case LOGICAL_OP_NOT:
if ((op.ro < lkeyCount) && lv[op.ro]) {
// sub-expression not negative
return 0;
}
lv[op.id] = !lv[op.ro];
break;
case LOGICAL_OP_AND:
if (((op.lo < lkeyCount) && lv[op.lo]) ||
((op.ro < lkeyCount) && lv[op.ro])) {
// sub-expression not negative
return 0;
}
lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
break;
case LOGICAL_OP_OR:
if (((op.lo < lkeyCount) && lv[op.lo]) ||
((op.ro < lkeyCount) && lv[op.ro])) {
// sub-expression not negative
return 0;
}
lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
break;
default:
assert(0);
break;
}
}
return lv[result];
}
bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
const string &buffer, ResultSet &rs, string &error) {
if (compiled.quiet) {
@ -616,6 +685,13 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
}
}
}
if (isPurelyNegativeMatch(lv, comb, m_lkey.size(),
li.start, li.result)) {
u64a to = buffer.length();
if ((to >= compiled.min_offset) && (to <= compiled.max_offset)) {
rs.addMatch(0, to);
}
}
return true;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,6 +32,7 @@
#include "ng_corpus_generator.h"
#include "NfaGeneratedCorpora.h"
#include "ExpressionParser.h"
#include "common.h"
#include "grey.h"
#include "hs_compile.h"
@ -44,6 +45,7 @@
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/report_manager.h"
#include "util/string_util.h"
#include "util/target_info.h"
#include <string>
@ -80,6 +82,18 @@ void NfaGeneratedCorpora::generate(unsigned id, vector<Corpus> &data) {
throw CorpusFailure("Expression could not be read: " + i->second);
}
// When hyperscan literal api is on, transfer the regex string into hex.
if (use_literal_api && !(hs_flags & HS_FLAG_COMBINATION)) {
unsigned char *pat
= reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
char *str = makeHex(pat, re.length());
if (!str) {
throw CorpusFailure("makeHex() malloc failure.");
}
re.assign(str);
free(str);
}
// Combination's corpus is consist of sub-expressions' corpuses.
if (hs_flags & HS_FLAG_COMBINATION) {
ParsedLogical pl;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -925,11 +925,22 @@ compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
const unsigned count = patterns.size();
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err;
hs_error_t err;
hs_error_t err = hs_compile_multi_int(&patterns[0], &flags[0],
&idsvec[0], ext.c_array(), count,
mode, platform, &db,
if (use_literal_api) {
// Compute length of each pattern.
vector<size_t> lens(count);
for (unsigned int i = 0; i < count; i++) {
lens[i] = strlen(patterns[i]);
}
err = hs_compile_lit_multi_int(&patterns[0], &flags[0], &idsvec[0],
ext.c_array(), &lens[0], count, mode,
platform, &db, &compile_err, grey);
} else {
err = hs_compile_multi_int(&patterns[0], &flags[0], &idsvec[0],
ext.c_array(), count, mode, platform, &db,
&compile_err, grey);
}
if (err != HS_SUCCESS) {
error = compile_err->message;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -116,6 +116,7 @@ void usage(const char *name, const char *error) {
printf(" --abort-on-fail Abort, rather than exit, on failure.\n");
printf(" --no-signal-handler Do not handle handle signals (to generate "
"backtraces).\n");
printf(" --literal-on Use Hyperscan pure literal matching.\n");
printf("\n");
printf("Memory and resource control options:\n");
printf("\n");
@ -174,6 +175,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
int mangleScratch = 0;
int compressFlag = 0;
int compressResetFlag = 0;
int literalFlag = 0;
static const struct option longopts[] = {
{"copy-scratch", 0, &copyScratch, 1},
{"copy-stream", 0, &copyStream, 1},
@ -187,6 +189,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
{"compress-expand", 0, &compressFlag, 1},
{"compress-reset-expand", 0, &compressResetFlag, 1},
{"no-groups", 0, &no_groups, 1},
{"literal-on", 0, &literalFlag, 1},
{nullptr, 0, nullptr, 0}};
for (;;) {
@ -589,4 +592,5 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
use_mangle_scratch = (bool) mangleScratch;
use_compress_expand = (bool)compressFlag;
use_compress_reset_expand = (bool)compressResetFlag;
use_literal_api = (bool)literalFlag;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -82,6 +82,7 @@ extern bool use_copy_stream;
extern bool use_mangle_scratch;
extern bool use_compress_expand;
extern bool use_compress_reset_expand;
extern bool use_literal_api;
extern int abort_on_failure;
extern int no_signal_handler;
extern bool force_edit_distance;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -118,6 +118,7 @@ bool use_copy_stream = false;
bool use_mangle_scratch = false;
bool use_compress_expand = false;
bool use_compress_reset_expand = false;
bool use_literal_api = false;
int abort_on_failure = 0;
int no_signal_handler = 0;
size_t max_scan_queue_len = 25000;

View File

@ -42,7 +42,10 @@
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
#include <unistd.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h> // for _exit
#endif
#define BACKTRACE_BUFFER_SIZE 200

View File

@ -10,6 +10,10 @@ include_directories(${PROJECT_SOURCE_DIR}/util)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
add_executable(hsdump main.cpp $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
else()
add_executable(hsdump main.cpp)
endif()
target_link_libraries(hsdump hs expressionutil crosscompileutil)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -106,6 +106,8 @@ bool dump_intermediate = true;
bool force_edit_distance = false;
u32 edit_distance = 0;
int use_literal_api = 0;
} // namespace
// Usage statement.
@ -139,6 +141,7 @@ void usage(const char *name, const char *error) {
printf(" -8 Force UTF8 mode on all patterns.\n");
printf(" -L Apply HS_FLAG_SOM_LEFTMOST to all patterns.\n");
printf(" --prefilter Apply HS_FLAG_PREFILTER to all patterns.\n");
printf(" --literal-on Use Hyperscan pure literal matching API.\n");
printf("\n");
printf("Example:\n");
printf("$ %s -e pattern.file -s sigfile\n", name);
@ -163,6 +166,7 @@ void processArgs(int argc, char *argv[], Grey &grey) {
{"utf8", no_argument, nullptr, '8'},
{"prefilter", no_argument, &force_prefilter, 1},
{"som-width", required_argument, nullptr, 'd'},
{"literal-on", no_argument, &use_literal_api, 1},
{nullptr, 0, nullptr, 0}
};
@ -501,9 +505,23 @@ unsigned int dumpDataMulti(const vector<const char *> &patterns,
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err;
hs_error_t err = hs_compile_multi_int(
patterns.data(), flags.data(), ids.data(), ext.c_array(),
patterns.size(), mode, plat_info.get(), &db, &compile_err, grey);
hs_error_t err;
const size_t count = patterns.size();
if (use_literal_api) {
// Compute length of each pattern.
vector<size_t> lens(count);
for (unsigned int i = 0; i < count; i++) {
lens[i] = strlen(patterns[i]);
}
err = hs_compile_lit_multi_int(patterns.data(), flags.data(),
ids.data(), ext.c_array(), lens.data(),
count, mode, plat_info.get(), &db,
&compile_err, grey);
} else {
err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
ext.c_array(), count, mode, plat_info.get(),
&db, &compile_err, grey);
}
if (err != HS_SUCCESS) {
if (compile_err && compile_err->message) {

View File

@ -129,7 +129,11 @@ set(unit_internal_SOURCES
internal/main.cpp
)
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
add_executable(unit-internal ${unit_internal_SOURCES} $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
else()
add_executable(unit-internal ${unit_internal_SOURCES})
endif()
set_target_properties(unit-internal PROPERTIES COMPILE_FLAGS "${HS_CXX_FLAGS}")
target_link_libraries(unit-internal hs corpusomatic)
endif(NOT (RELEASE_BUILD OR FAT_RUNTIME))

View File

@ -155,11 +155,6 @@
158:/141 & (142|!143) )| 144/C #Not enough left parentheses at index 17.
159:/1234567890 & (142|!143 )/C #Expression id too large at index 10.
160:/141 & (142|!143 )|/C #Not enough operand at index 18.
161:/!141/C #Has match from purely negative sub-expressions.
162:/!141 | 142 | 143/C #Has match from purely negative sub-expressions.
163:/!141 & !142 & !143/C #Has match from purely negative sub-expressions.
164:/(141 | !142 & !143)/C #Has match from purely negative sub-expressions.
165:/!(141 | 142 | 143)/C #Has match from purely negative sub-expressions.
166:/141/C #No logical operation.
167:/119 & 121/C #Unknown sub-expression id.
168:/166 & 167/C #Unknown sub-expression id.
161:/141/C #No logical operation.
162:/119 & 121/C #Unknown sub-expression id.
163:/166 & 167/C #Unknown sub-expression id.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Intel Corporation
* Copyright (c) 2018-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -694,3 +694,229 @@ TEST(LogicalCombination, MultiCombQuietUniSub5) {
err = hs_free_scratch(scratch);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(LogicalCombination, SingleCombPurelyNegative6) {
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
CallBackContext c;
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
"ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"};
unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION};
unsigned ids[] = {201, 202, 203, 204, 205, 1002};
hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
nullptr, &db, &compile_err);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(db != nullptr);
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
c.halt = 0;
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
(void *)&c);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_EQ(1U, c.matches.size());
ASSERT_EQ(MatchRecord(53, 1002), c.matches[0]);
hs_free_database(db);
err = hs_free_scratch(scratch);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(LogicalCombination, SingleCombQuietPurelyNegative6) {
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
CallBackContext c;
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
"ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"};
unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET};
unsigned ids[] = {201, 202, 203, 204, 205, 1002};
hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
nullptr, &db, &compile_err);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(db != nullptr);
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
c.halt = 0;
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
(void *)&c);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_EQ(0U, c.matches.size());
hs_free_database(db);
err = hs_free_scratch(scratch);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(LogicalCombination, MultiCombPurelyNegativeUniSub6) {
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
CallBackContext c;
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"-----------------------------------------------"
"xxxfedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"-----------------------------------------------"
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"------------------------------------------";
const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
"ijkl[mMn]", "cba", "fed", "google.*cn",
"haystacks{4,8}", "ijkl[oOp]", "cab", "fee",
"goobar.*jp", "shockwave{4,6}", "ijkl[rRs]",
"(101 & 102 & 103) | (!104 & !105)",
"(!201 | 202 & 203) & (!204 | 205)",
"((301 | 302) & 303) & (304 | 305)"};
unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
HS_FLAG_COMBINATION};
unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301,
302, 303, 304, 305, 1001, 1002, 1003};
hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM,
nullptr, &db, &compile_err);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(db != nullptr);
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
c.halt = 0;
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
(void *)&c);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_EQ(3U, c.matches.size());
ASSERT_EQ(MatchRecord(106, 202), c.matches[0]);
ASSERT_EQ(MatchRecord(106, 1002), c.matches[1]);
ASSERT_EQ(MatchRecord(300, 1001), c.matches[2]);
hs_free_database(db);
err = hs_free_scratch(scratch);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(LogicalCombination, MultiCombPurelyNegativeUniSubEOD6) {
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
CallBackContext c;
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"-----------------------------------------------"
"xdefedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"-----------------------------------------------"
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"-------------------------------------defed";
const char *expr[] = {"abc", "defed", "foobar.*gh", "teakettle{4,10}",
"ijkl[mMn]", "cba", "fed", "google.*cn",
"haystacks{4,8}", "ijkl[oOp]", "cab", "fee",
"goobar.*jp", "shockwave{4,6}", "ijkl[rRs]",
"(101 & 102 & 103) | (!104 & !105)",
"(!201 | 202 & 203) & (!204 | 205)",
"((301 | 302) & 303) & (304 | 305)"};
unsigned flags[] = {0, 0, 0, 0, 0, 0, HS_FLAG_MULTILINE,
0, 0, 0, 0, 0, 0, 0, 0,
HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
HS_FLAG_COMBINATION};
unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301,
302, 303, 304, 305, 1001, 1002, 1003};
hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM,
nullptr, &db, &compile_err);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(db != nullptr);
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
c.halt = 0;
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
(void *)&c);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_EQ(8U, c.matches.size());
ASSERT_EQ(MatchRecord(106, 102), c.matches[0]);
ASSERT_EQ(MatchRecord(106, 202), c.matches[1]);
ASSERT_EQ(MatchRecord(106, 1001), c.matches[2]);
ASSERT_EQ(MatchRecord(106, 1002), c.matches[3]);
ASSERT_EQ(MatchRecord(300, 102), c.matches[4]);
ASSERT_EQ(MatchRecord(300, 202), c.matches[5]);
ASSERT_EQ(MatchRecord(300, 1001), c.matches[6]);
ASSERT_EQ(MatchRecord(300, 1002), c.matches[7]);
hs_free_database(db);
err = hs_free_scratch(scratch);
ASSERT_EQ(HS_SUCCESS, err);
}
TEST(LogicalCombination, MultiCombStream1) {
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
CallBackContext c;
string data[] = {"xxxxxxxabcxxxxxxxdefxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxghixxxxxxxxxxxabcxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxdefxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxyzxxxxxxxxxxxxxxxxxxxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzxy",
"z"};
const char *expr[] = {"abc", "def", "xyz", "zxyz",
"101 & 102", "201 & !202"};
unsigned flags[] = {0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION};
unsigned ids[] = {101, 102, 201, 202, 1001, 1002};
hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_STREAM,
nullptr, &db, &compile_err);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(db != nullptr);
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(scratch != nullptr);
hs_stream_t *stream = nullptr;
err = hs_open_stream(db, 0, &stream);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(stream != nullptr);
c.halt = 0;
int i;
for (i = 0; i < 11; i++) {
err = hs_scan_stream(stream, data[i].c_str(), data[i].size(), 0,
scratch, record_cb, (void *)&c);
ASSERT_EQ(HS_SUCCESS, err);
}
err = hs_close_stream(stream, scratch, dummy_cb, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_EQ(11U, c.matches.size());
ASSERT_EQ(MatchRecord(10, 101), c.matches[0]);
ASSERT_EQ(MatchRecord(20, 102), c.matches[1]);
ASSERT_EQ(MatchRecord(20, 1001), c.matches[2]);
ASSERT_EQ(MatchRecord(109, 101), c.matches[3]);
ASSERT_EQ(MatchRecord(109, 1001), c.matches[4]);
ASSERT_EQ(MatchRecord(171, 102), c.matches[5]);
ASSERT_EQ(MatchRecord(171, 1001), c.matches[6]);
ASSERT_EQ(MatchRecord(247, 201), c.matches[7]);
ASSERT_EQ(MatchRecord(247, 1002), c.matches[8]);
ASSERT_EQ(MatchRecord(761, 201), c.matches[9]);
ASSERT_EQ(MatchRecord(761, 202), c.matches[10]);
hs_free_database(db);
err = hs_free_scratch(scratch);
ASSERT_EQ(HS_SUCCESS, err);
}

View File

@ -75,7 +75,7 @@ TEST(Uniform, loadstore_u16) {
TEST(Uniform, loadstore_u32) {
for (int i = 0; i < 32; i++) {
u32 in = 1 << i;
u32 in = 1U << i;
const char *cin = (const char *)(&in);
u32 out = load_u32(cin);
EXPECT_EQ(in, out);
@ -106,7 +106,7 @@ TEST(Uniform, loadstore_m128) {
} in;
for (int i = 0; i < 128; i++) {
memset(&in, 0, sizeof(in));
in.words[i/32] = 1 << (i % 32);
in.words[i/32] = 1U << (i % 32);
const char *cin = (const char *)(&in);
m128 out = load_m128(cin);
EXPECT_EQ(0, memcmp(&out, &in, sizeof(out)));
@ -124,7 +124,7 @@ TEST(Uniform, loadstore_m256) {
} in;
for (int i = 0; i < 256; i++) {
memset(&in, 0, sizeof(in));
in.words[i/32] = 1 << (i % 32);
in.words[i/32] = 1U << (i % 32);
const char *cin = (const char *)(&in);
m256 out = load_m256(cin);
EXPECT_EQ(0, memcmp(&out, &in, sizeof(out)));
@ -142,7 +142,7 @@ TEST(Uniform, loadstore_m512) {
} in;
for (int i = 0; i < 512; i++) {
memset(&in, 0, sizeof(in));
in.words[i/32] = 1 << (i % 32);
in.words[i/32] = 1U << (i % 32);
const char *cin = (const char *)(&in);
m512 out = load_m512(cin);
EXPECT_EQ(0, memcmp(&out, &in, sizeof(out)));

View File

@ -56,9 +56,8 @@ std::string inferExpressionPath(const std::string &sigFile) {
// POSIX variant.
// dirname() may modify its argument, so we must make a copy.
std::vector<char> path(sigFile.size() + 1);
memcpy(path.data(), sigFile.c_str(), sigFile.size());
path[sigFile.size()] = 0; // ensure null termination.
std::vector<char> path(sigFile.begin(), sigFile.end());
path.push_back(0); // ensure null termination.
std::string rv = dirname(path.data());
#else

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -127,4 +127,18 @@ void prettyPrintRange(std::ostream &out, it_t begin, it_t end) {
}
}
// Transfer given string into a hex-escaped pattern.
static really_inline
char *makeHex(const unsigned char *pat, unsigned patlen) {
size_t hexlen = patlen * 4;
char *hexbuf = (char *)malloc(hexlen + 1);
unsigned i;
char *buf;
for (i = 0, buf = hexbuf; i < patlen; i++, buf += 4) {
snprintf(buf, 5, "\\x%02x", (unsigned char)pat[i]);
}
hexbuf[hexlen] = '\0';
return hexbuf;
}
#endif // STRING_UTIL_H