mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop to master
This commit is contained in:
commit
4cebdaa435
15
CHANGELOG.md
15
CHANGELOG.md
@ -2,6 +2,21 @@
|
||||
|
||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||
|
||||
## [5.2.0] 2019-07-12
|
||||
- Literal API: add new API `hs_compile_lit()` and `hs_compile_lit_multi()` to
|
||||
process pure literal rule sets. The 2 literal APIs treat each expression text
|
||||
in a literal sense without recognizing any regular grammers.
|
||||
- Logical combination: add support for purely negative combinations, which
|
||||
report match at EOD in case of no sub-expressions matched.
|
||||
- Windows porting: support shared library (DLL) on Windows with available tools
|
||||
hscheck, hsbench and hsdump.
|
||||
- Bugfix for issue #148: fix uninitialized use of `scatter_unit_uX` due to
|
||||
padding.
|
||||
- Bugfix for issue #155: fix numerical result out of range error.
|
||||
- Bugfix for issue #165: avoid corruption of pending combination report in
|
||||
streaming mode.
|
||||
- Bugfix for issue #174: fix scratch free issue when memory allocation fails.
|
||||
|
||||
## [5.1.1] 2019-04-03
|
||||
- Add extra detection and handling when invalid rose programs are triggered.
|
||||
- Bugfix for issue #136: fix CMake parsing of CPU architecure for GCC-9.
|
||||
|
@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
|
||||
project (hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 5)
|
||||
set (HS_MINOR_VERSION 1)
|
||||
set (HS_PATCH_VERSION 1)
|
||||
set (HS_MINOR_VERSION 2)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
@ -31,6 +31,7 @@ else()
|
||||
endif()
|
||||
|
||||
if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO|MINSIZEREL)
|
||||
message(STATUS "using release build")
|
||||
set(RELEASE_BUILD TRUE)
|
||||
else()
|
||||
set(RELEASE_BUILD FALSE)
|
||||
@ -109,11 +110,9 @@ option(BUILD_SHARED_LIBS "Build shared libs instead of static" OFF)
|
||||
option(BUILD_STATIC_AND_SHARED "Build shared libs as well as static" OFF)
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
if (WIN32)
|
||||
message(FATAL_ERROR "Windows DLLs currently not supported")
|
||||
else()
|
||||
message(STATUS "Building shared libraries")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Building static libraries")
|
||||
endif()
|
||||
|
||||
if (NOT BUILD_SHARED_LIBS)
|
||||
@ -151,9 +150,6 @@ if(MSVC OR MSVC_IDE)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /wd4267 /Qdiag-disable:remark")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
else()
|
||||
# todo: change these as required
|
||||
set(ARCH_C_FLAGS "/arch:AVX2")
|
||||
set(ARCH_CXX_FLAGS "/arch:AVX2")
|
||||
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 /wd4996 -D_CRT_SECURE_NO_WARNINGS")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD")
|
||||
@ -1298,12 +1294,14 @@ endif()
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
if (NOT FAT_RUNTIME)
|
||||
add_library(hs_runtime_shared SHARED src/hs_version.c
|
||||
src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec_shared>
|
||||
hs_runtime.def)
|
||||
else()
|
||||
add_library(hs_runtime_shared SHARED src/hs_version.c
|
||||
src/hs_valid_platform.c
|
||||
$<TARGET_OBJECTS:hs_exec_common_shared>
|
||||
${RUNTIME_SHLIBS})
|
||||
${RUNTIME_SHLIBS}
|
||||
hs_runtime.def)
|
||||
endif()
|
||||
set_target_properties(hs_runtime_shared PROPERTIES
|
||||
VERSION ${LIB_VERSION}
|
||||
@ -1349,7 +1347,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
${RUNTIME_SHLIBS})
|
||||
endif ()
|
||||
|
||||
add_library(hs_shared SHARED ${hs_shared_SRCS})
|
||||
add_library(hs_shared SHARED ${hs_shared_SRCS} hs.def)
|
||||
|
||||
add_dependencies(hs_shared ragel_Parser)
|
||||
set_target_properties(hs_shared PROPERTIES
|
||||
|
@ -322,7 +322,7 @@ PatternData::PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in,
|
||||
ch_misc_free(info);
|
||||
|
||||
u32 guardflags;
|
||||
guardflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH;
|
||||
guardflags = flags | HS_FLAG_PREFILTER;
|
||||
guard = isHyperscanSupported(pattern, guardflags, platform);
|
||||
} else {
|
||||
// We can't even prefilter this pattern, so we're dependent on Big Dumb
|
||||
|
@ -54,6 +54,75 @@ version of Hyperscan used to scan with it.
|
||||
Hyperscan provides support for targeting a database at a particular CPU
|
||||
platform; see :ref:`instr_specialization` for details.
|
||||
|
||||
=====================
|
||||
Compile Pure Literals
|
||||
=====================
|
||||
|
||||
Pure literal is a special case of regular expression. A character sequence is
|
||||
regarded as a pure literal if and only if each character is read and
|
||||
interpreted independently. No syntax association happens between any adjacent
|
||||
characters.
|
||||
|
||||
For example, given an expression written as :regexp:`/bc?/`. We could say it is
|
||||
a regluar expression, with the meaning that character ``b`` followed by nothing
|
||||
or by one character ``c``. On the other view, we could also say it is a pure
|
||||
literal expression, with the meaning that this is a character sequence of 3-byte
|
||||
length, containing characters ``b``, ``c`` and ``?``. In regular case, the
|
||||
question mark character ``?`` has a particular syntax role called 0-1 quantifier,
|
||||
which has an syntax association with the character ahead of it. Similar
|
||||
characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
|
||||
``}``, ``-``, ``*``, ``+``, ``\``, ``|``, ``/``, ``:``, ``^``, ``.``, ``$``.
|
||||
While in pure literal case, all these meta characters lost extra meanings
|
||||
expect for that they are just common ASCII codes.
|
||||
|
||||
Hyperscan is initially designed to process common regualr expressions. It is
|
||||
hence embedded with a complex parser to do comprehensive regular grammer
|
||||
interpretion. Particularly, the identification of above meta characters is the
|
||||
basic step for the interpretion of far more complex regular grammers.
|
||||
|
||||
However in real cases, patterns may not always be regualr expressions. They
|
||||
could just be pure literals. Problem will come if the pure literals contain
|
||||
regular meta characters. Supposing fed directly into traditional Hyperscan
|
||||
compile API, all these meta characters will be interpreted in predefined ways,
|
||||
which is unnecessary and the result is totally out of expectation. To avoid
|
||||
such misunderstanding by traditional API, users have to preprocess these
|
||||
literal patterns by converting the meta characters into some other formats:
|
||||
either by adding a backslash ``\`` before certain meta characters, or by
|
||||
converting all the characters into a hexadecimal representation.
|
||||
|
||||
In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns:
|
||||
|
||||
#. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern
|
||||
database.
|
||||
|
||||
#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a
|
||||
pattern database. All of the supplied patterns will be scanned for
|
||||
concurrently at scan time, with user-supplied identifiers returned when they
|
||||
match.
|
||||
|
||||
These 2 APIs are designed for use cases where all patterns contained in the
|
||||
target rule set are pure literals. Users can pass the initial pure literal
|
||||
content directly into these APIs without worrying about writing regular meta
|
||||
characters in their patterns. No preprocessing work is needed any more.
|
||||
|
||||
For new APIs, the ``length`` of each literal pattern is a newly added parameter.
|
||||
Hyperscan needs to locate the end position of the input expression via clearly
|
||||
knowing each literal's length, not by simply identifying character ``\0`` of a
|
||||
string.
|
||||
|
||||
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`,
|
||||
:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`.
|
||||
|
||||
.. note:: We don't support literal compilation API with :ref:`extparam`. And
|
||||
for runtime implementation, traditional runtime APIs can still be
|
||||
used to match pure literal patterns.
|
||||
|
||||
.. note:: If the target rule set contains at least one regular expression,
|
||||
please use traditional compile APIs :c:func:`hs_compile`,
|
||||
:c:func:`hs_compile_multi` and :c:func:`hs_compile_ext_multi`.
|
||||
The new literal APIs introduced here are designed for rule sets
|
||||
containing only pure literal expressions.
|
||||
|
||||
***************
|
||||
Pattern Support
|
||||
***************
|
||||
|
43
hs.def
Normal file
43
hs.def
Normal file
@ -0,0 +1,43 @@
|
||||
; Hyperscan DLL export definitions
|
||||
|
||||
LIBRARY hs
|
||||
|
||||
EXPORTS
|
||||
hs_alloc_scratch
|
||||
hs_clone_scratch
|
||||
hs_close_stream
|
||||
hs_compile
|
||||
hs_compile_ext_multi
|
||||
hs_compile_multi
|
||||
hs_compress_stream
|
||||
hs_copy_stream
|
||||
hs_database_info
|
||||
hs_database_size
|
||||
hs_deserialize_database
|
||||
hs_deserialize_database_at
|
||||
hs_expand_stream
|
||||
hs_expression_ext_info
|
||||
hs_expression_info
|
||||
hs_free_compile_error
|
||||
hs_free_database
|
||||
hs_free_scratch
|
||||
hs_open_stream
|
||||
hs_populate_platform
|
||||
hs_reset_and_copy_stream
|
||||
hs_reset_and_expand_stream
|
||||
hs_reset_stream
|
||||
hs_scan
|
||||
hs_scan_stream
|
||||
hs_scan_vector
|
||||
hs_scratch_size
|
||||
hs_serialize_database
|
||||
hs_serialized_database_info
|
||||
hs_serialized_database_size
|
||||
hs_set_allocator
|
||||
hs_set_database_allocator
|
||||
hs_set_misc_allocator
|
||||
hs_set_scratch_allocator
|
||||
hs_set_stream_allocator
|
||||
hs_stream_size
|
||||
hs_valid_platform
|
||||
hs_version
|
36
hs_runtime.def
Normal file
36
hs_runtime.def
Normal file
@ -0,0 +1,36 @@
|
||||
; Hyperscan DLL export definitions
|
||||
|
||||
LIBRARY hs_runtime
|
||||
|
||||
EXPORTS
|
||||
hs_alloc_scratch
|
||||
hs_clone_scratch
|
||||
hs_close_stream
|
||||
hs_compress_stream
|
||||
hs_copy_stream
|
||||
hs_database_info
|
||||
hs_database_size
|
||||
hs_deserialize_database
|
||||
hs_deserialize_database_at
|
||||
hs_expand_stream
|
||||
hs_free_database
|
||||
hs_free_scratch
|
||||
hs_open_stream
|
||||
hs_reset_and_copy_stream
|
||||
hs_reset_and_expand_stream
|
||||
hs_reset_stream
|
||||
hs_scan
|
||||
hs_scan_stream
|
||||
hs_scan_vector
|
||||
hs_scratch_size
|
||||
hs_serialize_database
|
||||
hs_serialized_database_info
|
||||
hs_serialized_database_size
|
||||
hs_set_allocator
|
||||
hs_set_database_allocator
|
||||
hs_set_misc_allocator
|
||||
hs_set_scratch_allocator
|
||||
hs_set_stream_allocator
|
||||
hs_stream_size
|
||||
hs_valid_platform
|
||||
hs_version
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -56,11 +56,13 @@
|
||||
#include "parser/unsupported.h"
|
||||
#include "parser/utf8_validate.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_internal.h"
|
||||
#include "som/slot_manager_dump.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) {
|
||||
|
||||
}
|
||||
|
||||
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
|
||||
bool nocase) {
|
||||
const char *c = expression;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
lit.push_back(*c, nocase);
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
ParsedLitExpression::ParsedLitExpression(unsigned index_in,
|
||||
const char *expression,
|
||||
size_t expLength, unsigned flags,
|
||||
ReportID report)
|
||||
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
|
||||
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
|
||||
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
|
||||
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
|
||||
|
||||
if (flags & ~HS_FLAG_ALL) {
|
||||
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
||||
throw CompileError("Unrecognised flag.");
|
||||
}
|
||||
|
||||
// FIXME: we disallow highlander + SOM, see UE-1850.
|
||||
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
|
||||
// Set SOM type.
|
||||
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
||||
expr.som = SOM_LEFT;
|
||||
}
|
||||
|
||||
// Transfer expression text into ue2_literal.
|
||||
bool nocase = flags & HS_FLAG_CASELESS ? true : false;
|
||||
parseLiteral(expression, expLength, nocase);
|
||||
|
||||
}
|
||||
|
||||
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||
unsigned flags, ReportID report,
|
||||
const hs_expr_ext *ext)
|
||||
@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
}
|
||||
}
|
||||
|
||||
void addLitExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID id,
|
||||
size_t expLength) {
|
||||
assert(expression);
|
||||
const CompileContext &cc = ng.cc;
|
||||
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index,
|
||||
id, flags, expression, expLength);
|
||||
|
||||
// Extended parameters are not supported for pure literal patterns.
|
||||
if (ext && ext->flags != 0LLU) {
|
||||
throw CompileError("Extended parameters are not supported for pure "
|
||||
"literal matching API.");
|
||||
}
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw CompileError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
// filter out flags not supported by pure literal API.
|
||||
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
|
||||
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
|
||||
HS_FLAG_QUIET;
|
||||
|
||||
if (flags & not_supported) {
|
||||
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
|
||||
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
|
||||
"supported in literal API.");
|
||||
}
|
||||
|
||||
// This expression must be a pure literal, we can build ue2_literal
|
||||
// directly based on expression text.
|
||||
ParsedLitExpression ple(index, expression, expLength, flags, id);
|
||||
|
||||
// Feed the ue2_literal into Rose.
|
||||
const auto &expr = ple.expr;
|
||||
if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander,
|
||||
expr.som, expr.quiet)) {
|
||||
DEBUG_PRINTF("took pure literal\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
||||
const u32 minWidth =
|
||||
@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
||||
}
|
||||
|
||||
|
||||
struct hs_database *build(NG &ng, unsigned int *length) {
|
||||
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
|
||||
assert(length);
|
||||
|
||||
auto rose = generateRoseEngine(ng);
|
||||
struct RoseEngine *roseHead = rose.get();
|
||||
roseHead->pureLiteral = pureFlag;
|
||||
|
||||
if (!rose) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,6 +38,7 @@
|
||||
#include "compiler/expression_info.h"
|
||||
#include "parser/Component.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
@ -66,6 +67,22 @@ public:
|
||||
std::unique_ptr<Component> component;
|
||||
};
|
||||
|
||||
|
||||
/** \brief Class gathering together the pieces of a parsed lit-expression. */
|
||||
class ParsedLitExpression : noncopyable {
|
||||
public:
|
||||
ParsedLitExpression(unsigned index, const char *expression,
|
||||
size_t expLength, unsigned flags, ReportID report);
|
||||
|
||||
void parseLiteral(const char *expression, size_t len, bool nocase);
|
||||
|
||||
/** \brief Expression information (from flags, extparam etc) */
|
||||
ExpressionInfo expr;
|
||||
|
||||
/** \brief Format the lit-expression text into Hyperscan literal type. */
|
||||
ue2_literal lit;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Class gathering together the pieces of an expression that has been
|
||||
* built into an NFA graph.
|
||||
@ -99,6 +116,10 @@ struct BuiltExpression {
|
||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID report);
|
||||
|
||||
void addLitExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID id,
|
||||
size_t expLength);
|
||||
|
||||
/**
|
||||
* Build a Hyperscan database out of the expressions we've been given. A
|
||||
* fatal error will result in an exception being thrown.
|
||||
@ -107,11 +128,13 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
* The global NG object.
|
||||
* @param[out] length
|
||||
* The number of bytes occupied by the compiled structure.
|
||||
* @param pureFlag
|
||||
* The flag indicating invocation from literal API or not.
|
||||
* @return
|
||||
* The compiled structure. Should be deallocated with the
|
||||
* hs_database_free() function.
|
||||
*/
|
||||
struct hs_database *build(NG &ng, unsigned int *length);
|
||||
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag);
|
||||
|
||||
/**
|
||||
* Constructs an NFA graph from the given expression tree.
|
||||
|
@ -51,7 +51,7 @@
|
||||
} \
|
||||
\
|
||||
/* resolver */ \
|
||||
static void(*JOIN(resolve_, NAME)(void)) { \
|
||||
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
|
||||
if (check_avx512()) { \
|
||||
return JOIN(avx512_, NAME); \
|
||||
} \
|
||||
|
@ -282,7 +282,7 @@ const array<double, 100> Scorer::count_lut{{
|
||||
}};
|
||||
|
||||
const array<double, 9> Scorer::len_lut{{
|
||||
pow(0, -3.0), pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
|
||||
0, pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
|
||||
pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}};
|
||||
|
||||
/**
|
||||
@ -807,9 +807,6 @@ void findIncludedLits(vector<hwlmLiteral> &lits,
|
||||
for (size_t i = 0; i < cnt; i++) {
|
||||
u32 bucket1 = group[i].first;
|
||||
u32 id1 = group[i].second;
|
||||
if (lits[id1].pure) {
|
||||
continue;
|
||||
}
|
||||
buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
|
||||
exception_map);
|
||||
}
|
||||
|
@ -62,7 +62,6 @@ struct LitInfo {
|
||||
u8 size;
|
||||
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
|
||||
u8 next;
|
||||
u8 pure; //!< The pass-on of pure flag from hwlmLiteral.
|
||||
};
|
||||
|
||||
#define FDRC_FLAG_NO_CONFIRM 1
|
||||
|
@ -87,7 +87,6 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
info.flags = flags;
|
||||
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
|
||||
info.groups = lit.groups;
|
||||
info.pure = lit.pure;
|
||||
|
||||
// these are built up assuming a LE machine
|
||||
CONF_TYPE msk = all_ones;
|
||||
|
@ -65,7 +65,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
||||
u8 oldNext; // initialized in loop
|
||||
do {
|
||||
assert(ISALIGNED(li));
|
||||
scratch->pure = li->pure;
|
||||
|
||||
if (unlikely((conf_key & li->msk) != li->v)) {
|
||||
goto out;
|
||||
@ -100,7 +99,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
||||
li++;
|
||||
} while (oldNext);
|
||||
scratch->fdr_conf = NULL;
|
||||
scratch->pure = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
163
src/hs.cpp
163
src/hs.cpp
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -251,7 +251,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
ng.rm.logicalKeyRenumber();
|
||||
|
||||
unsigned length = 0;
|
||||
struct hs_database *out = build(ng, &length);
|
||||
struct hs_database *out = build(ng, &length, 0);
|
||||
|
||||
assert(out); // should have thrown exception on error
|
||||
assert(length);
|
||||
@ -281,6 +281,130 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
}
|
||||
}
|
||||
|
||||
hs_error_t
|
||||
hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
const unsigned *ids, const hs_expr_ext *const *ext,
|
||||
const size_t *lens, unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform, hs_database_t **db,
|
||||
hs_compile_error_t **comp_error, const Grey &g) {
|
||||
// Check the args: note that it's OK for flags, ids or ext to be null.
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
*db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (!expressions) {
|
||||
*db = nullptr;
|
||||
*comp_error
|
||||
= generateCompileError("Invalid parameter: expressions is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (!lens) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Invalid parameter: len is NULL", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (elements == 0) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
#if defined(FAT_RUNTIME)
|
||||
if (!check_ssse3()) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Unsupported architecture", -1);
|
||||
return HS_ARCH_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkPlatform(platform, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkPlattform.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (elements > g.limitPatternCount) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Number of patterns too large", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
// This function is simply a wrapper around both the parser and compiler
|
||||
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
|
||||
bool isVectored = mode & HS_MODE_VECTORED;
|
||||
unsigned somPrecision = getSomPrecision(mode);
|
||||
|
||||
target_t target_info = platform ? target_t(*platform)
|
||||
: get_current_target();
|
||||
|
||||
try {
|
||||
CompileContext cc(isStreaming, isVectored, target_info, g);
|
||||
NG ng(cc, elements, somPrecision);
|
||||
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
// Add this expression to the compiler
|
||||
try {
|
||||
addLitExpression(ng, i, expressions[i], flags ? flags[i] : 0,
|
||||
ext ? ext[i] : nullptr, ids ? ids[i] : 0,
|
||||
lens[i]);
|
||||
} catch (CompileError &e) {
|
||||
/* Caught a parse error;
|
||||
* throw it upstream as a CompileError with a specific index */
|
||||
e.setExpressionIndex(i);
|
||||
throw; /* do not slice */
|
||||
}
|
||||
}
|
||||
|
||||
// Check sub-expression ids
|
||||
ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
|
||||
// Renumber and assign lkey to reports
|
||||
ng.rm.logicalKeyRenumber();
|
||||
|
||||
unsigned length = 0;
|
||||
struct hs_database *out = build(ng, &length, 1);
|
||||
|
||||
assert(out); //should have thrown exception on error
|
||||
assert(length);
|
||||
|
||||
*db = out;
|
||||
*comp_error = nullptr;
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError(e.reason,
|
||||
e.hasIndex ? (int)e.index : -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (const std::bad_alloc &) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal errror, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
@ -326,6 +450,41 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions,
|
||||
platform, db, error, Grey());
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
|
||||
const size_t len, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **error) {
|
||||
if (expression == nullptr) {
|
||||
*db = nullptr;
|
||||
*error = generateCompileError("Invalid parameter: expression is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
unsigned id = 0; // single expressions get zero as an ID
|
||||
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
||||
|
||||
return hs_compile_lit_multi_int(&expression, &flags, &id, ext, &len, 1,
|
||||
mode, platform, db, error, Grey());
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
|
||||
const unsigned *flags,
|
||||
const unsigned *ids,
|
||||
const size_t *lens,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **error) {
|
||||
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
||||
return hs_compile_lit_multi_int(expressions, flags, ids, ext, lens,
|
||||
elements, mode, platform, db, error,
|
||||
Grey());
|
||||
}
|
||||
|
||||
static
|
||||
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
const hs_expr_ext_t *ext, unsigned int mode,
|
||||
|
181
src/hs_compile.h
181
src/hs_compile.h
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -323,6 +323,10 @@ typedef struct hs_expr_ext {
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
|
||||
* syntax.
|
||||
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
|
||||
* the sub-expressions in logical combinations.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
@ -392,6 +396,10 @@ hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags,
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
|
||||
* syntax.
|
||||
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
|
||||
* the sub-expressions in logical combinations.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
@ -472,6 +480,10 @@ hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
|
||||
* syntax.
|
||||
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
|
||||
* the sub-expressions in logical combinations.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
@ -527,6 +539,165 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* The basic pure literal expression compiler.
|
||||
*
|
||||
* This is the function call with which a pure literal expression (not a
|
||||
* common regular expression) is compiled into a Hyperscan database which
|
||||
* can be passed to the runtime functions (such as @ref hs_scan(),
|
||||
* @ref hs_open_stream(), etc.)
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @p flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags. Meanwhile, the string content shall be fully parsed in a literal
|
||||
* sense without any regular grammars. For example, the @p expression
|
||||
* `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
|
||||
* here doesn't mean 0 or 1 quantifier under regular semantics.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Compared to @ref hs_compile(), fewer
|
||||
* valid values are provided:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param len
|
||||
* The length of the text content of the pure literal expression. As the
|
||||
* text content indicated by @p expression is treated as single character
|
||||
* one by one, the special terminating character `\0` should be allowed
|
||||
* to appear in expression, and not treated as a terminator for a string.
|
||||
* Thus, the end of a pure literal expression cannot be indicated by
|
||||
* identifying `\0`, but by counting to the expression length.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
|
||||
const size_t len, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **error);
|
||||
/**
|
||||
* The multiple pure literal expression compiler.
|
||||
*
|
||||
* This is the function call with which a set of pure literal expressions is
|
||||
* compiled into a database which can be passed to the runtime functions (such
|
||||
* as @ref hs_scan(), @ref hs_open_stream(), etc.) Each expression can be
|
||||
* labelled with a unique integer which is passed into the match callback to
|
||||
* identify the pattern that has matched.
|
||||
*
|
||||
* @param expressions
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @p flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags. Meanwhile, the string content shall be fully parsed in a literal
|
||||
* sense without any regular grammars. For example, the @p expression
|
||||
* `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
|
||||
* here doesn't mean 0 or 1 quantifier under regular semantics.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Compared to @ref hs_compile_multi(), fewer valid values are provided:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param lens
|
||||
* Array of lengths of the text content of each pure literal expression.
|
||||
* As the text content indicated by @p expression is treated as single
|
||||
* character one by one, the special terminating character `\0` should be
|
||||
* allowed to appear in expression, and not treated as a terminator for a
|
||||
* string. Thus, the end of a pure literal expression cannot be indicated
|
||||
* by identifying `\0`, but by counting to the expression length.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
|
||||
const unsigned *flags,
|
||||
const unsigned *ids,
|
||||
const size_t *lens,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Free an error structure generated by @ref hs_compile(), @ref
|
||||
* hs_compile_multi() or @ref hs_compile_ext_multi().
|
||||
@ -579,6 +750,10 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
|
||||
* syntax.
|
||||
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
|
||||
* the sub-expressions in logical combinations.
|
||||
*
|
||||
* @param info
|
||||
* On success, a pointer to the pattern information will be returned in
|
||||
@ -641,6 +816,10 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression,
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
|
||||
* syntax.
|
||||
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
|
||||
* the sub-expressions in logical combinations.
|
||||
*
|
||||
* @param ext
|
||||
* A pointer to a filled @ref hs_expr_ext_t structure that defines
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -52,6 +52,17 @@ hs_error_t hs_compile_multi_int(const char *const *expressions,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **comp_error, const Grey &g);
|
||||
|
||||
/** \brief Internal use only: takes a Grey argument so that we can use it in
|
||||
* tools. */
|
||||
hs_error_t hs_compile_lit_multi_int(const char *const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
const hs_expr_ext *const *ext,
|
||||
const size_t *lens, unsigned elements,
|
||||
unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **comp_error,
|
||||
const Grey &g);
|
||||
} // namespace ue2
|
||||
|
||||
extern "C"
|
||||
|
@ -83,10 +83,9 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
|
||||
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
|
||||
const vector<u8> &msk_in, const vector<u8> &cmp_in,
|
||||
bool pure_in)
|
||||
const vector<u8> &msk_in, const vector<u8> &cmp_in)
|
||||
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
|
||||
groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) {
|
||||
groups(groups_in), msk(msk_in), cmp(cmp_in) {
|
||||
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
|
||||
assert(msk.size() <= HWLM_MASKLEN);
|
||||
assert(msk.size() == cmp.size());
|
||||
|
@ -113,16 +113,13 @@ struct hwlmLiteral {
|
||||
*/
|
||||
std::vector<u8> cmp;
|
||||
|
||||
bool pure; //!< \brief The pass-on of pure flag from LitFragment.
|
||||
|
||||
/** \brief Complete constructor, takes group information and msk/cmp.
|
||||
*
|
||||
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
|
||||
u32 id_in, hwlm_group_t groups_in,
|
||||
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in,
|
||||
bool pure_in = false);
|
||||
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
|
||||
|
||||
/** \brief Simple constructor: no group information, no msk/cmp.
|
||||
*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
* Copyright (c) 2018-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -254,44 +254,6 @@ void popOperator(vector<LogicalOperator> &op_stack, vector<u32> &subid_stack,
|
||||
op_stack.pop_back();
|
||||
}
|
||||
|
||||
static
|
||||
char getValue(const vector<char> &lv, u32 ckey) {
|
||||
if (ckey & LOGICAL_OP_BIT) {
|
||||
return lv[ckey & ~LOGICAL_OP_BIT];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool hasMatchFromPurelyNegative(const vector<LogicalOp> &tree,
|
||||
u32 start, u32 result) {
|
||||
vector<char> lv(tree.size());
|
||||
assert(start <= result);
|
||||
for (u32 i = start; i <= result; i++) {
|
||||
assert(i & LOGICAL_OP_BIT);
|
||||
const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT];
|
||||
assert(i == op.id);
|
||||
switch (op.op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro);
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) &
|
||||
getValue(lv, op.ro);
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) |
|
||||
getValue(lv, op.ro);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lv[result & ~LOGICAL_OP_BIT];
|
||||
}
|
||||
|
||||
void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
|
||||
u32 ekey, u64a min_offset,
|
||||
u64a max_offset) {
|
||||
@ -366,9 +328,6 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
|
||||
if (lkey_start == INVALID_LKEY) {
|
||||
throw CompileError("No logical operation.");
|
||||
}
|
||||
if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) {
|
||||
throw CompileError("Has match from purely negative sub-expressions.");
|
||||
}
|
||||
combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result,
|
||||
min_offset, max_offset);
|
||||
}
|
||||
|
@ -185,7 +185,6 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vis.lit.set_pure();
|
||||
const ue2_literal &lit = vis.lit;
|
||||
|
||||
if (lit.empty()) {
|
||||
|
54
src/report.h
54
src/report.h
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -222,6 +222,58 @@ char isLogicalCombination(const struct RoseEngine *rose, char *lvec,
|
||||
return getLogicalVal(rose, lvec, result);
|
||||
}
|
||||
|
||||
/** \brief Returns 1 if combination matches when no sub-expression matches. */
|
||||
static really_inline
|
||||
char isPurelyNegativeMatch(const struct RoseEngine *rose, char *lvec,
|
||||
u32 start, u32 result) {
|
||||
const struct LogicalOp *logicalTree = (const struct LogicalOp *)
|
||||
((const char *)rose + rose->logicalTreeOffset);
|
||||
assert(start >= rose->lkeyCount);
|
||||
assert(start <= result);
|
||||
assert(result < rose->lkeyCount + rose->lopCount);
|
||||
for (u32 i = start; i <= result; i++) {
|
||||
const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount);
|
||||
assert(i == op->id);
|
||||
assert(op->op <= LAST_LOGICAL_OP);
|
||||
switch ((enum LogicalOpType)op->op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
if ((op->ro < rose->lkeyCount) &&
|
||||
getLogicalVal(rose, lvec, op->ro)) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
setLogicalVal(rose, lvec, op->id,
|
||||
!getLogicalVal(rose, lvec, op->ro));
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
if (((op->lo < rose->lkeyCount) &&
|
||||
getLogicalVal(rose, lvec, op->lo)) ||
|
||||
((op->ro < rose->lkeyCount) &&
|
||||
getLogicalVal(rose, lvec, op->ro))) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
setLogicalVal(rose, lvec, op->id,
|
||||
getLogicalVal(rose, lvec, op->lo) &
|
||||
getLogicalVal(rose, lvec, op->ro)); // &&
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
if (((op->lo < rose->lkeyCount) &&
|
||||
getLogicalVal(rose, lvec, op->lo)) ||
|
||||
((op->ro < rose->lkeyCount) &&
|
||||
getLogicalVal(rose, lvec, op->ro))) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
setLogicalVal(rose, lvec, op->id,
|
||||
getLogicalVal(rose, lvec, op->lo) |
|
||||
getLogicalVal(rose, lvec, op->ro)); // ||
|
||||
break;
|
||||
}
|
||||
}
|
||||
return getLogicalVal(rose, lvec, result);
|
||||
}
|
||||
|
||||
/** \brief Clear all keys in the logical vector. */
|
||||
static really_inline
|
||||
void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@ -238,10 +238,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
|
||||
assert(id && id < t->size); // id is an offset into bytecode
|
||||
const u64a som = 0;
|
||||
const u8 flags = 0;
|
||||
if (!scratch->pure) {
|
||||
return roseRunProgram(t, scratch, id, som, end, flags);
|
||||
} else {
|
||||
if (t->pureLiteral) {
|
||||
return roseRunProgram_l(t, scratch, id, som, end, flags);
|
||||
} else {
|
||||
return roseRunProgram(t, scratch, id, som, end, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@ -591,6 +591,23 @@ int roseRunFlushCombProgram(const struct RoseEngine *rose,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Execute last flush combination program.
|
||||
*
|
||||
* Returns MO_HALT_MATCHING if the stream is exhausted or the user has
|
||||
* instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
|
||||
*/
|
||||
int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a end) {
|
||||
hwlmcb_rv_t rv = roseRunProgram(rose, scratch,
|
||||
rose->lastFlushCombProgramOffset,
|
||||
0, end, 0);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
|
||||
struct hs_scratch *scratch = context;
|
||||
assert(scratch && scratch->magic == SCRATCH_MAGIC);
|
||||
@ -602,8 +619,12 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
|
||||
// Our match ID is the program offset.
|
||||
const u32 program = id;
|
||||
const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
|
||||
hwlmcb_rv_t rv =
|
||||
roseRunProgram(rose, scratch, program, start, end, flags);
|
||||
hwlmcb_rv_t rv;
|
||||
if (rose->pureLiteral) {
|
||||
rv = roseRunProgram_l(rose, scratch, program, start, end, flags);
|
||||
} else {
|
||||
rv = roseRunProgram(rose, scratch, program, start, end, flags);
|
||||
}
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
@ -480,6 +480,25 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
return roseHaltIfExhausted(t, scratch);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseReportComb(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u64a end,
|
||||
ReportID onmatch, s32 offset_adjust, u32 ekey) {
|
||||
DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end);
|
||||
|
||||
int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey);
|
||||
if (cb_rv == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("termination requested\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
return roseHaltIfExhausted(t, scratch);
|
||||
}
|
||||
|
||||
/* catches up engines enough to ensure any earlier mpv triggers are enqueued
|
||||
* and then adds the trigger to the mpv queue. */
|
||||
static rose_inline
|
||||
@ -1866,7 +1885,7 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("Logical Combination Passed!\n");
|
||||
if (roseReport(t, scratch, end, ci->id, 0,
|
||||
if (roseReportComb(t, scratch, end, ci->id, 0,
|
||||
ci->ekey) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
@ -1875,6 +1894,49 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u64a end) {
|
||||
for (u32 i = 0; i < t->ckeyCount; i++) {
|
||||
const struct CombInfo *combInfoMap = (const struct CombInfo *)
|
||||
((const char *)t + t->combInfoMapOffset);
|
||||
const struct CombInfo *ci = combInfoMap + i;
|
||||
if ((ci->min_offset != 0) && (end < ci->min_offset)) {
|
||||
DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset);
|
||||
continue;
|
||||
}
|
||||
if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) {
|
||||
DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("check ekey %u\n", ci->ekey);
|
||||
if (ci->ekey != INVALID_EKEY) {
|
||||
assert(ci->ekey < t->ekeyCount);
|
||||
const char *evec = scratch->core_info.exhaustionVector;
|
||||
if (isExhausted(t, evec, ci->ekey)) {
|
||||
DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
|
||||
ci->ekey);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("check ckey %u purely negative\n", i);
|
||||
char *lvec = scratch->core_info.logicalVector;
|
||||
if (!isPurelyNegativeMatch(t, lvec, ci->start, ci->result)) {
|
||||
DEBUG_PRINTF("Logical Combination from purely negative Failed!\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("Logical Combination from purely negative Passed!\n");
|
||||
if (roseReportComb(t, scratch, end, ci->id, 0,
|
||||
ci->ekey) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#define PROGRAM_CASE(name) \
|
||||
case ROSE_INSTR_##name: { \
|
||||
@ -2004,7 +2066,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
&&LABEL_ROSE_INSTR_SET_LOGICAL,
|
||||
&&LABEL_ROSE_INSTR_SET_COMBINATION,
|
||||
&&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
|
||||
&&LABEL_ROSE_INSTR_SET_EXHAUST
|
||||
&&LABEL_ROSE_INSTR_SET_EXHAUST,
|
||||
&&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -2772,6 +2835,19 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(LAST_FLUSH_COMBINATION) {
|
||||
assert(end >= tctxt->lastCombMatchOffset);
|
||||
if (flushActiveCombinations(t, scratch)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
if (checkPurelyNegatives(t, scratch, end)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
default: {
|
||||
assert(0); // unreachable
|
||||
scratch->core_info.status |= STATUS_ERROR;
|
||||
@ -2808,6 +2884,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
assert(programOffset >= sizeof(struct RoseEngine));
|
||||
assert(programOffset < t->size);
|
||||
|
||||
const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
|
||||
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
|
||||
|
||||
const char *pc_base = getByOffset(t, programOffset);
|
||||
@ -2835,6 +2912,56 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_GROUPS) {
|
||||
DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
|
||||
tctxt->groups, ri->groups);
|
||||
if (!(ri->groups & tctxt->groups)) {
|
||||
DEBUG_PRINTF("halt: no groups are set\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_MASK) {
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask,
|
||||
ri->neg_mask, ri->offset, end)) {
|
||||
DEBUG_PRINTF("failed mask check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_MASK_32) {
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
|
||||
ri->neg_mask, ri->offset, end)) {
|
||||
assert(ri->fail_jump);
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_BYTE) {
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
||||
ri->negation, ri->offset, end)) {
|
||||
DEBUG_PRINTF("failed byte check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(PUSH_DELAYED) {
|
||||
rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end);
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CATCH_UP) {
|
||||
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
@ -2891,6 +3018,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(REPORT_CHAIN) {
|
||||
// Note: sequence points updated inside this function.
|
||||
if (roseCatchUpAndHandleChainMatch(
|
||||
t, scratch, ri->event, ri->top_squash_distance, end,
|
||||
in_catchup) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
work_done = 1;
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(REPORT) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
|
||||
@ -3041,6 +3179,24 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(INCLUDED_JUMP) {
|
||||
if (scratch->fdr_conf) {
|
||||
// squash the bucket of included literal
|
||||
u8 shift = scratch->fdr_conf_offset & ~7U;
|
||||
u64a mask = ((~(u64a)ri->squash) << shift);
|
||||
*(scratch->fdr_conf) &= mask;
|
||||
|
||||
pc = getByOffset(t, ri->child_offset);
|
||||
pc_base = pc;
|
||||
programOffset = (const u8 *)pc_base -(const u8 *)t;
|
||||
DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
|
||||
pc_base, pc, ri->child_offset, ri->squash);
|
||||
work_done = 0;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(SET_LOGICAL) {
|
||||
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
|
||||
ri->lkey, ri->offset_adjust);
|
||||
@ -3082,6 +3238,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(LAST_FLUSH_COMBINATION) {
|
||||
assert(end >= tctxt->lastCombMatchOffset);
|
||||
if (flushActiveCombinations(t, scratch)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
if (checkPurelyNegatives(t, scratch, end)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
default: {
|
||||
assert(0); // unreachable
|
||||
scratch->core_info.status |= STATUS_ERROR;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -56,4 +56,7 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
|
||||
int roseRunFlushCombProgram(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a end);
|
||||
|
||||
int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a end);
|
||||
|
||||
#endif // ROSE_H
|
||||
|
@ -2843,34 +2843,9 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
|
||||
|
||||
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
|
||||
dumpString(lit.s).c_str());
|
||||
|
||||
/** 0:/xxabcdefgh/ */
|
||||
/** 1:/yyabcdefgh/ */
|
||||
/** 2:/yyabcdefgh.+/ */
|
||||
// Above 3 patterns should firstly convert into RoseLiteralMap with
|
||||
// 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into
|
||||
// LitFragment with 1 element ("abcdefgh"). Special care should be
|
||||
// taken to handle the 'pure' flag during the conversion.
|
||||
|
||||
rose_literal_id lit_frag = getFragment(lit);
|
||||
auto it = frag_info.find(lit_frag);
|
||||
if (it != frag_info.end()) {
|
||||
if (!lit_frag.s.get_pure() && it->first.s.get_pure()) {
|
||||
struct FragmentInfo f_info = it->second;
|
||||
f_info.lit_ids.push_back(lit_id);
|
||||
f_info.groups |= groups;
|
||||
frag_info.erase(it->first);
|
||||
frag_info.emplace(lit_frag, f_info);
|
||||
} else {
|
||||
it->second.lit_ids.push_back(lit_id);
|
||||
it->second.groups |= groups;
|
||||
}
|
||||
} else {
|
||||
struct FragmentInfo f_info;
|
||||
f_info.lit_ids.push_back(lit_id);
|
||||
f_info.groups |= groups;
|
||||
frag_info.emplace(lit_frag, f_info);
|
||||
}
|
||||
auto &fi = frag_info[getFragment(lit)];
|
||||
fi.lit_ids.push_back(lit_id);
|
||||
fi.groups |= groups;
|
||||
}
|
||||
|
||||
for (auto &m : frag_info) {
|
||||
@ -3370,6 +3345,15 @@ RoseProgram makeFlushCombProgram(const RoseEngine &t) {
|
||||
return program;
|
||||
}
|
||||
|
||||
static
|
||||
RoseProgram makeLastFlushCombProgram(const RoseEngine &t) {
|
||||
RoseProgram program;
|
||||
if (t.ckeyCount) {
|
||||
addLastFlushCombinationProgram(program);
|
||||
}
|
||||
return program;
|
||||
}
|
||||
|
||||
static
|
||||
u32 history_required(const rose_literal_id &key) {
|
||||
if (key.msk.size() < key.s.length()) {
|
||||
@ -3740,6 +3724,10 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
auto flushComb_prog = makeFlushCombProgram(proto);
|
||||
proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
|
||||
|
||||
auto lastFlushComb_prog = makeLastFlushCombProgram(proto);
|
||||
proto.lastFlushCombProgramOffset =
|
||||
writeProgram(bc, move(lastFlushComb_prog));
|
||||
|
||||
// Build anchored matcher.
|
||||
auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);
|
||||
if (atable) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -1486,6 +1486,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(LAST_FLUSH_COMBINATION) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
default:
|
||||
os << " UNKNOWN (code " << int{code} << ")" << endl;
|
||||
os << " <stopping>" << endl;
|
||||
@ -1557,6 +1560,25 @@ void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
|
||||
os.close();
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseLastFlushCombPrograms(const RoseEngine *t,
|
||||
const string &filename) {
|
||||
ofstream os(filename);
|
||||
const char *base = (const char *)t;
|
||||
|
||||
if (t->lastFlushCombProgramOffset) {
|
||||
os << "Last Flush Combination Program @ "
|
||||
<< t->lastFlushCombProgramOffset
|
||||
<< ":" << endl;
|
||||
dumpProgram(os, t, base + t->lastFlushCombProgramOffset);
|
||||
os << endl;
|
||||
} else {
|
||||
os << "<No Last Flush Combination Program>" << endl;
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
|
||||
ofstream os(filename);
|
||||
@ -2249,6 +2271,8 @@ void roseDumpPrograms(const vector<LitFragment> &fragments, const RoseEngine *t,
|
||||
dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
|
||||
dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
|
||||
dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
|
||||
dumpRoseLastFlushCombPrograms(t,
|
||||
base + "/rose_last_flush_comb_programs.txt");
|
||||
dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
|
||||
dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
|
||||
dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");
|
||||
|
@ -340,14 +340,7 @@ public:
|
||||
std::pair<u32, bool> insert(const rose_literal_id &lit) {
|
||||
auto it = lits_index.find(lit);
|
||||
if (it != lits_index.end()) {
|
||||
u32 idx = it->second;
|
||||
auto &l = lits.at(idx);
|
||||
if (!lit.s.get_pure() && l.s.get_pure()) {
|
||||
lits_index.erase(l);
|
||||
l.s.unset_pure();
|
||||
lits_index.emplace(l, idx);
|
||||
}
|
||||
return {idx, false};
|
||||
return {it->second, false};
|
||||
}
|
||||
u32 id = verify_u32(lits.size());
|
||||
lits.push_back(lit);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
* Copyright (c) 2017-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -48,6 +48,7 @@ RoseInstrMatcherEod::~RoseInstrMatcherEod() = default;
|
||||
RoseInstrEnd::~RoseInstrEnd() = default;
|
||||
RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default;
|
||||
RoseInstrFlushCombination::~RoseInstrFlushCombination() = default;
|
||||
RoseInstrLastFlushCombination::~RoseInstrLastFlushCombination() = default;
|
||||
|
||||
using OffsetMap = RoseInstruction::OffsetMap;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Intel Corporation
|
||||
* Copyright (c) 2017-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -2206,6 +2206,14 @@ public:
|
||||
~RoseInstrFlushCombination() override;
|
||||
};
|
||||
|
||||
class RoseInstrLastFlushCombination
|
||||
: public RoseInstrBaseTrivial<ROSE_INSTR_LAST_FLUSH_COMBINATION,
|
||||
ROSE_STRUCT_LAST_FLUSH_COMBINATION,
|
||||
RoseInstrLastFlushCombination> {
|
||||
public:
|
||||
~RoseInstrLastFlushCombination() override;
|
||||
};
|
||||
|
||||
class RoseInstrSetExhaust
|
||||
: public RoseInstrBaseNoTargets<ROSE_INSTR_SET_EXHAUST,
|
||||
ROSE_STRUCT_SET_EXHAUST,
|
||||
|
@ -727,7 +727,6 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
|
||||
|
||||
const auto &s_final = lit_final.get_string();
|
||||
bool nocase = lit_final.any_nocase();
|
||||
bool pure = f.s.get_pure();
|
||||
|
||||
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n",
|
||||
f.fragment_id, escapeString(s_final).c_str(), (int)nocase,
|
||||
@ -741,7 +740,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
|
||||
const auto &groups = f.groups;
|
||||
|
||||
mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
|
||||
groups, msk, cmp, pure);
|
||||
groups, msk, cmp);
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -317,6 +317,10 @@ void addFlushCombinationProgram(RoseProgram &program) {
|
||||
program.add_before_end(make_unique<RoseInstrFlushCombination>());
|
||||
}
|
||||
|
||||
void addLastFlushCombinationProgram(RoseProgram &program) {
|
||||
program.add_before_end(make_unique<RoseInstrLastFlushCombination>());
|
||||
}
|
||||
|
||||
static
|
||||
void makeRoleCheckLeftfix(const RoseBuildImpl &build,
|
||||
const map<RoseVertex, left_build_info> &leftfix_info,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -188,6 +188,7 @@ void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
|
||||
void addSuffixesEodProgram(RoseProgram &program);
|
||||
void addMatcherEodProgram(RoseProgram &program);
|
||||
void addFlushCombinationProgram(RoseProgram &program);
|
||||
void addLastFlushCombinationProgram(RoseProgram &program);
|
||||
|
||||
static constexpr u32 INVALID_QUEUE = ~0U;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -328,6 +328,7 @@ struct RoseBoundaryReports {
|
||||
* nfas). Rose nfa info table can distinguish the cases.
|
||||
*/
|
||||
struct RoseEngine {
|
||||
u8 pureLiteral; /* Indicator of pure literal API */
|
||||
u8 noFloatingRoots; /* only need to run the anchored table if something
|
||||
* matched in the anchored table */
|
||||
u8 requiresEodCheck; /* stuff happens at eod time */
|
||||
@ -426,6 +427,8 @@ struct RoseEngine {
|
||||
|
||||
u32 eodProgramOffset; //!< EOD program, otherwise 0.
|
||||
u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
|
||||
u32 lastFlushCombProgramOffset; /**< LastFlushCombination program,
|
||||
* otherwise 0 */
|
||||
|
||||
u32 lastByteHistoryIterOffset; // if non-zero
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -201,7 +201,14 @@ enum RoseInstructionCode {
|
||||
/** \brief Mark as exhausted instead of report while quiet. */
|
||||
ROSE_INSTR_SET_EXHAUST,
|
||||
|
||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel.
|
||||
/**
|
||||
* \brief Calculate any combination's logical value if none of its
|
||||
* sub-expression matches until EOD, then check if compliant with any
|
||||
* logical constraints.
|
||||
*/
|
||||
ROSE_INSTR_LAST_FLUSH_COMBINATION,
|
||||
|
||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_LAST_FLUSH_COMBINATION //!< Sentinel.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_END {
|
||||
@ -674,4 +681,8 @@ struct ROSE_STRUCT_SET_EXHAUST {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 ekey; //!< Exhaustion key.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_LAST_FLUSH_COMBINATION {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
#endif // ROSE_ROSE_PROGRAM_H
|
||||
|
@ -141,7 +141,6 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
|
||||
s->deduper.current_report_offset = ~0ULL;
|
||||
s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
|
||||
s->fdr_conf = NULL;
|
||||
s->pure = 0;
|
||||
|
||||
// Rose program execution (used for some report paths) depends on these
|
||||
// values being initialised.
|
||||
@ -455,8 +454,9 @@ set_retval:
|
||||
return HS_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
if (rose->flushCombProgramOffset) {
|
||||
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
|
||||
if (rose->lastFlushCombProgramOffset) {
|
||||
if (roseRunLastFlushCombProgram(rose, scratch, length)
|
||||
== MO_HALT_MATCHING) {
|
||||
if (unlikely(internal_matching_error(scratch))) {
|
||||
unmarkScratchInUse(scratch);
|
||||
return HS_UNKNOWN_ERROR;
|
||||
@ -651,8 +651,10 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
scratch->core_info.logicalVector = state +
|
||||
rose->stateOffsets.logicalVec;
|
||||
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
|
||||
if (!id->offset) {
|
||||
scratch->tctxt.lastCombMatchOffset = id->offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (rose->somLocationCount) {
|
||||
loadSomFromStream(scratch, id->offset);
|
||||
@ -698,8 +700,9 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
}
|
||||
}
|
||||
|
||||
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
|
||||
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
|
||||
if (rose->lastFlushCombProgramOffset && !told_to_stop_matching(scratch)) {
|
||||
if (roseRunLastFlushCombProgram(rose, scratch, id->offset)
|
||||
== MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("told to stop matching\n");
|
||||
scratch->core_info.status |= STATUS_TERMINATED;
|
||||
}
|
||||
@ -906,8 +909,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
|
||||
scratch->core_info.logicalVector = state +
|
||||
rose->stateOffsets.logicalVec;
|
||||
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
|
||||
if (!id->offset) {
|
||||
scratch->tctxt.lastCombMatchOffset = id->offset;
|
||||
}
|
||||
}
|
||||
assert(scratch->core_info.hlen <= id->offset
|
||||
&& scratch->core_info.hlen <= rose->historyRequired);
|
||||
|
||||
@ -1013,18 +1018,6 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
unmarkScratchInUse(scratch);
|
||||
}
|
||||
|
||||
if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
|
||||
if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
|
||||
== MO_HALT_MATCHING) {
|
||||
scratch->core_info.status |= STATUS_TERMINATED;
|
||||
if (unlikely(internal_matching_error(scratch))) {
|
||||
unmarkScratchInUse(scratch);
|
||||
return HS_UNKNOWN_ERROR;
|
||||
}
|
||||
unmarkScratchInUse(scratch);
|
||||
}
|
||||
}
|
||||
|
||||
hs_stream_free(id);
|
||||
|
||||
return HS_SUCCESS;
|
||||
@ -1054,18 +1047,6 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
|
||||
unmarkScratchInUse(scratch);
|
||||
}
|
||||
|
||||
if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
|
||||
if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
|
||||
== MO_HALT_MATCHING) {
|
||||
scratch->core_info.status |= STATUS_TERMINATED;
|
||||
if (unlikely(internal_matching_error(scratch))) {
|
||||
unmarkScratchInUse(scratch);
|
||||
return HS_UNKNOWN_ERROR;
|
||||
}
|
||||
unmarkScratchInUse(scratch);
|
||||
}
|
||||
}
|
||||
|
||||
// history already initialised
|
||||
init_stream(id, id->rose, 0);
|
||||
|
||||
|
@ -137,7 +137,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
|
||||
s->scratchSize = alloc_size;
|
||||
s->scratch_alloc = (char *)s_tmp;
|
||||
s->fdr_conf = NULL;
|
||||
s->pure = 0;
|
||||
|
||||
// each of these is at an offset from the previous
|
||||
char *current = (char *)s + sizeof(*s);
|
||||
@ -280,7 +279,9 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
|
||||
hs_error_t proto_ret = hs_check_alloc(proto_tmp);
|
||||
if (proto_ret != HS_SUCCESS) {
|
||||
hs_scratch_free(proto_tmp);
|
||||
hs_scratch_free(*scratch);
|
||||
if (*scratch) {
|
||||
hs_scratch_free((*scratch)->scratch_alloc);
|
||||
}
|
||||
*scratch = NULL;
|
||||
return proto_ret;
|
||||
}
|
||||
|
@ -211,7 +211,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
|
||||
u64a *fdr_conf; /**< FDR confirm value */
|
||||
u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
|
||||
* in buffer */
|
||||
u8 pure; /**< indicator of pure-literal or cutting-literal */
|
||||
};
|
||||
|
||||
/* array of fatbit ptr; TODO: why not an array of fatbits? */
|
||||
|
@ -192,11 +192,11 @@ vector<mmbit_sparse_iter> mmbBuildSparseIterator(const vector<u32> &bits,
|
||||
template<typename T>
|
||||
static
|
||||
void add_scatter(vector<T> *out, u32 offset, u64a mask) {
|
||||
T su;
|
||||
out->emplace_back();
|
||||
T &su = out->back();
|
||||
memset(&su, 0, sizeof(su));
|
||||
su.offset = offset;
|
||||
su.val = mask;
|
||||
out->push_back(su);
|
||||
DEBUG_PRINTF("add %llu at offset %u\n", mask, offset);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -283,7 +283,6 @@ ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
|
||||
}
|
||||
|
||||
void ue2_literal::push_back(char c, bool nc) {
|
||||
assert(!nc || ourisalpha(c));
|
||||
if (nc) {
|
||||
c = mytoupper(c);
|
||||
}
|
||||
|
@ -211,17 +211,10 @@ public:
|
||||
|
||||
size_t hash() const;
|
||||
|
||||
void set_pure() { pure = true; }
|
||||
void unset_pure() { pure = false; }
|
||||
bool get_pure() const { return pure; }
|
||||
|
||||
/* TODO: consider existing member functions possibly related with pure. */
|
||||
|
||||
private:
|
||||
friend const_iterator;
|
||||
std::string s;
|
||||
boost::dynamic_bitset<> nocase;
|
||||
bool pure = false; /**< born from cutting or not (pure literal). */
|
||||
};
|
||||
|
||||
/// Return a reversed copy of this literal.
|
||||
|
@ -56,10 +56,7 @@ if (BUILD_CHIMERA)
|
||||
engine_pcre.cpp
|
||||
engine_pcre.h
|
||||
)
|
||||
endif()
|
||||
|
||||
add_executable(hsbench ${hsbench_SOURCES})
|
||||
if (BUILD_CHIMERA)
|
||||
add_executable(hsbench ${hsbench_SOURCES})
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
@ -69,6 +66,11 @@ if (BUILD_CHIMERA)
|
||||
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
else()
|
||||
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
|
||||
add_executable(hsbench ${hsbench_SOURCES} $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
else()
|
||||
add_executable(hsbench ${hsbench_SOURCES})
|
||||
endif()
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil
|
||||
${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,6 +41,7 @@ extern unsigned int somPrecisionMode;
|
||||
extern bool forceEditDistance;
|
||||
extern unsigned editDistance;
|
||||
extern bool printCompressSize;
|
||||
extern bool useLiteralApi;
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -411,22 +411,30 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||
ext_ptr[i] = &ext[i];
|
||||
}
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
hs_compile_error_t *compile_err;
|
||||
Timer timer;
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
|
||||
ext_ptr.data(), count, full_mode, nullptr,
|
||||
&db, &compile_err, grey);
|
||||
#else
|
||||
err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(),
|
||||
ext_ptr.data(), count, full_mode, nullptr,
|
||||
&db, &compile_err);
|
||||
#endif
|
||||
|
||||
if (useLiteralApi) {
|
||||
// Pattern length computation should be done before timer start.
|
||||
vector<size_t> lens(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
lens[i] = strlen(patterns[i]);
|
||||
}
|
||||
timer.start();
|
||||
err = hs_compile_lit_multi_int(patterns.data(), flags.data(),
|
||||
ids.data(), ext_ptr.data(),
|
||||
lens.data(), count, full_mode,
|
||||
nullptr, &db, &compile_err, grey);
|
||||
timer.complete();
|
||||
} else {
|
||||
timer.start();
|
||||
err = hs_compile_multi_int(patterns.data(), flags.data(),
|
||||
ids.data(), ext_ptr.data(), count,
|
||||
full_mode, nullptr, &db, &compile_err,
|
||||
grey);
|
||||
timer.complete();
|
||||
}
|
||||
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -87,6 +87,7 @@ unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
|
||||
bool forceEditDistance = false;
|
||||
unsigned editDistance = 0;
|
||||
bool printCompressSize = false;
|
||||
bool useLiteralApi = false;
|
||||
|
||||
// Globals local to this file.
|
||||
static bool compressStream = false;
|
||||
@ -218,6 +219,7 @@ void usage(const char *error) {
|
||||
printf(" --per-scan Display per-scan Mbit/sec results.\n");
|
||||
printf(" --echo-matches Display all matches that occur during scan.\n");
|
||||
printf(" --sql-out FILE Output sqlite db.\n");
|
||||
printf(" --literal-on Use Hyperscan pure literal matching.\n");
|
||||
printf(" -S NAME Signature set name (for sqlite db).\n");
|
||||
printf("\n\n");
|
||||
|
||||
@ -250,6 +252,7 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
int do_echo_matches = 0;
|
||||
int do_sql_output = 0;
|
||||
int option_index = 0;
|
||||
int literalFlag = 0;
|
||||
vector<string> sigFiles;
|
||||
|
||||
static struct option longopts[] = {
|
||||
@ -257,6 +260,7 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
{"echo-matches", no_argument, &do_echo_matches, 1},
|
||||
{"compress-stream", no_argument, &do_compress, 1},
|
||||
{"sql-out", required_argument, &do_sql_output, 1},
|
||||
{"literal-on", no_argument, &literalFlag, 1},
|
||||
{nullptr, 0, nullptr, 0}
|
||||
};
|
||||
|
||||
@ -463,6 +467,8 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
loadSignatureList(file, sigs);
|
||||
sigSets.emplace_back(file, move(sigs));
|
||||
}
|
||||
|
||||
useLiteralApi = (bool)literalFlag;
|
||||
}
|
||||
|
||||
/** Start the global timer. */
|
||||
|
@ -16,7 +16,11 @@ if (BUILD_CHIMERA)
|
||||
target_link_libraries(hscheck hs chimera pcre expressionutil)
|
||||
endif()
|
||||
else()
|
||||
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
|
||||
add_executable(hscheck ${hscheck_SOURCES} $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
else()
|
||||
add_executable(hscheck ${hscheck_SOURCES})
|
||||
endif()
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(hscheck hs expressionutil pthread)
|
||||
else()
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -92,6 +92,7 @@ bool g_allSignatures = false;
|
||||
bool g_forceEditDistance = false;
|
||||
bool build_sigs = false;
|
||||
bool check_logical = false;
|
||||
bool use_literal_api = false;
|
||||
unsigned int g_signature;
|
||||
unsigned int g_editDistance;
|
||||
unsigned int globalFlags = 0;
|
||||
@ -322,11 +323,26 @@ void checkExpression(UNUSED void *threadarg) {
|
||||
#if !defined(RELEASE_BUILD)
|
||||
// This variant is available in non-release builds and allows us to
|
||||
// modify greybox settings.
|
||||
err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1, mode,
|
||||
nullptr, &db, &compile_err, *g_grey);
|
||||
if (use_literal_api) {
|
||||
size_t len = strlen(regexp);
|
||||
err = hs_compile_lit_multi_int(®exp, &flags, nullptr, &extp,
|
||||
&len, 1, mode, nullptr, &db,
|
||||
&compile_err, *g_grey);
|
||||
} else {
|
||||
err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1,
|
||||
mode, nullptr, &db, &compile_err,
|
||||
*g_grey);
|
||||
}
|
||||
#else
|
||||
err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1, mode,
|
||||
nullptr, &db, &compile_err);
|
||||
if (use_literal_api) {
|
||||
size_t len = strlen(regexp);
|
||||
err = hs_compile_lit_multi_int(®exp, &flags, nullptr, &extp,
|
||||
&len, 1, mode, nullptr, &db,
|
||||
&compile_err, *g_grey);
|
||||
} else {
|
||||
err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1,
|
||||
mode, nullptr, &db, &compile_err);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (err == HS_SUCCESS) {
|
||||
@ -381,6 +397,11 @@ void checkLogicalExpression(UNUSED void *threadarg) {
|
||||
|
||||
ExprExtMap::const_iterator it;
|
||||
while (getNextLogicalExpression(it)) {
|
||||
if (use_literal_api) {
|
||||
recordSuccess(g_exprMap, it->first);
|
||||
continue;
|
||||
}
|
||||
|
||||
const ParsedExpr &comb = it->second;
|
||||
|
||||
vector<unsigned> subIds;
|
||||
@ -470,6 +491,7 @@ void usage() {
|
||||
<< " -h Display this help." << endl
|
||||
<< " -B Build signature set." << endl
|
||||
<< " -C Check logical combinations (default: off)." << endl
|
||||
<< " --literal-on Processing pure literals, no need to check." << endl
|
||||
<< endl;
|
||||
}
|
||||
|
||||
@ -477,9 +499,15 @@ static
|
||||
void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
const char options[] = "e:E:s:z:hHLNV8G:T:BC";
|
||||
bool signatureSet = false;
|
||||
int literalFlag = 0;
|
||||
|
||||
static struct option longopts[] = {
|
||||
{"literal-on", no_argument, &literalFlag, 1},
|
||||
{nullptr, 0, nullptr, 0}
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
int c = getopt_long(argc, argv, options, nullptr, nullptr);
|
||||
int c = getopt_long(argc, argv, options, longopts, nullptr);
|
||||
if (c < 0) {
|
||||
break;
|
||||
}
|
||||
@ -539,6 +567,9 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
case 'C':
|
||||
check_logical = true;
|
||||
break;
|
||||
case 0:
|
||||
case 1:
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
exit(1);
|
||||
@ -564,6 +595,8 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
usage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
use_literal_api = (bool)literalFlag;
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -299,6 +299,46 @@ char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
|
||||
return lv[result];
|
||||
}
|
||||
|
||||
/** \brief Returns 1 if combination matches when no sub-expression matches. */
|
||||
static
|
||||
char isPurelyNegativeMatch(vector<char> &lv, const vector<LogicalOp> &comb,
|
||||
size_t lkeyCount, unsigned start, unsigned result) {
|
||||
assert(start <= result);
|
||||
for (unsigned i = start; i <= result; i++) {
|
||||
const LogicalOp &op = comb[i - lkeyCount];
|
||||
assert(i == op.id);
|
||||
switch (op.op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
if ((op.ro < lkeyCount) && lv[op.ro]) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
lv[op.id] = !lv[op.ro];
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
if (((op.lo < lkeyCount) && lv[op.lo]) ||
|
||||
((op.ro < lkeyCount) && lv[op.ro])) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
if (((op.lo < lkeyCount) && lv[op.lo]) ||
|
||||
((op.ro < lkeyCount) && lv[op.ro])) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lv[result];
|
||||
}
|
||||
|
||||
bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
|
||||
const string &buffer, ResultSet &rs, string &error) {
|
||||
if (cngi.quiet) {
|
||||
@ -359,6 +399,13 @@ bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isPurelyNegativeMatch(lv, comb, m_lkey.size(),
|
||||
li.start, li.result)) {
|
||||
u64a to = buffer.length();
|
||||
if ((to >= cngi.min_offset) && (to <= cngi.max_offset)) {
|
||||
rs.addMatch(0, to);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -43,6 +43,7 @@
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/string_util.h"
|
||||
#include "util/unicode_def.h"
|
||||
#include "util/unordered.h"
|
||||
|
||||
@ -111,6 +112,15 @@ bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (use_literal_api) {
|
||||
// filter out flags not supported by pure literal API.
|
||||
u32 not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
|
||||
HS_FLAG_UCP | HS_FLAG_PREFILTER;
|
||||
hs_flags &= ~not_supported;
|
||||
force_utf8 = false;
|
||||
force_prefilter = false;
|
||||
}
|
||||
|
||||
expr.swap(regex);
|
||||
|
||||
if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som,
|
||||
@ -260,9 +270,29 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
throw PcreCompileFailure("Unable to decode flags.");
|
||||
}
|
||||
|
||||
// When hyperscan literal api is on, transfer the regex string into hex.
|
||||
if (use_literal_api && !combination) {
|
||||
unsigned char *pat
|
||||
= reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
|
||||
char *str = makeHex(pat, re.length());
|
||||
if (!str) {
|
||||
throw PcreCompileFailure("makeHex() malloc failure.");
|
||||
}
|
||||
re.assign(str);
|
||||
free(str);
|
||||
}
|
||||
|
||||
// filter out flags not supported by PCRE
|
||||
u64a supported = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET |
|
||||
HS_EXT_FLAG_MIN_LENGTH;
|
||||
if (use_literal_api) {
|
||||
ext.flags &= 0ULL;
|
||||
ext.min_offset = 0;
|
||||
ext.max_offset = MAX_OFFSET;
|
||||
ext.min_length = 0;
|
||||
ext.edit_distance = 0;
|
||||
ext.hamming_distance = 0;
|
||||
}
|
||||
if (ext.flags & ~supported) {
|
||||
// edit distance is a known unsupported flag, so just throw a soft error
|
||||
if (ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) {
|
||||
@ -314,7 +344,6 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
return compiled;
|
||||
}
|
||||
|
||||
|
||||
compiled->bytecode =
|
||||
pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr);
|
||||
|
||||
@ -557,6 +586,46 @@ char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
|
||||
return lv[result];
|
||||
}
|
||||
|
||||
/** \brief Returns 1 if combination matches when no sub-expression matches. */
|
||||
static
|
||||
char isPurelyNegativeMatch(vector<char> &lv, const vector<LogicalOp> &comb,
|
||||
size_t lkeyCount, unsigned start, unsigned result) {
|
||||
assert(start <= result);
|
||||
for (unsigned i = start; i <= result; i++) {
|
||||
const LogicalOp &op = comb[i - lkeyCount];
|
||||
assert(i == op.id);
|
||||
switch (op.op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
if ((op.ro < lkeyCount) && lv[op.ro]) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
lv[op.id] = !lv[op.ro];
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
if (((op.lo < lkeyCount) && lv[op.lo]) ||
|
||||
((op.ro < lkeyCount) && lv[op.ro])) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
if (((op.lo < lkeyCount) && lv[op.lo]) ||
|
||||
((op.ro < lkeyCount) && lv[op.ro])) {
|
||||
// sub-expression not negative
|
||||
return 0;
|
||||
}
|
||||
lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lv[result];
|
||||
}
|
||||
|
||||
bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
const string &buffer, ResultSet &rs, string &error) {
|
||||
if (compiled.quiet) {
|
||||
@ -616,6 +685,13 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isPurelyNegativeMatch(lv, comb, m_lkey.size(),
|
||||
li.start, li.result)) {
|
||||
u64a to = buffer.length();
|
||||
if ((to >= compiled.min_offset) && (to <= compiled.max_offset)) {
|
||||
rs.addMatch(0, to);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,6 +32,7 @@
|
||||
#include "ng_corpus_generator.h"
|
||||
#include "NfaGeneratedCorpora.h"
|
||||
#include "ExpressionParser.h"
|
||||
#include "common.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "hs_compile.h"
|
||||
@ -44,6 +45,7 @@
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/string_util.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
#include <string>
|
||||
@ -80,6 +82,18 @@ void NfaGeneratedCorpora::generate(unsigned id, vector<Corpus> &data) {
|
||||
throw CorpusFailure("Expression could not be read: " + i->second);
|
||||
}
|
||||
|
||||
// When hyperscan literal api is on, transfer the regex string into hex.
|
||||
if (use_literal_api && !(hs_flags & HS_FLAG_COMBINATION)) {
|
||||
unsigned char *pat
|
||||
= reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
|
||||
char *str = makeHex(pat, re.length());
|
||||
if (!str) {
|
||||
throw CorpusFailure("makeHex() malloc failure.");
|
||||
}
|
||||
re.assign(str);
|
||||
free(str);
|
||||
}
|
||||
|
||||
// Combination's corpus is consist of sub-expressions' corpuses.
|
||||
if (hs_flags & HS_FLAG_COMBINATION) {
|
||||
ParsedLogical pl;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -925,11 +925,22 @@ compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
|
||||
const unsigned count = patterns.size();
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err;
|
||||
hs_error_t err;
|
||||
|
||||
hs_error_t err = hs_compile_multi_int(&patterns[0], &flags[0],
|
||||
&idsvec[0], ext.c_array(), count,
|
||||
mode, platform, &db,
|
||||
if (use_literal_api) {
|
||||
// Compute length of each pattern.
|
||||
vector<size_t> lens(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
lens[i] = strlen(patterns[i]);
|
||||
}
|
||||
err = hs_compile_lit_multi_int(&patterns[0], &flags[0], &idsvec[0],
|
||||
ext.c_array(), &lens[0], count, mode,
|
||||
platform, &db, &compile_err, grey);
|
||||
} else {
|
||||
err = hs_compile_multi_int(&patterns[0], &flags[0], &idsvec[0],
|
||||
ext.c_array(), count, mode, platform, &db,
|
||||
&compile_err, grey);
|
||||
}
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
error = compile_err->message;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -116,6 +116,7 @@ void usage(const char *name, const char *error) {
|
||||
printf(" --abort-on-fail Abort, rather than exit, on failure.\n");
|
||||
printf(" --no-signal-handler Do not handle handle signals (to generate "
|
||||
"backtraces).\n");
|
||||
printf(" --literal-on Use Hyperscan pure literal matching.\n");
|
||||
printf("\n");
|
||||
printf("Memory and resource control options:\n");
|
||||
printf("\n");
|
||||
@ -174,6 +175,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
int mangleScratch = 0;
|
||||
int compressFlag = 0;
|
||||
int compressResetFlag = 0;
|
||||
int literalFlag = 0;
|
||||
static const struct option longopts[] = {
|
||||
{"copy-scratch", 0, ©Scratch, 1},
|
||||
{"copy-stream", 0, ©Stream, 1},
|
||||
@ -187,6 +189,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
{"compress-expand", 0, &compressFlag, 1},
|
||||
{"compress-reset-expand", 0, &compressResetFlag, 1},
|
||||
{"no-groups", 0, &no_groups, 1},
|
||||
{"literal-on", 0, &literalFlag, 1},
|
||||
{nullptr, 0, nullptr, 0}};
|
||||
|
||||
for (;;) {
|
||||
@ -589,4 +592,5 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
use_mangle_scratch = (bool) mangleScratch;
|
||||
use_compress_expand = (bool)compressFlag;
|
||||
use_compress_reset_expand = (bool)compressResetFlag;
|
||||
use_literal_api = (bool)literalFlag;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -82,6 +82,7 @@ extern bool use_copy_stream;
|
||||
extern bool use_mangle_scratch;
|
||||
extern bool use_compress_expand;
|
||||
extern bool use_compress_reset_expand;
|
||||
extern bool use_literal_api;
|
||||
extern int abort_on_failure;
|
||||
extern int no_signal_handler;
|
||||
extern bool force_edit_distance;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -118,6 +118,7 @@ bool use_copy_stream = false;
|
||||
bool use_mangle_scratch = false;
|
||||
bool use_compress_expand = false;
|
||||
bool use_compress_reset_expand = false;
|
||||
bool use_literal_api = false;
|
||||
int abort_on_failure = 0;
|
||||
int no_signal_handler = 0;
|
||||
size_t max_scan_queue_len = 25000;
|
||||
|
@ -42,7 +42,10 @@
|
||||
|
||||
#ifdef HAVE_BACKTRACE
|
||||
#include <execinfo.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h> // for _exit
|
||||
#endif
|
||||
|
||||
#define BACKTRACE_BUFFER_SIZE 200
|
||||
|
@ -10,6 +10,10 @@ include_directories(${PROJECT_SOURCE_DIR}/util)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
|
||||
add_executable(hsdump main.cpp)
|
||||
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
|
||||
add_executable(hsdump main.cpp $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
else()
|
||||
add_executable(hsdump main.cpp)
|
||||
endif()
|
||||
target_link_libraries(hsdump hs expressionutil crosscompileutil)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -106,6 +106,8 @@ bool dump_intermediate = true;
|
||||
bool force_edit_distance = false;
|
||||
u32 edit_distance = 0;
|
||||
|
||||
int use_literal_api = 0;
|
||||
|
||||
} // namespace
|
||||
|
||||
// Usage statement.
|
||||
@ -139,6 +141,7 @@ void usage(const char *name, const char *error) {
|
||||
printf(" -8 Force UTF8 mode on all patterns.\n");
|
||||
printf(" -L Apply HS_FLAG_SOM_LEFTMOST to all patterns.\n");
|
||||
printf(" --prefilter Apply HS_FLAG_PREFILTER to all patterns.\n");
|
||||
printf(" --literal-on Use Hyperscan pure literal matching API.\n");
|
||||
printf("\n");
|
||||
printf("Example:\n");
|
||||
printf("$ %s -e pattern.file -s sigfile\n", name);
|
||||
@ -163,6 +166,7 @@ void processArgs(int argc, char *argv[], Grey &grey) {
|
||||
{"utf8", no_argument, nullptr, '8'},
|
||||
{"prefilter", no_argument, &force_prefilter, 1},
|
||||
{"som-width", required_argument, nullptr, 'd'},
|
||||
{"literal-on", no_argument, &use_literal_api, 1},
|
||||
{nullptr, 0, nullptr, 0}
|
||||
};
|
||||
|
||||
@ -501,9 +505,23 @@ unsigned int dumpDataMulti(const vector<const char *> &patterns,
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err;
|
||||
|
||||
hs_error_t err = hs_compile_multi_int(
|
||||
patterns.data(), flags.data(), ids.data(), ext.c_array(),
|
||||
patterns.size(), mode, plat_info.get(), &db, &compile_err, grey);
|
||||
hs_error_t err;
|
||||
const size_t count = patterns.size();
|
||||
if (use_literal_api) {
|
||||
// Compute length of each pattern.
|
||||
vector<size_t> lens(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
lens[i] = strlen(patterns[i]);
|
||||
}
|
||||
err = hs_compile_lit_multi_int(patterns.data(), flags.data(),
|
||||
ids.data(), ext.c_array(), lens.data(),
|
||||
count, mode, plat_info.get(), &db,
|
||||
&compile_err, grey);
|
||||
} else {
|
||||
err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
|
||||
ext.c_array(), count, mode, plat_info.get(),
|
||||
&db, &compile_err, grey);
|
||||
}
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
if (compile_err && compile_err->message) {
|
||||
|
@ -129,7 +129,11 @@ set(unit_internal_SOURCES
|
||||
internal/main.cpp
|
||||
)
|
||||
|
||||
if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS))
|
||||
add_executable(unit-internal ${unit_internal_SOURCES} $<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
else()
|
||||
add_executable(unit-internal ${unit_internal_SOURCES})
|
||||
endif()
|
||||
set_target_properties(unit-internal PROPERTIES COMPILE_FLAGS "${HS_CXX_FLAGS}")
|
||||
target_link_libraries(unit-internal hs corpusomatic)
|
||||
endif(NOT (RELEASE_BUILD OR FAT_RUNTIME))
|
||||
|
@ -155,11 +155,6 @@
|
||||
158:/141 & (142|!143) )| 144/C #Not enough left parentheses at index 17.
|
||||
159:/1234567890 & (142|!143 )/C #Expression id too large at index 10.
|
||||
160:/141 & (142|!143 )|/C #Not enough operand at index 18.
|
||||
161:/!141/C #Has match from purely negative sub-expressions.
|
||||
162:/!141 | 142 | 143/C #Has match from purely negative sub-expressions.
|
||||
163:/!141 & !142 & !143/C #Has match from purely negative sub-expressions.
|
||||
164:/(141 | !142 & !143)/C #Has match from purely negative sub-expressions.
|
||||
165:/!(141 | 142 | 143)/C #Has match from purely negative sub-expressions.
|
||||
166:/141/C #No logical operation.
|
||||
167:/119 & 121/C #Unknown sub-expression id.
|
||||
168:/166 & 167/C #Unknown sub-expression id.
|
||||
161:/141/C #No logical operation.
|
||||
162:/119 & 121/C #Unknown sub-expression id.
|
||||
163:/166 & 167/C #Unknown sub-expression id.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
* Copyright (c) 2018-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -694,3 +694,229 @@ TEST(LogicalCombination, MultiCombQuietUniSub5) {
|
||||
err = hs_free_scratch(scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(LogicalCombination, SingleCombPurelyNegative6) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
CallBackContext c;
|
||||
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
|
||||
const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
|
||||
"ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"};
|
||||
unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION};
|
||||
unsigned ids[] = {201, 202, 203, 204, 205, 1002};
|
||||
hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
|
||||
nullptr, &db, &compile_err);
|
||||
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
err = hs_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
c.halt = 0;
|
||||
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
|
||||
(void *)&c);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_EQ(1U, c.matches.size());
|
||||
ASSERT_EQ(MatchRecord(53, 1002), c.matches[0]);
|
||||
|
||||
hs_free_database(db);
|
||||
err = hs_free_scratch(scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(LogicalCombination, SingleCombQuietPurelyNegative6) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
CallBackContext c;
|
||||
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
|
||||
const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
|
||||
"ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"};
|
||||
unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET};
|
||||
unsigned ids[] = {201, 202, 203, 204, 205, 1002};
|
||||
hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
|
||||
nullptr, &db, &compile_err);
|
||||
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
err = hs_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
c.halt = 0;
|
||||
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
|
||||
(void *)&c);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_EQ(0U, c.matches.size());
|
||||
|
||||
hs_free_database(db);
|
||||
err = hs_free_scratch(scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(LogicalCombination, MultiCombPurelyNegativeUniSub6) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
CallBackContext c;
|
||||
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
"-----------------------------------------------"
|
||||
"xxxfedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
"-----------------------------------------------"
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
"------------------------------------------";
|
||||
const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
|
||||
"ijkl[mMn]", "cba", "fed", "google.*cn",
|
||||
"haystacks{4,8}", "ijkl[oOp]", "cab", "fee",
|
||||
"goobar.*jp", "shockwave{4,6}", "ijkl[rRs]",
|
||||
"(101 & 102 & 103) | (!104 & !105)",
|
||||
"(!201 | 202 & 203) & (!204 | 205)",
|
||||
"((301 | 302) & 303) & (304 | 305)"};
|
||||
unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
|
||||
HS_FLAG_COMBINATION};
|
||||
unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301,
|
||||
302, 303, 304, 305, 1001, 1002, 1003};
|
||||
hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM,
|
||||
nullptr, &db, &compile_err);
|
||||
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
err = hs_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
c.halt = 0;
|
||||
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
|
||||
(void *)&c);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_EQ(3U, c.matches.size());
|
||||
ASSERT_EQ(MatchRecord(106, 202), c.matches[0]);
|
||||
ASSERT_EQ(MatchRecord(106, 1002), c.matches[1]);
|
||||
ASSERT_EQ(MatchRecord(300, 1001), c.matches[2]);
|
||||
|
||||
hs_free_database(db);
|
||||
err = hs_free_scratch(scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(LogicalCombination, MultiCombPurelyNegativeUniSubEOD6) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
CallBackContext c;
|
||||
string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
"-----------------------------------------------"
|
||||
"xdefedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
"-----------------------------------------------"
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
"-------------------------------------defed";
|
||||
const char *expr[] = {"abc", "defed", "foobar.*gh", "teakettle{4,10}",
|
||||
"ijkl[mMn]", "cba", "fed", "google.*cn",
|
||||
"haystacks{4,8}", "ijkl[oOp]", "cab", "fee",
|
||||
"goobar.*jp", "shockwave{4,6}", "ijkl[rRs]",
|
||||
"(101 & 102 & 103) | (!104 & !105)",
|
||||
"(!201 | 202 & 203) & (!204 | 205)",
|
||||
"((301 | 302) & 303) & (304 | 305)"};
|
||||
unsigned flags[] = {0, 0, 0, 0, 0, 0, HS_FLAG_MULTILINE,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
|
||||
HS_FLAG_COMBINATION};
|
||||
unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301,
|
||||
302, 303, 304, 305, 1001, 1002, 1003};
|
||||
hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM,
|
||||
nullptr, &db, &compile_err);
|
||||
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
err = hs_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
c.halt = 0;
|
||||
err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
|
||||
(void *)&c);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_EQ(8U, c.matches.size());
|
||||
ASSERT_EQ(MatchRecord(106, 102), c.matches[0]);
|
||||
ASSERT_EQ(MatchRecord(106, 202), c.matches[1]);
|
||||
ASSERT_EQ(MatchRecord(106, 1001), c.matches[2]);
|
||||
ASSERT_EQ(MatchRecord(106, 1002), c.matches[3]);
|
||||
ASSERT_EQ(MatchRecord(300, 102), c.matches[4]);
|
||||
ASSERT_EQ(MatchRecord(300, 202), c.matches[5]);
|
||||
ASSERT_EQ(MatchRecord(300, 1001), c.matches[6]);
|
||||
ASSERT_EQ(MatchRecord(300, 1002), c.matches[7]);
|
||||
|
||||
hs_free_database(db);
|
||||
err = hs_free_scratch(scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(LogicalCombination, MultiCombStream1) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
CallBackContext c;
|
||||
string data[] = {"xxxxxxxabcxxxxxxxdefxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxghixxxxxxxxxxxabcxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxdefxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxyzxxxxxxxxxxxxxxxxxxxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzxy",
|
||||
"z"};
|
||||
const char *expr[] = {"abc", "def", "xyz", "zxyz",
|
||||
"101 & 102", "201 & !202"};
|
||||
unsigned flags[] = {0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION};
|
||||
unsigned ids[] = {101, 102, 201, 202, 1001, 1002};
|
||||
hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_STREAM,
|
||||
nullptr, &db, &compile_err);
|
||||
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(db != nullptr);
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
err = hs_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(scratch != nullptr);
|
||||
|
||||
hs_stream_t *stream = nullptr;
|
||||
err = hs_open_stream(db, 0, &stream);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(stream != nullptr);
|
||||
|
||||
c.halt = 0;
|
||||
int i;
|
||||
for (i = 0; i < 11; i++) {
|
||||
err = hs_scan_stream(stream, data[i].c_str(), data[i].size(), 0,
|
||||
scratch, record_cb, (void *)&c);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
err = hs_close_stream(stream, scratch, dummy_cb, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
ASSERT_EQ(11U, c.matches.size());
|
||||
ASSERT_EQ(MatchRecord(10, 101), c.matches[0]);
|
||||
ASSERT_EQ(MatchRecord(20, 102), c.matches[1]);
|
||||
ASSERT_EQ(MatchRecord(20, 1001), c.matches[2]);
|
||||
ASSERT_EQ(MatchRecord(109, 101), c.matches[3]);
|
||||
ASSERT_EQ(MatchRecord(109, 1001), c.matches[4]);
|
||||
ASSERT_EQ(MatchRecord(171, 102), c.matches[5]);
|
||||
ASSERT_EQ(MatchRecord(171, 1001), c.matches[6]);
|
||||
ASSERT_EQ(MatchRecord(247, 201), c.matches[7]);
|
||||
ASSERT_EQ(MatchRecord(247, 1002), c.matches[8]);
|
||||
ASSERT_EQ(MatchRecord(761, 201), c.matches[9]);
|
||||
ASSERT_EQ(MatchRecord(761, 202), c.matches[10]);
|
||||
|
||||
hs_free_database(db);
|
||||
err = hs_free_scratch(scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ TEST(Uniform, loadstore_u16) {
|
||||
|
||||
TEST(Uniform, loadstore_u32) {
|
||||
for (int i = 0; i < 32; i++) {
|
||||
u32 in = 1 << i;
|
||||
u32 in = 1U << i;
|
||||
const char *cin = (const char *)(&in);
|
||||
u32 out = load_u32(cin);
|
||||
EXPECT_EQ(in, out);
|
||||
@ -106,7 +106,7 @@ TEST(Uniform, loadstore_m128) {
|
||||
} in;
|
||||
for (int i = 0; i < 128; i++) {
|
||||
memset(&in, 0, sizeof(in));
|
||||
in.words[i/32] = 1 << (i % 32);
|
||||
in.words[i/32] = 1U << (i % 32);
|
||||
const char *cin = (const char *)(&in);
|
||||
m128 out = load_m128(cin);
|
||||
EXPECT_EQ(0, memcmp(&out, &in, sizeof(out)));
|
||||
@ -124,7 +124,7 @@ TEST(Uniform, loadstore_m256) {
|
||||
} in;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
memset(&in, 0, sizeof(in));
|
||||
in.words[i/32] = 1 << (i % 32);
|
||||
in.words[i/32] = 1U << (i % 32);
|
||||
const char *cin = (const char *)(&in);
|
||||
m256 out = load_m256(cin);
|
||||
EXPECT_EQ(0, memcmp(&out, &in, sizeof(out)));
|
||||
@ -142,7 +142,7 @@ TEST(Uniform, loadstore_m512) {
|
||||
} in;
|
||||
for (int i = 0; i < 512; i++) {
|
||||
memset(&in, 0, sizeof(in));
|
||||
in.words[i/32] = 1 << (i % 32);
|
||||
in.words[i/32] = 1U << (i % 32);
|
||||
const char *cin = (const char *)(&in);
|
||||
m512 out = load_m512(cin);
|
||||
EXPECT_EQ(0, memcmp(&out, &in, sizeof(out)));
|
||||
|
@ -56,9 +56,8 @@ std::string inferExpressionPath(const std::string &sigFile) {
|
||||
// POSIX variant.
|
||||
|
||||
// dirname() may modify its argument, so we must make a copy.
|
||||
std::vector<char> path(sigFile.size() + 1);
|
||||
memcpy(path.data(), sigFile.c_str(), sigFile.size());
|
||||
path[sigFile.size()] = 0; // ensure null termination.
|
||||
std::vector<char> path(sigFile.begin(), sigFile.end());
|
||||
path.push_back(0); // ensure null termination.
|
||||
|
||||
std::string rv = dirname(path.data());
|
||||
#else
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -127,4 +127,18 @@ void prettyPrintRange(std::ostream &out, it_t begin, it_t end) {
|
||||
}
|
||||
}
|
||||
|
||||
// Transfer given string into a hex-escaped pattern.
|
||||
static really_inline
|
||||
char *makeHex(const unsigned char *pat, unsigned patlen) {
|
||||
size_t hexlen = patlen * 4;
|
||||
char *hexbuf = (char *)malloc(hexlen + 1);
|
||||
unsigned i;
|
||||
char *buf;
|
||||
for (i = 0, buf = hexbuf; i < patlen; i++, buf += 4) {
|
||||
snprintf(buf, 5, "\\x%02x", (unsigned char)pat[i]);
|
||||
}
|
||||
hexbuf[hexlen] = '\0';
|
||||
return hexbuf;
|
||||
}
|
||||
|
||||
#endif // STRING_UTIL_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user