mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge develop into master
This commit is contained in:
commit
fe31630221
@ -1,9 +1,9 @@
|
||||
cmake_minimum_required (VERSION 2.8)
|
||||
cmake_minimum_required (VERSION 2.8.11)
|
||||
project (Hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 4)
|
||||
set (HS_MINOR_VERSION 0)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_PATCH_VERSION 1)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
||||
@ -36,19 +36,19 @@ else()
|
||||
set(RELEASE_BUILD FALSE)
|
||||
endif()
|
||||
|
||||
set(BINDIR ${PROJECT_BINARY_DIR}/bin)
|
||||
set(LIBDIR ${PROJECT_BINARY_DIR}/lib)
|
||||
set(BINDIR "${PROJECT_BINARY_DIR}/bin")
|
||||
set(LIBDIR "${PROJECT_BINARY_DIR}/lib")
|
||||
|
||||
# First for the generic no-config case
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BINDIR})
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBDIR})
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBDIR})
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIBDIR}")
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBDIR}")
|
||||
# Second, for multi-config builds (e.g. msvc)
|
||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||
string (TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${BINDIR})
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${BINDIR}")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
|
||||
endforeach (OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES)
|
||||
|
||||
|
||||
@ -71,13 +71,14 @@ find_package(Boost ${BOOST_MINVERSION})
|
||||
if(NOT Boost_FOUND)
|
||||
# we might have boost in tree, so provide a hint and try again
|
||||
message(STATUS "trying include dir for boost")
|
||||
set(BOOST_INCLUDEDIR ${CMAKE_SOURCE_DIR}/include)
|
||||
set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include")
|
||||
find_package(Boost ${BOOST_MINVERSION})
|
||||
if(NOT Boost_FOUND)
|
||||
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available or extract Boost headers to ${CMAKE_SOURCE_DIR}/include")
|
||||
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
|
||||
find_package(PythonInterp)
|
||||
find_program(RAGEL ragel)
|
||||
@ -88,6 +89,10 @@ else()
|
||||
message(FATAL_ERROR "No python interpreter found")
|
||||
endif()
|
||||
|
||||
if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
|
||||
message(FATAL_ERROR "Ragel state machine compiler not found")
|
||||
endif()
|
||||
|
||||
option(OPTIMISE "Turns off compiler optimizations (on by default unless debug output enabled or coverage testing)" TRUE)
|
||||
|
||||
option(DEBUG_OUTPUT "Enable debug output (warning: very verbose)" FALSE)
|
||||
@ -290,10 +295,10 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
||||
endif()
|
||||
|
||||
if (NOT XCODE)
|
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
|
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
|
||||
else()
|
||||
# cmake doesn't think Xcode supports isystem
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIR}")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIRS}")
|
||||
endif()
|
||||
|
||||
|
||||
|
@ -125,8 +125,12 @@ Boost Headers
|
||||
|
||||
Compiling Hyperscan depends on a recent version of the Boost C++ header
|
||||
library. If the Boost libraries are installed on the build machine in the
|
||||
usual paths, CMake will find them. An alternative is to put a copy of (or a
|
||||
symlink to) the boost subdirectory in ``<hyperscan-source-path>/include/boost``.
|
||||
usual paths, CMake will find them. If the Boost libraries are not installed,
|
||||
the location of the Boost source tree can be specified during the CMake
|
||||
configuration step using the ``BOOST_ROOT`` variable (described below).
|
||||
|
||||
Another alternative is to put a copy of (or a symlink to) the boost
|
||||
subdirectory in ``<hyperscan-source-path>/include/boost``.
|
||||
|
||||
For example: for the Boost-1.59.0 release: ::
|
||||
|
||||
@ -161,6 +165,8 @@ Common options for CMake include:
|
||||
| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. |
|
||||
| | Default off. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BOOST_ROOT | Location of Boost source tree. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| DEBUG_OUTPUT | Enable very verbose debug output. Default off. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
|
||||
|
@ -13,11 +13,11 @@ set(AUTOGEN_PY_FILES
|
||||
function(fdr_autogen type out)
|
||||
add_custom_command (
|
||||
COMMENT "AUTOGEN ${out}"
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${out}"
|
||||
COMMAND ${PYTHON} "${CMAKE_CURRENT_SOURCE_DIR}/autogen.py" ${type} > "${CMAKE_CURRENT_BINARY_DIR}/${out}"
|
||||
DEPENDS ${AUTOGEN_PY_FILES}
|
||||
)
|
||||
add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
|
||||
add_custom_target(autogen_${type} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${out}")
|
||||
endfunction(fdr_autogen)
|
||||
|
||||
#now build the functions
|
||||
|
@ -250,7 +250,8 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
* for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
|
||||
* allowed to be NULL only if the @a onEvent callback is also NULL.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
@ -299,7 +300,8 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
|
||||
* The stream (as created by @ref hs_open_stream()) to be copied.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
|
||||
* allowed to be NULL only if the @a onEvent callback is also NULL.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
|
@ -54,7 +54,7 @@ char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
#define nfaExecCastle0_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecCastle0_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecCastle0_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
|
||||
#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecGough8_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
// 16-bit Gough
|
||||
|
||||
@ -77,6 +77,6 @@ char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
|
||||
#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecGough16_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
#endif
|
||||
|
@ -56,7 +56,7 @@ char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
#define nfaExecLbrDot_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecLbrDot_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
// LBR Verm
|
||||
|
||||
@ -76,7 +76,7 @@ char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
#define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecLbrVerm_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
// LBR Negated Verm
|
||||
|
||||
@ -96,7 +96,7 @@ char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
#define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecLbrNVerm_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
// LBR Shuf
|
||||
|
||||
@ -116,7 +116,7 @@ char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
#define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecLbrShuf_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
// LBR Truffle
|
||||
|
||||
@ -136,7 +136,7 @@ char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
#define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecLbrTruf_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -151,12 +151,12 @@ char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
|
||||
while (1) {
|
||||
// Find the next top with location >= the last escape we saw.
|
||||
for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
|
||||
enum mqe_event t = q_cur_type(q);
|
||||
if ((t == MQE_TOP || t == MQE_TOP_FIRST) &&
|
||||
u32 event = q_cur_type(q);
|
||||
if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
|
||||
q_cur_offset(q) >= lstate->lastEscape) {
|
||||
goto found_top;
|
||||
}
|
||||
DEBUG_PRINTF("skip event type=%d offset=%lld\n", t, q_cur_offset(q));
|
||||
DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
|
||||
}
|
||||
|
||||
// No more tops, we're done.
|
||||
|
@ -56,7 +56,7 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
|
||||
#define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecMcClellan8_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
// 16-bit McClellan
|
||||
|
||||
@ -79,7 +79,7 @@ char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
|
||||
#define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecMcClellan16_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
/**
|
||||
* Simple streaming mode calls:
|
||||
|
@ -50,7 +50,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
|
||||
#define nfaExecMpv0_QR NFA_API_NO_IMPL
|
||||
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
|
||||
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecMpv0_zombie_status NFA_API_NO_IMPL
|
||||
#define nfaExecMpv0_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
/**
|
||||
* return 0 if the mpv dies, otherwise returns the location of the next possible
|
||||
|
@ -250,7 +250,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
|
||||
const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
|
||||
/* first the triggered puffs */
|
||||
map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
|
||||
while (it != kilopuffs.end() && it->first.trigger_event != ~0U) {
|
||||
while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
|
||||
assert(!it->first.auto_restart);
|
||||
assert(it->first.trigger_event
|
||||
== MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
|
||||
@ -268,7 +268,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
|
||||
*/
|
||||
map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
|
||||
while (it != kilopuffs.end() && !it->first.auto_restart) {
|
||||
assert(it->first.trigger_event == ~0U);
|
||||
assert(it->first.trigger_event == MQE_INVALID);
|
||||
|
||||
++it;
|
||||
}
|
||||
@ -278,7 +278,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
|
||||
kilopuffs, kilopuffs.begin(), it);
|
||||
}
|
||||
while (it != kilopuffs.end() && it->first.auto_restart) {
|
||||
assert(it->first.trigger_event == ~0U);
|
||||
assert(it->first.trigger_event == MQE_INVALID);
|
||||
|
||||
out->push_back(mpv_counter_info());
|
||||
map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
|
||||
|
@ -41,24 +41,28 @@ extern "C"
|
||||
#define MAX_MQE_LEN 10
|
||||
|
||||
/** Queue events */
|
||||
enum mqe_event {
|
||||
MQE_START = 0, /**< and begin! Note: stateless engines will start from
|
||||
* this location */
|
||||
MQE_END = 1, /**< stop scanning */
|
||||
MQE_TOP = 2, /**< enable start + start dot star */
|
||||
MQE_TOP_FIRST = 4, /**< first event corresponding to a TOP _N_ */
|
||||
|
||||
/*
|
||||
* Additional tops (in multi-top engines) use the event values from
|
||||
* MQE_TOP_FIRST to something.
|
||||
*/
|
||||
/** Queue event: begin scanning. Note: stateless engines will start from this
|
||||
* location. */
|
||||
#define MQE_START 0U
|
||||
|
||||
MQE_INVALID = ~0U
|
||||
};
|
||||
/** Queue event: stop scanning. */
|
||||
#define MQE_END 1U
|
||||
|
||||
/** Queue event: enable start and start-dot-star. */
|
||||
#define MQE_TOP 2U
|
||||
|
||||
/** Queue event: first event corresponding to a numbered TOP. Additional tops
|
||||
* (in multi-top engines) use the event values from MQE_TOP_FIRST to
|
||||
* MQE_INVALID - 1. */
|
||||
#define MQE_TOP_FIRST 4U
|
||||
|
||||
/** Invalid queue event */
|
||||
#define MQE_INVALID (~0U)
|
||||
|
||||
/** Queue item */
|
||||
struct mq_item {
|
||||
u32 type; /**< event; from mqe_event */
|
||||
u32 type; /**< event type, from MQE_* */
|
||||
s64a location; /**< relative to the start of the current buffer */
|
||||
u64a som; /**< pattern start-of-match corresponding to a top, only used
|
||||
* by som engines. */
|
||||
|
@ -237,16 +237,32 @@ static really_inline
|
||||
int isMultiTopType(u8 t) {
|
||||
return !isDfaType(t) && !isLbrType(t);
|
||||
}
|
||||
/** Macro used in place of unimplemented NFA API functions for a given
|
||||
|
||||
/** Macros used in place of unimplemented NFA API functions for a given
|
||||
* engine. */
|
||||
#if !defined(_WIN32)
|
||||
#define NFA_API_NO_IMPL(...) \
|
||||
|
||||
/* Use for functions that return an integer. */
|
||||
#define NFA_API_NO_IMPL(...) \
|
||||
({ \
|
||||
assert("not implemented for this engine!"); \
|
||||
0; /* return value, for places that need it */ \
|
||||
})
|
||||
|
||||
/* Use for _zombie_status functions. */
|
||||
#define NFA_API_ZOMBIE_NO_IMPL(...) \
|
||||
({ \
|
||||
assert("not implemented for this engine!"); \
|
||||
NFA_ZOMBIE_NO; \
|
||||
})
|
||||
|
||||
#else
|
||||
#define NFA_API_NO_IMPL(...) 0
|
||||
|
||||
/* Simpler implementation for compilers that don't like the GCC extension used
|
||||
* above. */
|
||||
#define NFA_API_NO_IMPL(...) 0
|
||||
#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -67,6 +67,7 @@ static
|
||||
bool findPaths(const NGHolder &g, vector<Path> &paths) {
|
||||
vector<NFAVertex> order = getTopoOrdering(g);
|
||||
|
||||
vector<size_t> read_count(num_vertices(g));
|
||||
vector<vector<Path>> built(num_vertices(g));
|
||||
|
||||
for (auto it = order.rbegin(); it != order.rend(); ++it) {
|
||||
@ -74,6 +75,11 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
|
||||
auto &out = built[g[v].index];
|
||||
assert(out.empty());
|
||||
|
||||
read_count[g[v].index] = out_degree(v, g);
|
||||
|
||||
DEBUG_PRINTF("setting read_count to %zu for %u\n",
|
||||
read_count[g[v].index], g[v].index);
|
||||
|
||||
if (v == g.start || v == g.startDs) {
|
||||
out.push_back({v});
|
||||
continue;
|
||||
@ -94,6 +100,9 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(!built[g[u].index].empty());
|
||||
assert(read_count[g[u].index]);
|
||||
|
||||
for (const auto &p : built[g[u].index]) {
|
||||
out.push_back(p);
|
||||
out.back().push_back(v);
|
||||
@ -105,6 +114,13 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
read_count[g[u].index]--;
|
||||
if (!read_count[g[u].index]) {
|
||||
DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
|
||||
built[g[u].index].clear();
|
||||
built[g[u].index].shrink_to_fit();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -118,10 +118,15 @@ bool findLiterals(const NGHolder &g,
|
||||
vector<NFAVertex> order = getTopoOrdering(g);
|
||||
|
||||
vector<set<sls_literal>> built(num_vertices(g));
|
||||
vector<size_t> read_count(num_vertices(g));
|
||||
|
||||
for (auto it = order.rbegin(); it != order.rend(); ++it) {
|
||||
NFAVertex v = *it;
|
||||
set<sls_literal> &out = built[g[v].index];
|
||||
read_count[g[v].index] = out_degree(v, g);
|
||||
|
||||
DEBUG_PRINTF("setting read_count to %zu for %u\n",
|
||||
read_count[g[v].index], g[v].index);
|
||||
|
||||
assert(out.empty());
|
||||
if (v == g.start) {
|
||||
@ -149,7 +154,10 @@ bool findLiterals(const NGHolder &g,
|
||||
}
|
||||
|
||||
set<sls_literal> &in = built[g[u].index];
|
||||
DEBUG_PRINTF("getting from %u (%zu reads to go)\n",
|
||||
g[u].index, read_count[g[u].index]);
|
||||
assert(!in.empty());
|
||||
assert(read_count[g[u].index]);
|
||||
|
||||
for (const sls_literal &lit : in) {
|
||||
if (accept) {
|
||||
@ -171,10 +179,18 @@ bool findLiterals(const NGHolder &g,
|
||||
out.insert(lit.append((u8)c, nocase));
|
||||
|
||||
if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
|
||||
DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
|
||||
literals->size());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
read_count[g[u].index]--;
|
||||
if (!read_count[g[u].index]) {
|
||||
DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
|
||||
in.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -206,6 +222,8 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("looking for literals\n");
|
||||
|
||||
map<sls_literal, ue2::flat_set<ReportID>> literals;
|
||||
if (!findLiterals(g, &literals)) {
|
||||
DEBUG_PRINTF(":(\n");
|
||||
|
@ -51,11 +51,11 @@ using namespace std;
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Hard limit on the maximum repeat for bounded repeats. */
|
||||
static const u32 MAX_MAX_BOUND = 32767;
|
||||
static constexpr u32 MAX_REPEAT = 32767;
|
||||
|
||||
/** \brief If expanding a repeat would lead to this many positions being
|
||||
* generated, we fail the pattern. */
|
||||
static const u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
|
||||
static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
|
||||
|
||||
/* no edge priorities means that if our subcomponent can be empty, our min
|
||||
* extent is effectively zero. */
|
||||
@ -67,7 +67,11 @@ ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min,
|
||||
assert(sub_comp);
|
||||
assert(max > 0);
|
||||
assert(m_min <= m_max);
|
||||
if (m_max < NoLimit && m_max > MAX_MAX_BOUND) {
|
||||
|
||||
if (m_min > MAX_REPEAT) {
|
||||
throw ParseError("Bounded repeat is too large.");
|
||||
}
|
||||
if (m_max != NoLimit && m_max > MAX_REPEAT) {
|
||||
throw ParseError("Bounded repeat is too large.");
|
||||
}
|
||||
}
|
||||
@ -119,7 +123,7 @@ void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) {
|
||||
|
||||
void ComponentRepeat::notePositions(GlushkovBuildState &bs) {
|
||||
assert(m_max > 0);
|
||||
assert(m_max == NoLimit || m_max < MAX_MAX_BOUND);
|
||||
assert(m_max == NoLimit || m_max < MAX_REPEAT);
|
||||
|
||||
/* Note: We can construct smaller subgraphs if we're not maintaining edge
|
||||
* priorities. */
|
||||
|
@ -391,7 +391,7 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t,
|
||||
}
|
||||
}
|
||||
|
||||
enum mqe_event top = tr->suffixEvent;
|
||||
u32 top = tr->suffixEvent;
|
||||
assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID));
|
||||
pushQueueSom(q, top, loc, som);
|
||||
|
||||
@ -977,14 +977,14 @@ void roseTriggerInfixes(const struct RoseEngine *t, const struct RoseRole *tr,
|
||||
do {
|
||||
u32 qi = curr_r->queue;
|
||||
u32 ri = queueToLeftIndex(t, qi);
|
||||
enum mqe_event topEvent = curr_r->event;
|
||||
u32 topEvent = curr_r->event;
|
||||
u8 cancel = curr_r->cancel_prev_top;
|
||||
assert(topEvent < MQE_INVALID);
|
||||
|
||||
const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi);
|
||||
assert(!left->transient);
|
||||
|
||||
DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, (u32)topEvent);
|
||||
DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent);
|
||||
|
||||
struct mq *q = tctxtToScratch(tctxt)->queues + qi;
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
|
@ -2433,7 +2433,7 @@ vector<RoseTrigger> buildRoseTriggerList(const RoseGraph &g, RoseVertex u,
|
||||
assert(num_tops(g[v].left) == 1);
|
||||
top = MQE_TOP;
|
||||
} else {
|
||||
top = (enum mqe_event)((u32)MQE_TOP_FIRST + g[e].rose_top);
|
||||
top = MQE_TOP_FIRST + g[e].rose_top;
|
||||
assert(top < MQE_INVALID);
|
||||
}
|
||||
|
||||
|
@ -236,7 +236,7 @@ struct LeftNfaInfo {
|
||||
// A list of these is used to trigger prefix/infix roses.
|
||||
struct RoseTrigger {
|
||||
u32 queue; // queue index of leftfix
|
||||
u32 event; // from enum mqe_event
|
||||
u32 event; // queue event, from MQE_*
|
||||
u8 cancel_prev_top;
|
||||
};
|
||||
|
||||
@ -309,7 +309,7 @@ struct RoseRole {
|
||||
ReportID reportId; // report ID, or MO_INVALID_IDX
|
||||
u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not
|
||||
* require a state bit if they are terminal */
|
||||
u32 suffixEvent; // from enum mqe_event
|
||||
u32 suffixEvent; // queue event, from MQE_
|
||||
u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater.
|
||||
*/
|
||||
u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role,
|
||||
|
@ -1163,11 +1163,10 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (!scratch || !validScratch(to_id->rose, scratch)) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (onEvent) {
|
||||
if (!scratch || !validScratch(to_id->rose, scratch)) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
report_eod_matches(to_id, scratch, onEvent, context);
|
||||
}
|
||||
|
||||
@ -1406,12 +1405,14 @@ HS_PUBLIC_API
|
||||
hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||
void *context) {
|
||||
if (!id || !scratch || !validScratch(id->rose, scratch)) {
|
||||
if (!id) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
/* user wants eod matches */
|
||||
if (onEvent) {
|
||||
if (!scratch || !validScratch(id->rose, scratch)) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
report_eod_matches(id, scratch, onEvent, context);
|
||||
}
|
||||
|
||||
|
@ -254,7 +254,7 @@ u32 compress32(u32 x, u32 m) {
|
||||
#if defined(__BMI2__)
|
||||
// BMI2 has a single instruction for this operation.
|
||||
return _pext_u32(x, m);
|
||||
#endif
|
||||
#else
|
||||
|
||||
// Return zero quickly on trivial cases
|
||||
if ((x & m) == 0) {
|
||||
@ -281,6 +281,7 @@ u32 compress32(u32 x, u32 m) {
|
||||
}
|
||||
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -288,7 +289,7 @@ u64a compress64(u64a x, u64a m) {
|
||||
#if defined(ARCH_X86_64) && defined(__BMI2__)
|
||||
// BMI2 has a single instruction for this operation.
|
||||
return _pext_u64(x, m);
|
||||
#endif
|
||||
#else
|
||||
|
||||
// Return zero quickly on trivial cases
|
||||
if ((x & m) == 0) {
|
||||
@ -316,6 +317,7 @@ u64a compress64(u64a x, u64a m) {
|
||||
}
|
||||
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -323,7 +325,7 @@ u32 expand32(u32 x, u32 m) {
|
||||
#if defined(__BMI2__)
|
||||
// BMI2 has a single instruction for this operation.
|
||||
return _pdep_u32(x, m);
|
||||
#endif
|
||||
#else
|
||||
|
||||
// Return zero quickly on trivial cases
|
||||
if (!x || !m) {
|
||||
@ -355,6 +357,7 @@ u32 expand32(u32 x, u32 m) {
|
||||
}
|
||||
|
||||
return x & m0; // clear out extraneous bits
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -362,7 +365,7 @@ u64a expand64(u64a x, u64a m) {
|
||||
#if defined(ARCH_X86_64) && defined(__BMI2__)
|
||||
// BMI2 has a single instruction for this operation.
|
||||
return _pdep_u64(x, m);
|
||||
#endif
|
||||
#else
|
||||
|
||||
// Return zero quickly on trivial cases
|
||||
if (!x || !m) {
|
||||
@ -395,6 +398,7 @@ u64a expand64(u64a x, u64a m) {
|
||||
}
|
||||
|
||||
return x & m0; // clear out extraneous bits
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -51,7 +51,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
|
||||
#if defined(HAVE_PEXT)
|
||||
// Intel BMI2 can do this operation in one instruction.
|
||||
return _pext_u32(x, mask);
|
||||
#endif
|
||||
#else
|
||||
|
||||
u32 result = 0, num = 1;
|
||||
while (mask != 0) {
|
||||
@ -63,6 +63,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
|
||||
num <<= 1;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -70,7 +71,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
|
||||
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
||||
// Intel BMI2 can do this operation in one instruction.
|
||||
return _pext_u64(x, mask);
|
||||
#endif
|
||||
#else
|
||||
|
||||
u32 result = 0, num = 1;
|
||||
while (mask != 0) {
|
||||
@ -82,6 +83,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
|
||||
num <<= 1;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef HAVE_PEXT
|
||||
|
@ -112,6 +112,7 @@ endif()
|
||||
#
|
||||
# build target to run unit tests
|
||||
#
|
||||
if (NOT RELEASE_BUILD)
|
||||
add_custom_target(
|
||||
unit
|
||||
COMMAND bin/unit-internal
|
||||
@ -119,3 +120,11 @@ add_custom_target(
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
DEPENDS unit-internal unit-hyperscan
|
||||
)
|
||||
else ()
|
||||
add_custom_target(
|
||||
unit
|
||||
COMMAND bin/unit-hyperscan
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
DEPENDS unit-hyperscan
|
||||
)
|
||||
endif()
|
||||
|
@ -740,7 +740,9 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamSameToId) {
|
||||
hs_free_database(db);
|
||||
}
|
||||
|
||||
TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
|
||||
// hs_reset_and_copy_stream: You're allowed to reset and copy a stream with no
|
||||
// scratch and no callback.
|
||||
TEST(HyperscanArgChecks, ResetAndCopyStreamNoCallbackOrScratch) {
|
||||
hs_stream_t *stream = nullptr;
|
||||
hs_stream_t *stream_to = nullptr;
|
||||
hs_database_t *db = nullptr;
|
||||
@ -760,6 +762,37 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_reset_and_copy_stream(stream_to, stream, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
hs_close_stream(stream_to, scratch, nullptr, nullptr);
|
||||
hs_close_stream(stream, scratch, nullptr, nullptr);
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(db);
|
||||
}
|
||||
|
||||
// hs_reset_and_copy_stream: If you specify a callback, you must provide
|
||||
// scratch.
|
||||
TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
|
||||
hs_stream_t *stream = nullptr;
|
||||
hs_stream_t *stream_to = nullptr;
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
hs_error_t err = hs_compile("foobar", 0, HS_MODE_STREAM, nullptr, &db,
|
||||
&compile_err);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
err = hs_alloc_scratch(db, &scratch);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_open_stream(db, 0, &stream);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_open_stream(db, 0, &stream_to);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_reset_and_copy_stream(stream_to, stream, nullptr, dummy_cb,
|
||||
nullptr);
|
||||
ASSERT_EQ(HS_INVALID, err);
|
||||
|
||||
hs_close_stream(stream_to, scratch, nullptr, nullptr);
|
||||
@ -793,7 +826,8 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamDiffDb) {
|
||||
err = hs_open_stream(db2, 0, &stream_to);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr, nullptr);
|
||||
err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr,
|
||||
nullptr);
|
||||
ASSERT_EQ(HS_INVALID, err);
|
||||
|
||||
hs_close_stream(stream_to, scratch, nullptr, nullptr);
|
||||
@ -2009,6 +2043,7 @@ TEST(HyperscanArgChecks, ScanStreamBadScratch) {
|
||||
free(local_garbage);
|
||||
}
|
||||
|
||||
// hs_reset_stream: bad scratch arg
|
||||
TEST(HyperscanArgChecks, ResetStreamBadScratch) {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
@ -2025,7 +2060,7 @@ TEST(HyperscanArgChecks, ResetStreamBadScratch) {
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(stream != nullptr);
|
||||
|
||||
err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
|
||||
err = hs_reset_stream(stream, 0, scratch, dummy_cb, nullptr);
|
||||
EXPECT_NE(HS_SUCCESS, err);
|
||||
EXPECT_NE(HS_SCAN_TERMINATED, err);
|
||||
|
||||
|
@ -127,3 +127,4 @@
|
||||
127:/^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/8 #Expression is not valid UTF-8.
|
||||
128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8.
|
||||
129:/bignum \1111111111111111111/ #Number is too big at index 7.
|
||||
130:/foo|&{5555555,}/ #Bounded repeat is too large.
|
||||
|
@ -86,8 +86,7 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
|
||||
hs_error_t err;
|
||||
const size_t datalen = 1024 * 1024;
|
||||
size_t megabytes = 5 * 1024;
|
||||
char * data = new char[datalen];
|
||||
memset(data, 'X', datalen);
|
||||
vector<char> data(datalen, 'X');
|
||||
|
||||
// build a database
|
||||
hs_database_t *db = nullptr;
|
||||
@ -110,8 +109,8 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
|
||||
ASSERT_TRUE(stream != nullptr);
|
||||
|
||||
while (megabytes-- > 0) {
|
||||
err = hs_scan_stream(stream, data, datalen, 0, scratch, dummyHandler,
|
||||
nullptr);
|
||||
err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
|
||||
dummyHandler, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
}
|
||||
|
||||
@ -121,7 +120,6 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
|
||||
// teardown
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(db);
|
||||
delete [] data;
|
||||
}
|
||||
|
||||
struct HugeScanMatchingData {
|
||||
@ -141,8 +139,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
|
||||
|
||||
hs_error_t err;
|
||||
const size_t datalen = 1024*1024;
|
||||
char * data = new char[datalen];
|
||||
memset(data, 'X', datalen);
|
||||
vector<char> data(datalen, 'X');
|
||||
|
||||
// build a database
|
||||
hs_database_t *db = nullptr;
|
||||
@ -178,7 +175,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
|
||||
// streaming mode scan of our megabyte of data gb*1024 times
|
||||
unsigned long remaining = gb * 1024;
|
||||
while (remaining-- > 0) {
|
||||
err = hs_scan_stream(stream, data, datalen, 0, scratch,
|
||||
err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
|
||||
singleHandler, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_EQ(0ULL, lastMatchTo);
|
||||
@ -202,7 +199,6 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
|
||||
// teardown
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(db);
|
||||
delete[] data;
|
||||
}
|
||||
|
||||
// Helper function to actually perform scans for BlockMatch test below
|
||||
|
@ -69,7 +69,9 @@ TEST(StreamUtil, reset1) {
|
||||
|
||||
c.matches.clear();
|
||||
|
||||
err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
|
||||
// Note: we do not need matches from this reset operation, so we do not
|
||||
// need to supply a callback or scratch space.
|
||||
err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
|
||||
@ -107,7 +109,9 @@ TEST(StreamUtil, reset2) {
|
||||
|
||||
c.matches.clear();
|
||||
|
||||
err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
|
||||
// Note: we do not need matches from this reset operation, so we do not
|
||||
// need to supply a callback or scratch space.
|
||||
err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
|
||||
@ -268,7 +272,7 @@ TEST(StreamUtil, copy_reset1) {
|
||||
|
||||
c.matches.clear();
|
||||
|
||||
err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
|
||||
err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
|
||||
@ -312,7 +316,7 @@ TEST(StreamUtil, copy_reset2) {
|
||||
|
||||
c.matches.clear();
|
||||
|
||||
err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
|
||||
err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
|
||||
@ -355,7 +359,7 @@ TEST(StreamUtil, copy_reset3) {
|
||||
|
||||
c.matches.clear();
|
||||
|
||||
err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
|
||||
err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
|
||||
@ -408,7 +412,7 @@ TEST(StreamUtil, copy_reset4) {
|
||||
|
||||
c.matches.clear();
|
||||
|
||||
err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
|
||||
err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
|
||||
@ -458,7 +462,7 @@ TEST(StreamUtil, copy_reset5) {
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_EQ(0U, c.matches.size());
|
||||
|
||||
err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
|
||||
err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
||||
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
|
||||
|
@ -207,7 +207,6 @@ TEST_P(FDRp, SimpleSingle) {
|
||||
TEST_P(FDRp, MultiLocation) {
|
||||
const u32 hint = GetParam();
|
||||
SCOPED_TRACE(hint);
|
||||
u8 * data;
|
||||
|
||||
vector<hwlmLiteral> lits;
|
||||
lits.push_back(hwlmLiteral("abc", 0, 1));
|
||||
@ -216,24 +215,23 @@ TEST_P(FDRp, MultiLocation) {
|
||||
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
|
||||
|
||||
const u32 testSize = 128;
|
||||
data = (u8 *)malloc(testSize);
|
||||
memset(data, 0, testSize);
|
||||
|
||||
vector<u8> data(testSize, 0);
|
||||
|
||||
for (u32 i = 0; i < testSize - 3; i++) {
|
||||
memcpy(data + i, "abc", 3);
|
||||
memcpy(data.data() + i, "abc", 3);
|
||||
vector<match> matches;
|
||||
fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback,
|
||||
&matches, HWLM_ALL_GROUPS);
|
||||
fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
|
||||
HWLM_ALL_GROUPS);
|
||||
ASSERT_EQ(1U, matches.size());
|
||||
EXPECT_EQ(match(i, i+2, 1), matches[0]);
|
||||
memset(data + i, 0, 3);
|
||||
memset(data.data() + i, 0, 3);
|
||||
}
|
||||
free(data);
|
||||
}
|
||||
|
||||
TEST_P(FDRp, Flood) {
|
||||
const u32 hint = GetParam();
|
||||
SCOPED_TRACE(hint);
|
||||
u8 * data;
|
||||
|
||||
vector<hwlmLiteral> lits;
|
||||
lits.push_back(hwlmLiteral("aaaa", 0, 1));
|
||||
@ -245,11 +243,10 @@ TEST_P(FDRp, Flood) {
|
||||
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
|
||||
|
||||
const u32 testSize = 1024;
|
||||
data = (u8 *)malloc(testSize);
|
||||
memset(data, 'a', testSize);
|
||||
vector<u8> data(testSize, 'a');
|
||||
|
||||
vector<match> matches;
|
||||
fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback, &matches,
|
||||
fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
|
||||
HWLM_ALL_GROUPS);
|
||||
ASSERT_EQ(testSize - 3 + testSize - 7, matches.size());
|
||||
EXPECT_EQ(match(0, 3, 1), matches[0]);
|
||||
@ -266,8 +263,6 @@ TEST_P(FDRp, Flood) {
|
||||
match(i - 3, i, 1) == matches[currentMatch])
|
||||
);
|
||||
}
|
||||
|
||||
free(data);
|
||||
}
|
||||
|
||||
TEST_P(FDRp, NoRepeat1) {
|
||||
@ -483,10 +478,10 @@ TEST_P(FDRp, moveByteStream) {
|
||||
|
||||
size_t size = fdrSize(fdrTable0.get());
|
||||
|
||||
FDR *fdrTable = (FDR *)aligned_zmalloc(size);
|
||||
EXPECT_TRUE(fdrTable);
|
||||
auto fdrTable = aligned_zmalloc_unique<FDR>(size);
|
||||
EXPECT_NE(nullptr, fdrTable);
|
||||
|
||||
memcpy(fdrTable, fdrTable0.get(), size);
|
||||
memcpy(fdrTable.get(), fdrTable0.get(), size);
|
||||
|
||||
// bugger up original
|
||||
for (size_t i = 0 ; i < size; i++) {
|
||||
@ -496,14 +491,13 @@ TEST_P(FDRp, moveByteStream) {
|
||||
// check matches
|
||||
vector<match> matches;
|
||||
|
||||
hwlm_error_t fdrStatus = fdrExec(fdrTable, (const u8 *)data, data_len, 0,
|
||||
decentCallback, &matches, HWLM_ALL_GROUPS);
|
||||
hwlm_error_t fdrStatus = fdrExec(fdrTable.get(), (const u8 *)data,
|
||||
data_len, 0, decentCallback, &matches,
|
||||
HWLM_ALL_GROUPS);
|
||||
ASSERT_EQ(0, fdrStatus);
|
||||
|
||||
ASSERT_EQ(1U, matches.size());
|
||||
EXPECT_EQ(match(12, 17, 0), matches[0]);
|
||||
|
||||
aligned_free(fdrTable);
|
||||
}
|
||||
|
||||
TEST_P(FDRp, Stream1) {
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
#include "config.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "nfagraph_common.h"
|
||||
#include "grey.h"
|
||||
#include "hs.h"
|
||||
#include "compiler/compiler.h"
|
||||
@ -43,17 +44,8 @@
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
// Helper: build us an NFA graph from a regex
|
||||
static
|
||||
unique_ptr<NGWrapper> constructGraph(const string &expr) {
|
||||
CompileContext cc(false, false, get_current_target(), Grey());
|
||||
ParsedExpression parsed(0, expr.c_str(), 0, 0);
|
||||
ReportManager rm(cc.grey);
|
||||
return buildWrapper(rm, cc, parsed);
|
||||
}
|
||||
|
||||
TEST(NFAGraph, CalcComp1) {
|
||||
auto graph = constructGraph("abc|def|ghi");
|
||||
auto graph = constructGraph("abc|def|ghi", 0);
|
||||
ASSERT_TRUE(graph != nullptr);
|
||||
|
||||
deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
|
||||
@ -61,7 +53,7 @@ TEST(NFAGraph, CalcComp1) {
|
||||
}
|
||||
|
||||
TEST(NFAGraph, CalcComp2) {
|
||||
auto graph = constructGraph("a|b|c|d|e|f|g|h|i");
|
||||
auto graph = constructGraph("a|b|c|d|e|f|g|h|i", 0);
|
||||
ASSERT_TRUE(graph != nullptr);
|
||||
|
||||
deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
|
||||
@ -72,7 +64,7 @@ TEST(NFAGraph, CalcComp2) {
|
||||
|
||||
TEST(NFAGraph, RecalcComp1) {
|
||||
deque<unique_ptr<NGHolder>> comps;
|
||||
comps.push_back(constructGraph("abc|def|ghi"));
|
||||
comps.push_back(constructGraph("abc|def|ghi", 0));
|
||||
ASSERT_TRUE(comps.back() != nullptr);
|
||||
|
||||
recalcComponents(comps);
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include "config.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "nfagraph_common.h"
|
||||
|
||||
#include "compiler/compiler.h"
|
||||
#include "grey.h"
|
||||
#include "nfagraph/ng_builder.h"
|
||||
@ -42,15 +44,10 @@ using namespace std;
|
||||
using namespace testing;
|
||||
using namespace ue2;
|
||||
|
||||
#define NUM_MATCHES 4U
|
||||
#define P(x,y) pair<size_t, size_t>(x, y)
|
||||
#define NO_MATCH P(~0U, ~0U)
|
||||
|
||||
struct MatchesTestParams {
|
||||
string pattern;
|
||||
string input;
|
||||
// max 4 matches per pattern, P(-1,-1) is "no match"
|
||||
pair<size_t, size_t> matches[NUM_MATCHES];
|
||||
vector<pair<size_t, size_t>> matches;
|
||||
unsigned flags;
|
||||
bool notEod;
|
||||
bool som;
|
||||
@ -58,18 +55,11 @@ struct MatchesTestParams {
|
||||
|
||||
// teach google-test how to print a param
|
||||
void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
|
||||
pair<size_t, size_t> *matches = const_cast<pair<size_t, size_t> *>(p.matches);
|
||||
|
||||
*os << "( \"" << p.pattern << "\", "
|
||||
<< "\"" << p.input << "\", "
|
||||
<< "{";
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (matches[i] == NO_MATCH) {
|
||||
*os << "NO_MATCH,";
|
||||
break;
|
||||
} else {
|
||||
*os << "P(" << matches[i].first << ',' << matches[i].second << "),";
|
||||
}
|
||||
for (const auto &match : p.matches) {
|
||||
*os << "P(" << match.first << ',' << match.second << "),";
|
||||
}
|
||||
*os << "}, ";
|
||||
*os << "flags(" << p.flags << "), "
|
||||
@ -81,192 +71,153 @@ void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
|
||||
class MatchesTest: public TestWithParam<MatchesTestParams> {
|
||||
};
|
||||
|
||||
#define P(x, y) pair<size_t, size_t>((x), (y))
|
||||
|
||||
static const MatchesTestParams matchesTests[] = {
|
||||
// EOD and anchored patterns
|
||||
|
||||
// these should produce no matches
|
||||
{ "^foobar", "foolish", {NO_MATCH}, 0, false, true},
|
||||
{ "^foobar$", "ze foobar", {NO_MATCH}, 0, false, true},
|
||||
{ "^foobar$", "foobar ", {NO_MATCH}, 0, false, true},
|
||||
{ "^abc\\b", "abcde", {NO_MATCH}, 0, false, true},
|
||||
{ "^a\\b", "aa", {NO_MATCH}, 0, false, true},
|
||||
{ "^foobar\\b", "foobarz", {NO_MATCH}, 0, false, true},
|
||||
{ "^foobar", "fooq", {NO_MATCH}, 0, false, true},
|
||||
{ "^foobar", "foo", {NO_MATCH}, 0, false, true},
|
||||
{ "^foobar", "fooba", {NO_MATCH}, 0, false, true},
|
||||
{ "^foo *bar", "foolishness bar none ", {NO_MATCH}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {NO_MATCH}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {NO_MATCH}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {NO_MATCH}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {NO_MATCH}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {NO_MATCH}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {NO_MATCH}, 0, false, true},
|
||||
{ "^.*foobar", "foobaz", {NO_MATCH}, 0, false, true},
|
||||
{ "^.*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
|
||||
{ "^(foo)|(bar)", "fo baz", {NO_MATCH}, 0, false, true},
|
||||
{ "^((foo)|(bar))", "fo baz", {NO_MATCH}, 0, false, true},
|
||||
{ "aaaaaaaa$", "AAaaAAaa", {NO_MATCH}, 0, false, true},
|
||||
{ "^foo\\z", "foo\n", {NO_MATCH}, 0, false, true},
|
||||
{ "^(foo){2,}", "foo", {NO_MATCH}, 0, false, true},
|
||||
{ "^foobar", "foolish", {}, 0, false, true},
|
||||
{ "^foobar$", "ze foobar", {}, 0, false, true},
|
||||
{ "^foobar$", "foobar ", {}, 0, false, true},
|
||||
{ "^abc\\b", "abcde", {}, 0, false, true},
|
||||
{ "^a\\b", "aa", {}, 0, false, true},
|
||||
{ "^foobar\\b", "foobarz", {}, 0, false, true},
|
||||
{ "^foobar", "fooq", {}, 0, false, true},
|
||||
{ "^foobar", "foo", {}, 0, false, true},
|
||||
{ "^foobar", "fooba", {}, 0, false, true},
|
||||
{ "^foo *bar", "foolishness bar none ", {}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {}, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {}, 0, false, true},
|
||||
{ "^.*foobar", "foobaz", {}, 0, false, true},
|
||||
{ "^.*foobar", "foobaz\n", {}, 0, false, true},
|
||||
{ "^(foo)|(bar)", "fo baz", {}, 0, false, true},
|
||||
{ "^((foo)|(bar))", "fo baz", {}, 0, false, true},
|
||||
{ "aaaaaaaa$", "AAaaAAaa", {}, 0, false, true},
|
||||
{ "^foo\\z", "foo\n", {}, 0, false, true},
|
||||
{ "^(foo){2,}", "foo", {}, 0, false, true},
|
||||
|
||||
// these should match
|
||||
{ "^abc\\B", "abcde", { P(0,3), NO_MATCH }, 0, false, true},
|
||||
{ "^abc\\b", "abc de", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^foobar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
|
||||
{ "^foobar$", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
|
||||
{ "^foobar", "foobarq", { P(0, 6), NO_MATCH }, 0, false, true},
|
||||
{ "^foobar\\B", "foobarz", { P(0, 6), NO_MATCH }, 0, false, true},
|
||||
{ "^foo.*bar", "foobar none ", { P(0, 6), NO_MATCH }, 0, false, true},
|
||||
{ "^foo.*bar", "foo bar none ", { P(0, 7), NO_MATCH }, 0, false, true},
|
||||
{ "^foo.*bar", "foo bar none ", { P(0, 10), NO_MATCH }, 0, false, true},
|
||||
{ "^foo.*bar", "foolishness bar none ", { P(0, 15), NO_MATCH }, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5), NO_MATCH }, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dexyklmnxy", { P(0, 17), NO_MATCH }, 0, false, true},
|
||||
{ "^.*foobar", "abcfoobar", { P(0, 9), NO_MATCH }, 0, false, true},
|
||||
{ "^((foo)|(bar))", "foobar", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^((foo)|(bar))", "foo bar", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foobaz", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foo baz", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4), NO_MATCH }, 0, false, true},
|
||||
{ "aaaaaaaa$", "AAaaAAaa", { P(0, 8), NO_MATCH }, HS_FLAG_CASELESS, false, true},
|
||||
{ "^foo\\z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^foo\\Z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^foo\\Z", "foo\n", { P(0, 3), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9), NO_MATCH}, 0, false, true},
|
||||
{ "^abc\\B", "abcde", { P(0,3) }, 0, false, true},
|
||||
{ "^abc\\b", "abc de", { P(0, 3) }, 0, false, true},
|
||||
{ "^foobar", "foobar", { P(0, 6) }, 0, false, true},
|
||||
{ "^foobar$", "foobar", { P(0, 6) }, 0, false, true},
|
||||
{ "^foobar", "foobarq", { P(0, 6) }, 0, false, true},
|
||||
{ "^foobar\\B", "foobarz", { P(0, 6) }, 0, false, true},
|
||||
{ "^foo.*bar", "foobar none ", { P(0, 6) }, 0, false, true},
|
||||
{ "^foo.*bar", "foo bar none ", { P(0, 7) }, 0, false, true},
|
||||
{ "^foo.*bar", "foo bar none ", { P(0, 10) }, 0, false, true},
|
||||
{ "^foo.*bar", "foolishness bar none ", { P(0, 15) }, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5) }, 0, false, true},
|
||||
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dexyklmnxy", { P(0, 17) }, 0, false, true},
|
||||
{ "^.*foobar", "abcfoobar", { P(0, 9) }, 0, false, true},
|
||||
{ "^((foo)|(bar))", "foobar", { P(0, 3) }, 0, false, true},
|
||||
{ "^((foo)|(bar))", "foo bar", { P(0, 3) }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foobaz", { P(0, 3) }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foo baz", { P(0, 3) }, 0, false, true},
|
||||
{ "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4) }, 0, false, true},
|
||||
{ "aaaaaaaa$", "AAaaAAaa", { P(0, 8) }, HS_FLAG_CASELESS, false, true},
|
||||
{ "^foo\\z", "foo", { P(0, 3) }, 0, false, true},
|
||||
{ "^foo\\Z", "foo", { P(0, 3) }, 0, false, true},
|
||||
{ "^foo\\Z", "foo\n", { P(0, 3) }, 0, false, true},
|
||||
{ "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9)}, 0, false, true},
|
||||
|
||||
// try multiple matches per pattern
|
||||
{ "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
|
||||
{ "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5), NO_MATCH }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7) }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6) }, 0, false, true},
|
||||
{ "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10) }, 0, false, true},
|
||||
{ "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10) }, 0, false, true},
|
||||
{ "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6) }, 0, false, true},
|
||||
{ "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5) }, 0, false, true},
|
||||
{ "^(f[o0]+)", "foo0obar", { P(0, 2), P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},
|
||||
|
||||
// unanchored patterns
|
||||
{ "\\b4\\B", "444", { P(0, 1), NO_MATCH }, 0, false, true},
|
||||
{ "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
|
||||
{ "foobar", "veryfoolish", {NO_MATCH}, 0, false, true},
|
||||
{ "foo.*bar", "extreme foolishness bar none ", { P(8, 23), NO_MATCH }, 0, false, true},
|
||||
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13), NO_MATCH }, 0, false, true},
|
||||
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15), NO_MATCH }, 0, false, true},
|
||||
{ ".*foobar", "verylongfoobaz", {NO_MATCH}, 0, false, true},
|
||||
{ ".*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
|
||||
{ "(foo)|(bar)", "verylongfo baz", {NO_MATCH}, 0, false, true},
|
||||
{ "(foo)?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
|
||||
{ "foo?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
|
||||
{ "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4), NO_MATCH }, 0, false, true},
|
||||
{ "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3), NO_MATCH }, 0, false, true},
|
||||
{ "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13), NO_MATCH }, 0, false, true},
|
||||
{ "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15), NO_MATCH }, 0, false, true},
|
||||
{ "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5), NO_MATCH }, 0, false, true},
|
||||
{ "bar\\Z", "foobar\n", { P(3, 6), NO_MATCH }, 0, false, true},
|
||||
{ "\\b4\\B", "444", { P(0, 1) }, 0, false, true},
|
||||
{ "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7) }, 0, false, true},
|
||||
{ "foobar", "veryfoolish", {}, 0, false, true},
|
||||
{ "foo.*bar", "extreme foolishness bar none ", { P(8, 23) }, 0, false, true},
|
||||
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13) }, 0, false, true},
|
||||
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15) }, 0, false, true},
|
||||
{ ".*foobar", "verylongfoobaz", {}, 0, false, true},
|
||||
{ ".*foobar", "foobaz\n", {}, 0, false, true},
|
||||
{ "(foo)|(bar)", "verylongfo baz", {}, 0, false, true},
|
||||
{ "(foo)?bar", "foobar", { P(0, 6) }, 0, false, true},
|
||||
{ "foo?bar", "foobar", { P(0, 6) }, 0, false, true},
|
||||
{ "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4) }, 0, false, true},
|
||||
{ "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3) }, 0, false, true},
|
||||
{ "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13) }, 0, false, true},
|
||||
{ "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15) }, 0, false, true},
|
||||
{ "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},
|
||||
{ "bar\\Z", "foobar\n", { P(3, 6) }, 0, false, true},
|
||||
|
||||
// multi-line patterns
|
||||
{ "^foo$", "foo\nbar", { P(0, 3), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^bar$", "foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo$", "big foo\nbar", {NO_MATCH}, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo$", "foo\nfoo", { P(0, 3), P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "\\bfoo$", "big foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "\\Bfoo$", "bigfoo\nbar", { P(3, 6), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo\\z", "big\nfoo", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo\\Z", "big\nfoo\n", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo$", "foo\nbar", { P(0, 3) }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^bar$", "foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo$", "big foo\nbar", {}, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo$", "foo\nfoo", { P(0, 3), P(4, 7) }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "\\bfoo$", "big foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "\\Bfoo$", "bigfoo\nbar", { P(3, 6) }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo\\z", "big\nfoo", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
|
||||
{ "^foo\\Z", "big\nfoo\n", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
|
||||
|
||||
// utf8 patterns
|
||||
{ "ab+", "\x61\x62", { P(0, 2), NO_MATCH }, HS_FLAG_UTF8, false, true},
|
||||
{ "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7), NO_MATCH }, HS_FLAG_UTF8, false, true},
|
||||
{ "ab+", "\x61\x62", { P(0, 2) }, HS_FLAG_UTF8, false, true},
|
||||
{ "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7) }, HS_FLAG_UTF8, false, true},
|
||||
|
||||
// noteod patterns
|
||||
{ "^foobar$", "foobar", { NO_MATCH }, 0, true, true},
|
||||
{ "aaaaaaaa$", "AAaaAAaa", { NO_MATCH }, HS_FLAG_CASELESS, true, true},
|
||||
{ "^foo\\z", "foo", { NO_MATCH }, 0, true, true},
|
||||
{ "^foo\\Z", "foo", { NO_MATCH }, 0, true, true},
|
||||
{ "^foo\\Z", "foo\n", { NO_MATCH }, 0, true, true},
|
||||
{ "^foobar$", "foobar", {}, 0, true, true},
|
||||
{ "aaaaaaaa$", "AAaaAAaa", {}, HS_FLAG_CASELESS, true, true},
|
||||
{ "^foo\\z", "foo", {}, 0, true, true},
|
||||
{ "^foo\\Z", "foo", {}, 0, true, true},
|
||||
{ "^foo\\Z", "foo\n", {}, 0, true, true},
|
||||
|
||||
// vacuous patterns (with multiline, utf8 and caseless flags)
|
||||
// vacuous patterns have SOM turned off, so all SOM are zero
|
||||
{ "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, false},
|
||||
{ "b*", "", { P(0, 0), NO_MATCH }, 0, false, false},
|
||||
{ "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, 0, false, false},
|
||||
{ "b*", "", { P(0, 0) }, 0, false, false},
|
||||
{ "(aa|b*)", "a", { P(0, 0), P(0, 1) }, 0, false, false},
|
||||
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_CASELESS, false, false},
|
||||
{ "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE, false, false},
|
||||
{ "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
|
||||
{ "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
|
||||
{ "b*", "", { P(0, 0) }, HS_FLAG_MULTILINE, false, false},
|
||||
{ "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_MULTILINE, false, false},
|
||||
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE | HS_FLAG_CASELESS, false, false},
|
||||
{ "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8, false, false},
|
||||
{ "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_UTF8, false, false},
|
||||
{ "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_UTF8, false, false},
|
||||
{ "b*", "", { P(0, 0) }, HS_FLAG_UTF8, false, false},
|
||||
{ "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_UTF8, false, false},
|
||||
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
|
||||
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
|
||||
{ ".*", "\x61\xf0\xa4\xad\xa2\x64", { P(0, 0), P(0, 1), P(0, 5), P(0, 6) }, HS_FLAG_UTF8, false, false},
|
||||
{ ".*", "\xf0\xa4\xad\xa2\xf0\xa4\xad\xa3\x64", { P(0, 0), P(0, 4), P(0, 8), P(0, 9) }, HS_FLAG_UTF8, false, false},
|
||||
|
||||
// special patterns for detecting various bugs
|
||||
{ "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5), NO_MATCH }, HS_FLAG_UTF8, false, true},
|
||||
{ "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5) }, HS_FLAG_UTF8, false, true},
|
||||
{ ".*", "\xf0\xa4\xad", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, true},
|
||||
{ "\\Bfoo", "foo", {NO_MATCH}, 0, false, true},
|
||||
{ "fo\\B", "fo_", { P(0, 2), NO_MATCH}, 0, false, true},
|
||||
{ "\\Bfoo", "foo", {}, 0, false, true},
|
||||
{ "fo\\B", "fo_", { P(0, 2)}, 0, false, true},
|
||||
{ "^.*", "\xee\x80\x80\n\xee\x80\x80", { P(0, 0), P(0, 3), P(0, 4), P(0, 7)}, HS_FLAG_UTF8 | HS_FLAG_MULTILINE, false, false},
|
||||
|
||||
// ignore highlander patterns as they can't be easily checked
|
||||
};
|
||||
|
||||
// by default, all matches initialize to zeroes. this makes it impossible to
|
||||
// test vacuous patterns, among other things, unless we specify every single
|
||||
// match, which is tedious. instead, we set "no match" to NO_MATCH. if the matches
|
||||
// start with NO_MATCH, everything else is set to NO_MATCH. if matches start
|
||||
// with something else, look for the next NO_MATCH and replace everything after
|
||||
// it with NO_MATCH's.
|
||||
static
|
||||
void fixMatches(const pair<size_t, size_t> in_matches[],
|
||||
pair<size_t, size_t> out_matches[], unsigned size) {
|
||||
bool end_matches = false;
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
if (in_matches[i] == NO_MATCH) {
|
||||
end_matches = true;
|
||||
}
|
||||
if (end_matches) {
|
||||
out_matches[i] = NO_MATCH;
|
||||
}
|
||||
else {
|
||||
out_matches[i] = in_matches[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MatchesTest, Check) {
|
||||
const MatchesTestParams &t = GetParam();
|
||||
CompileContext cc(false, false, get_current_target(), Grey());
|
||||
ReportManager rm(cc.grey);
|
||||
ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0);
|
||||
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
|
||||
|
||||
set<pair<size_t, size_t> > results, tmp, matches;
|
||||
auto g = buildWrapper(rm, cc, parsed);
|
||||
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
|
||||
|
||||
set<pair<size_t, size_t>> matches;
|
||||
findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8);
|
||||
|
||||
// fix matches and make a set out of them
|
||||
pair<size_t, size_t> tmp_matches[NUM_MATCHES];
|
||||
fixMatches(t.matches, tmp_matches, NUM_MATCHES);
|
||||
tmp = set<pair<size_t, size_t> >(tmp_matches, tmp_matches + NUM_MATCHES);
|
||||
tmp.erase(NO_MATCH);
|
||||
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
|
||||
|
||||
// create a superset of pattern results and matches to pick up unexpected
|
||||
// matches as well as missed matches.
|
||||
results.insert(tmp.begin(), tmp.end());
|
||||
results.insert(matches.begin(), matches.end());
|
||||
|
||||
// check if we have the same number of matches as in expected results
|
||||
ASSERT_EQ(results.size(), tmp.size())<< "Pattern '" << t.pattern
|
||||
<< "' against input '" << t.input << "': wrong results count";
|
||||
|
||||
// we already know that size of two sets is the same, so now check matches.
|
||||
for (set<pair<size_t, size_t> >::const_iterator it = results.begin();
|
||||
it != results.end(); ++it) {
|
||||
ASSERT_EQ(1, matches.count(*it))<< "Pattern '" << t.pattern
|
||||
<< "' against input '" << t.input << "'";
|
||||
}
|
||||
ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
|
||||
<< "' against input '" << t.input << "'";
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ng_find_matches, MatchesTest, ValuesIn(matchesTests));
|
||||
|
@ -71,37 +71,35 @@ TEST(Sidecar, ns1) {
|
||||
ASSERT_TRUE(ns != nullptr);
|
||||
ASSERT_LT(0U, sidecarSize(ns.get()));
|
||||
|
||||
struct sidecar_enabled *enabled
|
||||
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
|
||||
ASSERT_TRUE(enabled);
|
||||
sidecarEnabledInit(ns.get(), enabled);
|
||||
struct sidecar_scratch *scratch
|
||||
= (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
|
||||
auto enabled =
|
||||
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
|
||||
sidecarEnabledInit(ns.get(), enabled.get());
|
||||
auto scratch =
|
||||
aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
|
||||
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
SCOPED_TRACE(i);
|
||||
u32 seen = 0;
|
||||
memset(data, i, data_len);
|
||||
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
|
||||
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
|
||||
ns_cb, &seen);
|
||||
ASSERT_EQ(0U, seen);
|
||||
}
|
||||
|
||||
sidecarEnabledAdd(ns.get(), enabled, 0);
|
||||
sidecarEnabledAdd(ns.get(), enabled.get(), 0);
|
||||
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
SCOPED_TRACE(i);
|
||||
u32 seen = 0;
|
||||
memset(data, i, data_len);
|
||||
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
|
||||
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
|
||||
ns_cb, &seen);
|
||||
if (i == 'f') {
|
||||
ASSERT_EQ(1U, seen);
|
||||
} else {
|
||||
ASSERT_EQ(0U, seen);
|
||||
}
|
||||
}
|
||||
|
||||
aligned_free(enabled);
|
||||
aligned_free(scratch);
|
||||
}
|
||||
|
||||
const char* sidecarStrings[] = {
|
||||
@ -186,14 +184,13 @@ TEST_P(SidecarTest, Individual) {
|
||||
ASSERT_TRUE(ns != nullptr);
|
||||
ASSERT_LT(0U, sidecarSize(ns.get()));
|
||||
|
||||
struct sidecar_enabled *enabled
|
||||
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
|
||||
ASSERT_TRUE(enabled);
|
||||
sidecarEnabledInit(ns.get(), enabled);
|
||||
struct sidecar_enabled *local_enabled
|
||||
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
|
||||
struct sidecar_scratch *scratch
|
||||
= (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
|
||||
auto enabled =
|
||||
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
|
||||
sidecarEnabledInit(ns.get(), enabled.get());
|
||||
auto local_enabled =
|
||||
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
|
||||
auto scratch =
|
||||
aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
|
||||
|
||||
const size_t data_len = 1024;
|
||||
u8 data[data_len];
|
||||
@ -203,7 +200,8 @@ TEST_P(SidecarTest, Individual) {
|
||||
SCOPED_TRACE(i);
|
||||
memset(data, i, data_len);
|
||||
set<u32> seen;
|
||||
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
|
||||
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
|
||||
set_cb, &seen);
|
||||
ASSERT_TRUE(seen.empty());
|
||||
}
|
||||
|
||||
@ -213,17 +211,18 @@ TEST_P(SidecarTest, Individual) {
|
||||
SCOPED_TRACE(c);
|
||||
|
||||
// build a "compile time" enabled structure and add class j to it.
|
||||
sidecarEnabledInit(ns.get(), local_enabled);
|
||||
sidecarEnabledAdd(ns.get(), local_enabled, j);
|
||||
sidecarEnabledInit(ns.get(), local_enabled.get());
|
||||
sidecarEnabledAdd(ns.get(), local_enabled.get(), j);
|
||||
|
||||
// union class j into our runtime enabled structure.
|
||||
sidecarEnabledUnion(ns.get(), enabled, local_enabled);
|
||||
sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
|
||||
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
SCOPED_TRACE(i);
|
||||
memset(data, i, data_len);
|
||||
set<u32> seen;
|
||||
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
|
||||
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
|
||||
0, set_cb, &seen);
|
||||
if (i == c) {
|
||||
ASSERT_EQ(1U, seen.size());
|
||||
ASSERT_EQ(j, *seen.begin());
|
||||
@ -232,10 +231,6 @@ TEST_P(SidecarTest, Individual) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aligned_free(local_enabled);
|
||||
aligned_free(enabled);
|
||||
aligned_free(scratch);
|
||||
}
|
||||
|
||||
TEST_P(SidecarTest, Together) {
|
||||
@ -253,13 +248,13 @@ TEST_P(SidecarTest, Together) {
|
||||
ASSERT_TRUE(ns != nullptr);
|
||||
ASSERT_LT(0U, sidecarSize(ns.get()));
|
||||
|
||||
struct sidecar_enabled *enabled
|
||||
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
|
||||
ASSERT_TRUE(enabled);
|
||||
struct sidecar_enabled *local_enabled
|
||||
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
|
||||
struct sidecar_scratch *scratch
|
||||
= (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
|
||||
auto enabled =
|
||||
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
|
||||
sidecarEnabledInit(ns.get(), enabled.get());
|
||||
auto local_enabled =
|
||||
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
|
||||
auto scratch =
|
||||
aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
|
||||
|
||||
const size_t data_len = 1024;
|
||||
u8 data[data_len];
|
||||
@ -269,21 +264,22 @@ TEST_P(SidecarTest, Together) {
|
||||
SCOPED_TRACE(i);
|
||||
memset(data, i, data_len);
|
||||
set<u32> seen;
|
||||
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
|
||||
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
|
||||
set_cb, &seen);
|
||||
ASSERT_TRUE(seen.empty());
|
||||
}
|
||||
|
||||
// test that every char class fires
|
||||
for (u32 j = 0; j < charclasses.size(); j++) {
|
||||
// enable the whole lot
|
||||
sidecarEnabledInit(ns.get(), enabled);
|
||||
sidecarEnabledInit(ns.get(), enabled.get());
|
||||
for (u32 i = 0; i < charclasses.size(); i++) {
|
||||
// build a "compile time" enabled structure and add class j to it.
|
||||
sidecarEnabledInit(ns.get(), local_enabled);
|
||||
sidecarEnabledAdd(ns.get(), local_enabled, i);
|
||||
sidecarEnabledInit(ns.get(), local_enabled.get());
|
||||
sidecarEnabledAdd(ns.get(), local_enabled.get(), i);
|
||||
|
||||
// union class j into our runtime enabled structure.
|
||||
sidecarEnabledUnion(ns.get(), enabled, local_enabled);
|
||||
sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
|
||||
}
|
||||
|
||||
u32 c = chars[j];
|
||||
@ -293,7 +289,8 @@ TEST_P(SidecarTest, Together) {
|
||||
SCOPED_TRACE(i);
|
||||
memset(data, i, data_len);
|
||||
set<u32> seen;
|
||||
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
|
||||
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
|
||||
0, set_cb, &seen);
|
||||
if (i == c) {
|
||||
// seen should contain only `c'
|
||||
ASSERT_EQ(1U, seen.size());
|
||||
@ -306,10 +303,6 @@ TEST_P(SidecarTest, Together) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aligned_free(local_enabled);
|
||||
aligned_free(enabled);
|
||||
aligned_free(scratch);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Sidecar, SidecarTest,
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include "parser/utf8_validate.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/string_util.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
@ -46,7 +46,7 @@ struct ValidUtf8TestInfo {
|
||||
// Helper for gtest.
|
||||
static
|
||||
void PrintTo(const ValidUtf8TestInfo &t, ::std::ostream *os) {
|
||||
*os << "(" << t.str << ", " << t.is_valid << ")";
|
||||
*os << "(\"" << printable(t.str) << "\", " << t.is_valid << ")";
|
||||
}
|
||||
|
||||
static ValidUtf8TestInfo valid_utf8_tests[] = {
|
||||
@ -118,5 +118,5 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests));
|
||||
TEST_P(ValidUtf8Test, check) {
|
||||
const auto &info = GetParam();
|
||||
ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str()))
|
||||
<< "String is: " << escapeString(info.str) << std::endl;
|
||||
<< "String is: " << printable(info.str) << std::endl;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user