Merge develop into master

This commit is contained in:
Matthew Barr 2015-10-30 11:29:20 +11:00
commit fe31630221
32 changed files with 384 additions and 331 deletions

View File

@ -1,9 +1,9 @@
cmake_minimum_required (VERSION 2.8)
cmake_minimum_required (VERSION 2.8.11)
project (Hyperscan C CXX)
set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 0)
set (HS_PATCH_VERSION 0)
set (HS_PATCH_VERSION 1)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
@ -36,19 +36,19 @@ else()
set(RELEASE_BUILD FALSE)
endif()
set(BINDIR ${PROJECT_BINARY_DIR}/bin)
set(LIBDIR ${PROJECT_BINARY_DIR}/lib)
set(BINDIR "${PROJECT_BINARY_DIR}/bin")
set(LIBDIR "${PROJECT_BINARY_DIR}/lib")
# First for the generic no-config case
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BINDIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBDIR})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBDIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIBDIR}")
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBDIR}")
# Second, for multi-config builds (e.g. msvc)
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string (TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${BINDIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${BINDIR}")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
endforeach (OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES)
@ -71,13 +71,14 @@ find_package(Boost ${BOOST_MINVERSION})
if(NOT Boost_FOUND)
# we might have boost in tree, so provide a hint and try again
message(STATUS "trying include dir for boost")
set(BOOST_INCLUDEDIR ${CMAKE_SOURCE_DIR}/include)
set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include")
find_package(Boost ${BOOST_MINVERSION})
if(NOT Boost_FOUND)
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available or extract Boost headers to ${CMAKE_SOURCE_DIR}/include")
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
endif()
endif()
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
find_package(PythonInterp)
find_program(RAGEL ragel)
@ -88,6 +89,10 @@ else()
message(FATAL_ERROR "No python interpreter found")
endif()
if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
message(FATAL_ERROR "Ragel state machine compiler not found")
endif()
option(OPTIMISE "Turns off compiler optimizations (on by default unless debug output enabled or coverage testing)" TRUE)
option(DEBUG_OUTPUT "Enable debug output (warning: very verbose)" FALSE)
@ -290,10 +295,10 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
endif()
if (NOT XCODE)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
else()
# cmake doesn't think Xcode supports isystem
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIR}")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIRS}")
endif()

View File

@ -125,8 +125,12 @@ Boost Headers
Compiling Hyperscan depends on a recent version of the Boost C++ header
library. If the Boost libraries are installed on the build machine in the
usual paths, CMake will find them. An alternative is to put a copy of (or a
symlink to) the boost subdirectory in ``<hyperscan-source-path>/include/boost``.
usual paths, CMake will find them. If the Boost libraries are not installed,
the location of the Boost source tree can be specified during the CMake
configuration step using the ``BOOST_ROOT`` variable (described below).
Another alternative is to put a copy of (or a symlink to) the boost
subdirectory in ``<hyperscan-source-path>/include/boost``.
For example: for the Boost-1.59.0 release: ::
@ -161,6 +165,8 @@ Common options for CMake include:
| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. |
| | Default off. |
+------------------------+----------------------------------------------------+
| BOOST_ROOT | Location of Boost source tree. |
+------------------------+----------------------------------------------------+
| DEBUG_OUTPUT | Enable very verbose debug output. Default off. |
+------------------------+----------------------------------------------------+

View File

@ -13,11 +13,11 @@ set(AUTOGEN_PY_FILES
function(fdr_autogen type out)
add_custom_command (
COMMENT "AUTOGEN ${out}"
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${out}"
COMMAND ${PYTHON} "${CMAKE_CURRENT_SOURCE_DIR}/autogen.py" ${type} > "${CMAKE_CURRENT_BINARY_DIR}/${out}"
DEPENDS ${AUTOGEN_PY_FILES}
)
add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
add_custom_target(autogen_${type} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${out}")
endfunction(fdr_autogen)
#now build the functions

View File

@ -250,7 +250,8 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
* for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @a onEvent callback is also NULL.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
@ -299,7 +300,8 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
* The stream (as created by @ref hs_open_stream()) to be copied.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @a onEvent callback is also NULL.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,

View File

@ -54,7 +54,7 @@ char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest,
#define nfaExecCastle0_testEOD NFA_API_NO_IMPL
#define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL
#define nfaExecCastle0_zombie_status NFA_API_NO_IMPL
#define nfaExecCastle0_zombie_status NFA_API_ZOMBIE_NO_IMPL
#ifdef __cplusplus
}

View File

@ -55,7 +55,7 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
#define nfaExecGough8_zombie_status NFA_API_NO_IMPL
#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL
// 16-bit Gough
@ -77,6 +77,6 @@ char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
#define nfaExecGough16_zombie_status NFA_API_NO_IMPL
#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL
#endif

View File

@ -56,7 +56,7 @@ char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest,
#define nfaExecLbrDot_testEOD NFA_API_NO_IMPL
#define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL
#define nfaExecLbrDot_zombie_status NFA_API_NO_IMPL
#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL
// LBR Verm
@ -76,7 +76,7 @@ char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest,
#define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL
#define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL
#define nfaExecLbrVerm_zombie_status NFA_API_NO_IMPL
#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
// LBR Negated Verm
@ -96,7 +96,7 @@ char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest,
#define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL
#define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL
#define nfaExecLbrNVerm_zombie_status NFA_API_NO_IMPL
#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
// LBR Shuf
@ -116,7 +116,7 @@ char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest,
#define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL
#define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL
#define nfaExecLbrShuf_zombie_status NFA_API_NO_IMPL
#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL
// LBR Truffle
@ -136,7 +136,7 @@ char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest,
#define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL
#define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL
#define nfaExecLbrTruf_zombie_status NFA_API_NO_IMPL
#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL
#ifdef __cplusplus
}

View File

@ -151,12 +151,12 @@ char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
while (1) {
// Find the next top with location >= the last escape we saw.
for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
enum mqe_event t = q_cur_type(q);
if ((t == MQE_TOP || t == MQE_TOP_FIRST) &&
u32 event = q_cur_type(q);
if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
q_cur_offset(q) >= lstate->lastEscape) {
goto found_top;
}
DEBUG_PRINTF("skip event type=%d offset=%lld\n", t, q_cur_offset(q));
DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
}
// No more tops, we're done.

View File

@ -56,7 +56,7 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcClellan8_zombie_status NFA_API_NO_IMPL
#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL
// 16-bit McClellan
@ -79,7 +79,7 @@ char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcClellan16_zombie_status NFA_API_NO_IMPL
#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL
/**
* Simple streaming mode calls:

View File

@ -50,7 +50,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
#define nfaExecMpv0_QR NFA_API_NO_IMPL
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
#define nfaExecMpv0_zombie_status NFA_API_NO_IMPL
#define nfaExecMpv0_zombie_status NFA_API_ZOMBIE_NO_IMPL
/**
* return 0 if the mpv dies, otherwise returns the location of the next possible

View File

@ -250,7 +250,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
/* first the triggered puffs */
map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
while (it != kilopuffs.end() && it->first.trigger_event != ~0U) {
while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
assert(!it->first.auto_restart);
assert(it->first.trigger_event
== MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
@ -268,7 +268,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
*/
map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
while (it != kilopuffs.end() && !it->first.auto_restart) {
assert(it->first.trigger_event == ~0U);
assert(it->first.trigger_event == MQE_INVALID);
++it;
}
@ -278,7 +278,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
kilopuffs, kilopuffs.begin(), it);
}
while (it != kilopuffs.end() && it->first.auto_restart) {
assert(it->first.trigger_event == ~0U);
assert(it->first.trigger_event == MQE_INVALID);
out->push_back(mpv_counter_info());
map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;

View File

@ -41,24 +41,28 @@ extern "C"
#define MAX_MQE_LEN 10
/** Queue events */
enum mqe_event {
MQE_START = 0, /**< and begin! Note: stateless engines will start from
* this location */
MQE_END = 1, /**< stop scanning */
MQE_TOP = 2, /**< enable start + start dot star */
MQE_TOP_FIRST = 4, /**< first event corresponding to a TOP _N_ */
/*
* Additional tops (in multi-top engines) use the event values from
* MQE_TOP_FIRST to something.
*/
/** Queue event: begin scanning. Note: stateless engines will start from this
* location. */
#define MQE_START 0U
MQE_INVALID = ~0U
};
/** Queue event: stop scanning. */
#define MQE_END 1U
/** Queue event: enable start and start-dot-star. */
#define MQE_TOP 2U
/** Queue event: first event corresponding to a numbered TOP. Additional tops
* (in multi-top engines) use the event values from MQE_TOP_FIRST to
* MQE_INVALID - 1. */
#define MQE_TOP_FIRST 4U
/** Invalid queue event */
#define MQE_INVALID (~0U)
/** Queue item */
struct mq_item {
u32 type; /**< event; from mqe_event */
u32 type; /**< event type, from MQE_* */
s64a location; /**< relative to the start of the current buffer */
u64a som; /**< pattern start-of-match corresponding to a top, only used
* by som engines. */

View File

@ -237,16 +237,32 @@ static really_inline
int isMultiTopType(u8 t) {
return !isDfaType(t) && !isLbrType(t);
}
/** Macro used in place of unimplemented NFA API functions for a given
/** Macros used in place of unimplemented NFA API functions for a given
* engine. */
#if !defined(_WIN32)
#define NFA_API_NO_IMPL(...) \
/* Use for functions that return an integer. */
#define NFA_API_NO_IMPL(...) \
({ \
assert("not implemented for this engine!"); \
0; /* return value, for places that need it */ \
})
/* Use for _zombie_status functions. */
#define NFA_API_ZOMBIE_NO_IMPL(...) \
({ \
assert("not implemented for this engine!"); \
NFA_ZOMBIE_NO; \
})
#else
#define NFA_API_NO_IMPL(...) 0
/* Simpler implementation for compilers that don't like the GCC extension used
* above. */
#define NFA_API_NO_IMPL(...) 0
#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
#endif
#ifdef __cplusplus

View File

@ -67,6 +67,7 @@ static
bool findPaths(const NGHolder &g, vector<Path> &paths) {
vector<NFAVertex> order = getTopoOrdering(g);
vector<size_t> read_count(num_vertices(g));
vector<vector<Path>> built(num_vertices(g));
for (auto it = order.rbegin(); it != order.rend(); ++it) {
@ -74,6 +75,11 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
auto &out = built[g[v].index];
assert(out.empty());
read_count[g[v].index] = out_degree(v, g);
DEBUG_PRINTF("setting read_count to %zu for %u\n",
read_count[g[v].index], g[v].index);
if (v == g.start || v == g.startDs) {
out.push_back({v});
continue;
@ -94,6 +100,9 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
continue;
}
assert(!built[g[u].index].empty());
assert(read_count[g[u].index]);
for (const auto &p : built[g[u].index]) {
out.push_back(p);
out.back().push_back(v);
@ -105,6 +114,13 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
return false;
}
}
read_count[g[u].index]--;
if (!read_count[g[u].index]) {
DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
built[g[u].index].clear();
built[g[u].index].shrink_to_fit();
}
}
}

View File

@ -118,10 +118,15 @@ bool findLiterals(const NGHolder &g,
vector<NFAVertex> order = getTopoOrdering(g);
vector<set<sls_literal>> built(num_vertices(g));
vector<size_t> read_count(num_vertices(g));
for (auto it = order.rbegin(); it != order.rend(); ++it) {
NFAVertex v = *it;
set<sls_literal> &out = built[g[v].index];
read_count[g[v].index] = out_degree(v, g);
DEBUG_PRINTF("setting read_count to %zu for %u\n",
read_count[g[v].index], g[v].index);
assert(out.empty());
if (v == g.start) {
@ -149,7 +154,10 @@ bool findLiterals(const NGHolder &g,
}
set<sls_literal> &in = built[g[u].index];
DEBUG_PRINTF("getting from %u (%zu reads to go)\n",
g[u].index, read_count[g[u].index]);
assert(!in.empty());
assert(read_count[g[u].index]);
for (const sls_literal &lit : in) {
if (accept) {
@ -171,10 +179,18 @@ bool findLiterals(const NGHolder &g,
out.insert(lit.append((u8)c, nocase));
if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
literals->size());
return false;
}
}
}
read_count[g[u].index]--;
if (!read_count[g[u].index]) {
DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
in.clear();
}
}
}
@ -206,6 +222,8 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
return false;
}
DEBUG_PRINTF("looking for literals\n");
map<sls_literal, ue2::flat_set<ReportID>> literals;
if (!findLiterals(g, &literals)) {
DEBUG_PRINTF(":(\n");

View File

@ -51,11 +51,11 @@ using namespace std;
namespace ue2 {
/** \brief Hard limit on the maximum repeat for bounded repeats. */
static const u32 MAX_MAX_BOUND = 32767;
static constexpr u32 MAX_REPEAT = 32767;
/** \brief If expanding a repeat would lead to this many positions being
* generated, we fail the pattern. */
static const u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
/* no edge priorities means that if our subcomponent can be empty, our min
* extent is effectively zero. */
@ -67,7 +67,11 @@ ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min,
assert(sub_comp);
assert(max > 0);
assert(m_min <= m_max);
if (m_max < NoLimit && m_max > MAX_MAX_BOUND) {
if (m_min > MAX_REPEAT) {
throw ParseError("Bounded repeat is too large.");
}
if (m_max != NoLimit && m_max > MAX_REPEAT) {
throw ParseError("Bounded repeat is too large.");
}
}
@ -119,7 +123,7 @@ void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) {
void ComponentRepeat::notePositions(GlushkovBuildState &bs) {
assert(m_max > 0);
assert(m_max == NoLimit || m_max < MAX_MAX_BOUND);
assert(m_max == NoLimit || m_max < MAX_REPEAT);
/* Note: We can construct smaller subgraphs if we're not maintaining edge
* priorities. */

View File

@ -391,7 +391,7 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t,
}
}
enum mqe_event top = tr->suffixEvent;
u32 top = tr->suffixEvent;
assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID));
pushQueueSom(q, top, loc, som);
@ -977,14 +977,14 @@ void roseTriggerInfixes(const struct RoseEngine *t, const struct RoseRole *tr,
do {
u32 qi = curr_r->queue;
u32 ri = queueToLeftIndex(t, qi);
enum mqe_event topEvent = curr_r->event;
u32 topEvent = curr_r->event;
u8 cancel = curr_r->cancel_prev_top;
assert(topEvent < MQE_INVALID);
const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi);
assert(!left->transient);
DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, (u32)topEvent);
DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent);
struct mq *q = tctxtToScratch(tctxt)->queues + qi;
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);

View File

@ -2433,7 +2433,7 @@ vector<RoseTrigger> buildRoseTriggerList(const RoseGraph &g, RoseVertex u,
assert(num_tops(g[v].left) == 1);
top = MQE_TOP;
} else {
top = (enum mqe_event)((u32)MQE_TOP_FIRST + g[e].rose_top);
top = MQE_TOP_FIRST + g[e].rose_top;
assert(top < MQE_INVALID);
}

View File

@ -236,7 +236,7 @@ struct LeftNfaInfo {
// A list of these is used to trigger prefix/infix roses.
struct RoseTrigger {
u32 queue; // queue index of leftfix
u32 event; // from enum mqe_event
u32 event; // queue event, from MQE_*
u8 cancel_prev_top;
};
@ -309,7 +309,7 @@ struct RoseRole {
ReportID reportId; // report ID, or MO_INVALID_IDX
u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not
* require a state bit if they are terminal */
u32 suffixEvent; // from enum mqe_event
u32 suffixEvent; // queue event, from MQE_
u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater.
*/
u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role,

View File

@ -1163,11 +1163,10 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
return HS_INVALID;
}
if (!scratch || !validScratch(to_id->rose, scratch)) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(to_id->rose, scratch)) {
return HS_INVALID;
}
report_eod_matches(to_id, scratch, onEvent, context);
}
@ -1406,12 +1405,14 @@ HS_PUBLIC_API
hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *context) {
if (!id || !scratch || !validScratch(id->rose, scratch)) {
if (!id) {
return HS_INVALID;
}
/* user wants eod matches */
if (onEvent) {
if (!scratch || !validScratch(id->rose, scratch)) {
return HS_INVALID;
}
report_eod_matches(id, scratch, onEvent, context);
}

View File

@ -254,7 +254,7 @@ u32 compress32(u32 x, u32 m) {
#if defined(__BMI2__)
// BMI2 has a single instruction for this operation.
return _pext_u32(x, m);
#endif
#else
// Return zero quickly on trivial cases
if ((x & m) == 0) {
@ -281,6 +281,7 @@ u32 compress32(u32 x, u32 m) {
}
return x;
#endif
}
static really_inline
@ -288,7 +289,7 @@ u64a compress64(u64a x, u64a m) {
#if defined(ARCH_X86_64) && defined(__BMI2__)
// BMI2 has a single instruction for this operation.
return _pext_u64(x, m);
#endif
#else
// Return zero quickly on trivial cases
if ((x & m) == 0) {
@ -316,6 +317,7 @@ u64a compress64(u64a x, u64a m) {
}
return x;
#endif
}
static really_inline
@ -323,7 +325,7 @@ u32 expand32(u32 x, u32 m) {
#if defined(__BMI2__)
// BMI2 has a single instruction for this operation.
return _pdep_u32(x, m);
#endif
#else
// Return zero quickly on trivial cases
if (!x || !m) {
@ -355,6 +357,7 @@ u32 expand32(u32 x, u32 m) {
}
return x & m0; // clear out extraneous bits
#endif
}
static really_inline
@ -362,7 +365,7 @@ u64a expand64(u64a x, u64a m) {
#if defined(ARCH_X86_64) && defined(__BMI2__)
// BMI2 has a single instruction for this operation.
return _pdep_u64(x, m);
#endif
#else
// Return zero quickly on trivial cases
if (!x || !m) {
@ -395,6 +398,7 @@ u64a expand64(u64a x, u64a m) {
}
return x & m0; // clear out extraneous bits
#endif
}

View File

@ -51,7 +51,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
#if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask);
#endif
#else
u32 result = 0, num = 1;
while (mask != 0) {
@ -63,6 +63,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
num <<= 1;
}
return result;
#endif
}
static really_inline
@ -70,7 +71,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask);
#endif
#else
u32 result = 0, num = 1;
while (mask != 0) {
@ -82,6 +83,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
num <<= 1;
}
return result;
#endif
}
#undef HAVE_PEXT

View File

@ -112,6 +112,7 @@ endif()
#
# build target to run unit tests
#
if (NOT RELEASE_BUILD)
add_custom_target(
unit
COMMAND bin/unit-internal
@ -119,3 +120,11 @@ add_custom_target(
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-internal unit-hyperscan
)
else ()
add_custom_target(
unit
COMMAND bin/unit-hyperscan
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS unit-hyperscan
)
endif()

View File

@ -740,7 +740,9 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamSameToId) {
hs_free_database(db);
}
TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
// hs_reset_and_copy_stream: You're allowed to reset and copy a stream with no
// scratch and no callback.
TEST(HyperscanArgChecks, ResetAndCopyStreamNoCallbackOrScratch) {
hs_stream_t *stream = nullptr;
hs_stream_t *stream_to = nullptr;
hs_database_t *db = nullptr;
@ -760,6 +762,37 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
ASSERT_EQ(HS_SUCCESS, err);
err = hs_reset_and_copy_stream(stream_to, stream, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
hs_close_stream(stream_to, scratch, nullptr, nullptr);
hs_close_stream(stream, scratch, nullptr, nullptr);
hs_free_scratch(scratch);
hs_free_database(db);
}
// hs_reset_and_copy_stream: If you specify a callback, you must provide
// scratch.
TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
hs_stream_t *stream = nullptr;
hs_stream_t *stream_to = nullptr;
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
hs_error_t err = hs_compile("foobar", 0, HS_MODE_STREAM, nullptr, &db,
&compile_err);
ASSERT_EQ(HS_SUCCESS, err);
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_open_stream(db, 0, &stream);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_open_stream(db, 0, &stream_to);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_reset_and_copy_stream(stream_to, stream, nullptr, dummy_cb,
nullptr);
ASSERT_EQ(HS_INVALID, err);
hs_close_stream(stream_to, scratch, nullptr, nullptr);
@ -793,7 +826,8 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamDiffDb) {
err = hs_open_stream(db2, 0, &stream_to);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr, nullptr);
err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr,
nullptr);
ASSERT_EQ(HS_INVALID, err);
hs_close_stream(stream_to, scratch, nullptr, nullptr);
@ -2009,6 +2043,7 @@ TEST(HyperscanArgChecks, ScanStreamBadScratch) {
free(local_garbage);
}
// hs_reset_stream: bad scratch arg
TEST(HyperscanArgChecks, ResetStreamBadScratch) {
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
@ -2025,7 +2060,7 @@ TEST(HyperscanArgChecks, ResetStreamBadScratch) {
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(stream != nullptr);
err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
err = hs_reset_stream(stream, 0, scratch, dummy_cb, nullptr);
EXPECT_NE(HS_SUCCESS, err);
EXPECT_NE(HS_SCAN_TERMINATED, err);

View File

@ -127,3 +127,4 @@
127:/^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/8 #Expression is not valid UTF-8.
128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8.
129:/bignum \1111111111111111111/ #Number is too big at index 7.
130:/foo|&{5555555,}/ #Bounded repeat is too large.

View File

@ -86,8 +86,7 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
hs_error_t err;
const size_t datalen = 1024 * 1024;
size_t megabytes = 5 * 1024;
char * data = new char[datalen];
memset(data, 'X', datalen);
vector<char> data(datalen, 'X');
// build a database
hs_database_t *db = nullptr;
@ -110,8 +109,8 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
ASSERT_TRUE(stream != nullptr);
while (megabytes-- > 0) {
err = hs_scan_stream(stream, data, datalen, 0, scratch, dummyHandler,
nullptr);
err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
dummyHandler, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
}
@ -121,7 +120,6 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
// teardown
hs_free_scratch(scratch);
hs_free_database(db);
delete [] data;
}
struct HugeScanMatchingData {
@ -141,8 +139,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
hs_error_t err;
const size_t datalen = 1024*1024;
char * data = new char[datalen];
memset(data, 'X', datalen);
vector<char> data(datalen, 'X');
// build a database
hs_database_t *db = nullptr;
@ -178,7 +175,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
// streaming mode scan of our megabyte of data gb*1024 times
unsigned long remaining = gb * 1024;
while (remaining-- > 0) {
err = hs_scan_stream(stream, data, datalen, 0, scratch,
err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
singleHandler, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_EQ(0ULL, lastMatchTo);
@ -202,7 +199,6 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
// teardown
hs_free_scratch(scratch);
hs_free_database(db);
delete[] data;
}
// Helper function to actually perform scans for BlockMatch test below

View File

@ -69,7 +69,9 @@ TEST(StreamUtil, reset1) {
c.matches.clear();
err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
// Note: we do not need matches from this reset operation, so we do not
// need to supply a callback or scratch space.
err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
@ -107,7 +109,9 @@ TEST(StreamUtil, reset2) {
c.matches.clear();
err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
// Note: we do not need matches from this reset operation, so we do not
// need to supply a callback or scratch space.
err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -268,7 +272,7 @@ TEST(StreamUtil, copy_reset1) {
c.matches.clear();
err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
@ -312,7 +316,7 @@ TEST(StreamUtil, copy_reset2) {
c.matches.clear();
err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -355,7 +359,7 @@ TEST(StreamUtil, copy_reset3) {
c.matches.clear();
err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -408,7 +412,7 @@ TEST(StreamUtil, copy_reset4) {
c.matches.clear();
err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -458,7 +462,7 @@ TEST(StreamUtil, copy_reset5) {
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_EQ(0U, c.matches.size());
err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
ASSERT_EQ(HS_SUCCESS, err);
err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,

View File

@ -207,7 +207,6 @@ TEST_P(FDRp, SimpleSingle) {
TEST_P(FDRp, MultiLocation) {
const u32 hint = GetParam();
SCOPED_TRACE(hint);
u8 * data;
vector<hwlmLiteral> lits;
lits.push_back(hwlmLiteral("abc", 0, 1));
@ -216,24 +215,23 @@ TEST_P(FDRp, MultiLocation) {
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
const u32 testSize = 128;
data = (u8 *)malloc(testSize);
memset(data, 0, testSize);
vector<u8> data(testSize, 0);
for (u32 i = 0; i < testSize - 3; i++) {
memcpy(data + i, "abc", 3);
memcpy(data.data() + i, "abc", 3);
vector<match> matches;
fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback,
&matches, HWLM_ALL_GROUPS);
fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
HWLM_ALL_GROUPS);
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(match(i, i+2, 1), matches[0]);
memset(data + i, 0, 3);
memset(data.data() + i, 0, 3);
}
free(data);
}
TEST_P(FDRp, Flood) {
const u32 hint = GetParam();
SCOPED_TRACE(hint);
u8 * data;
vector<hwlmLiteral> lits;
lits.push_back(hwlmLiteral("aaaa", 0, 1));
@ -245,11 +243,10 @@ TEST_P(FDRp, Flood) {
CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
const u32 testSize = 1024;
data = (u8 *)malloc(testSize);
memset(data, 'a', testSize);
vector<u8> data(testSize, 'a');
vector<match> matches;
fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback, &matches,
fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
HWLM_ALL_GROUPS);
ASSERT_EQ(testSize - 3 + testSize - 7, matches.size());
EXPECT_EQ(match(0, 3, 1), matches[0]);
@ -266,8 +263,6 @@ TEST_P(FDRp, Flood) {
match(i - 3, i, 1) == matches[currentMatch])
);
}
free(data);
}
TEST_P(FDRp, NoRepeat1) {
@ -483,10 +478,10 @@ TEST_P(FDRp, moveByteStream) {
size_t size = fdrSize(fdrTable0.get());
FDR *fdrTable = (FDR *)aligned_zmalloc(size);
EXPECT_TRUE(fdrTable);
auto fdrTable = aligned_zmalloc_unique<FDR>(size);
EXPECT_NE(nullptr, fdrTable);
memcpy(fdrTable, fdrTable0.get(), size);
memcpy(fdrTable.get(), fdrTable0.get(), size);
// bugger up original
for (size_t i = 0 ; i < size; i++) {
@ -496,14 +491,13 @@ TEST_P(FDRp, moveByteStream) {
// check matches
vector<match> matches;
hwlm_error_t fdrStatus = fdrExec(fdrTable, (const u8 *)data, data_len, 0,
decentCallback, &matches, HWLM_ALL_GROUPS);
hwlm_error_t fdrStatus = fdrExec(fdrTable.get(), (const u8 *)data,
data_len, 0, decentCallback, &matches,
HWLM_ALL_GROUPS);
ASSERT_EQ(0, fdrStatus);
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(match(12, 17, 0), matches[0]);
aligned_free(fdrTable);
}
TEST_P(FDRp, Stream1) {

View File

@ -32,6 +32,7 @@
#include "config.h"
#include "gtest/gtest.h"
#include "nfagraph_common.h"
#include "grey.h"
#include "hs.h"
#include "compiler/compiler.h"
@ -43,17 +44,8 @@
using namespace std;
using namespace ue2;
// Helper: build us an NFA graph from a regex
static
unique_ptr<NGWrapper> constructGraph(const string &expr) {
CompileContext cc(false, false, get_current_target(), Grey());
ParsedExpression parsed(0, expr.c_str(), 0, 0);
ReportManager rm(cc.grey);
return buildWrapper(rm, cc, parsed);
}
TEST(NFAGraph, CalcComp1) {
auto graph = constructGraph("abc|def|ghi");
auto graph = constructGraph("abc|def|ghi", 0);
ASSERT_TRUE(graph != nullptr);
deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
@ -61,7 +53,7 @@ TEST(NFAGraph, CalcComp1) {
}
TEST(NFAGraph, CalcComp2) {
auto graph = constructGraph("a|b|c|d|e|f|g|h|i");
auto graph = constructGraph("a|b|c|d|e|f|g|h|i", 0);
ASSERT_TRUE(graph != nullptr);
deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
@ -72,7 +64,7 @@ TEST(NFAGraph, CalcComp2) {
TEST(NFAGraph, RecalcComp1) {
deque<unique_ptr<NGHolder>> comps;
comps.push_back(constructGraph("abc|def|ghi"));
comps.push_back(constructGraph("abc|def|ghi", 0));
ASSERT_TRUE(comps.back() != nullptr);
recalcComponents(comps);

View File

@ -29,6 +29,8 @@
#include "config.h"
#include "gtest/gtest.h"
#include "nfagraph_common.h"
#include "compiler/compiler.h"
#include "grey.h"
#include "nfagraph/ng_builder.h"
@ -42,15 +44,10 @@ using namespace std;
using namespace testing;
using namespace ue2;
#define NUM_MATCHES 4U
#define P(x,y) pair<size_t, size_t>(x, y)
#define NO_MATCH P(~0U, ~0U)
struct MatchesTestParams {
string pattern;
string input;
// max 4 matches per pattern, P(-1,-1) is "no match"
pair<size_t, size_t> matches[NUM_MATCHES];
vector<pair<size_t, size_t>> matches;
unsigned flags;
bool notEod;
bool som;
@ -58,18 +55,11 @@ struct MatchesTestParams {
// teach google-test how to print a param
void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
pair<size_t, size_t> *matches = const_cast<pair<size_t, size_t> *>(p.matches);
*os << "( \"" << p.pattern << "\", "
<< "\"" << p.input << "\", "
<< "{";
for (int i = 0; i < 4; i++) {
if (matches[i] == NO_MATCH) {
*os << "NO_MATCH,";
break;
} else {
*os << "P(" << matches[i].first << ',' << matches[i].second << "),";
}
for (const auto &match : p.matches) {
*os << "P(" << match.first << ',' << match.second << "),";
}
*os << "}, ";
*os << "flags(" << p.flags << "), "
@ -81,192 +71,153 @@ void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
class MatchesTest: public TestWithParam<MatchesTestParams> {
};
#define P(x, y) pair<size_t, size_t>((x), (y))
static const MatchesTestParams matchesTests[] = {
// EOD and anchored patterns
// these should produce no matches
{ "^foobar", "foolish", {NO_MATCH}, 0, false, true},
{ "^foobar$", "ze foobar", {NO_MATCH}, 0, false, true},
{ "^foobar$", "foobar ", {NO_MATCH}, 0, false, true},
{ "^abc\\b", "abcde", {NO_MATCH}, 0, false, true},
{ "^a\\b", "aa", {NO_MATCH}, 0, false, true},
{ "^foobar\\b", "foobarz", {NO_MATCH}, 0, false, true},
{ "^foobar", "fooq", {NO_MATCH}, 0, false, true},
{ "^foobar", "foo", {NO_MATCH}, 0, false, true},
{ "^foobar", "fooba", {NO_MATCH}, 0, false, true},
{ "^foo *bar", "foolishness bar none ", {NO_MATCH}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {NO_MATCH}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {NO_MATCH}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {NO_MATCH}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {NO_MATCH}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {NO_MATCH}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {NO_MATCH}, 0, false, true},
{ "^.*foobar", "foobaz", {NO_MATCH}, 0, false, true},
{ "^.*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
{ "^(foo)|(bar)", "fo baz", {NO_MATCH}, 0, false, true},
{ "^((foo)|(bar))", "fo baz", {NO_MATCH}, 0, false, true},
{ "aaaaaaaa$", "AAaaAAaa", {NO_MATCH}, 0, false, true},
{ "^foo\\z", "foo\n", {NO_MATCH}, 0, false, true},
{ "^(foo){2,}", "foo", {NO_MATCH}, 0, false, true},
{ "^foobar", "foolish", {}, 0, false, true},
{ "^foobar$", "ze foobar", {}, 0, false, true},
{ "^foobar$", "foobar ", {}, 0, false, true},
{ "^abc\\b", "abcde", {}, 0, false, true},
{ "^a\\b", "aa", {}, 0, false, true},
{ "^foobar\\b", "foobarz", {}, 0, false, true},
{ "^foobar", "fooq", {}, 0, false, true},
{ "^foobar", "foo", {}, 0, false, true},
{ "^foobar", "fooba", {}, 0, false, true},
{ "^foo *bar", "foolishness bar none ", {}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {}, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {}, 0, false, true},
{ "^.*foobar", "foobaz", {}, 0, false, true},
{ "^.*foobar", "foobaz\n", {}, 0, false, true},
{ "^(foo)|(bar)", "fo baz", {}, 0, false, true},
{ "^((foo)|(bar))", "fo baz", {}, 0, false, true},
{ "aaaaaaaa$", "AAaaAAaa", {}, 0, false, true},
{ "^foo\\z", "foo\n", {}, 0, false, true},
{ "^(foo){2,}", "foo", {}, 0, false, true},
// these should match
{ "^abc\\B", "abcde", { P(0,3), NO_MATCH }, 0, false, true},
{ "^abc\\b", "abc de", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^foobar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
{ "^foobar$", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
{ "^foobar", "foobarq", { P(0, 6), NO_MATCH }, 0, false, true},
{ "^foobar\\B", "foobarz", { P(0, 6), NO_MATCH }, 0, false, true},
{ "^foo.*bar", "foobar none ", { P(0, 6), NO_MATCH }, 0, false, true},
{ "^foo.*bar", "foo bar none ", { P(0, 7), NO_MATCH }, 0, false, true},
{ "^foo.*bar", "foo bar none ", { P(0, 10), NO_MATCH }, 0, false, true},
{ "^foo.*bar", "foolishness bar none ", { P(0, 15), NO_MATCH }, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5), NO_MATCH }, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dexyklmnxy", { P(0, 17), NO_MATCH }, 0, false, true},
{ "^.*foobar", "abcfoobar", { P(0, 9), NO_MATCH }, 0, false, true},
{ "^((foo)|(bar))", "foobar", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^((foo)|(bar))", "foo bar", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^(foo)|(bar)", "foobaz", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^(foo)|(bar)", "foo baz", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4), NO_MATCH }, 0, false, true},
{ "aaaaaaaa$", "AAaaAAaa", { P(0, 8), NO_MATCH }, HS_FLAG_CASELESS, false, true},
{ "^foo\\z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^foo\\Z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^foo\\Z", "foo\n", { P(0, 3), NO_MATCH }, 0, false, true},
{ "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9), NO_MATCH}, 0, false, true},
{ "^abc\\B", "abcde", { P(0,3) }, 0, false, true},
{ "^abc\\b", "abc de", { P(0, 3) }, 0, false, true},
{ "^foobar", "foobar", { P(0, 6) }, 0, false, true},
{ "^foobar$", "foobar", { P(0, 6) }, 0, false, true},
{ "^foobar", "foobarq", { P(0, 6) }, 0, false, true},
{ "^foobar\\B", "foobarz", { P(0, 6) }, 0, false, true},
{ "^foo.*bar", "foobar none ", { P(0, 6) }, 0, false, true},
{ "^foo.*bar", "foo bar none ", { P(0, 7) }, 0, false, true},
{ "^foo.*bar", "foo bar none ", { P(0, 10) }, 0, false, true},
{ "^foo.*bar", "foolishness bar none ", { P(0, 15) }, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5) }, 0, false, true},
{ "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dexyklmnxy", { P(0, 17) }, 0, false, true},
{ "^.*foobar", "abcfoobar", { P(0, 9) }, 0, false, true},
{ "^((foo)|(bar))", "foobar", { P(0, 3) }, 0, false, true},
{ "^((foo)|(bar))", "foo bar", { P(0, 3) }, 0, false, true},
{ "^(foo)|(bar)", "foobaz", { P(0, 3) }, 0, false, true},
{ "^(foo)|(bar)", "foo baz", { P(0, 3) }, 0, false, true},
{ "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4) }, 0, false, true},
{ "aaaaaaaa$", "AAaaAAaa", { P(0, 8) }, HS_FLAG_CASELESS, false, true},
{ "^foo\\z", "foo", { P(0, 3) }, 0, false, true},
{ "^foo\\Z", "foo", { P(0, 3) }, 0, false, true},
{ "^foo\\Z", "foo\n", { P(0, 3) }, 0, false, true},
{ "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9)}, 0, false, true},
// try multiple matches per pattern
{ "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
{ "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
{ "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10), NO_MATCH }, 0, false, true},
{ "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10), NO_MATCH }, 0, false, true},
{ "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
{ "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5), NO_MATCH }, 0, false, true},
{ "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7) }, 0, false, true},
{ "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6) }, 0, false, true},
{ "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10) }, 0, false, true},
{ "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10) }, 0, false, true},
{ "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6) }, 0, false, true},
{ "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5) }, 0, false, true},
{ "^(f[o0]+)", "foo0obar", { P(0, 2), P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},
// unanchored patterns
{ "\\b4\\B", "444", { P(0, 1), NO_MATCH }, 0, false, true},
{ "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
{ "foobar", "veryfoolish", {NO_MATCH}, 0, false, true},
{ "foo.*bar", "extreme foolishness bar none ", { P(8, 23), NO_MATCH }, 0, false, true},
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13), NO_MATCH }, 0, false, true},
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15), NO_MATCH }, 0, false, true},
{ ".*foobar", "verylongfoobaz", {NO_MATCH}, 0, false, true},
{ ".*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
{ "(foo)|(bar)", "verylongfo baz", {NO_MATCH}, 0, false, true},
{ "(foo)?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
{ "foo?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
{ "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4), NO_MATCH }, 0, false, true},
{ "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3), NO_MATCH }, 0, false, true},
{ "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13), NO_MATCH }, 0, false, true},
{ "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15), NO_MATCH }, 0, false, true},
{ "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5), NO_MATCH }, 0, false, true},
{ "bar\\Z", "foobar\n", { P(3, 6), NO_MATCH }, 0, false, true},
{ "\\b4\\B", "444", { P(0, 1) }, 0, false, true},
{ "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7) }, 0, false, true},
{ "foobar", "veryfoolish", {}, 0, false, true},
{ "foo.*bar", "extreme foolishness bar none ", { P(8, 23) }, 0, false, true},
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13) }, 0, false, true},
{ "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15) }, 0, false, true},
{ ".*foobar", "verylongfoobaz", {}, 0, false, true},
{ ".*foobar", "foobaz\n", {}, 0, false, true},
{ "(foo)|(bar)", "verylongfo baz", {}, 0, false, true},
{ "(foo)?bar", "foobar", { P(0, 6) }, 0, false, true},
{ "foo?bar", "foobar", { P(0, 6) }, 0, false, true},
{ "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4) }, 0, false, true},
{ "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3) }, 0, false, true},
{ "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13) }, 0, false, true},
{ "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15) }, 0, false, true},
{ "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},
{ "bar\\Z", "foobar\n", { P(3, 6) }, 0, false, true},
// multi-line patterns
{ "^foo$", "foo\nbar", { P(0, 3), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
{ "^bar$", "foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
{ "^foo$", "big foo\nbar", {NO_MATCH}, HS_FLAG_MULTILINE, false, true},
{ "^foo$", "foo\nfoo", { P(0, 3), P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
{ "\\bfoo$", "big foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
{ "\\Bfoo$", "bigfoo\nbar", { P(3, 6), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
{ "^foo\\z", "big\nfoo", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
{ "^foo\\Z", "big\nfoo\n", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
{ "^foo$", "foo\nbar", { P(0, 3) }, HS_FLAG_MULTILINE, false, true},
{ "^bar$", "foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
{ "^foo$", "big foo\nbar", {}, HS_FLAG_MULTILINE, false, true},
{ "^foo$", "foo\nfoo", { P(0, 3), P(4, 7) }, HS_FLAG_MULTILINE, false, true},
{ "\\bfoo$", "big foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
{ "\\Bfoo$", "bigfoo\nbar", { P(3, 6) }, HS_FLAG_MULTILINE, false, true},
{ "^foo\\z", "big\nfoo", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
{ "^foo\\Z", "big\nfoo\n", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
// utf8 patterns
{ "ab+", "\x61\x62", { P(0, 2), NO_MATCH }, HS_FLAG_UTF8, false, true},
{ "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7), NO_MATCH }, HS_FLAG_UTF8, false, true},
{ "ab+", "\x61\x62", { P(0, 2) }, HS_FLAG_UTF8, false, true},
{ "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7) }, HS_FLAG_UTF8, false, true},
// noteod patterns
{ "^foobar$", "foobar", { NO_MATCH }, 0, true, true},
{ "aaaaaaaa$", "AAaaAAaa", { NO_MATCH }, HS_FLAG_CASELESS, true, true},
{ "^foo\\z", "foo", { NO_MATCH }, 0, true, true},
{ "^foo\\Z", "foo", { NO_MATCH }, 0, true, true},
{ "^foo\\Z", "foo\n", { NO_MATCH }, 0, true, true},
{ "^foobar$", "foobar", {}, 0, true, true},
{ "aaaaaaaa$", "AAaaAAaa", {}, HS_FLAG_CASELESS, true, true},
{ "^foo\\z", "foo", {}, 0, true, true},
{ "^foo\\Z", "foo", {}, 0, true, true},
{ "^foo\\Z", "foo\n", {}, 0, true, true},
// vacuous patterns (with multiline, utf8 and caseless flags)
// vacuous patterns have SOM turned off, so all SOM are zero
{ "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, false},
{ "b*", "", { P(0, 0), NO_MATCH }, 0, false, false},
{ "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, 0, false, false},
{ "b*", "", { P(0, 0) }, 0, false, false},
{ "(aa|b*)", "a", { P(0, 0), P(0, 1) }, 0, false, false},
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_CASELESS, false, false},
{ "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE, false, false},
{ "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
{ "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
{ "b*", "", { P(0, 0) }, HS_FLAG_MULTILINE, false, false},
{ "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_MULTILINE, false, false},
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE | HS_FLAG_CASELESS, false, false},
{ "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8, false, false},
{ "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_UTF8, false, false},
{ "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_UTF8, false, false},
{ "b*", "", { P(0, 0) }, HS_FLAG_UTF8, false, false},
{ "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_UTF8, false, false},
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
{ "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
{ ".*", "\x61\xf0\xa4\xad\xa2\x64", { P(0, 0), P(0, 1), P(0, 5), P(0, 6) }, HS_FLAG_UTF8, false, false},
{ ".*", "\xf0\xa4\xad\xa2\xf0\xa4\xad\xa3\x64", { P(0, 0), P(0, 4), P(0, 8), P(0, 9) }, HS_FLAG_UTF8, false, false},
// special patterns for detecting various bugs
{ "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5), NO_MATCH }, HS_FLAG_UTF8, false, true},
{ "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5) }, HS_FLAG_UTF8, false, true},
{ ".*", "\xf0\xa4\xad", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, true},
{ "\\Bfoo", "foo", {NO_MATCH}, 0, false, true},
{ "fo\\B", "fo_", { P(0, 2), NO_MATCH}, 0, false, true},
{ "\\Bfoo", "foo", {}, 0, false, true},
{ "fo\\B", "fo_", { P(0, 2)}, 0, false, true},
{ "^.*", "\xee\x80\x80\n\xee\x80\x80", { P(0, 0), P(0, 3), P(0, 4), P(0, 7)}, HS_FLAG_UTF8 | HS_FLAG_MULTILINE, false, false},
// ignore highlander patterns as they can't be easily checked
};
// by default, all matches initialize to zeroes. this makes it impossible to
// test vacuous patterns, among other things, unless we specify every single
// match, which is tedious. instead, we set "no match" to NO_MATCH. if the matches
// start with NO_MATCH, everything else is set to NO_MATCH. if matches start
// with something else, look for the next NO_MATCH and replace everything after
// it with NO_MATCH's.
static
void fixMatches(const pair<size_t, size_t> in_matches[],
pair<size_t, size_t> out_matches[], unsigned size) {
bool end_matches = false;
for (unsigned i = 0; i < size; i++) {
if (in_matches[i] == NO_MATCH) {
end_matches = true;
}
if (end_matches) {
out_matches[i] = NO_MATCH;
}
else {
out_matches[i] = in_matches[i];
}
}
}
TEST_P(MatchesTest, Check) {
const MatchesTestParams &t = GetParam();
CompileContext cc(false, false, get_current_target(), Grey());
ReportManager rm(cc.grey);
ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
set<pair<size_t, size_t> > results, tmp, matches;
auto g = buildWrapper(rm, cc, parsed);
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
set<pair<size_t, size_t>> matches;
findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8);
// fix matches and make a set out of them
pair<size_t, size_t> tmp_matches[NUM_MATCHES];
fixMatches(t.matches, tmp_matches, NUM_MATCHES);
tmp = set<pair<size_t, size_t> >(tmp_matches, tmp_matches + NUM_MATCHES);
tmp.erase(NO_MATCH);
set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
// create a superset of pattern results and matches to pick up unexpected
// matches as well as missed matches.
results.insert(tmp.begin(), tmp.end());
results.insert(matches.begin(), matches.end());
// check if we have the same number of matches as in expected results
ASSERT_EQ(results.size(), tmp.size())<< "Pattern '" << t.pattern
<< "' against input '" << t.input << "': wrong results count";
// we already know that size of two sets is the same, so now check matches.
for (set<pair<size_t, size_t> >::const_iterator it = results.begin();
it != results.end(); ++it) {
ASSERT_EQ(1, matches.count(*it))<< "Pattern '" << t.pattern
<< "' against input '" << t.input << "'";
}
ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
<< "' against input '" << t.input << "'";
}
INSTANTIATE_TEST_CASE_P(ng_find_matches, MatchesTest, ValuesIn(matchesTests));

View File

@ -71,37 +71,35 @@ TEST(Sidecar, ns1) {
ASSERT_TRUE(ns != nullptr);
ASSERT_LT(0U, sidecarSize(ns.get()));
struct sidecar_enabled *enabled
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
ASSERT_TRUE(enabled);
sidecarEnabledInit(ns.get(), enabled);
struct sidecar_scratch *scratch
= (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
auto enabled =
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
sidecarEnabledInit(ns.get(), enabled.get());
auto scratch =
aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
for (u32 i = 0; i < 256; i++) {
SCOPED_TRACE(i);
u32 seen = 0;
memset(data, i, data_len);
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
ns_cb, &seen);
ASSERT_EQ(0U, seen);
}
sidecarEnabledAdd(ns.get(), enabled, 0);
sidecarEnabledAdd(ns.get(), enabled.get(), 0);
for (u32 i = 0; i < 256; i++) {
SCOPED_TRACE(i);
u32 seen = 0;
memset(data, i, data_len);
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
ns_cb, &seen);
if (i == 'f') {
ASSERT_EQ(1U, seen);
} else {
ASSERT_EQ(0U, seen);
}
}
aligned_free(enabled);
aligned_free(scratch);
}
const char* sidecarStrings[] = {
@ -186,14 +184,13 @@ TEST_P(SidecarTest, Individual) {
ASSERT_TRUE(ns != nullptr);
ASSERT_LT(0U, sidecarSize(ns.get()));
struct sidecar_enabled *enabled
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
ASSERT_TRUE(enabled);
sidecarEnabledInit(ns.get(), enabled);
struct sidecar_enabled *local_enabled
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
struct sidecar_scratch *scratch
= (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
auto enabled =
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
sidecarEnabledInit(ns.get(), enabled.get());
auto local_enabled =
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
auto scratch =
aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
const size_t data_len = 1024;
u8 data[data_len];
@ -203,7 +200,8 @@ TEST_P(SidecarTest, Individual) {
SCOPED_TRACE(i);
memset(data, i, data_len);
set<u32> seen;
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
set_cb, &seen);
ASSERT_TRUE(seen.empty());
}
@ -213,17 +211,18 @@ TEST_P(SidecarTest, Individual) {
SCOPED_TRACE(c);
// build a "compile time" enabled structure and add class j to it.
sidecarEnabledInit(ns.get(), local_enabled);
sidecarEnabledAdd(ns.get(), local_enabled, j);
sidecarEnabledInit(ns.get(), local_enabled.get());
sidecarEnabledAdd(ns.get(), local_enabled.get(), j);
// union class j into our runtime enabled structure.
sidecarEnabledUnion(ns.get(), enabled, local_enabled);
sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
for (u32 i = 0; i < 256; i++) {
SCOPED_TRACE(i);
memset(data, i, data_len);
set<u32> seen;
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
0, set_cb, &seen);
if (i == c) {
ASSERT_EQ(1U, seen.size());
ASSERT_EQ(j, *seen.begin());
@ -232,10 +231,6 @@ TEST_P(SidecarTest, Individual) {
}
}
}
aligned_free(local_enabled);
aligned_free(enabled);
aligned_free(scratch);
}
TEST_P(SidecarTest, Together) {
@ -253,13 +248,13 @@ TEST_P(SidecarTest, Together) {
ASSERT_TRUE(ns != nullptr);
ASSERT_LT(0U, sidecarSize(ns.get()));
struct sidecar_enabled *enabled
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
ASSERT_TRUE(enabled);
struct sidecar_enabled *local_enabled
= (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
struct sidecar_scratch *scratch
= (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
auto enabled =
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
sidecarEnabledInit(ns.get(), enabled.get());
auto local_enabled =
aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
auto scratch =
aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
const size_t data_len = 1024;
u8 data[data_len];
@ -269,21 +264,22 @@ TEST_P(SidecarTest, Together) {
SCOPED_TRACE(i);
memset(data, i, data_len);
set<u32> seen;
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
set_cb, &seen);
ASSERT_TRUE(seen.empty());
}
// test that every char class fires
for (u32 j = 0; j < charclasses.size(); j++) {
// enable the whole lot
sidecarEnabledInit(ns.get(), enabled);
sidecarEnabledInit(ns.get(), enabled.get());
for (u32 i = 0; i < charclasses.size(); i++) {
// build a "compile time" enabled structure and add class j to it.
sidecarEnabledInit(ns.get(), local_enabled);
sidecarEnabledAdd(ns.get(), local_enabled, i);
sidecarEnabledInit(ns.get(), local_enabled.get());
sidecarEnabledAdd(ns.get(), local_enabled.get(), i);
// union class j into our runtime enabled structure.
sidecarEnabledUnion(ns.get(), enabled, local_enabled);
sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
}
u32 c = chars[j];
@ -293,7 +289,8 @@ TEST_P(SidecarTest, Together) {
SCOPED_TRACE(i);
memset(data, i, data_len);
set<u32> seen;
sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
0, set_cb, &seen);
if (i == c) {
// seen should contain only `c'
ASSERT_EQ(1U, seen.size());
@ -306,10 +303,6 @@ TEST_P(SidecarTest, Together) {
}
}
}
aligned_free(local_enabled);
aligned_free(enabled);
aligned_free(scratch);
}
INSTANTIATE_TEST_CASE_P(Sidecar, SidecarTest,

View File

@ -31,7 +31,7 @@
#include "parser/utf8_validate.h"
#include "ue2common.h"
#include "util/ue2string.h"
#include "util/string_util.h"
#include "gtest/gtest.h"
@ -46,7 +46,7 @@ struct ValidUtf8TestInfo {
// Helper for gtest.
static
void PrintTo(const ValidUtf8TestInfo &t, ::std::ostream *os) {
*os << "(" << t.str << ", " << t.is_valid << ")";
*os << "(\"" << printable(t.str) << "\", " << t.is_valid << ")";
}
static ValidUtf8TestInfo valid_utf8_tests[] = {
@ -118,5 +118,5 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests));
TEST_P(ValidUtf8Test, check) {
const auto &info = GetParam();
ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str()))
<< "String is: " << escapeString(info.str) << std::endl;
<< "String is: " << printable(info.str) << std::endl;
}