Merge develop into master

2025-06-28 16:41:01 +03:00 · 2015-10-30 11:29:20 +11:00 · 2015-10-30 11:29:20 +11:00 · fe31630221
commit fe31630221
parent 904e436f11 91343b00e9
32 changed files with 384 additions and 331 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,9 +1,9 @@
-cmake_minimum_required (VERSION 2.8)
+cmake_minimum_required (VERSION 2.8.11)
 project (Hyperscan C CXX)

 set (HS_MAJOR_VERSION 4)
 set (HS_MINOR_VERSION 0)
-set (HS_PATCH_VERSION 0)
+set (HS_PATCH_VERSION 1)
 set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})

 string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
@ -36,19 +36,19 @@ else()
    set(RELEASE_BUILD FALSE)
 endif()

-set(BINDIR ${PROJECT_BINARY_DIR}/bin)
-set(LIBDIR ${PROJECT_BINARY_DIR}/lib)
+set(BINDIR "${PROJECT_BINARY_DIR}/bin")
+set(LIBDIR "${PROJECT_BINARY_DIR}/lib")

 # First for the generic no-config case
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BINDIR})
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBDIR})
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBDIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIBDIR}")
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBDIR}")
 # Second, for multi-config builds (e.g. msvc)
 foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
    string (TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG)
-    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${BINDIR})
-    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
-    set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${BINDIR}")
+    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
+    set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
 endforeach (OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES)


@ -71,13 +71,14 @@ find_package(Boost ${BOOST_MINVERSION})
 if(NOT Boost_FOUND)
    # we might have boost in tree, so provide a hint and try again
    message(STATUS "trying include dir for boost")
-    set(BOOST_INCLUDEDIR ${CMAKE_SOURCE_DIR}/include)
+    set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include")
    find_package(Boost ${BOOST_MINVERSION})
    if(NOT Boost_FOUND)
-        message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available or extract Boost headers to ${CMAKE_SOURCE_DIR}/include")
+        message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
    endif()
 endif()

+
 # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
 find_package(PythonInterp)
 find_program(RAGEL ragel)
@ -88,6 +89,10 @@ else()
    message(FATAL_ERROR "No python interpreter found")
 endif()

+if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
+    message(FATAL_ERROR "Ragel state machine compiler not found")
+endif()
+
 option(OPTIMISE "Turns off compiler optimizations (on by default unless debug output enabled or coverage testing)" TRUE)

 option(DEBUG_OUTPUT "Enable debug output (warning: very verbose)" FALSE)
@ -290,10 +295,10 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
 endif()

 if (NOT XCODE)
-    include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+    include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
 else()
    # cmake doesn't think Xcode supports isystem
-    set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIR}")
+    set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIRS}")
 endif()


--- a/doc/dev-reference/getting_started.rst
+++ b/doc/dev-reference/getting_started.rst
@ -125,8 +125,12 @@ Boost Headers

 Compiling Hyperscan depends on a recent version of the Boost C++ header
 library. If the Boost libraries are installed on the build machine in the
-usual paths, CMake will find them. An alternative is to put a copy of (or a
-symlink to) the boost subdirectory in ``<hyperscan-source-path>/include/boost``.
+usual paths, CMake will find them. If the Boost libraries are not installed,
+the location of the Boost source tree can be specified during the CMake
+configuration step using the ``BOOST_ROOT`` variable (described below).
+
+Another alternative is to put a copy of (or a symlink to) the boost
+subdirectory in ``<hyperscan-source-path>/include/boost``.

 For example: for the Boost-1.59.0 release: ::

@ -161,6 +165,8 @@ Common options for CMake include:
 | BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs.       |
 |                        | Default off.                                       |
 +------------------------+----------------------------------------------------+
+| BOOST_ROOT             | Location of Boost source tree.                     |
+------------------------+----------------------------------------------------+
 | DEBUG_OUTPUT           | Enable very verbose debug output. Default off.     |
 +------------------------+----------------------------------------------------+

--- a/src/fdr/CMakeLists.txt
+++ b/src/fdr/CMakeLists.txt
@ -13,11 +13,11 @@ set(AUTOGEN_PY_FILES
 function(fdr_autogen type out)
    add_custom_command (
        COMMENT "AUTOGEN ${out}"
-        OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
-        COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
+        OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${out}"
+        COMMAND ${PYTHON} "${CMAKE_CURRENT_SOURCE_DIR}/autogen.py" ${type} > "${CMAKE_CURRENT_BINARY_DIR}/${out}"
        DEPENDS ${AUTOGEN_PY_FILES}
        )
-    add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
+    add_custom_target(autogen_${type} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${out}")
 endfunction(fdr_autogen)

 #now build the functions
--- a/src/hs_runtime.h
+++ b/src/hs_runtime.h
@ -250,7 +250,8 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
 *      for future use and is unused at present.
 *
 * @param scratch
- *      A per-thread scratch space allocated by @ref hs_alloc_scratch().
+ *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+ *      allowed to be NULL only if the @a onEvent callback is also NULL.
 *
 * @param onEvent
 *      Pointer to a match event callback function. If a NULL pointer is given,
@ -299,7 +300,8 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
 *      The stream (as created by @ref hs_open_stream()) to be copied.
 *
 * @param scratch
- *      A per-thread scratch space allocated by @ref hs_alloc_scratch().
+ *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+ *      allowed to be NULL only if the @a onEvent callback is also NULL.
 *
 * @param onEvent
 *      Pointer to a match event callback function. If a NULL pointer is given,
--- a/src/nfa/castle.h
+++ b/src/nfa/castle.h
@ -54,7 +54,7 @@ char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest,

 #define nfaExecCastle0_testEOD NFA_API_NO_IMPL
 #define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL
-#define nfaExecCastle0_zombie_status NFA_API_NO_IMPL
+#define nfaExecCastle0_zombie_status NFA_API_ZOMBIE_NO_IMPL

 #ifdef __cplusplus
 }
--- a/src/nfa/gough.h
+++ b/src/nfa/gough.h
@ -55,7 +55,7 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
                               const void *src, u64a offset, u8 key);

 #define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecGough8_zombie_status NFA_API_NO_IMPL
+#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL

 // 16-bit Gough

@ -77,6 +77,6 @@ char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
                                const void *src, u64a offset, u8 key);

 #define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecGough16_zombie_status NFA_API_NO_IMPL
+#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL

 #endif
--- a/src/nfa/lbr.h
+++ b/src/nfa/lbr.h
@ -56,7 +56,7 @@ char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest,

 #define nfaExecLbrDot_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrDot_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL

 // LBR Verm

@ -76,7 +76,7 @@ char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest,

 #define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrVerm_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL

 // LBR Negated Verm

@ -96,7 +96,7 @@ char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest,

 #define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrNVerm_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL

 // LBR Shuf

@ -116,7 +116,7 @@ char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest,

 #define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrShuf_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL

 // LBR Truffle

@ -136,7 +136,7 @@ char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest,

 #define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrTruf_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL

 #ifdef __cplusplus
 }
--- a/src/nfa/lbr_common_impl.h
+++ b/src/nfa/lbr_common_impl.h
@ -151,12 +151,12 @@ char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
    while (1) {
        // Find the next top with location >= the last escape we saw.
        for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
-            enum mqe_event t = q_cur_type(q);
-            if ((t == MQE_TOP || t == MQE_TOP_FIRST) &&
+            u32 event = q_cur_type(q);
+            if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
                q_cur_offset(q) >= lstate->lastEscape) {
                goto found_top;
            }
-            DEBUG_PRINTF("skip event type=%d offset=%lld\n", t, q_cur_offset(q));
+            DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
        }

        // No more tops, we're done.
--- a/src/nfa/mcclellan.h
+++ b/src/nfa/mcclellan.h
@ -56,7 +56,7 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
                                   const void *src, u64a offset, u8 key);

 #define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcClellan8_zombie_status NFA_API_NO_IMPL
+#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL

 // 16-bit McClellan

@ -79,7 +79,7 @@ char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest,
                                    const void *src, u64a offset, u8 key);

 #define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcClellan16_zombie_status NFA_API_NO_IMPL
+#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL

 /**
 * Simple streaming mode calls:
--- a/src/nfa/mpv.h
+++ b/src/nfa/mpv.h
@ -50,7 +50,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
 #define nfaExecMpv0_QR NFA_API_NO_IMPL
 #define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
 #define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMpv0_zombie_status NFA_API_NO_IMPL
+#define nfaExecMpv0_zombie_status NFA_API_ZOMBIE_NO_IMPL

 /**
 * return 0 if the mpv dies, otherwise returns the location of the next possible
--- a/src/nfa/mpvcompile.cpp
+++ b/src/nfa/mpvcompile.cpp
@ -250,7 +250,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
                      const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
    /* first the triggered puffs */
    map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
-    while (it != kilopuffs.end() && it->first.trigger_event != ~0U) {
+    while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
        assert(!it->first.auto_restart);
        assert(it->first.trigger_event
               == MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
@ -268,7 +268,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
     */
    map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
    while (it != kilopuffs.end() && !it->first.auto_restart) {
-        assert(it->first.trigger_event == ~0U);
+        assert(it->first.trigger_event == MQE_INVALID);

        ++it;
    }
@ -278,7 +278,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
                        kilopuffs, kilopuffs.begin(), it);
    }
    while (it != kilopuffs.end() && it->first.auto_restart) {
-        assert(it->first.trigger_event == ~0U);
+        assert(it->first.trigger_event == MQE_INVALID);

        out->push_back(mpv_counter_info());
        map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
--- a/src/nfa/nfa_api_queue.h
+++ b/src/nfa/nfa_api_queue.h
@ -41,24 +41,28 @@ extern "C"
 #define MAX_MQE_LEN 10

 /** Queue events */
-enum mqe_event {
-    MQE_START = 0,       /**< and begin! Note: stateless engines will start from
-                          *  this location */
-    MQE_END = 1,         /**< stop scanning */
-    MQE_TOP = 2,         /**< enable start + start dot star */
-    MQE_TOP_FIRST = 4, /**< first event corresponding to a TOP _N_ */

-    /*
-     * Additional tops (in multi-top engines) use the event values from
-     * MQE_TOP_FIRST to something.
-     */
+/** Queue event: begin scanning. Note: stateless engines will start from this
+ * location. */
+#define MQE_START 0U

-    MQE_INVALID = ~0U
-};
+/** Queue event: stop scanning. */
+#define MQE_END 1U
+
+/** Queue event: enable start and start-dot-star. */
+#define MQE_TOP 2U
+
+/** Queue event: first event corresponding to a numbered TOP. Additional tops
+ * (in multi-top engines) use the event values from MQE_TOP_FIRST to
+ * MQE_INVALID - 1. */
+#define MQE_TOP_FIRST 4U
+
+/** Invalid queue event */
+#define MQE_INVALID (~0U)

 /** Queue item */
 struct mq_item {
-    u32 type; /**< event; from mqe_event */
+    u32 type; /**< event type, from MQE_* */
    s64a location; /**< relative to the start of the current buffer */
    u64a som; /**< pattern start-of-match corresponding to a top, only used
               * by som engines. */
--- a/src/nfa/nfa_internal.h
+++ b/src/nfa/nfa_internal.h
@ -237,16 +237,32 @@ static really_inline
 int isMultiTopType(u8 t) {
    return !isDfaType(t) && !isLbrType(t);
 }
-/** Macro used in place of unimplemented NFA API functions for a given
+
+/** Macros used in place of unimplemented NFA API functions for a given
 * engine. */
 #if !defined(_WIN32)
-#define NFA_API_NO_IMPL(...)                                                  \
+
+/* Use for functions that return an integer. */
+#define NFA_API_NO_IMPL(...)                                                   \
    ({                                                                         \
        assert("not implemented for this engine!");                            \
        0; /* return value, for places that need it */                         \
    })
+
+/* Use for _zombie_status functions. */
+#define NFA_API_ZOMBIE_NO_IMPL(...)                                            \
+    ({                                                                         \
+        assert("not implemented for this engine!");                            \
+        NFA_ZOMBIE_NO;                                                         \
+    })
+
 #else
-#define NFA_API_NO_IMPL(...) 0
+
+/* Simpler implementation for compilers that don't like the GCC extension used
+ * above. */
+#define NFA_API_NO_IMPL(...)        0
+#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
+
 #endif

 #ifdef __cplusplus
--- a/src/nfagraph/ng_literal_decorated.cpp
+++ b/src/nfagraph/ng_literal_decorated.cpp
@ -67,6 +67,7 @@ static
 bool findPaths(const NGHolder &g, vector<Path> &paths) {
    vector<NFAVertex> order = getTopoOrdering(g);

+    vector<size_t> read_count(num_vertices(g));
    vector<vector<Path>> built(num_vertices(g));

    for (auto it = order.rbegin(); it != order.rend(); ++it) {
@ -74,6 +75,11 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
        auto &out = built[g[v].index];
        assert(out.empty());

+        read_count[g[v].index] = out_degree(v, g);
+
+        DEBUG_PRINTF("setting read_count to %zu for %u\n",
+                      read_count[g[v].index], g[v].index);
+
        if (v == g.start || v == g.startDs) {
            out.push_back({v});
            continue;
@ -94,6 +100,9 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
                continue;
            }

+            assert(!built[g[u].index].empty());
+            assert(read_count[g[u].index]);
+
            for (const auto &p : built[g[u].index]) {
                out.push_back(p);
                out.back().push_back(v);
@ -105,6 +114,13 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
                    return false;
                }
            }
+
+            read_count[g[u].index]--;
+            if (!read_count[g[u].index]) {
+                DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
+                built[g[u].index].clear();
+                built[g[u].index].shrink_to_fit();
+            }
        }
    }

--- a/src/nfagraph/ng_small_literal_set.cpp
+++ b/src/nfagraph/ng_small_literal_set.cpp
@ -118,10 +118,15 @@ bool findLiterals(const NGHolder &g,
    vector<NFAVertex> order = getTopoOrdering(g);

    vector<set<sls_literal>> built(num_vertices(g));
+    vector<size_t> read_count(num_vertices(g));

    for (auto it = order.rbegin(); it != order.rend(); ++it) {
        NFAVertex v = *it;
        set<sls_literal> &out = built[g[v].index];
+        read_count[g[v].index] = out_degree(v, g);
+
+        DEBUG_PRINTF("setting read_count to %zu for %u\n",
+                      read_count[g[v].index], g[v].index);

        assert(out.empty());
        if (v == g.start) {
@ -149,7 +154,10 @@ bool findLiterals(const NGHolder &g,
            }

            set<sls_literal> &in = built[g[u].index];
+            DEBUG_PRINTF("getting from %u (%zu reads to go)\n",
+                          g[u].index, read_count[g[u].index]);
            assert(!in.empty());
+            assert(read_count[g[u].index]);

            for (const sls_literal &lit : in) {
                if (accept) {
@ -171,10 +179,18 @@ bool findLiterals(const NGHolder &g,
                    out.insert(lit.append((u8)c, nocase));

                    if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
+                        DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
+                                      literals->size());
                        return false;
                    }
                }
            }
+
+            read_count[g[u].index]--;
+            if (!read_count[g[u].index]) {
+                DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
+                in.clear();
+            }
        }
    }

@ -206,6 +222,8 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
        return false;
    }

+    DEBUG_PRINTF("looking for literals\n");
+
    map<sls_literal, ue2::flat_set<ReportID>> literals;
    if (!findLiterals(g, &literals)) {
        DEBUG_PRINTF(":(\n");
--- a/src/parser/ComponentRepeat.cpp
+++ b/src/parser/ComponentRepeat.cpp
@ -51,11 +51,11 @@ using namespace std;
 namespace ue2 {

 /** \brief Hard limit on the maximum repeat for bounded repeats. */
-static const u32 MAX_MAX_BOUND = 32767;
+static constexpr u32 MAX_REPEAT = 32767;

 /** \brief If expanding a repeat would lead to this many positions being
 * generated, we fail the pattern. */
-static const u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
+static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge

 /* no edge priorities means that if our subcomponent can be empty, our min
 * extent is effectively zero. */
@ -67,7 +67,11 @@ ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min,
    assert(sub_comp);
    assert(max > 0);
    assert(m_min <= m_max);
-    if (m_max < NoLimit && m_max > MAX_MAX_BOUND) {
+
+    if (m_min > MAX_REPEAT) {
+        throw ParseError("Bounded repeat is too large.");
+    }
+    if (m_max != NoLimit && m_max > MAX_REPEAT) {
        throw ParseError("Bounded repeat is too large.");
    }
 }
@ -119,7 +123,7 @@ void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) {

 void ComponentRepeat::notePositions(GlushkovBuildState &bs) {
    assert(m_max > 0);
-    assert(m_max == NoLimit || m_max < MAX_MAX_BOUND);
+    assert(m_max == NoLimit || m_max < MAX_REPEAT);

    /* Note: We can construct smaller subgraphs if we're not maintaining edge
     * priorities. */
--- a/src/rose/match.c
+++ b/src/rose/match.c
@ -391,7 +391,7 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t,
        }
    }

-    enum mqe_event top = tr->suffixEvent;
+    u32 top = tr->suffixEvent;
    assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID));
    pushQueueSom(q, top, loc, som);

@ -977,14 +977,14 @@ void roseTriggerInfixes(const struct RoseEngine *t, const struct RoseRole *tr,
    do {
        u32 qi = curr_r->queue;
        u32 ri = queueToLeftIndex(t, qi);
-        enum mqe_event topEvent = curr_r->event;
+        u32 topEvent = curr_r->event;
        u8 cancel = curr_r->cancel_prev_top;
        assert(topEvent < MQE_INVALID);

        const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi);
        assert(!left->transient);

-        DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, (u32)topEvent);
+        DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent);

        struct mq *q = tctxtToScratch(tctxt)->queues + qi;
        const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@ -2433,7 +2433,7 @@ vector<RoseTrigger> buildRoseTriggerList(const RoseGraph &g, RoseVertex u,
            assert(num_tops(g[v].left) == 1);
            top = MQE_TOP;
        } else {
-            top = (enum mqe_event)((u32)MQE_TOP_FIRST + g[e].rose_top);
+            top = MQE_TOP_FIRST + g[e].rose_top;
            assert(top < MQE_INVALID);
        }

--- a/src/rose/rose_internal.h
+++ b/src/rose/rose_internal.h
@ -236,7 +236,7 @@ struct LeftNfaInfo {
 // A list of these is used to trigger prefix/infix roses.
 struct RoseTrigger {
    u32 queue; // queue index of leftfix
-    u32 event; // from enum mqe_event
+    u32 event; // queue event, from MQE_*
    u8 cancel_prev_top;
 };

@ -309,7 +309,7 @@ struct RoseRole {
    ReportID reportId; // report ID, or MO_INVALID_IDX
    u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not
                     * require a state bit if they are terminal */
-    u32 suffixEvent; // from enum mqe_event
+    u32 suffixEvent; // queue event, from MQE_
    u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater.
               */
    u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role,
--- a/src/runtime.c
+++ b/src/runtime.c
@ -1163,11 +1163,10 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
        return HS_INVALID;
    }

-    if (!scratch || !validScratch(to_id->rose, scratch)) {
-        return HS_INVALID;
-    }
-
    if (onEvent) {
+        if (!scratch || !validScratch(to_id->rose, scratch)) {
+            return HS_INVALID;
+        }
        report_eod_matches(to_id, scratch, onEvent, context);
    }

@ -1406,12 +1405,14 @@ HS_PUBLIC_API
 hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
                           hs_scratch_t *scratch, match_event_handler onEvent,
                           void *context) {
-    if (!id || !scratch || !validScratch(id->rose, scratch)) {
+    if (!id) {
        return HS_INVALID;
    }

-    /* user wants eod matches */
    if (onEvent) {
+        if (!scratch || !validScratch(id->rose, scratch)) {
+            return HS_INVALID;
+        }
        report_eod_matches(id, scratch, onEvent, context);
    }

--- a/src/util/bitutils.h
+++ b/src/util/bitutils.h
@ -254,7 +254,7 @@ u32 compress32(u32 x, u32 m) {
 #if defined(__BMI2__)
    // BMI2 has a single instruction for this operation.
    return _pext_u32(x, m);
-#endif
+#else

    // Return zero quickly on trivial cases
    if ((x & m) == 0) {
@ -281,6 +281,7 @@ u32 compress32(u32 x, u32 m) {
    }

    return x;
+#endif
 }

 static really_inline
@ -288,7 +289,7 @@ u64a compress64(u64a x, u64a m) {
 #if defined(ARCH_X86_64) && defined(__BMI2__)
    // BMI2 has a single instruction for this operation.
    return _pext_u64(x, m);
-#endif
+#else

    // Return zero quickly on trivial cases
    if ((x & m) == 0) {
@ -316,6 +317,7 @@ u64a compress64(u64a x, u64a m) {
    }

    return x;
+#endif
 }

 static really_inline
@ -323,7 +325,7 @@ u32 expand32(u32 x, u32 m) {
 #if defined(__BMI2__)
    // BMI2 has a single instruction for this operation.
    return _pdep_u32(x, m);
-#endif
+#else

    // Return zero quickly on trivial cases
    if (!x || !m) {
@ -355,6 +357,7 @@ u32 expand32(u32 x, u32 m) {
    }

    return x & m0; // clear out extraneous bits
+#endif
 }

 static really_inline
@ -362,7 +365,7 @@ u64a expand64(u64a x, u64a m) {
 #if defined(ARCH_X86_64) && defined(__BMI2__)
    // BMI2 has a single instruction for this operation.
    return _pdep_u64(x, m);
-#endif
+#else

    // Return zero quickly on trivial cases
    if (!x || !m) {
@ -395,6 +398,7 @@ u64a expand64(u64a x, u64a m) {
    }

    return x & m0; // clear out extraneous bits
+#endif
 }


--- a/src/util/shuffle.h
+++ b/src/util/shuffle.h
@ -51,7 +51,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
 #if defined(HAVE_PEXT)
    // Intel BMI2 can do this operation in one instruction.
    return _pext_u32(x, mask);
-#endif
+#else

    u32 result = 0, num = 1;
    while (mask != 0) {
@ -63,6 +63,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
        num <<= 1;
    }
    return result;
+#endif
 }

 static really_inline
@ -70,7 +71,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
 #if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
    // Intel BMI2 can do this operation in one instruction.
    return _pext_u64(x, mask);
-#endif
+#else

    u32 result = 0, num = 1;
    while (mask != 0) {
@ -82,6 +83,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
        num <<= 1;
    }
    return result;
+#endif
 }

 #undef HAVE_PEXT
--- a/unit/CMakeLists.txt
+++ b/unit/CMakeLists.txt
@ -112,6 +112,7 @@ endif()
 #
 # build target to run unit tests
 #
+if (NOT RELEASE_BUILD)
 add_custom_target(
    unit
    COMMAND bin/unit-internal
@ -119,3 +120,11 @@ add_custom_target(
    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
    DEPENDS unit-internal unit-hyperscan
 )
+else ()
+add_custom_target(
+    unit
+    COMMAND bin/unit-hyperscan
+    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    DEPENDS unit-hyperscan
+)
+endif()
--- a/unit/hyperscan/arg_checks.cpp
+++ b/unit/hyperscan/arg_checks.cpp
@ -740,7 +740,9 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamSameToId) {
    hs_free_database(db);
 }

-TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
+// hs_reset_and_copy_stream: You're allowed to reset and copy a stream with no
+// scratch and no callback.
+TEST(HyperscanArgChecks, ResetAndCopyStreamNoCallbackOrScratch) {
    hs_stream_t *stream = nullptr;
    hs_stream_t *stream_to = nullptr;
    hs_database_t *db = nullptr;
@ -760,6 +762,37 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_reset_and_copy_stream(stream_to, stream, nullptr, nullptr, nullptr);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    hs_close_stream(stream_to, scratch, nullptr, nullptr);
+    hs_close_stream(stream, scratch, nullptr, nullptr);
+    hs_free_scratch(scratch);
+    hs_free_database(db);
+}
+
+// hs_reset_and_copy_stream: If you specify a callback, you must provide
+// scratch.
+TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
+    hs_stream_t *stream = nullptr;
+    hs_stream_t *stream_to = nullptr;
+    hs_database_t *db = nullptr;
+    hs_compile_error_t *compile_err = nullptr;
+    hs_error_t err = hs_compile("foobar", 0, HS_MODE_STREAM, nullptr, &db,
+                                &compile_err);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    hs_scratch_t *scratch = nullptr;
+    err = hs_alloc_scratch(db, &scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    err = hs_open_stream(db, 0, &stream);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    err = hs_open_stream(db, 0, &stream_to);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    err = hs_reset_and_copy_stream(stream_to, stream, nullptr, dummy_cb,
+                                   nullptr);
    ASSERT_EQ(HS_INVALID, err);

    hs_close_stream(stream_to, scratch, nullptr, nullptr);
@ -793,7 +826,8 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamDiffDb) {
    err = hs_open_stream(db2, 0, &stream_to);
    ASSERT_EQ(HS_SUCCESS, err);

-    err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr,
+                                   nullptr);
    ASSERT_EQ(HS_INVALID, err);

    hs_close_stream(stream_to, scratch, nullptr, nullptr);
@ -2009,6 +2043,7 @@ TEST(HyperscanArgChecks, ScanStreamBadScratch) {
    free(local_garbage);
 }

+// hs_reset_stream: bad scratch arg
 TEST(HyperscanArgChecks, ResetStreamBadScratch) {
    hs_database_t *db = nullptr;
    hs_compile_error_t *compile_err = nullptr;
@ -2025,7 +2060,7 @@ TEST(HyperscanArgChecks, ResetStreamBadScratch) {
    ASSERT_EQ(HS_SUCCESS, err);
    ASSERT_TRUE(stream != nullptr);

-    err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
+    err = hs_reset_stream(stream, 0, scratch, dummy_cb, nullptr);
    EXPECT_NE(HS_SUCCESS, err);
    EXPECT_NE(HS_SCAN_TERMINATED, err);

--- a/unit/hyperscan/bad_patterns.txt
+++ b/unit/hyperscan/bad_patterns.txt
@ -127,3 +127,4 @@
 127:/^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/8 #Expression is not valid UTF-8.
 128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/ñññññññññññññññññññññññññññ0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8.
 129:/bignum \1111111111111111111/ #Number is too big at index 7.
+130:/foo|&{5555555,}/ #Bounded repeat is too large.
--- a/unit/hyperscan/behaviour.cpp
+++ b/unit/hyperscan/behaviour.cpp
@ -86,8 +86,7 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
    hs_error_t err;
    const size_t datalen = 1024 * 1024;
    size_t megabytes = 5 * 1024;
-    char * data = new char[datalen];
-    memset(data, 'X', datalen);
+    vector<char> data(datalen, 'X');

    // build a database
    hs_database_t *db = nullptr;
@ -110,8 +109,8 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
    ASSERT_TRUE(stream != nullptr);

    while (megabytes-- > 0) {
-        err = hs_scan_stream(stream, data, datalen, 0, scratch, dummyHandler,
-                             nullptr);
+        err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
+                             dummyHandler, nullptr);
        ASSERT_EQ(HS_SUCCESS, err);
    }

@ -121,7 +120,6 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
    // teardown
    hs_free_scratch(scratch);
    hs_free_database(db);
-    delete [] data;
 }

 struct HugeScanMatchingData {
@ -141,8 +139,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {

    hs_error_t err;
    const size_t datalen = 1024*1024;
-    char * data = new char[datalen];
-    memset(data, 'X', datalen);
+    vector<char> data(datalen, 'X');

    // build a database
    hs_database_t *db = nullptr;
@ -178,7 +175,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
        // streaming mode scan of our megabyte of data gb*1024 times
        unsigned long remaining = gb * 1024;
        while (remaining-- > 0) {
-            err = hs_scan_stream(stream, data, datalen, 0, scratch,
+            err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
                                 singleHandler, nullptr);
            ASSERT_EQ(HS_SUCCESS, err);
            ASSERT_EQ(0ULL, lastMatchTo);
@ -202,7 +199,6 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
    // teardown
    hs_free_scratch(scratch);
    hs_free_database(db);
-    delete[] data;
 }

 // Helper function to actually perform scans for BlockMatch test below
--- a/unit/hyperscan/stream_op.cpp
+++ b/unit/hyperscan/stream_op.cpp
@ -69,7 +69,9 @@ TEST(StreamUtil, reset1) {

    c.matches.clear();

-    err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
+    // Note: we do not need matches from this reset operation, so we do not
+    // need to supply a callback or scratch space.
+    err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
@ -107,7 +109,9 @@ TEST(StreamUtil, reset2) {

    c.matches.clear();

-    err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
+    // Note: we do not need matches from this reset operation, so we do not
+    // need to supply a callback or scratch space.
+    err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -268,7 +272,7 @@ TEST(StreamUtil, copy_reset1) {

    c.matches.clear();

-    err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
@ -312,7 +316,7 @@ TEST(StreamUtil, copy_reset2) {

    c.matches.clear();

-    err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -355,7 +359,7 @@ TEST(StreamUtil, copy_reset3) {

    c.matches.clear();

-    err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -408,7 +412,7 @@ TEST(StreamUtil, copy_reset4) {

    c.matches.clear();

-    err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@ -458,7 +462,7 @@ TEST(StreamUtil, copy_reset5) {
    ASSERT_EQ(HS_SUCCESS, err);
    ASSERT_EQ(0U, c.matches.size());

-    err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
    ASSERT_EQ(HS_SUCCESS, err);

    err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
--- a/unit/internal/fdr.cpp
+++ b/unit/internal/fdr.cpp
@ -207,7 +207,6 @@ TEST_P(FDRp, SimpleSingle) {
 TEST_P(FDRp, MultiLocation) {
    const u32 hint = GetParam();
    SCOPED_TRACE(hint);
-    u8 * data;

    vector<hwlmLiteral> lits;
    lits.push_back(hwlmLiteral("abc", 0, 1));
@ -216,24 +215,23 @@ TEST_P(FDRp, MultiLocation) {
    CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);

    const u32 testSize = 128;
-    data = (u8 *)malloc(testSize);
-    memset(data, 0, testSize);
+
+    vector<u8> data(testSize, 0);
+
    for (u32 i = 0; i < testSize - 3; i++) {
-        memcpy(data + i, "abc", 3);
+        memcpy(data.data() + i, "abc", 3);
        vector<match> matches;
-        fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback,
-                &matches, HWLM_ALL_GROUPS);
+        fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
+                HWLM_ALL_GROUPS);
        ASSERT_EQ(1U, matches.size());
        EXPECT_EQ(match(i, i+2, 1), matches[0]);
-        memset(data + i, 0, 3);
+        memset(data.data() + i, 0, 3);
    }
-    free(data);
 }

 TEST_P(FDRp, Flood) {
    const u32 hint = GetParam();
    SCOPED_TRACE(hint);
-    u8 * data;

    vector<hwlmLiteral> lits;
    lits.push_back(hwlmLiteral("aaaa", 0, 1));
@ -245,11 +243,10 @@ TEST_P(FDRp, Flood) {
    CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);

    const u32 testSize = 1024;
-    data = (u8 *)malloc(testSize);
-    memset(data, 'a', testSize);
+    vector<u8> data(testSize, 'a');

    vector<match> matches;
-    fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback, &matches,
+    fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
            HWLM_ALL_GROUPS);
    ASSERT_EQ(testSize - 3 + testSize - 7, matches.size());
    EXPECT_EQ(match(0, 3, 1), matches[0]);
@ -266,8 +263,6 @@ TEST_P(FDRp, Flood) {
           match(i - 3, i, 1) == matches[currentMatch])
        );
    }
-
-    free(data);
 }

 TEST_P(FDRp, NoRepeat1) {
@ -483,10 +478,10 @@ TEST_P(FDRp, moveByteStream) {

    size_t size = fdrSize(fdrTable0.get());

-    FDR *fdrTable = (FDR *)aligned_zmalloc(size);
-    EXPECT_TRUE(fdrTable);
+    auto fdrTable = aligned_zmalloc_unique<FDR>(size);
+    EXPECT_NE(nullptr, fdrTable);

-    memcpy(fdrTable, fdrTable0.get(), size);
+    memcpy(fdrTable.get(), fdrTable0.get(), size);

    //  bugger up original
    for (size_t i = 0 ; i < size; i++) {
@ -496,14 +491,13 @@ TEST_P(FDRp, moveByteStream) {
    // check matches
    vector<match> matches;

-    hwlm_error_t fdrStatus = fdrExec(fdrTable, (const u8 *)data, data_len, 0,
-                                     decentCallback, &matches, HWLM_ALL_GROUPS);
+    hwlm_error_t fdrStatus = fdrExec(fdrTable.get(), (const u8 *)data,
+                                     data_len, 0, decentCallback, &matches,
+                                     HWLM_ALL_GROUPS);
    ASSERT_EQ(0, fdrStatus);

    ASSERT_EQ(1U, matches.size());
    EXPECT_EQ(match(12, 17, 0), matches[0]);
-
-    aligned_free(fdrTable);
 }

 TEST_P(FDRp, Stream1) {
--- a/unit/internal/nfagraph_comp.cpp
+++ b/unit/internal/nfagraph_comp.cpp
@ -32,6 +32,7 @@

 #include "config.h"
 #include "gtest/gtest.h"
+#include "nfagraph_common.h"
 #include "grey.h"
 #include "hs.h"
 #include "compiler/compiler.h"
@ -43,17 +44,8 @@
 using namespace std;
 using namespace ue2;

-// Helper: build us an NFA graph from a regex
-static
-unique_ptr<NGWrapper> constructGraph(const string &expr) {
-    CompileContext cc(false, false, get_current_target(), Grey());
-    ParsedExpression parsed(0, expr.c_str(), 0, 0);
-    ReportManager rm(cc.grey);
-    return buildWrapper(rm, cc, parsed);
-}
-
 TEST(NFAGraph, CalcComp1) {
-    auto graph = constructGraph("abc|def|ghi");
+    auto graph = constructGraph("abc|def|ghi", 0);
    ASSERT_TRUE(graph != nullptr);

    deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
@ -61,7 +53,7 @@ TEST(NFAGraph, CalcComp1) {
 }

 TEST(NFAGraph, CalcComp2) {
-    auto graph = constructGraph("a|b|c|d|e|f|g|h|i");
+    auto graph = constructGraph("a|b|c|d|e|f|g|h|i", 0);
    ASSERT_TRUE(graph != nullptr);

    deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
@ -72,7 +64,7 @@ TEST(NFAGraph, CalcComp2) {

 TEST(NFAGraph, RecalcComp1) {
    deque<unique_ptr<NGHolder>> comps;
-    comps.push_back(constructGraph("abc|def|ghi"));
+    comps.push_back(constructGraph("abc|def|ghi", 0));
    ASSERT_TRUE(comps.back() != nullptr);

    recalcComponents(comps);
--- a/unit/internal/nfagraph_find_matches.cpp
+++ b/unit/internal/nfagraph_find_matches.cpp
@ -29,6 +29,8 @@
 #include "config.h"
 #include "gtest/gtest.h"

+#include "nfagraph_common.h"
+
 #include "compiler/compiler.h"
 #include "grey.h"
 #include "nfagraph/ng_builder.h"
@ -42,15 +44,10 @@ using namespace std;
 using namespace testing;
 using namespace ue2;

-#define NUM_MATCHES 4U
-#define P(x,y) pair<size_t, size_t>(x, y)
-#define NO_MATCH P(~0U, ~0U)
-
 struct MatchesTestParams {
    string pattern;
    string input;
-    // max 4 matches per pattern, P(-1,-1) is "no match"
-    pair<size_t, size_t> matches[NUM_MATCHES];
+    vector<pair<size_t, size_t>> matches;
    unsigned flags;
    bool notEod;
    bool som;
@ -58,18 +55,11 @@ struct MatchesTestParams {

 // teach google-test how to print a param
 void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
-    pair<size_t, size_t> *matches = const_cast<pair<size_t, size_t> *>(p.matches);
-
    *os << "( \"" << p.pattern << "\", "
        << "\"" << p.input << "\", "
        << "{";
-    for (int i = 0; i < 4; i++) {
-        if (matches[i] == NO_MATCH) {
-            *os << "NO_MATCH,";
-            break;
-        } else {
-            *os << "P(" << matches[i].first << ',' << matches[i].second << "),";
-        }
+    for (const auto &match : p.matches) {
+        *os << "P(" << match.first << ',' << match.second << "),";
    }
    *os << "}, ";
    *os << "flags(" << p.flags << "), "
@ -81,192 +71,153 @@ void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
 class MatchesTest: public TestWithParam<MatchesTestParams> {
 };

+#define P(x, y) pair<size_t, size_t>((x), (y))
+
 static const MatchesTestParams matchesTests[] = {
    // EOD and anchored patterns

 	// these should produce no matches
-    { "^foobar", "foolish", {NO_MATCH}, 0, false, true},
-    { "^foobar$", "ze foobar", {NO_MATCH}, 0, false, true},
-    { "^foobar$", "foobar ", {NO_MATCH}, 0, false, true},
-    { "^abc\\b", "abcde", {NO_MATCH}, 0, false, true},
-    { "^a\\b", "aa", {NO_MATCH}, 0, false, true},
-    { "^foobar\\b", "foobarz", {NO_MATCH}, 0, false, true},
-    { "^foobar", "fooq", {NO_MATCH}, 0, false, true},
-    { "^foobar", "foo", {NO_MATCH}, 0, false, true},
-    { "^foobar", "fooba", {NO_MATCH}, 0, false, true},
-    { "^foo *bar", "foolishness bar none ", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {NO_MATCH}, 0, false, true},
-    { "^.*foobar", "foobaz", {NO_MATCH}, 0, false, true},
-    { "^.*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
-    { "^(foo)|(bar)", "fo baz", {NO_MATCH}, 0, false, true},
-    { "^((foo)|(bar))", "fo baz", {NO_MATCH}, 0, false, true},
-    { "aaaaaaaa$", "AAaaAAaa", {NO_MATCH}, 0, false, true},
-    { "^foo\\z", "foo\n", {NO_MATCH}, 0, false, true},
-    { "^(foo){2,}", "foo", {NO_MATCH}, 0, false, true},
+    { "^foobar", "foolish", {}, 0, false, true},
+    { "^foobar$", "ze foobar", {}, 0, false, true},
+    { "^foobar$", "foobar ", {}, 0, false, true},
+    { "^abc\\b", "abcde", {}, 0, false, true},
+    { "^a\\b", "aa", {}, 0, false, true},
+    { "^foobar\\b", "foobarz", {}, 0, false, true},
+    { "^foobar", "fooq", {}, 0, false, true},
+    { "^foobar", "foo", {}, 0, false, true},
+    { "^foobar", "fooba", {}, 0, false, true},
+    { "^foo *bar", "foolishness bar none ", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {}, 0, false, true},
+    { "^.*foobar", "foobaz", {}, 0, false, true},
+    { "^.*foobar", "foobaz\n", {}, 0, false, true},
+    { "^(foo)|(bar)", "fo baz", {}, 0, false, true},
+    { "^((foo)|(bar))", "fo baz", {}, 0, false, true},
+    { "aaaaaaaa$", "AAaaAAaa", {}, 0, false, true},
+    { "^foo\\z", "foo\n", {}, 0, false, true},
+    { "^(foo){2,}", "foo", {}, 0, false, true},

    // these should match
-    { "^abc\\B", "abcde", { P(0,3), NO_MATCH }, 0, false, true},
-    { "^abc\\b", "abc de", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^foobar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foobar$", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foobar", "foobarq", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foobar\\B", "foobarz", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foobar none ", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foo bar none ", { P(0, 7), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foo    bar none ", { P(0, 10), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foolishness bar none ", { P(0, 15), NO_MATCH }, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5), NO_MATCH }, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc    dexyklmnxy", { P(0, 17), NO_MATCH }, 0, false, true},
-    { "^.*foobar", "abcfoobar", { P(0, 9), NO_MATCH }, 0, false, true},
-    { "^((foo)|(bar))", "foobar", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^((foo)|(bar))", "foo bar", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foobaz", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foo baz", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4), NO_MATCH }, 0, false, true},
-    { "aaaaaaaa$", "AAaaAAaa", { P(0, 8), NO_MATCH }, HS_FLAG_CASELESS, false, true},
-    { "^foo\\z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^foo\\Z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^foo\\Z", "foo\n", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9), NO_MATCH}, 0, false, true},
+    { "^abc\\B", "abcde", { P(0,3) }, 0, false, true},
+    { "^abc\\b", "abc de", { P(0, 3) }, 0, false, true},
+    { "^foobar", "foobar", { P(0, 6) }, 0, false, true},
+    { "^foobar$", "foobar", { P(0, 6) }, 0, false, true},
+    { "^foobar", "foobarq", { P(0, 6) }, 0, false, true},
+    { "^foobar\\B", "foobarz", { P(0, 6) }, 0, false, true},
+    { "^foo.*bar", "foobar none ", { P(0, 6) }, 0, false, true},
+    { "^foo.*bar", "foo bar none ", { P(0, 7) }, 0, false, true},
+    { "^foo.*bar", "foo    bar none ", { P(0, 10) }, 0, false, true},
+    { "^foo.*bar", "foolishness bar none ", { P(0, 15) }, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5) }, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc    dexyklmnxy", { P(0, 17) }, 0, false, true},
+    { "^.*foobar", "abcfoobar", { P(0, 9) }, 0, false, true},
+    { "^((foo)|(bar))", "foobar", { P(0, 3) }, 0, false, true},
+    { "^((foo)|(bar))", "foo bar", { P(0, 3) }, 0, false, true},
+    { "^(foo)|(bar)", "foobaz", { P(0, 3) }, 0, false, true},
+    { "^(foo)|(bar)", "foo baz", { P(0, 3) }, 0, false, true},
+    { "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4) }, 0, false, true},
+    { "aaaaaaaa$", "AAaaAAaa", { P(0, 8) }, HS_FLAG_CASELESS, false, true},
+    { "^foo\\z", "foo", { P(0, 3) }, 0, false, true},
+    { "^foo\\Z", "foo", { P(0, 3) }, 0, false, true},
+    { "^foo\\Z", "foo\n", { P(0, 3) }, 0, false, true},
+    { "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9)}, 0, false, true},

    // try multiple matches per pattern
-    { "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
-    { "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10), NO_MATCH }, 0, false, true},
-    { "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
-    { "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5), NO_MATCH }, 0, false, true},
+    { "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7) }, 0, false, true},
+    { "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6) }, 0, false, true},
+    { "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10) }, 0, false, true},
+    { "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10) }, 0, false, true},
+    { "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6) }, 0, false, true},
+    { "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5) }, 0, false, true},
    { "^(f[o0]+)", "foo0obar", { P(0, 2), P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},

    // unanchored patterns
-    { "\\b4\\B", "444", { P(0, 1), NO_MATCH }, 0, false, true},
-    { "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
-    { "foobar", "veryfoolish", {NO_MATCH}, 0, false, true},
-    { "foo.*bar", "extreme foolishness bar none ", { P(8, 23), NO_MATCH }, 0, false, true},
-    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13), NO_MATCH }, 0, false, true},
-    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15), NO_MATCH }, 0, false, true},
-    { ".*foobar", "verylongfoobaz", {NO_MATCH}, 0, false, true},
-    { ".*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
-    { "(foo)|(bar)", "verylongfo baz", {NO_MATCH}, 0, false, true},
-    { "(foo)?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "foo?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4), NO_MATCH }, 0, false, true},
-    { "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3), NO_MATCH }, 0, false, true},
-    { "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13), NO_MATCH }, 0, false, true},
-    { "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15), NO_MATCH }, 0, false, true},
-    { "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5), NO_MATCH }, 0, false, true},
-    { "bar\\Z", "foobar\n", { P(3, 6), NO_MATCH }, 0, false, true},
+    { "\\b4\\B", "444", { P(0, 1) }, 0, false, true},
+    { "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7) }, 0, false, true},
+    { "foobar", "veryfoolish", {}, 0, false, true},
+    { "foo.*bar", "extreme foolishness bar none ", { P(8, 23) }, 0, false, true},
+    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13) }, 0, false, true},
+    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15) }, 0, false, true},
+    { ".*foobar", "verylongfoobaz", {}, 0, false, true},
+    { ".*foobar", "foobaz\n", {}, 0, false, true},
+    { "(foo)|(bar)", "verylongfo baz", {}, 0, false, true},
+    { "(foo)?bar", "foobar", { P(0, 6) }, 0, false, true},
+    { "foo?bar", "foobar", { P(0, 6) }, 0, false, true},
+    { "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4) }, 0, false, true},
+    { "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3) }, 0, false, true},
+    { "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13) }, 0, false, true},
+    { "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15) }, 0, false, true},
+    { "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},
+    { "bar\\Z", "foobar\n", { P(3, 6) }, 0, false, true},

    // multi-line patterns
-    { "^foo$", "foo\nbar", { P(0, 3), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^bar$", "foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^foo$", "big foo\nbar", {NO_MATCH}, HS_FLAG_MULTILINE, false, true},
-    { "^foo$", "foo\nfoo", { P(0, 3), P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "\\bfoo$", "big foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "\\Bfoo$", "bigfoo\nbar", { P(3, 6), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^foo\\z", "big\nfoo", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^foo\\Z", "big\nfoo\n", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
+    { "^foo$", "foo\nbar", { P(0, 3) }, HS_FLAG_MULTILINE, false, true},
+    { "^bar$", "foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "^foo$", "big foo\nbar", {}, HS_FLAG_MULTILINE, false, true},
+    { "^foo$", "foo\nfoo", { P(0, 3), P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "\\bfoo$", "big foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "\\Bfoo$", "bigfoo\nbar", { P(3, 6) }, HS_FLAG_MULTILINE, false, true},
+    { "^foo\\z", "big\nfoo", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "^foo\\Z", "big\nfoo\n", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},

    // utf8 patterns
-    { "ab+", "\x61\x62", { P(0, 2), NO_MATCH }, HS_FLAG_UTF8, false, true},
-    { "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7), NO_MATCH }, HS_FLAG_UTF8, false, true},
+    { "ab+", "\x61\x62", { P(0, 2) }, HS_FLAG_UTF8, false, true},
+    { "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7) }, HS_FLAG_UTF8, false, true},

    // noteod patterns
-    { "^foobar$", "foobar", { NO_MATCH }, 0, true, true},
-    { "aaaaaaaa$", "AAaaAAaa", { NO_MATCH }, HS_FLAG_CASELESS, true, true},
-    { "^foo\\z", "foo", { NO_MATCH }, 0, true, true},
-    { "^foo\\Z", "foo", { NO_MATCH }, 0, true, true},
-    { "^foo\\Z", "foo\n", { NO_MATCH }, 0, true, true},
+    { "^foobar$", "foobar", {}, 0, true, true},
+    { "aaaaaaaa$", "AAaaAAaa", {}, HS_FLAG_CASELESS, true, true},
+    { "^foo\\z", "foo", {}, 0, true, true},
+    { "^foo\\Z", "foo", {}, 0, true, true},
+    { "^foo\\Z", "foo\n", {}, 0, true, true},

    // vacuous patterns (with multiline, utf8 and caseless flags)
    // vacuous patterns have SOM turned off, so all SOM are zero
    { "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, false},
-    { "b*", "", { P(0, 0), NO_MATCH }, 0, false, false},
-    { "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, 0, false, false},
+    { "b*", "", { P(0, 0) }, 0, false, false},
+    { "(aa|b*)", "a", { P(0, 0), P(0, 1) }, 0, false, false},
    { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_CASELESS, false, false},
    { "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE, false, false},
-    { "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
-    { "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
+    { "b*", "", { P(0, 0) }, HS_FLAG_MULTILINE, false, false},
+    { "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_MULTILINE, false, false},
    { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE | HS_FLAG_CASELESS, false, false},
    { "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8, false, false},
-    { "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_UTF8, false, false},
-    { "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_UTF8, false, false},
+    { "b*", "", { P(0, 0) }, HS_FLAG_UTF8, false, false},
+    { "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_UTF8, false, false},
    { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
    { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
    { ".*", "\x61\xf0\xa4\xad\xa2\x64", { P(0, 0), P(0, 1), P(0, 5), P(0, 6) }, HS_FLAG_UTF8, false, false},
    { ".*", "\xf0\xa4\xad\xa2\xf0\xa4\xad\xa3\x64", { P(0, 0), P(0, 4), P(0, 8), P(0, 9) }, HS_FLAG_UTF8, false, false},

    // special patterns for detecting various bugs
-    { "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5), NO_MATCH }, HS_FLAG_UTF8, false, true},
+    { "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5) }, HS_FLAG_UTF8, false, true},
    { ".*", "\xf0\xa4\xad", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, true},
-    { "\\Bfoo", "foo", {NO_MATCH}, 0, false, true},
-    { "fo\\B", "fo_", { P(0, 2), NO_MATCH}, 0, false, true},
+    { "\\Bfoo", "foo", {}, 0, false, true},
+    { "fo\\B", "fo_", { P(0, 2)}, 0, false, true},
    { "^.*", "\xee\x80\x80\n\xee\x80\x80", { P(0, 0), P(0, 3), P(0, 4), P(0, 7)}, HS_FLAG_UTF8 | HS_FLAG_MULTILINE, false, false},

    // ignore highlander patterns as they can't be easily checked
 };

-// by default, all matches initialize to zeroes. this makes it impossible to
-// test vacuous patterns, among other things, unless we specify every single
-// match, which is tedious. instead, we set "no match" to NO_MATCH. if the matches
-// start with NO_MATCH, everything else is set to NO_MATCH. if matches start
-// with something else, look for the next NO_MATCH and replace everything after
-// it with NO_MATCH's.
-static
-void fixMatches(const pair<size_t, size_t> in_matches[],
-                pair<size_t, size_t> out_matches[], unsigned size) {
-    bool end_matches = false;
-    for (unsigned i = 0; i < size; i++) {
-        if (in_matches[i] == NO_MATCH) {
-            end_matches = true;
-        }
-        if (end_matches) {
-            out_matches[i] = NO_MATCH;
-        }
-        else {
-            out_matches[i] = in_matches[i];
-        }
-    }
-}
-
 TEST_P(MatchesTest, Check) {
    const MatchesTestParams &t = GetParam();
    CompileContext cc(false, false, get_current_target(), Grey());
    ReportManager rm(cc.grey);
    ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0);
-    unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
-
-    set<pair<size_t, size_t> > results, tmp, matches;
+    auto g = buildWrapper(rm, cc, parsed);
    bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;

+    set<pair<size_t, size_t>> matches;
    findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8);

-    // fix matches and make a set out of them
-    pair<size_t, size_t> tmp_matches[NUM_MATCHES];
-    fixMatches(t.matches, tmp_matches, NUM_MATCHES);
-    tmp = set<pair<size_t, size_t> >(tmp_matches, tmp_matches + NUM_MATCHES);
-    tmp.erase(NO_MATCH);
+    set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));

-    // create a superset of pattern results and matches to pick up unexpected
-    // matches as well as missed matches.
-    results.insert(tmp.begin(), tmp.end());
-    results.insert(matches.begin(), matches.end());
-
-    // check if we have the same number of matches as in expected results
-    ASSERT_EQ(results.size(), tmp.size())<< "Pattern '" << t.pattern
-    << "' against input '" << t.input << "': wrong results count";
-
-    // we already know that size of two sets is the same, so now check matches.
-    for (set<pair<size_t, size_t> >::const_iterator it = results.begin();
-            it != results.end(); ++it) {
-        ASSERT_EQ(1, matches.count(*it))<< "Pattern '" << t.pattern
-        << "' against input '" << t.input << "'";
-    }
+    ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
+                                 << "' against input '" << t.input << "'";
 }

 INSTANTIATE_TEST_CASE_P(ng_find_matches, MatchesTest, ValuesIn(matchesTests));
--- a/unit/internal/sidecar.cpp
+++ b/unit/internal/sidecar.cpp
@ -71,37 +71,35 @@ TEST(Sidecar, ns1) {
    ASSERT_TRUE(ns != nullptr);
    ASSERT_LT(0U, sidecarSize(ns.get()));

-    struct sidecar_enabled *enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    ASSERT_TRUE(enabled);
-    sidecarEnabledInit(ns.get(), enabled);
-    struct sidecar_scratch *scratch
-        = (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
+    auto enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    sidecarEnabledInit(ns.get(), enabled.get());
+    auto scratch =
+        aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));

    for (u32 i = 0; i < 256; i++) {
        SCOPED_TRACE(i);
        u32 seen = 0;
        memset(data, i, data_len);
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    ns_cb, &seen);
        ASSERT_EQ(0U, seen);
    }

-    sidecarEnabledAdd(ns.get(), enabled, 0);
+    sidecarEnabledAdd(ns.get(), enabled.get(), 0);

    for (u32 i = 0; i < 256; i++) {
        SCOPED_TRACE(i);
        u32 seen = 0;
        memset(data, i, data_len);
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    ns_cb, &seen);
        if (i == 'f') {
            ASSERT_EQ(1U, seen);
        } else {
            ASSERT_EQ(0U, seen);
        }
    }
-
-    aligned_free(enabled);
-    aligned_free(scratch);
 }

 const char* sidecarStrings[] = {
@ -186,14 +184,13 @@ TEST_P(SidecarTest, Individual) {
    ASSERT_TRUE(ns != nullptr);
    ASSERT_LT(0U, sidecarSize(ns.get()));

-    struct sidecar_enabled *enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    ASSERT_TRUE(enabled);
-    sidecarEnabledInit(ns.get(), enabled);
-    struct sidecar_enabled *local_enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    struct sidecar_scratch *scratch
-        = (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
+    auto enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    sidecarEnabledInit(ns.get(), enabled.get());
+    auto local_enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    auto scratch =
+        aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));

    const size_t data_len = 1024;
    u8 data[data_len];
@ -203,7 +200,8 @@ TEST_P(SidecarTest, Individual) {
        SCOPED_TRACE(i);
        memset(data, i, data_len);
        set<u32> seen;
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    set_cb, &seen);
        ASSERT_TRUE(seen.empty());
    }

@ -213,17 +211,18 @@ TEST_P(SidecarTest, Individual) {
        SCOPED_TRACE(c);

        // build a "compile time" enabled structure and add class j to it.
-        sidecarEnabledInit(ns.get(), local_enabled);
-        sidecarEnabledAdd(ns.get(), local_enabled, j);
+        sidecarEnabledInit(ns.get(), local_enabled.get());
+        sidecarEnabledAdd(ns.get(), local_enabled.get(), j);

        // union class j into our runtime enabled structure.
-        sidecarEnabledUnion(ns.get(), enabled, local_enabled);
+        sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());

        for (u32 i = 0; i < 256; i++) {
            SCOPED_TRACE(i);
            memset(data, i, data_len);
            set<u32> seen;
-            sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+            sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
+                        0, set_cb, &seen);
            if (i == c) {
                ASSERT_EQ(1U, seen.size());
                ASSERT_EQ(j, *seen.begin());
@ -232,10 +231,6 @@ TEST_P(SidecarTest, Individual) {
            }
        }
    }
-
-    aligned_free(local_enabled);
-    aligned_free(enabled);
-    aligned_free(scratch);
 }

 TEST_P(SidecarTest, Together) {
@ -253,13 +248,13 @@ TEST_P(SidecarTest, Together) {
    ASSERT_TRUE(ns != nullptr);
    ASSERT_LT(0U, sidecarSize(ns.get()));

-    struct sidecar_enabled *enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    ASSERT_TRUE(enabled);
-    struct sidecar_enabled *local_enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    struct sidecar_scratch *scratch
-        = (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
+    auto enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    sidecarEnabledInit(ns.get(), enabled.get());
+    auto local_enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    auto scratch =
+        aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));

    const size_t data_len = 1024;
    u8 data[data_len];
@ -269,21 +264,22 @@ TEST_P(SidecarTest, Together) {
        SCOPED_TRACE(i);
        memset(data, i, data_len);
        set<u32> seen;
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    set_cb, &seen);
        ASSERT_TRUE(seen.empty());
    }

    // test that every char class fires
    for (u32 j = 0; j < charclasses.size(); j++) {
        // enable the whole lot
-        sidecarEnabledInit(ns.get(), enabled);
+        sidecarEnabledInit(ns.get(), enabled.get());
        for (u32 i = 0; i < charclasses.size(); i++) {
            // build a "compile time" enabled structure and add class j to it.
-            sidecarEnabledInit(ns.get(), local_enabled);
-            sidecarEnabledAdd(ns.get(), local_enabled, i);
+            sidecarEnabledInit(ns.get(), local_enabled.get());
+            sidecarEnabledAdd(ns.get(), local_enabled.get(), i);

            // union class j into our runtime enabled structure.
-            sidecarEnabledUnion(ns.get(), enabled, local_enabled);
+            sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
        }

        u32 c = chars[j];
@ -293,7 +289,8 @@ TEST_P(SidecarTest, Together) {
            SCOPED_TRACE(i);
            memset(data, i, data_len);
            set<u32> seen;
-            sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+            sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
+                        0, set_cb, &seen);
            if (i == c) {
                // seen should contain only `c'
                ASSERT_EQ(1U, seen.size());
@ -306,10 +303,6 @@ TEST_P(SidecarTest, Together) {
            }
        }
    }
-
-    aligned_free(local_enabled);
-    aligned_free(enabled);
-    aligned_free(scratch);
 }

 INSTANTIATE_TEST_CASE_P(Sidecar, SidecarTest,
--- a/unit/internal/utf8_validate.cpp
+++ b/unit/internal/utf8_validate.cpp
@ -31,7 +31,7 @@
 #include "parser/utf8_validate.h"

 #include "ue2common.h"
-#include "util/ue2string.h"
+#include "util/string_util.h"

 #include "gtest/gtest.h"

@ -46,7 +46,7 @@ struct ValidUtf8TestInfo {
 // Helper for gtest.
 static
 void PrintTo(const ValidUtf8TestInfo &t, ::std::ostream *os) {
-    *os << "(" << t.str << ", " << t.is_valid << ")";
+    *os << "(\"" << printable(t.str) << "\", " << t.is_valid << ")";
 }

 static ValidUtf8TestInfo valid_utf8_tests[] = {
@ -118,5 +118,5 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests));
 TEST_P(ValidUtf8Test, check) {
    const auto &info = GetParam();
    ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str()))
-        << "String is: " << escapeString(info.str) << std::endl;
+        << "String is: " << printable(info.str) << std::endl;
 }