From a5660ee3d4a789c0dd38ae95cb5031a36a0bb9ea Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Tue, 13 Oct 2015 10:16:54 +1100
Subject: [PATCH 01/17] nfagraph_comp: use common constructGraph

---
 unit/internal/nfagraph_comp.cpp | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/unit/internal/nfagraph_comp.cpp b/unit/internal/nfagraph_comp.cpp
index 451871f9..41af3f0c 100644
--- a/unit/internal/nfagraph_comp.cpp
+++ b/unit/internal/nfagraph_comp.cpp
@@ -32,6 +32,7 @@
 
 #include "config.h"
 #include "gtest/gtest.h"
+#include "nfagraph_common.h"
 #include "grey.h"
 #include "hs.h"
 #include "compiler/compiler.h"
@@ -43,17 +44,8 @@
 using namespace std;
 using namespace ue2;
 
-// Helper: build us an NFA graph from a regex
-static
-unique_ptr<NGWrapper> constructGraph(const string &expr) {
-    CompileContext cc(false, false, get_current_target(), Grey());
-    ParsedExpression parsed(0, expr.c_str(), 0, 0);
-    ReportManager rm(cc.grey);
-    return buildWrapper(rm, cc, parsed);
-}
-
 TEST(NFAGraph, CalcComp1) {
-    auto graph = constructGraph("abc|def|ghi");
+    auto graph = constructGraph("abc|def|ghi", 0);
     ASSERT_TRUE(graph != nullptr);
 
     deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
@@ -61,7 +53,7 @@ TEST(NFAGraph, CalcComp1) {
 }
 
 TEST(NFAGraph, CalcComp2) {
-    auto graph = constructGraph("a|b|c|d|e|f|g|h|i");
+    auto graph = constructGraph("a|b|c|d|e|f|g|h|i", 0);
     ASSERT_TRUE(graph != nullptr);
 
     deque<unique_ptr<NGHolder>> comps = calcComponents(*graph);
@@ -72,7 +64,7 @@ TEST(NFAGraph, CalcComp2) {
 
 TEST(NFAGraph, RecalcComp1) {
     deque<unique_ptr<NGHolder>> comps;
-    comps.push_back(constructGraph("abc|def|ghi"));
+    comps.push_back(constructGraph("abc|def|ghi", 0));
     ASSERT_TRUE(comps.back() != nullptr);
 
     recalcComponents(comps);

From 9578ae6ee254ee807fda6c3d3276cd257b72ec11 Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Tue, 13 Oct 2015 10:49:45 +1100
Subject: [PATCH 02/17] nfagraph_find_matches: simplify/cleanup

---
 unit/internal/nfagraph_find_matches.cpp | 259 ++++++++++--------------
 1 file changed, 105 insertions(+), 154 deletions(-)

diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp
index 3e430aff..41d28c52 100644
--- a/unit/internal/nfagraph_find_matches.cpp
+++ b/unit/internal/nfagraph_find_matches.cpp
@@ -29,6 +29,8 @@
 #include "config.h"
 #include "gtest/gtest.h"
 
+#include "nfagraph_common.h"
+
 #include "compiler/compiler.h"
 #include "grey.h"
 #include "nfagraph/ng_builder.h"
@@ -42,15 +44,10 @@ using namespace std;
 using namespace testing;
 using namespace ue2;
 
-#define NUM_MATCHES 4U
-#define P(x,y) pair<size_t, size_t>(x, y)
-#define NO_MATCH P(~0U, ~0U)
-
 struct MatchesTestParams {
     string pattern;
     string input;
-    // max 4 matches per pattern, P(-1,-1) is "no match"
-    pair<size_t, size_t> matches[NUM_MATCHES];
+    vector<pair<size_t, size_t>> matches;
     unsigned flags;
     bool notEod;
     bool som;
@@ -58,18 +55,11 @@ struct MatchesTestParams {
 
 // teach google-test how to print a param
 void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
-    pair<size_t, size_t> *matches = const_cast<pair<size_t, size_t> *>(p.matches);
-
     *os << "( \"" << p.pattern << "\", "
         << "\"" << p.input << "\", "
         << "{";
-    for (int i = 0; i < 4; i++) {
-        if (matches[i] == NO_MATCH) {
-            *os << "NO_MATCH,";
-            break;
-        } else {
-            *os << "P(" << matches[i].first << ',' << matches[i].second << "),";
-        }
+    for (const auto &match : p.matches) {
+        *os << "P(" << match.first << ',' << match.second << "),";
     }
     *os << "}, ";
     *os << "flags(" << p.flags << "), "
@@ -81,192 +71,153 @@ void PrintTo(const MatchesTestParams &p, ::std::ostream *os) {
 class MatchesTest: public TestWithParam<MatchesTestParams> {
 };
 
+#define P(x, y) pair<size_t, size_t>((x), (y))
+
 static const MatchesTestParams matchesTests[] = {
     // EOD and anchored patterns
 
 	// these should produce no matches
-    { "^foobar", "foolish", {NO_MATCH}, 0, false, true},
-    { "^foobar$", "ze foobar", {NO_MATCH}, 0, false, true},
-    { "^foobar$", "foobar ", {NO_MATCH}, 0, false, true},
-    { "^abc\\b", "abcde", {NO_MATCH}, 0, false, true},
-    { "^a\\b", "aa", {NO_MATCH}, 0, false, true},
-    { "^foobar\\b", "foobarz", {NO_MATCH}, 0, false, true},
-    { "^foobar", "fooq", {NO_MATCH}, 0, false, true},
-    { "^foobar", "foo", {NO_MATCH}, 0, false, true},
-    { "^foobar", "fooba", {NO_MATCH}, 0, false, true},
-    { "^foo *bar", "foolishness bar none ", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {NO_MATCH}, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {NO_MATCH}, 0, false, true},
-    { "^.*foobar", "foobaz", {NO_MATCH}, 0, false, true},
-    { "^.*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
-    { "^(foo)|(bar)", "fo baz", {NO_MATCH}, 0, false, true},
-    { "^((foo)|(bar))", "fo baz", {NO_MATCH}, 0, false, true},
-    { "aaaaaaaa$", "AAaaAAaa", {NO_MATCH}, 0, false, true},
-    { "^foo\\z", "foo\n", {NO_MATCH}, 0, false, true},
-    { "^(foo){2,}", "foo", {NO_MATCH}, 0, false, true},
+    { "^foobar", "foolish", {}, 0, false, true},
+    { "^foobar$", "ze foobar", {}, 0, false, true},
+    { "^foobar$", "foobar ", {}, 0, false, true},
+    { "^abc\\b", "abcde", {}, 0, false, true},
+    { "^a\\b", "aa", {}, 0, false, true},
+    { "^foobar\\b", "foobarz", {}, 0, false, true},
+    { "^foobar", "fooq", {}, 0, false, true},
+    { "^foobar", "foo", {}, 0, false, true},
+    { "^foobar", "fooba", {}, 0, false, true},
+    { "^foo *bar", "foolishness bar none ", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc p", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc dez", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc ghi", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc hij", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc klm", {}, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abcklmn", {}, 0, false, true},
+    { "^.*foobar", "foobaz", {}, 0, false, true},
+    { "^.*foobar", "foobaz\n", {}, 0, false, true},
+    { "^(foo)|(bar)", "fo baz", {}, 0, false, true},
+    { "^((foo)|(bar))", "fo baz", {}, 0, false, true},
+    { "aaaaaaaa$", "AAaaAAaa", {}, 0, false, true},
+    { "^foo\\z", "foo\n", {}, 0, false, true},
+    { "^(foo){2,}", "foo", {}, 0, false, true},
 
     // these should match
-    { "^abc\\B", "abcde", { P(0,3), NO_MATCH }, 0, false, true},
-    { "^abc\\b", "abc de", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^foobar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foobar$", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foobar", "foobarq", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foobar\\B", "foobarz", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foobar none ", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foo bar none ", { P(0, 7), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foo    bar none ", { P(0, 10), NO_MATCH }, 0, false, true},
-    { "^foo.*bar", "foolishness bar none ", { P(0, 15), NO_MATCH }, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5), NO_MATCH }, 0, false, true},
-    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc    dexyklmnxy", { P(0, 17), NO_MATCH }, 0, false, true},
-    { "^.*foobar", "abcfoobar", { P(0, 9), NO_MATCH }, 0, false, true},
-    { "^((foo)|(bar))", "foobar", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^((foo)|(bar))", "foo bar", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foobaz", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foo baz", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4), NO_MATCH }, 0, false, true},
-    { "aaaaaaaa$", "AAaaAAaa", { P(0, 8), NO_MATCH }, HS_FLAG_CASELESS, false, true},
-    { "^foo\\z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^foo\\Z", "foo", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^foo\\Z", "foo\n", { P(0, 3), NO_MATCH }, 0, false, true},
-    { "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9), NO_MATCH}, 0, false, true},
+    { "^abc\\B", "abcde", { P(0,3) }, 0, false, true},
+    { "^abc\\b", "abc de", { P(0, 3) }, 0, false, true},
+    { "^foobar", "foobar", { P(0, 6) }, 0, false, true},
+    { "^foobar$", "foobar", { P(0, 6) }, 0, false, true},
+    { "^foobar", "foobarq", { P(0, 6) }, 0, false, true},
+    { "^foobar\\B", "foobarz", { P(0, 6) }, 0, false, true},
+    { "^foo.*bar", "foobar none ", { P(0, 6) }, 0, false, true},
+    { "^foo.*bar", "foo bar none ", { P(0, 7) }, 0, false, true},
+    { "^foo.*bar", "foo    bar none ", { P(0, 10) }, 0, false, true},
+    { "^foo.*bar", "foolishness bar none ", { P(0, 15) }, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "klmny", { P(0, 5) }, 0, false, true},
+    { "^(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc    dexyklmnxy", { P(0, 17) }, 0, false, true},
+    { "^.*foobar", "abcfoobar", { P(0, 9) }, 0, false, true},
+    { "^((foo)|(bar))", "foobar", { P(0, 3) }, 0, false, true},
+    { "^((foo)|(bar))", "foo bar", { P(0, 3) }, 0, false, true},
+    { "^(foo)|(bar)", "foobaz", { P(0, 3) }, 0, false, true},
+    { "^(foo)|(bar)", "foo baz", { P(0, 3) }, 0, false, true},
+    { "^(f[o0]+o)|(bar)", "fo0o baz", { P(0, 4) }, 0, false, true},
+    { "aaaaaaaa$", "AAaaAAaa", { P(0, 8) }, HS_FLAG_CASELESS, false, true},
+    { "^foo\\z", "foo", { P(0, 3) }, 0, false, true},
+    { "^foo\\Z", "foo", { P(0, 3) }, 0, false, true},
+    { "^foo\\Z", "foo\n", { P(0, 3) }, 0, false, true},
+    { "^(foo){2,}", "foofoofoo", { P(0, 6), P(0, 9)}, 0, false, true},
 
     // try multiple matches per pattern
-    { "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
-    { "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10), NO_MATCH }, 0, false, true},
-    { "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10), NO_MATCH }, 0, false, true},
-    { "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6), NO_MATCH }, 0, false, true},
-    { "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5), NO_MATCH }, 0, false, true},
+    { "^(foo)|(bar)", "foo bar", { P(0, 3), P(4, 7) }, 0, false, true},
+    { "^(foo)|(bar)", "foobar", { P(0, 3), P(3, 6) }, 0, false, true},
+    { "^(foo)+|(bar)", "foo foobar", { P(0, 3), P(7, 10) }, 0, false, true},
+    { "^(foo)+|(bar)", "foofoo bar", { P(0, 3), P(0, 6), P(7, 10) }, 0, false, true},
+    { "^(foo)|(bar)", "foobarbaz", { P(0, 3), P(3, 6) }, 0, false, true},
+    { "^(f[o0]+o)", "foo0obar", { P(0, 3), P(0, 5) }, 0, false, true},
     { "^(f[o0]+)", "foo0obar", { P(0, 2), P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},
 
     // unanchored patterns
-    { "\\b4\\B", "444", { P(0, 1), NO_MATCH }, 0, false, true},
-    { "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7), NO_MATCH }, 0, false, true},
-    { "foobar", "veryfoolish", {NO_MATCH}, 0, false, true},
-    { "foo.*bar", "extreme foolishness bar none ", { P(8, 23), NO_MATCH }, 0, false, true},
-    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13), NO_MATCH }, 0, false, true},
-    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15), NO_MATCH }, 0, false, true},
-    { ".*foobar", "verylongfoobaz", {NO_MATCH}, 0, false, true},
-    { ".*foobar", "foobaz\n", {NO_MATCH}, 0, false, true},
-    { "(foo)|(bar)", "verylongfo baz", {NO_MATCH}, 0, false, true},
-    { "(foo)?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "foo?bar", "foobar", { P(0, 6), NO_MATCH }, 0, false, true},
-    { "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4), NO_MATCH }, 0, false, true},
-    { "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3), NO_MATCH }, 0, false, true},
-    { "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13), NO_MATCH }, 0, false, true},
-    { "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15), NO_MATCH }, 0, false, true},
-    { "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5), NO_MATCH }, 0, false, true},
-    { "bar\\Z", "foobar\n", { P(3, 6), NO_MATCH }, 0, false, true},
+    { "\\b4\\B", "444", { P(0, 1) }, 0, false, true},
+    { "\\b\\w+\\b", "444 555", { P(0, 3), P(4, 7) }, 0, false, true},
+    { "foobar", "veryfoolish", {}, 0, false, true},
+    { "foo.*bar", "extreme foolishness bar none ", { P(8, 23) }, 0, false, true},
+    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "abc deyghijfy", { P(0, 13) }, 0, false, true},
+    { "(abc\\s+dex?y)?(ghij|klmn).*?x?y", "wegf5tgghij34xy", { P(7, 15) }, 0, false, true},
+    { ".*foobar", "verylongfoobaz", {}, 0, false, true},
+    { ".*foobar", "foobaz\n", {}, 0, false, true},
+    { "(foo)|(bar)", "verylongfo baz", {}, 0, false, true},
+    { "(foo)?bar", "foobar", { P(0, 6) }, 0, false, true},
+    { "foo?bar", "foobar", { P(0, 6) }, 0, false, true},
+    { "(abc)|(bcd)", "abcd", { P(0, 3), P(1, 4) }, 0, false, true},
+    { "(abcd)|(bc)", "abcd", { P(0, 4), P(1, 3) }, 0, false, true},
+    { "(ab|cd)ef", "abcdef cdabef", { P(2, 6), P(9, 13) }, 0, false, true},
+    { "(foo)|(bar)", "verylongfoobbarbaz", { P(8, 11), P(12, 15) }, 0, false, true},
+    { "(a[aaaa]aa?((\\B)|[aa])){1,9}", "aaaaa", { P(0, 3), P(0, 4), P(0, 5) }, 0, false, true},
+    { "bar\\Z", "foobar\n", { P(3, 6) }, 0, false, true},
 
     // multi-line patterns
-    { "^foo$", "foo\nbar", { P(0, 3), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^bar$", "foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^foo$", "big foo\nbar", {NO_MATCH}, HS_FLAG_MULTILINE, false, true},
-    { "^foo$", "foo\nfoo", { P(0, 3), P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "\\bfoo$", "big foo\nbar", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "\\Bfoo$", "bigfoo\nbar", { P(3, 6), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^foo\\z", "big\nfoo", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
-    { "^foo\\Z", "big\nfoo\n", { P(4, 7), NO_MATCH }, HS_FLAG_MULTILINE, false, true},
+    { "^foo$", "foo\nbar", { P(0, 3) }, HS_FLAG_MULTILINE, false, true},
+    { "^bar$", "foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "^foo$", "big foo\nbar", {}, HS_FLAG_MULTILINE, false, true},
+    { "^foo$", "foo\nfoo", { P(0, 3), P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "\\bfoo$", "big foo\nbar", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "\\Bfoo$", "bigfoo\nbar", { P(3, 6) }, HS_FLAG_MULTILINE, false, true},
+    { "^foo\\z", "big\nfoo", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
+    { "^foo\\Z", "big\nfoo\n", { P(4, 7) }, HS_FLAG_MULTILINE, false, true},
 
     // utf8 patterns
-    { "ab+", "\x61\x62", { P(0, 2), NO_MATCH }, HS_FLAG_UTF8, false, true},
-    { "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7), NO_MATCH }, HS_FLAG_UTF8, false, true},
+    { "ab+", "\x61\x62", { P(0, 2) }, HS_FLAG_UTF8, false, true},
+    { "ab.+d", "\x61\x62\xf0\xa4\xad\xa2\x64", { P(0, 7) }, HS_FLAG_UTF8, false, true},
 
     // noteod patterns
-    { "^foobar$", "foobar", { NO_MATCH }, 0, true, true},
-    { "aaaaaaaa$", "AAaaAAaa", { NO_MATCH }, HS_FLAG_CASELESS, true, true},
-    { "^foo\\z", "foo", { NO_MATCH }, 0, true, true},
-    { "^foo\\Z", "foo", { NO_MATCH }, 0, true, true},
-    { "^foo\\Z", "foo\n", { NO_MATCH }, 0, true, true},
+    { "^foobar$", "foobar", {}, 0, true, true},
+    { "aaaaaaaa$", "AAaaAAaa", {}, HS_FLAG_CASELESS, true, true},
+    { "^foo\\z", "foo", {}, 0, true, true},
+    { "^foo\\Z", "foo", {}, 0, true, true},
+    { "^foo\\Z", "foo\n", {}, 0, true, true},
 
     // vacuous patterns (with multiline, utf8 and caseless flags)
     // vacuous patterns have SOM turned off, so all SOM are zero
     { "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, false},
-    { "b*", "", { P(0, 0), NO_MATCH }, 0, false, false},
-    { "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, 0, false, false},
+    { "b*", "", { P(0, 0) }, 0, false, false},
+    { "(aa|b*)", "a", { P(0, 0), P(0, 1) }, 0, false, false},
     { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_CASELESS, false, false},
     { "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE, false, false},
-    { "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
-    { "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_MULTILINE, false, false},
+    { "b*", "", { P(0, 0) }, HS_FLAG_MULTILINE, false, false},
+    { "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_MULTILINE, false, false},
     { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_MULTILINE | HS_FLAG_CASELESS, false, false},
     { "b*", "abc", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8, false, false},
-    { "b*", "", { P(0, 0), NO_MATCH }, HS_FLAG_UTF8, false, false},
-    { "(aa|b*)", "a", { P(0, 0), P(0, 1), NO_MATCH }, HS_FLAG_UTF8, false, false},
+    { "b*", "", { P(0, 0) }, HS_FLAG_UTF8, false, false},
+    { "(aa|b*)", "a", { P(0, 0), P(0, 1) }, HS_FLAG_UTF8, false, false},
     { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
     { "b*", "bBb", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, HS_FLAG_UTF8 | HS_FLAG_CASELESS, false, false},
     { ".*", "\x61\xf0\xa4\xad\xa2\x64", { P(0, 0), P(0, 1), P(0, 5), P(0, 6) }, HS_FLAG_UTF8, false, false},
     { ".*", "\xf0\xa4\xad\xa2\xf0\xa4\xad\xa3\x64", { P(0, 0), P(0, 4), P(0, 8), P(0, 9) }, HS_FLAG_UTF8, false, false},
 
     // special patterns for detecting various bugs
-    { "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5), NO_MATCH }, HS_FLAG_UTF8, false, true},
+    { "(\\B.|a)*a", "\xf0\xa4\xad\xa2\x61", { P(0, 5) }, HS_FLAG_UTF8, false, true},
     { ".*", "\xf0\xa4\xad", { P(0, 0), P(0, 1), P(0, 2), P(0, 3) }, 0, false, true},
-    { "\\Bfoo", "foo", {NO_MATCH}, 0, false, true},
-    { "fo\\B", "fo_", { P(0, 2), NO_MATCH}, 0, false, true},
+    { "\\Bfoo", "foo", {}, 0, false, true},
+    { "fo\\B", "fo_", { P(0, 2)}, 0, false, true},
     { "^.*", "\xee\x80\x80\n\xee\x80\x80", { P(0, 0), P(0, 3), P(0, 4), P(0, 7)}, HS_FLAG_UTF8 | HS_FLAG_MULTILINE, false, false},
 
     // ignore highlander patterns as they can't be easily checked
 };
 
-// by default, all matches initialize to zeroes. this makes it impossible to
-// test vacuous patterns, among other things, unless we specify every single
-// match, which is tedious. instead, we set "no match" to NO_MATCH. if the matches
-// start with NO_MATCH, everything else is set to NO_MATCH. if matches start
-// with something else, look for the next NO_MATCH and replace everything after
-// it with NO_MATCH's.
-static
-void fixMatches(const pair<size_t, size_t> in_matches[],
-                pair<size_t, size_t> out_matches[], unsigned size) {
-    bool end_matches = false;
-    for (unsigned i = 0; i < size; i++) {
-        if (in_matches[i] == NO_MATCH) {
-            end_matches = true;
-        }
-        if (end_matches) {
-            out_matches[i] = NO_MATCH;
-        }
-        else {
-            out_matches[i] = in_matches[i];
-        }
-    }
-}
-
 TEST_P(MatchesTest, Check) {
     const MatchesTestParams &t = GetParam();
     CompileContext cc(false, false, get_current_target(), Grey());
     ReportManager rm(cc.grey);
     ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0);
-    unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
-
-    set<pair<size_t, size_t> > results, tmp, matches;
+    auto g = buildWrapper(rm, cc, parsed);
     bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
 
+    set<pair<size_t, size_t>> matches;
     findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8);
 
-    // fix matches and make a set out of them
-    pair<size_t, size_t> tmp_matches[NUM_MATCHES];
-    fixMatches(t.matches, tmp_matches, NUM_MATCHES);
-    tmp = set<pair<size_t, size_t> >(tmp_matches, tmp_matches + NUM_MATCHES);
-    tmp.erase(NO_MATCH);
+    set<pair<size_t, size_t>> expected(begin(t.matches), end(t.matches));
 
-    // create a superset of pattern results and matches to pick up unexpected
-    // matches as well as missed matches.
-    results.insert(tmp.begin(), tmp.end());
-    results.insert(matches.begin(), matches.end());
-
-    // check if we have the same number of matches as in expected results
-    ASSERT_EQ(results.size(), tmp.size())<< "Pattern '" << t.pattern
-    << "' against input '" << t.input << "': wrong results count";
-
-    // we already know that size of two sets is the same, so now check matches.
-    for (set<pair<size_t, size_t> >::const_iterator it = results.begin();
-            it != results.end(); ++it) {
-        ASSERT_EQ(1, matches.count(*it))<< "Pattern '" << t.pattern
-        << "' against input '" << t.input << "'";
-    }
+    ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern
+                                 << "' against input '" << t.input << "'";
 }
 
 INSTANTIATE_TEST_CASE_P(ng_find_matches, MatchesTest, ValuesIn(matchesTests));

From 4a98c664b4caa32f747d6b9311b393a295eb5c4d Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Wed, 14 Oct 2015 12:38:13 +1100
Subject: [PATCH 03/17] HyperscanScanGigabytesMatch: use a vector

---
 unit/hyperscan/behaviour.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/unit/hyperscan/behaviour.cpp b/unit/hyperscan/behaviour.cpp
index 2ec8a9c9..98371c86 100644
--- a/unit/hyperscan/behaviour.cpp
+++ b/unit/hyperscan/behaviour.cpp
@@ -86,8 +86,7 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
     hs_error_t err;
     const size_t datalen = 1024 * 1024;
     size_t megabytes = 5 * 1024;
-    char * data = new char[datalen];
-    memset(data, 'X', datalen);
+    vector<char> data(datalen, 'X');
 
     // build a database
     hs_database_t *db = nullptr;
@@ -110,8 +109,8 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
     ASSERT_TRUE(stream != nullptr);
 
     while (megabytes-- > 0) {
-        err = hs_scan_stream(stream, data, datalen, 0, scratch, dummyHandler,
-                             nullptr);
+        err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
+                             dummyHandler, nullptr);
         ASSERT_EQ(HS_SUCCESS, err);
     }
 
@@ -121,7 +120,6 @@ TEST(HyperscanTestBehaviour, ScanSeveralGigabytesNoMatch) {
     // teardown
     hs_free_scratch(scratch);
     hs_free_database(db);
-    delete [] data;
 }
 
 struct HugeScanMatchingData {
@@ -141,8 +139,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
 
     hs_error_t err;
     const size_t datalen = 1024*1024;
-    char * data = new char[datalen];
-    memset(data, 'X', datalen);
+    vector<char> data(datalen, 'X');
 
     // build a database
     hs_database_t *db = nullptr;
@@ -178,7 +175,7 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
         // streaming mode scan of our megabyte of data gb*1024 times
         unsigned long remaining = gb * 1024;
         while (remaining-- > 0) {
-            err = hs_scan_stream(stream, data, datalen, 0, scratch,
+            err = hs_scan_stream(stream, data.data(), data.size(), 0, scratch,
                                  singleHandler, nullptr);
             ASSERT_EQ(HS_SUCCESS, err);
             ASSERT_EQ(0ULL, lastMatchTo);
@@ -202,7 +199,6 @@ TEST_P(HyperscanScanGigabytesMatch, StreamingMatch) {
     // teardown
     hs_free_scratch(scratch);
     hs_free_database(db);
-    delete[] data;
 }
 
 // Helper function to actually perform scans for BlockMatch test below

From 441329f15a469aecc60c0207304b490e40f503f7 Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Thu, 15 Oct 2015 16:08:51 +1100
Subject: [PATCH 04/17] FDRp tests: less raw malloc/free

---
 unit/internal/fdr.cpp | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp
index 90268812..3aada867 100644
--- a/unit/internal/fdr.cpp
+++ b/unit/internal/fdr.cpp
@@ -207,7 +207,6 @@ TEST_P(FDRp, SimpleSingle) {
 TEST_P(FDRp, MultiLocation) {
     const u32 hint = GetParam();
     SCOPED_TRACE(hint);
-    u8 * data;
 
     vector<hwlmLiteral> lits;
     lits.push_back(hwlmLiteral("abc", 0, 1));
@@ -216,24 +215,23 @@ TEST_P(FDRp, MultiLocation) {
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     const u32 testSize = 128;
-    data = (u8 *)malloc(testSize);
-    memset(data, 0, testSize);
+
+    vector<u8> data(testSize, 0);
+
     for (u32 i = 0; i < testSize - 3; i++) {
-        memcpy(data + i, "abc", 3);
+        memcpy(data.data() + i, "abc", 3);
         vector<match> matches;
-        fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback,
-                &matches, HWLM_ALL_GROUPS);
+        fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
+                HWLM_ALL_GROUPS);
         ASSERT_EQ(1U, matches.size());
         EXPECT_EQ(match(i, i+2, 1), matches[0]);
-        memset(data + i, 0, 3);
+        memset(data.data() + i, 0, 3);
     }
-    free(data);
 }
 
 TEST_P(FDRp, Flood) {
     const u32 hint = GetParam();
     SCOPED_TRACE(hint);
-    u8 * data;
 
     vector<hwlmLiteral> lits;
     lits.push_back(hwlmLiteral("aaaa", 0, 1));
@@ -245,11 +243,10 @@ TEST_P(FDRp, Flood) {
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     const u32 testSize = 1024;
-    data = (u8 *)malloc(testSize);
-    memset(data, 'a', testSize);
+    vector<u8> data(testSize, 'a');
 
     vector<match> matches;
-    fdrExec(fdr.get(), (const u8 *)data, testSize, 0, decentCallback, &matches,
+    fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
             HWLM_ALL_GROUPS);
     ASSERT_EQ(testSize - 3 + testSize - 7, matches.size());
     EXPECT_EQ(match(0, 3, 1), matches[0]);
@@ -266,8 +263,6 @@ TEST_P(FDRp, Flood) {
            match(i - 3, i, 1) == matches[currentMatch])
         );
     }
-
-    free(data);
 }
 
 TEST_P(FDRp, NoRepeat1) {
@@ -483,10 +478,10 @@ TEST_P(FDRp, moveByteStream) {
 
     size_t size = fdrSize(fdrTable0.get());
 
-    FDR *fdrTable = (FDR *)aligned_zmalloc(size);
-    EXPECT_TRUE(fdrTable);
+    auto fdrTable = aligned_zmalloc_unique<FDR>(size);
+    EXPECT_NE(nullptr, fdrTable);
 
-    memcpy(fdrTable, fdrTable0.get(), size);
+    memcpy(fdrTable.get(), fdrTable0.get(), size);
 
     //  bugger up original
     for (size_t i = 0 ; i < size; i++) {
@@ -496,14 +491,13 @@ TEST_P(FDRp, moveByteStream) {
     // check matches
     vector<match> matches;
 
-    hwlm_error_t fdrStatus = fdrExec(fdrTable, (const u8 *)data, data_len, 0,
-                                     decentCallback, &matches, HWLM_ALL_GROUPS);
+    hwlm_error_t fdrStatus = fdrExec(fdrTable.get(), (const u8 *)data,
+                                     data_len, 0, decentCallback, &matches,
+                                     HWLM_ALL_GROUPS);
     ASSERT_EQ(0, fdrStatus);
 
     ASSERT_EQ(1U, matches.size());
     EXPECT_EQ(match(12, 17, 0), matches[0]);
-
-    aligned_free(fdrTable);
 }
 
 TEST_P(FDRp, Stream1) {

From a2f2e11e608a29616f154da51b74adf2fc4d1a3a Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Tue, 20 Oct 2015 14:37:15 +1100
Subject: [PATCH 05/17] sidecar: use aligned_zmalloc_unique

---
 unit/internal/sidecar.cpp | 85 ++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 46 deletions(-)

diff --git a/unit/internal/sidecar.cpp b/unit/internal/sidecar.cpp
index b087e46b..bd517926 100644
--- a/unit/internal/sidecar.cpp
+++ b/unit/internal/sidecar.cpp
@@ -71,37 +71,35 @@ TEST(Sidecar, ns1) {
     ASSERT_TRUE(ns != nullptr);
     ASSERT_LT(0U, sidecarSize(ns.get()));
 
-    struct sidecar_enabled *enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    ASSERT_TRUE(enabled);
-    sidecarEnabledInit(ns.get(), enabled);
-    struct sidecar_scratch *scratch
-        = (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
+    auto enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    sidecarEnabledInit(ns.get(), enabled.get());
+    auto scratch =
+        aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
 
     for (u32 i = 0; i < 256; i++) {
         SCOPED_TRACE(i);
         u32 seen = 0;
         memset(data, i, data_len);
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    ns_cb, &seen);
         ASSERT_EQ(0U, seen);
     }
 
-    sidecarEnabledAdd(ns.get(), enabled, 0);
+    sidecarEnabledAdd(ns.get(), enabled.get(), 0);
 
     for (u32 i = 0; i < 256; i++) {
         SCOPED_TRACE(i);
         u32 seen = 0;
         memset(data, i, data_len);
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, ns_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    ns_cb, &seen);
         if (i == 'f') {
             ASSERT_EQ(1U, seen);
         } else {
             ASSERT_EQ(0U, seen);
         }
     }
-
-    aligned_free(enabled);
-    aligned_free(scratch);
 }
 
 const char* sidecarStrings[] = {
@@ -186,14 +184,13 @@ TEST_P(SidecarTest, Individual) {
     ASSERT_TRUE(ns != nullptr);
     ASSERT_LT(0U, sidecarSize(ns.get()));
 
-    struct sidecar_enabled *enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    ASSERT_TRUE(enabled);
-    sidecarEnabledInit(ns.get(), enabled);
-    struct sidecar_enabled *local_enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    struct sidecar_scratch *scratch
-        = (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
+    auto enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    sidecarEnabledInit(ns.get(), enabled.get());
+    auto local_enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    auto scratch =
+        aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
 
     const size_t data_len = 1024;
     u8 data[data_len];
@@ -203,7 +200,8 @@ TEST_P(SidecarTest, Individual) {
         SCOPED_TRACE(i);
         memset(data, i, data_len);
         set<u32> seen;
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    set_cb, &seen);
         ASSERT_TRUE(seen.empty());
     }
 
@@ -213,17 +211,18 @@ TEST_P(SidecarTest, Individual) {
         SCOPED_TRACE(c);
 
         // build a "compile time" enabled structure and add class j to it.
-        sidecarEnabledInit(ns.get(), local_enabled);
-        sidecarEnabledAdd(ns.get(), local_enabled, j);
+        sidecarEnabledInit(ns.get(), local_enabled.get());
+        sidecarEnabledAdd(ns.get(), local_enabled.get(), j);
 
         // union class j into our runtime enabled structure.
-        sidecarEnabledUnion(ns.get(), enabled, local_enabled);
+        sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
 
         for (u32 i = 0; i < 256; i++) {
             SCOPED_TRACE(i);
             memset(data, i, data_len);
             set<u32> seen;
-            sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+            sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
+                        0, set_cb, &seen);
             if (i == c) {
                 ASSERT_EQ(1U, seen.size());
                 ASSERT_EQ(j, *seen.begin());
@@ -232,10 +231,6 @@ TEST_P(SidecarTest, Individual) {
             }
         }
     }
-
-    aligned_free(local_enabled);
-    aligned_free(enabled);
-    aligned_free(scratch);
 }
 
 TEST_P(SidecarTest, Together) {
@@ -253,13 +248,13 @@ TEST_P(SidecarTest, Together) {
     ASSERT_TRUE(ns != nullptr);
     ASSERT_LT(0U, sidecarSize(ns.get()));
 
-    struct sidecar_enabled *enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    ASSERT_TRUE(enabled);
-    struct sidecar_enabled *local_enabled
-        = (struct sidecar_enabled *)aligned_zmalloc(sidecarEnabledSize(ns.get()));
-    struct sidecar_scratch *scratch
-        = (struct sidecar_scratch *)aligned_zmalloc(sidecarScratchSize(ns.get()));
+    auto enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    sidecarEnabledInit(ns.get(), enabled.get());
+    auto local_enabled =
+        aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
+    auto scratch =
+        aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
 
     const size_t data_len = 1024;
     u8 data[data_len];
@@ -269,21 +264,22 @@ TEST_P(SidecarTest, Together) {
         SCOPED_TRACE(i);
         memset(data, i, data_len);
         set<u32> seen;
-        sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+        sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
+                    set_cb, &seen);
         ASSERT_TRUE(seen.empty());
     }
 
     // test that every char class fires
     for (u32 j = 0; j < charclasses.size(); j++) {
         // enable the whole lot
-        sidecarEnabledInit(ns.get(), enabled);
+        sidecarEnabledInit(ns.get(), enabled.get());
         for (u32 i = 0; i < charclasses.size(); i++) {
             // build a "compile time" enabled structure and add class j to it.
-            sidecarEnabledInit(ns.get(), local_enabled);
-            sidecarEnabledAdd(ns.get(), local_enabled, i);
+            sidecarEnabledInit(ns.get(), local_enabled.get());
+            sidecarEnabledAdd(ns.get(), local_enabled.get(), i);
 
             // union class j into our runtime enabled structure.
-            sidecarEnabledUnion(ns.get(), enabled, local_enabled);
+            sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
         }
 
         u32 c = chars[j];
@@ -293,7 +289,8 @@ TEST_P(SidecarTest, Together) {
             SCOPED_TRACE(i);
             memset(data, i, data_len);
             set<u32> seen;
-            sidecarExec(ns.get(), data, data_len, enabled, scratch, 0, set_cb, &seen);
+            sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
+                        0, set_cb, &seen);
             if (i == c) {
                 // seen should contain only `c'
                 ASSERT_EQ(1U, seen.size());
@@ -306,10 +303,6 @@ TEST_P(SidecarTest, Together) {
             }
         }
     }
-
-    aligned_free(local_enabled);
-    aligned_free(enabled);
-    aligned_free(scratch);
 }
 
 INSTANTIATE_TEST_CASE_P(Sidecar, SidecarTest,

From b59491d0dbab1a8ed47898bb397d4a539382c0e7 Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Fri, 23 Oct 2015 11:39:31 +1100
Subject: [PATCH 06/17] Custom NFA_API_NO_IMPL variant for zombie_status

Silences ICC warning #188: enumerated type mixed with another type.
---
 src/nfa/castle.h       |  2 +-
 src/nfa/gough.h        |  4 ++--
 src/nfa/lbr.h          | 10 +++++-----
 src/nfa/mcclellan.h    |  4 ++--
 src/nfa/mpv.h          |  2 +-
 src/nfa/nfa_internal.h | 22 +++++++++++++++++++---
 6 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/src/nfa/castle.h b/src/nfa/castle.h
index 6a7fe70c..8fc3514b 100644
--- a/src/nfa/castle.h
+++ b/src/nfa/castle.h
@@ -54,7 +54,7 @@ char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest,
 
 #define nfaExecCastle0_testEOD NFA_API_NO_IMPL
 #define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL
-#define nfaExecCastle0_zombie_status NFA_API_NO_IMPL
+#define nfaExecCastle0_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 #ifdef __cplusplus
 }
diff --git a/src/nfa/gough.h b/src/nfa/gough.h
index 7729c653..41d4cb5a 100644
--- a/src/nfa/gough.h
+++ b/src/nfa/gough.h
@@ -55,7 +55,7 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
                                const void *src, u64a offset, u8 key);
 
 #define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecGough8_zombie_status NFA_API_NO_IMPL
+#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 // 16-bit Gough
 
@@ -77,6 +77,6 @@ char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
                                 const void *src, u64a offset, u8 key);
 
 #define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecGough16_zombie_status NFA_API_NO_IMPL
+#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 #endif
diff --git a/src/nfa/lbr.h b/src/nfa/lbr.h
index af7d983d..b770477d 100644
--- a/src/nfa/lbr.h
+++ b/src/nfa/lbr.h
@@ -56,7 +56,7 @@ char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest,
 
 #define nfaExecLbrDot_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrDot_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 // LBR Verm
 
@@ -76,7 +76,7 @@ char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest,
 
 #define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrVerm_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 // LBR Negated Verm
 
@@ -96,7 +96,7 @@ char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest,
 
 #define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrNVerm_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 // LBR Shuf
 
@@ -116,7 +116,7 @@ char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest,
 
 #define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrShuf_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 // LBR Truffle
 
@@ -136,7 +136,7 @@ char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest,
 
 #define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL
 #define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrTruf_zombie_status NFA_API_NO_IMPL
+#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 #ifdef __cplusplus
 }
diff --git a/src/nfa/mcclellan.h b/src/nfa/mcclellan.h
index 856740b1..6b4ec2d5 100644
--- a/src/nfa/mcclellan.h
+++ b/src/nfa/mcclellan.h
@@ -56,7 +56,7 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
                                    const void *src, u64a offset, u8 key);
 
 #define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcClellan8_zombie_status NFA_API_NO_IMPL
+#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 // 16-bit McClellan
 
@@ -79,7 +79,7 @@ char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest,
                                     const void *src, u64a offset, u8 key);
 
 #define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcClellan16_zombie_status NFA_API_NO_IMPL
+#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 /**
  * Simple streaming mode calls:
diff --git a/src/nfa/mpv.h b/src/nfa/mpv.h
index 18c52e99..dc5dad6f 100644
--- a/src/nfa/mpv.h
+++ b/src/nfa/mpv.h
@@ -50,7 +50,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
 #define nfaExecMpv0_QR NFA_API_NO_IMPL
 #define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
 #define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMpv0_zombie_status NFA_API_NO_IMPL
+#define nfaExecMpv0_zombie_status NFA_API_ZOMBIE_NO_IMPL
 
 /**
  * return 0 if the mpv dies, otherwise returns the location of the next possible
diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h
index 14678c2f..e13482b5 100644
--- a/src/nfa/nfa_internal.h
+++ b/src/nfa/nfa_internal.h
@@ -237,16 +237,32 @@ static really_inline
 int isMultiTopType(u8 t) {
     return !isDfaType(t) && !isLbrType(t);
 }
-/** Macro used in place of unimplemented NFA API functions for a given
+
+/** Macros used in place of unimplemented NFA API functions for a given
  * engine. */
 #if !defined(_WIN32)
-#define NFA_API_NO_IMPL(...)                                                  \
+
+/* Use for functions that return an integer. */
+#define NFA_API_NO_IMPL(...)                                                   \
     ({                                                                         \
         assert("not implemented for this engine!");                            \
         0; /* return value, for places that need it */                         \
     })
+
+/* Use for _zombie_status functions. */
+#define NFA_API_ZOMBIE_NO_IMPL(...)                                            \
+    ({                                                                         \
+        assert("not implemented for this engine!");                            \
+        NFA_ZOMBIE_NO;                                                         \
+    })
+
 #else
-#define NFA_API_NO_IMPL(...) 0
+
+/* Simpler implementation for compilers that don't like the GCC extension used
+ * above. */
+#define NFA_API_NO_IMPL(...)        0
+#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
+
 #endif
 
 #ifdef __cplusplus

From 9ff1303cd8adf1010fce16e36a803f53ec3a7ff2 Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Fri, 23 Oct 2015 15:32:55 +1100
Subject: [PATCH 07/17] Allow no scratch for stream reset API calls

Bring hs_reset_stream(), hs_reset_and_copy_stream()'s functionality into
line with hs_close_stream() by accepting a NULL scratch if and only if
the match callback is also NULL, indicating that no matches should be
delivered.
---
 src/hs_runtime.h              |  6 +++--
 src/runtime.c                 | 13 ++++++-----
 unit/hyperscan/arg_checks.cpp | 41 ++++++++++++++++++++++++++++++++---
 unit/hyperscan/stream_op.cpp  | 18 +++++++++------
 4 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/src/hs_runtime.h b/src/hs_runtime.h
index 27de00a3..db52f4f5 100644
--- a/src/hs_runtime.h
+++ b/src/hs_runtime.h
@@ -250,7 +250,8 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
  *      for future use and is unused at present.
  *
  * @param scratch
- *      A per-thread scratch space allocated by @ref hs_alloc_scratch().
+ *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+ *      allowed to be NULL only if the @a onEvent callback is also NULL.
  *
  * @param onEvent
  *      Pointer to a match event callback function. If a NULL pointer is given,
@@ -299,7 +300,8 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
  *      The stream (as created by @ref hs_open_stream()) to be copied.
  *
  * @param scratch
- *      A per-thread scratch space allocated by @ref hs_alloc_scratch().
+ *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+ *      allowed to be NULL only if the @a onEvent callback is also NULL.
  *
  * @param onEvent
  *      Pointer to a match event callback function. If a NULL pointer is given,
diff --git a/src/runtime.c b/src/runtime.c
index 66323789..335a83bc 100644
--- a/src/runtime.c
+++ b/src/runtime.c
@@ -1163,11 +1163,10 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
         return HS_INVALID;
     }
 
-    if (!scratch || !validScratch(to_id->rose, scratch)) {
-        return HS_INVALID;
-    }
-
     if (onEvent) {
+        if (!scratch || !validScratch(to_id->rose, scratch)) {
+            return HS_INVALID;
+        }
         report_eod_matches(to_id, scratch, onEvent, context);
     }
 
@@ -1406,12 +1405,14 @@ HS_PUBLIC_API
 hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
                            hs_scratch_t *scratch, match_event_handler onEvent,
                            void *context) {
-    if (!id || !scratch || !validScratch(id->rose, scratch)) {
+    if (!id) {
         return HS_INVALID;
     }
 
-    /* user wants eod matches */
     if (onEvent) {
+        if (!scratch || !validScratch(id->rose, scratch)) {
+            return HS_INVALID;
+        }
         report_eod_matches(id, scratch, onEvent, context);
     }
 
diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp
index f717f0dc..dbc692c5 100644
--- a/unit/hyperscan/arg_checks.cpp
+++ b/unit/hyperscan/arg_checks.cpp
@@ -740,7 +740,9 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamSameToId) {
     hs_free_database(db);
 }
 
-TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
+// hs_reset_and_copy_stream: You're allowed to reset and copy a stream with no
+// scratch and no callback.
+TEST(HyperscanArgChecks, ResetAndCopyStreamNoCallbackOrScratch) {
     hs_stream_t *stream = nullptr;
     hs_stream_t *stream_to = nullptr;
     hs_database_t *db = nullptr;
@@ -760,6 +762,37 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_reset_and_copy_stream(stream_to, stream, nullptr, nullptr, nullptr);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    hs_close_stream(stream_to, scratch, nullptr, nullptr);
+    hs_close_stream(stream, scratch, nullptr, nullptr);
+    hs_free_scratch(scratch);
+    hs_free_database(db);
+}
+
+// hs_reset_and_copy_stream: If you specify a callback, you must provide
+// scratch.
+TEST(HyperscanArgChecks, ResetAndCopyStreamNoScratch) {
+    hs_stream_t *stream = nullptr;
+    hs_stream_t *stream_to = nullptr;
+    hs_database_t *db = nullptr;
+    hs_compile_error_t *compile_err = nullptr;
+    hs_error_t err = hs_compile("foobar", 0, HS_MODE_STREAM, nullptr, &db,
+                                &compile_err);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    hs_scratch_t *scratch = nullptr;
+    err = hs_alloc_scratch(db, &scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    err = hs_open_stream(db, 0, &stream);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    err = hs_open_stream(db, 0, &stream_to);
+    ASSERT_EQ(HS_SUCCESS, err);
+
+    err = hs_reset_and_copy_stream(stream_to, stream, nullptr, dummy_cb,
+                                   nullptr);
     ASSERT_EQ(HS_INVALID, err);
 
     hs_close_stream(stream_to, scratch, nullptr, nullptr);
@@ -793,7 +826,8 @@ TEST(HyperscanArgChecks, ResetAndCopyStreamDiffDb) {
     err = hs_open_stream(db2, 0, &stream_to);
     ASSERT_EQ(HS_SUCCESS, err);
 
-    err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream_to, stream, scratch, nullptr,
+                                   nullptr);
     ASSERT_EQ(HS_INVALID, err);
 
     hs_close_stream(stream_to, scratch, nullptr, nullptr);
@@ -2009,6 +2043,7 @@ TEST(HyperscanArgChecks, ScanStreamBadScratch) {
     free(local_garbage);
 }
 
+// hs_reset_stream: bad scratch arg
 TEST(HyperscanArgChecks, ResetStreamBadScratch) {
     hs_database_t *db = nullptr;
     hs_compile_error_t *compile_err = nullptr;
@@ -2025,7 +2060,7 @@ TEST(HyperscanArgChecks, ResetStreamBadScratch) {
     ASSERT_EQ(HS_SUCCESS, err);
     ASSERT_TRUE(stream != nullptr);
 
-    err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
+    err = hs_reset_stream(stream, 0, scratch, dummy_cb, nullptr);
     EXPECT_NE(HS_SUCCESS, err);
     EXPECT_NE(HS_SCAN_TERMINATED, err);
 
diff --git a/unit/hyperscan/stream_op.cpp b/unit/hyperscan/stream_op.cpp
index fbec6b98..a78e08a1 100644
--- a/unit/hyperscan/stream_op.cpp
+++ b/unit/hyperscan/stream_op.cpp
@@ -69,7 +69,9 @@ TEST(StreamUtil, reset1) {
 
     c.matches.clear();
 
-    err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
+    // Note: we do not need matches from this reset operation, so we do not
+    // need to supply a callback or scratch space.
+    err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
@@ -107,7 +109,9 @@ TEST(StreamUtil, reset2) {
 
     c.matches.clear();
 
-    err = hs_reset_stream(stream, 0, scratch, nullptr, nullptr);
+    // Note: we do not need matches from this reset operation, so we do not
+    // need to supply a callback or scratch space.
+    err = hs_reset_stream(stream, 0, nullptr, nullptr, nullptr);
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@@ -268,7 +272,7 @@ TEST(StreamUtil, copy_reset1) {
 
     c.matches.clear();
 
-    err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb2,
@@ -312,7 +316,7 @@ TEST(StreamUtil, copy_reset2) {
 
     c.matches.clear();
 
-    err = hs_reset_and_copy_stream(stream, stream2, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream, stream2, nullptr, nullptr, nullptr);
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@@ -355,7 +359,7 @@ TEST(StreamUtil, copy_reset3) {
 
     c.matches.clear();
 
-    err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@@ -408,7 +412,7 @@ TEST(StreamUtil, copy_reset4) {
 
     c.matches.clear();
 
-    err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,
@@ -458,7 +462,7 @@ TEST(StreamUtil, copy_reset5) {
     ASSERT_EQ(HS_SUCCESS, err);
     ASSERT_EQ(0U, c.matches.size());
 
-    err = hs_reset_and_copy_stream(stream2, stream, scratch, nullptr, nullptr);
+    err = hs_reset_and_copy_stream(stream2, stream, nullptr, nullptr, nullptr);
     ASSERT_EQ(HS_SUCCESS, err);
 
     err = hs_scan_stream(stream, data1, sizeof(data1), 0, scratch, record_cb,

From 55b357f7d1276cd4935a9034343726211a873d20 Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Fri, 23 Oct 2015 10:59:48 +1100
Subject: [PATCH 08/17] Remove enum mqe_event and use u32 for queue events

We were using intermediate values int he enum and casting back and forth
with a u32; it is cleaner to just use a u32 and define some special
values.

Silences ICC warning #188: enumerated type mixed with another type.
---
 src/nfa/lbr_common_impl.h        |  6 +++---
 src/nfa/mpvcompile.cpp           |  6 +++---
 src/nfa/nfa_api_queue.h          | 30 +++++++++++++++++-------------
 src/rose/match.c                 |  6 +++---
 src/rose/rose_build_bytecode.cpp |  2 +-
 src/rose/rose_internal.h         |  4 ++--
 6 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/nfa/lbr_common_impl.h b/src/nfa/lbr_common_impl.h
index ef12f193..917a8e91 100644
--- a/src/nfa/lbr_common_impl.h
+++ b/src/nfa/lbr_common_impl.h
@@ -151,12 +151,12 @@ char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
     while (1) {
         // Find the next top with location >= the last escape we saw.
         for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
-            enum mqe_event t = q_cur_type(q);
-            if ((t == MQE_TOP || t == MQE_TOP_FIRST) &&
+            u32 event = q_cur_type(q);
+            if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
                 q_cur_offset(q) >= lstate->lastEscape) {
                 goto found_top;
             }
-            DEBUG_PRINTF("skip event type=%d offset=%lld\n", t, q_cur_offset(q));
+            DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
         }
 
         // No more tops, we're done.
diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp
index ce0ac473..e4741ef1 100644
--- a/src/nfa/mpvcompile.cpp
+++ b/src/nfa/mpvcompile.cpp
@@ -250,7 +250,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
                       const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
     /* first the triggered puffs */
     map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
-    while (it != kilopuffs.end() && it->first.trigger_event != ~0U) {
+    while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
         assert(!it->first.auto_restart);
         assert(it->first.trigger_event
                == MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
@@ -268,7 +268,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
      */
     map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
     while (it != kilopuffs.end() && !it->first.auto_restart) {
-        assert(it->first.trigger_event == ~0U);
+        assert(it->first.trigger_event == MQE_INVALID);
 
         ++it;
     }
@@ -278,7 +278,7 @@ void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
                         kilopuffs, kilopuffs.begin(), it);
     }
     while (it != kilopuffs.end() && it->first.auto_restart) {
-        assert(it->first.trigger_event == ~0U);
+        assert(it->first.trigger_event == MQE_INVALID);
 
         out->push_back(mpv_counter_info());
         map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h
index 123ce3e4..d8079292 100644
--- a/src/nfa/nfa_api_queue.h
+++ b/src/nfa/nfa_api_queue.h
@@ -41,24 +41,28 @@ extern "C"
 #define MAX_MQE_LEN 10
 
 /** Queue events */
-enum mqe_event {
-    MQE_START = 0,       /**< and begin! Note: stateless engines will start from
-                          *  this location */
-    MQE_END = 1,         /**< stop scanning */
-    MQE_TOP = 2,         /**< enable start + start dot star */
-    MQE_TOP_FIRST = 4, /**< first event corresponding to a TOP _N_ */
 
-    /*
-     * Additional tops (in multi-top engines) use the event values from
-     * MQE_TOP_FIRST to something.
-     */
+/** Queue event: begin scanning. Note: stateless engines will start from this
+ * location. */
+#define MQE_START 0U
 
-    MQE_INVALID = ~0U
-};
+/** Queue event: stop scanning. */
+#define MQE_END 1U
+
+/** Queue event: enable start and start-dot-star. */
+#define MQE_TOP 2U
+
+/** Queue event: first event corresponding to a numbered TOP. Additional tops
+ * (in multi-top engines) use the event values from MQE_TOP_FIRST to
+ * MQE_INVALID - 1. */
+#define MQE_TOP_FIRST 4U
+
+/** Invalid queue event */
+#define MQE_INVALID (~0U)
 
 /** Queue item */
 struct mq_item {
-    u32 type; /**< event; from mqe_event */
+    u32 type; /**< event type, from MQE_* */
     s64a location; /**< relative to the start of the current buffer */
     u64a som; /**< pattern start-of-match corresponding to a top, only used
                * by som engines. */
diff --git a/src/rose/match.c b/src/rose/match.c
index 6903baa5..32224c24 100644
--- a/src/rose/match.c
+++ b/src/rose/match.c
@@ -391,7 +391,7 @@ hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t,
         }
     }
 
-    enum mqe_event top = tr->suffixEvent;
+    u32 top = tr->suffixEvent;
     assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID));
     pushQueueSom(q, top, loc, som);
 
@@ -977,14 +977,14 @@ void roseTriggerInfixes(const struct RoseEngine *t, const struct RoseRole *tr,
     do {
         u32 qi = curr_r->queue;
         u32 ri = queueToLeftIndex(t, qi);
-        enum mqe_event topEvent = curr_r->event;
+        u32 topEvent = curr_r->event;
         u8 cancel = curr_r->cancel_prev_top;
         assert(topEvent < MQE_INVALID);
 
         const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi);
         assert(!left->transient);
 
-        DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, (u32)topEvent);
+        DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent);
 
         struct mq *q = tctxtToScratch(tctxt)->queues + qi;
         const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp
index 9cc1484f..bbc8644e 100644
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@@ -2433,7 +2433,7 @@ vector<RoseTrigger> buildRoseTriggerList(const RoseGraph &g, RoseVertex u,
             assert(num_tops(g[v].left) == 1);
             top = MQE_TOP;
         } else {
-            top = (enum mqe_event)((u32)MQE_TOP_FIRST + g[e].rose_top);
+            top = MQE_TOP_FIRST + g[e].rose_top;
             assert(top < MQE_INVALID);
         }
 
diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h
index 441680ca..1f4fc93a 100644
--- a/src/rose/rose_internal.h
+++ b/src/rose/rose_internal.h
@@ -236,7 +236,7 @@ struct LeftNfaInfo {
 // A list of these is used to trigger prefix/infix roses.
 struct RoseTrigger {
     u32 queue; // queue index of leftfix
-    u32 event; // from enum mqe_event
+    u32 event; // queue event, from MQE_*
     u8 cancel_prev_top;
 };
 
@@ -309,7 +309,7 @@ struct RoseRole {
     ReportID reportId; // report ID, or MO_INVALID_IDX
     u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not
                      * require a state bit if they are terminal */
-    u32 suffixEvent; // from enum mqe_event
+    u32 suffixEvent; // queue event, from MQE_
     u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater.
                */
     u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role,

From e67b8032f77ebe337c87aeb3f550089dccfdd990 Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Tue, 20 Oct 2015 13:24:23 +1100
Subject: [PATCH 09/17] Unbreak unit-internal for builds w/o dump support

Use printable, rather than escapeString.
---
 unit/internal/utf8_validate.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp
index 83358361..6649e6fe 100644
--- a/unit/internal/utf8_validate.cpp
+++ b/unit/internal/utf8_validate.cpp
@@ -31,7 +31,7 @@
 #include "parser/utf8_validate.h"
 
 #include "ue2common.h"
-#include "util/ue2string.h"
+#include "util/string_util.h"
 
 #include "gtest/gtest.h"
 
@@ -46,7 +46,7 @@ struct ValidUtf8TestInfo {
 // Helper for gtest.
 static
 void PrintTo(const ValidUtf8TestInfo &t, ::std::ostream *os) {
-    *os << "(" << t.str << ", " << t.is_valid << ")";
+    *os << "(\"" << printable(t.str) << "\", " << t.is_valid << ")";
 }
 
 static ValidUtf8TestInfo valid_utf8_tests[] = {
@@ -118,5 +118,5 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests));
 TEST_P(ValidUtf8Test, check) {
     const auto &info = GetParam();
     ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str()))
-        << "String is: " << escapeString(info.str) << std::endl;
+        << "String is: " << printable(info.str) << std::endl;
 }

From b77613802d257162e826e2e3bfeca5d25b5c946f Mon Sep 17 00:00:00 2001
From: Matthew Barr <matthew.barr@intel.com>
Date: Mon, 26 Oct 2015 15:53:55 +1100
Subject: [PATCH 10/17] Update CMake required min version to 2.8.11

RedHat/CentOS 7 ship with 2.8.11 so this is a sane minimum.
---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0dd02942..9760d36a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 2.8)
+cmake_minimum_required (VERSION 2.8.11)
 project (Hyperscan C CXX)
 
 set (HS_MAJOR_VERSION 4)

From 1afc591c30b8171df82bdcdcc613e280558604d6 Mon Sep 17 00:00:00 2001
From: Justin Viiret <justin.viiret@intel.com>
Date: Thu, 29 Oct 2015 09:08:40 +1100
Subject: [PATCH 11/17] Check for (and throw on) large min repeat

We were only checking for large maximum bounds, which meant that we
would attempt to compile A{N,} where N is huge.
---
 src/parser/ComponentRepeat.cpp  | 12 ++++++++----
 unit/hyperscan/bad_patterns.txt |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/parser/ComponentRepeat.cpp b/src/parser/ComponentRepeat.cpp
index 325114d8..8cd88372 100644
--- a/src/parser/ComponentRepeat.cpp
+++ b/src/parser/ComponentRepeat.cpp
@@ -51,11 +51,11 @@ using namespace std;
 namespace ue2 {
 
 /** \brief Hard limit on the maximum repeat for bounded repeats. */
-static const u32 MAX_MAX_BOUND = 32767;
+static constexpr u32 MAX_REPEAT = 32767;
 
 /** \brief If expanding a repeat would lead to this many positions being
  * generated, we fail the pattern. */
-static const u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
+static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
 
 /* no edge priorities means that if our subcomponent can be empty, our min
  * extent is effectively zero. */
@@ -67,7 +67,11 @@ ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min,
     assert(sub_comp);
     assert(max > 0);
     assert(m_min <= m_max);
-    if (m_max < NoLimit && m_max > MAX_MAX_BOUND) {
+
+    if (m_min > MAX_REPEAT) {
+        throw ParseError("Bounded repeat is too large.");
+    }
+    if (m_max != NoLimit && m_max > MAX_REPEAT) {
         throw ParseError("Bounded repeat is too large.");
     }
 }
@@ -119,7 +123,7 @@ void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) {
 
 void ComponentRepeat::notePositions(GlushkovBuildState &bs) {
     assert(m_max > 0);
-    assert(m_max == NoLimit || m_max < MAX_MAX_BOUND);
+    assert(m_max == NoLimit || m_max < MAX_REPEAT);
 
     /* Note: We can construct smaller subgraphs if we're not maintaining edge
      * priorities. */
diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt
index 53a4dcc0..1ad445b3 100644
--- a/unit/hyperscan/bad_patterns.txt
+++ b/unit/hyperscan/bad_patterns.txt
@@ -127,3 +127,4 @@
 127:/^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/���������������������������0}l.{1,60}Car*k|npanomnax+8Wnah/8 #Expression is not valid UTF-8.
 128:/(*UTF8)^fo?ob{ro|nax_off\Qt=10omnax+8Wnah/���������������������������0}l.{1,60}Car*k|npanomnax+8Wnah/ #Expression is not valid UTF-8.
 129:/bignum \1111111111111111111/ #Number is too big at index 7.
+130:/foo|&{5555555,}/ #Bounded repeat is too large.

From 629be086834197bc4d704730f07072ad6ee85e16 Mon Sep 17 00:00:00 2001
From: Alex Coyte <a.coyte@intel.com>
Date: Thu, 29 Oct 2015 14:35:02 +1100
Subject: [PATCH 12/17] reduce memory use in
 ng_small_literal_set/ng_literal_decorated

These passes kept temporary strings/paths alive longer than was needed which
lead to high memory usage during these passes in pathological cases.
---
 src/nfagraph/ng_literal_decorated.cpp | 16 ++++++++++++++++
 src/nfagraph/ng_small_literal_set.cpp | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/src/nfagraph/ng_literal_decorated.cpp b/src/nfagraph/ng_literal_decorated.cpp
index 652fd14a..02b25a73 100644
--- a/src/nfagraph/ng_literal_decorated.cpp
+++ b/src/nfagraph/ng_literal_decorated.cpp
@@ -67,6 +67,7 @@ static
 bool findPaths(const NGHolder &g, vector<Path> &paths) {
     vector<NFAVertex> order = getTopoOrdering(g);
 
+    vector<size_t> read_count(num_vertices(g));
     vector<vector<Path>> built(num_vertices(g));
 
     for (auto it = order.rbegin(); it != order.rend(); ++it) {
@@ -74,6 +75,11 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
         auto &out = built[g[v].index];
         assert(out.empty());
 
+        read_count[g[v].index] = out_degree(v, g);
+
+        DEBUG_PRINTF("setting read_count to %zu for %u\n",
+                      read_count[g[v].index], g[v].index);
+
         if (v == g.start || v == g.startDs) {
             out.push_back({v});
             continue;
@@ -94,6 +100,9 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
                 continue;
             }
 
+            assert(!built[g[u].index].empty());
+            assert(read_count[g[u].index]);
+
             for (const auto &p : built[g[u].index]) {
                 out.push_back(p);
                 out.back().push_back(v);
@@ -105,6 +114,13 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
                     return false;
                 }
             }
+
+            read_count[g[u].index]--;
+            if (!read_count[g[u].index]) {
+                DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
+                built[g[u].index].clear();
+                built[g[u].index].shrink_to_fit();
+            }
         }
     }
 
diff --git a/src/nfagraph/ng_small_literal_set.cpp b/src/nfagraph/ng_small_literal_set.cpp
index 89cb0ff8..b5867bb9 100644
--- a/src/nfagraph/ng_small_literal_set.cpp
+++ b/src/nfagraph/ng_small_literal_set.cpp
@@ -118,10 +118,15 @@ bool findLiterals(const NGHolder &g,
     vector<NFAVertex> order = getTopoOrdering(g);
 
     vector<set<sls_literal>> built(num_vertices(g));
+    vector<size_t> read_count(num_vertices(g));
 
     for (auto it = order.rbegin(); it != order.rend(); ++it) {
         NFAVertex v = *it;
         set<sls_literal> &out = built[g[v].index];
+        read_count[g[v].index] = out_degree(v, g);
+
+        DEBUG_PRINTF("setting read_count to %zu for %u\n",
+                      read_count[g[v].index], g[v].index);
 
         assert(out.empty());
         if (v == g.start) {
@@ -149,7 +154,10 @@ bool findLiterals(const NGHolder &g,
             }
 
             set<sls_literal> &in = built[g[u].index];
+            DEBUG_PRINTF("getting from %u (%zu reads to go)\n",
+                          g[u].index, read_count[g[u].index]);
             assert(!in.empty());
+            assert(read_count[g[u].index]);
 
             for (const sls_literal &lit : in) {
                 if (accept) {
@@ -171,10 +179,18 @@ bool findLiterals(const NGHolder &g,
                     out.insert(lit.append((u8)c, nocase));
 
                     if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
+                        DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
+                                      literals->size());
                         return false;
                     }
                 }
             }
+
+            read_count[g[u].index]--;
+            if (!read_count[g[u].index]) {
+                DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
+                in.clear();
+            }
         }
     }
 
@@ -206,6 +222,8 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
         return false;
     }
 
+    DEBUG_PRINTF("looking for literals\n");
+
     map<sls_literal, ue2::flat_set<ReportID>> literals;
     if (!findLiterals(g, &literals)) {
         DEBUG_PRINTF(":(\n");

From ba0b2b788bcf11c64b5503e1a4c5287327e92cc0 Mon Sep 17 00:00:00 2001
From: Matthew Barr <matthew.barr@intel.com>
Date: Thu, 29 Oct 2015 17:29:06 +1100
Subject: [PATCH 13/17] cmake: collection of fixes

---
 CMakeLists.txt         | 29 +++++++++++++++++------------
 src/fdr/CMakeLists.txt |  6 +++---
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9760d36a..77e04a5e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,19 +36,19 @@ else()
     set(RELEASE_BUILD FALSE)
 endif()
 
-set(BINDIR ${PROJECT_BINARY_DIR}/bin)
-set(LIBDIR ${PROJECT_BINARY_DIR}/lib)
+set(BINDIR "${PROJECT_BINARY_DIR}/bin")
+set(LIBDIR "${PROJECT_BINARY_DIR}/lib")
 
 # First for the generic no-config case
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BINDIR})
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBDIR})
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBDIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIBDIR}")
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBDIR}")
 # Second, for multi-config builds (e.g. msvc)
 foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
     string (TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG)
-    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${BINDIR})
-    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
-    set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${BINDIR}")
+    set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
+    set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} "${LIBDIR}")
 endforeach (OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES)
 
 
@@ -71,13 +71,14 @@ find_package(Boost ${BOOST_MINVERSION})
 if(NOT Boost_FOUND)
     # we might have boost in tree, so provide a hint and try again
     message(STATUS "trying include dir for boost")
-    set(BOOST_INCLUDEDIR ${CMAKE_SOURCE_DIR}/include)
+    set(BOOST_INCLUDEDIR "${CMAKE_SOURCE_DIR}/include")
     find_package(Boost ${BOOST_MINVERSION})
     if(NOT Boost_FOUND)
-        message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available or extract Boost headers to ${CMAKE_SOURCE_DIR}/include")
+        message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
     endif()
 endif()
 
+
 # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
 find_package(PythonInterp)
 find_program(RAGEL ragel)
@@ -88,6 +89,10 @@ else()
     message(FATAL_ERROR "No python interpreter found")
 endif()
 
+if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
+    message(FATAL_ERROR "Ragel state machine compiler not found")
+endif()
+
 option(OPTIMISE "Turns off compiler optimizations (on by default unless debug output enabled or coverage testing)" TRUE)
 
 option(DEBUG_OUTPUT "Enable debug output (warning: very verbose)" FALSE)
@@ -290,10 +295,10 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
 endif()
 
 if (NOT XCODE)
-    include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+    include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
 else()
     # cmake doesn't think Xcode supports isystem
-    set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIR}")
+    set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIRS}")
 endif()
 
 
diff --git a/src/fdr/CMakeLists.txt b/src/fdr/CMakeLists.txt
index 4790c527..25396689 100644
--- a/src/fdr/CMakeLists.txt
+++ b/src/fdr/CMakeLists.txt
@@ -13,11 +13,11 @@ set(AUTOGEN_PY_FILES
 function(fdr_autogen type out)
     add_custom_command (
         COMMENT "AUTOGEN ${out}"
-        OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
-        COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
+        OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${out}"
+        COMMAND ${PYTHON} "${CMAKE_CURRENT_SOURCE_DIR}/autogen.py" ${type} > "${CMAKE_CURRENT_BINARY_DIR}/${out}"
         DEPENDS ${AUTOGEN_PY_FILES}
         )
-    add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
+    add_custom_target(autogen_${type} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${out}")
 endfunction(fdr_autogen)
 
 #now build the functions

From aafbd96a1dc6863aefb85e94737b8f6dddaf0d79 Mon Sep 17 00:00:00 2001
From: Matthew Barr <matthew.barr@intel.com>
Date: Thu, 29 Oct 2015 17:29:24 +1100
Subject: [PATCH 14/17] docs: describe BOOST_ROOT cmake variable

---
 doc/dev-reference/getting_started.rst | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst
index 7e43d10a..826349a7 100644
--- a/doc/dev-reference/getting_started.rst
+++ b/doc/dev-reference/getting_started.rst
@@ -125,8 +125,12 @@ Boost Headers
 
 Compiling Hyperscan depends on a recent version of the Boost C++ header
 library. If the Boost libraries are installed on the build machine in the
-usual paths, CMake will find them. An alternative is to put a copy of (or a
-symlink to) the boost subdirectory in ``<hyperscan-source-path>/include/boost``.
+usual paths, CMake will find them. If the Boost libraries are not installed,
+the location of the Boost source tree can be specified during the CMake
+configuration step using the ``BOOST_ROOT`` variable (described below).
+
+Another alternative is to put a copy of (or a symlink to) the boost
+subdirectory in ``<hyperscan-source-path>/include/boost``.
 
 For example: for the Boost-1.59.0 release: ::
 
@@ -161,6 +165,8 @@ Common options for CMake include:
 | BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs.       |
 |                        | Default off.                                       |
 +------------------------+----------------------------------------------------+
+| BOOST_ROOT             | Location of Boost source tree.                     |
++------------------------+----------------------------------------------------+
 | DEBUG_OUTPUT           | Enable very verbose debug output. Default off.     |
 +------------------------+----------------------------------------------------+
 

From 1f47b8210635c6d3a76b99c1b09ce4f5c4744f45 Mon Sep 17 00:00:00 2001
From: Matthew Barr <matthew.barr@intel.com>
Date: Fri, 30 Oct 2015 10:43:43 +1100
Subject: [PATCH 15/17] Remove unneeded code at preproc stage

If we know we have BMI2 we shouldn't produce the fallback code.
---
 src/util/bitutils.h | 12 ++++++++----
 src/util/shuffle.h  |  6 ++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/util/bitutils.h b/src/util/bitutils.h
index 6fdafa71..979a2c04 100644
--- a/src/util/bitutils.h
+++ b/src/util/bitutils.h
@@ -254,7 +254,7 @@ u32 compress32(u32 x, u32 m) {
 #if defined(__BMI2__)
     // BMI2 has a single instruction for this operation.
     return _pext_u32(x, m);
-#endif
+#else
 
     // Return zero quickly on trivial cases
     if ((x & m) == 0) {
@@ -281,6 +281,7 @@ u32 compress32(u32 x, u32 m) {
     }
 
     return x;
+#endif
 }
 
 static really_inline
@@ -288,7 +289,7 @@ u64a compress64(u64a x, u64a m) {
 #if defined(ARCH_X86_64) && defined(__BMI2__)
     // BMI2 has a single instruction for this operation.
     return _pext_u64(x, m);
-#endif
+#else
 
     // Return zero quickly on trivial cases
     if ((x & m) == 0) {
@@ -316,6 +317,7 @@ u64a compress64(u64a x, u64a m) {
     }
 
     return x;
+#endif
 }
 
 static really_inline
@@ -323,7 +325,7 @@ u32 expand32(u32 x, u32 m) {
 #if defined(__BMI2__)
     // BMI2 has a single instruction for this operation.
     return _pdep_u32(x, m);
-#endif
+#else
 
     // Return zero quickly on trivial cases
     if (!x || !m) {
@@ -355,6 +357,7 @@ u32 expand32(u32 x, u32 m) {
     }
 
     return x & m0; // clear out extraneous bits
+#endif
 }
 
 static really_inline
@@ -362,7 +365,7 @@ u64a expand64(u64a x, u64a m) {
 #if defined(ARCH_X86_64) && defined(__BMI2__)
     // BMI2 has a single instruction for this operation.
     return _pdep_u64(x, m);
-#endif
+#else
 
     // Return zero quickly on trivial cases
     if (!x || !m) {
@@ -395,6 +398,7 @@ u64a expand64(u64a x, u64a m) {
     }
 
     return x & m0; // clear out extraneous bits
+#endif
 }
 
 
diff --git a/src/util/shuffle.h b/src/util/shuffle.h
index 847d07a0..ba85fb5d 100644
--- a/src/util/shuffle.h
+++ b/src/util/shuffle.h
@@ -51,7 +51,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
 #if defined(HAVE_PEXT)
     // Intel BMI2 can do this operation in one instruction.
     return _pext_u32(x, mask);
-#endif
+#else
 
     u32 result = 0, num = 1;
     while (mask != 0) {
@@ -63,6 +63,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
         num <<= 1;
     }
     return result;
+#endif
 }
 
 static really_inline
@@ -70,7 +71,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
 #if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
     // Intel BMI2 can do this operation in one instruction.
     return _pext_u64(x, mask);
-#endif
+#else
 
     u32 result = 0, num = 1;
     while (mask != 0) {
@@ -82,6 +83,7 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
         num <<= 1;
     }
     return result;
+#endif
 }
 
 #undef HAVE_PEXT

From aa674e4e4724e917b822935e1b64a07d5fe7fe06 Mon Sep 17 00:00:00 2001
From: Matthew Barr <matthew.barr@intel.com>
Date: Fri, 30 Oct 2015 11:14:32 +1100
Subject: [PATCH 16/17] unit: Don't run unit-internal in release build

---
 unit/CMakeLists.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt
index 21416a25..9bc74e23 100644
--- a/unit/CMakeLists.txt
+++ b/unit/CMakeLists.txt
@@ -112,6 +112,7 @@ endif()
 #
 # build target to run unit tests
 #
+if (NOT RELEASE_BUILD)
 add_custom_target(
     unit
     COMMAND bin/unit-internal
@@ -119,3 +120,11 @@ add_custom_target(
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
     DEPENDS unit-internal unit-hyperscan
 )
+else ()
+add_custom_target(
+    unit
+    COMMAND bin/unit-hyperscan
+    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    DEPENDS unit-hyperscan
+)
+endif()

From 91343b00e94dbc4a3997324e281b06a5846cca9b Mon Sep 17 00:00:00 2001
From: Matthew Barr <matthew.barr@intel.com>
Date: Fri, 30 Oct 2015 11:20:28 +1100
Subject: [PATCH 17/17] Bump version number

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 77e04a5e..67885c93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ project (Hyperscan C CXX)
 
 set (HS_MAJOR_VERSION 4)
 set (HS_MINOR_VERSION 0)
-set (HS_PATCH_VERSION 0)
+set (HS_PATCH_VERSION 1)
 set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
 
 string (TIMESTAMP BUILD_DATE "%Y-%m-%d")