tamarama: add container engine for exclusive nfas

Add the new Tamarama engine that acts as a container for infix/suffix engines that can be proven to run exclusively of one another. This reduces stream state for pattern sets with many exclusive engines.
2026-01-17 16:00:26 +03:00 · 2015-08-11 05:23:12 -04:00
parent 013dbd3b3c
commit 9087d59be5
38 changed files with 2418 additions and 56 deletions
--- a/src/nfa/castle_dump.cpp
+++ b/src/nfa/castle_dump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -48,7 +48,8 @@

 namespace ue2 {

-void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) {
+void nfaExecCastle0_dumpDot(const struct NFA *, FILE *,
+                            UNUSED const std::string &base) {
    // No GraphViz output for Castles.
 }

--- a/src/nfa/castle_dump.h
+++ b/src/nfa/castle_dump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -32,12 +32,14 @@
 #if defined(DUMP_SUPPORT)

 #include <cstdio>
+#include <string>

 struct NFA;

 namespace ue2 {

-void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file);
+void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file,
+                            const std::string &base);
 void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);

 } // namespace ue2
--- a/src/nfa/goughdump.cpp
+++ b/src/nfa/goughdump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -259,7 +259,8 @@ void dumpTransitions(const NFA *nfa, FILE *f,
    fprintf(f, "\n");
 }

-void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
+void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
+                           UNUSED const string &base) {
    assert(nfa->type == GOUGH_NFA_8);
    const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

@@ -302,7 +303,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
    dumpTextReverse(nfa, f);
 }

-void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
+void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
+                            UNUSED const string &base) {
    assert(nfa->type == GOUGH_NFA_16);
    const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

--- a/src/nfa/goughdump.h
+++ b/src/nfa/goughdump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -33,12 +33,16 @@

 #include "ue2common.h"

+#include <string>
+
 struct NFA;

 namespace ue2 {

-void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file);
-void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file);
+void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file,
+                           const std::string &base);
+void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file,
+                            const std::string &base);
 void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
 void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);

--- a/src/nfa/lbr_dump.cpp
+++ b/src/nfa/lbr_dump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -49,23 +49,28 @@

 namespace ue2 {

-void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                           UNUSED const std::string &base) {
    // No impl
 }

-void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                            UNUSED const std::string &base) {
    // No impl
 }

-void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                             UNUSED const std::string &base) {
    // No impl
 }

-void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                            UNUSED const std::string &base) {
    // No impl
 }

-void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                            UNUSED const std::string &base) {
    // No impl
 }

--- a/src/nfa/lbr_dump.h
+++ b/src/nfa/lbr_dump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -32,16 +32,22 @@
 #ifdef DUMP_SUPPORT

 #include <cstdio>
+#include <string>

 struct NFA;

 namespace ue2 {

-void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file);
+void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file,
+                           const std::string &base);
+void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
+void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
+void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
+void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
 void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
 void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
 void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);
--- a/src/nfa/limex.h
+++ b/src/nfa/limex.h
@@ -30,6 +30,7 @@
 #define LIMEX_H

 #ifdef __cplusplus
+#include <string>
 extern "C"
 {
 #endif
@@ -40,7 +41,8 @@ extern "C"
 #define GENERATE_NFA_DUMP_DECL(gf_name)                                        \
    } /* extern "C" */                                                         \
    namespace ue2 {                                                            \
-    void gf_name##_dumpDot(const struct NFA *nfa, FILE *file);                 \
+    void gf_name##_dumpDot(const struct NFA *nfa, FILE *file,                  \
+                           const std::string &base);                           \
    void gf_name##_dumpText(const struct NFA *nfa, FILE *file);                \
    } /* namespace ue2 */                                                      \
    extern "C" {
--- a/src/nfa/limex_dump.cpp
+++ b/src/nfa/limex_dump.cpp
@@ -448,7 +448,8 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
    }

 #define DUMP_DOT_FN(ddf_n)                                                     \
-    void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f) {              \
+    void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f,                \
+                                       UNUSED const string &base) {            \
        const LimExNFA##ddf_n *limex =                                         \
            (const LimExNFA##ddf_n *)getImplNfa(nfa);                          \
                                                                               \
--- a/src/nfa/mcclellandump.cpp
+++ b/src/nfa/mcclellandump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -267,7 +267,8 @@ void dumpDotPreambleDfa(FILE *f) {
    fprintf(f, "0 [style=invis];\n");
 }

-void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
+void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
+                                UNUSED const string &base) {
    assert(nfa->type == MCCLELLAN_NFA_16);
    const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

@@ -286,7 +287,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
    fprintf(f, "}\n");
 }

-void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) {
+void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f,
+                               UNUSED const string &base) {
    assert(nfa->type == MCCLELLAN_NFA_8);
    const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

--- a/src/nfa/mcclellandump.h
+++ b/src/nfa/mcclellandump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -34,6 +34,7 @@
 #include "rdfa.h"

 #include <cstdio>
+#include <string>

 struct mcclellan;
 struct mstate_aux;
@@ -42,8 +43,10 @@ union AccelAux;

 namespace ue2 {

-void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file);
+void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file,
+                               const std::string &base);
+void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file,
+                                const std::string &base);
 void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
 void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);

--- a/src/nfa/mpv_dump.cpp
+++ b/src/nfa/mpv_dump.cpp
@@ -48,7 +48,8 @@

 namespace ue2 {

-void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file) {
+void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file,
+                         UNUSED const std::string &base) {
 }

 static really_inline
--- a/src/nfa/mpv_dump.h
+++ b/src/nfa/mpv_dump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -32,12 +32,14 @@
 #if defined(DUMP_SUPPORT)

 #include <cstdio>
+#include <string>

 struct NFA;

 namespace ue2 {

-void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file);
+void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file,
+                         const std::string &base);
 void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);

 } // namespace ue2
--- a/src/nfa/nfa_api.h
+++ b/src/nfa/nfa_api.h
@@ -120,6 +120,13 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
 */
 char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);

+/**
+ * Main execution function that doesn't perform the checks and optimisations of
+ * nfaQueueExec() and just dispatches directly to the nfa implementations. It is
+ * intended to be used by the Tamarama engine.
+ */
+char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
+
 /** Return value indicating that the engine is alive. */
 #define MO_ALIVE 1

@@ -155,6 +162,13 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
 */
 char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);

+/**
+ * Main execution function that doesn't perform the checks and optimisations of
+ * nfaQueueExecToMatch() and just dispatches directly to the nfa
+ * implementations. It is intended to be used by the Tamarama engine.
+ */
+char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end);
+
 /**
 * Report matches at the current queue location.
 *
--- a/src/nfa/nfa_api_dispatch.c
+++ b/src/nfa/nfa_api_dispatch.c
@@ -42,6 +42,7 @@
 #include "limex.h"
 #include "mcclellan.h"
 #include "mpv.h"
+#include "tamarama.h"

 #define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
    case dc_ltype##_NFA_##dc_subtype:                               \
@@ -68,6 +69,7 @@
        DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func);             \
        DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func);             \
        DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func);          \
+        DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func);      \
    default:                                                  \
        assert(0);                                            \
    }
@@ -105,6 +107,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
    return 0;
 }

+char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) {
+    return nfaQueueExec_i(nfa, q, end);
+}
+
+char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) {
+    return nfaQueueExec2_i(nfa, q, end);
+}
+
 static really_inline
 char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
    DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
--- a/src/nfa/nfa_build_util.cpp
+++ b/src/nfa/nfa_build_util.cpp
@@ -300,6 +300,18 @@ const has_accel_fn NFATraits<LBR_NFA_Truf>::has_accel = has_accel_generic;
 const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
 #endif

+template<> struct NFATraits<TAMARAMA_NFA_0> {
+    UNUSED static const char *name;
+    static const NFACategory category = NFA_OTHER;
+    static const u32 stateAlign = 32;
+    static const bool fast = true;
+    static const has_accel_fn has_accel;
+};
+const has_accel_fn NFATraits<TAMARAMA_NFA_0>::has_accel = has_accel_generic;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<TAMARAMA_NFA_0>::name = "Tamarama";
+#endif
+
 } // namespace

 #if defined(DUMP_SUPPORT)
--- a/src/nfa/nfa_dump_api.h
+++ b/src/nfa/nfa_dump_api.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -36,6 +36,7 @@
 #if defined(DUMP_SUPPORT)

 #include <cstdio>
+#include <string>

 struct NFA;

@@ -45,7 +46,7 @@ namespace ue2 {
 * \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the
 * file pointed to by dotFile.
 */
-void nfaDumpDot(const struct NFA *nfa, FILE *dotFile);
+void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base);

 /** \brief Dump a textual representation of the NFA. */
 void nfaDumpText(const struct NFA *fact, FILE *textFile);
--- a/src/nfa/nfa_dump_dispatch.cpp
+++ b/src/nfa/nfa_dump_dispatch.cpp
@@ -40,6 +40,7 @@
 #include "limex.h"
 #include "mcclellandump.h"
 #include "mpv_dump.h"
+#include "tamarama_dump.h"

 #ifndef DUMP_SUPPORT
 #error "no dump support"
@@ -73,12 +74,14 @@ namespace ue2 {
        DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func);             \
        DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func);             \
        DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func);          \
+        DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func);      \
    default:                                                  \
        assert(0);                                            \
    }

-void nfaDumpDot(const struct NFA *nfa, FILE *dotFile) {
-    DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile));
+void nfaDumpDot(const struct NFA *nfa, FILE *dotFile,
+                const std::string &base) {
+    DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base));
 }

 void nfaDumpText(const struct NFA *nfa, FILE *txtFile) {
--- a/src/nfa/nfa_internal.h
+++ b/src/nfa/nfa_internal.h
@@ -67,6 +67,7 @@ enum NFAEngineType {
    LBR_NFA_Shuf,       /**< magic pseudo nfa */
    LBR_NFA_Truf,       /**< magic pseudo nfa */
    CASTLE_NFA_0,       /**< magic pseudo nfa */
+    TAMARAMA_NFA_0,     /**< magic nfa container */
    /** \brief bogus NFA - not used */
    INVALID_NFA
 };
@@ -173,6 +174,12 @@ int isLbrType(u8 t) {
           t == LBR_NFA_Shuf || t == LBR_NFA_Truf;
 }

+/** \brief True if the given type (from NFA::type) is a container engine. */
+static really_inline
+int isContainerType(u8 t) {
+    return t == TAMARAMA_NFA_0;
+}
+
 static really_inline
 int isMultiTopType(u8 t) {
    return !isDfaType(t) && !isLbrType(t);
--- a/src/nfa/tamarama.c
+++ b/src/nfa/tamarama.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+    \brief Tamarama: container engine for exclusive engines, runtime code.
+*/
+#include "config.h"
+
+#include "tamarama.h"
+
+#include "tamarama_internal.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_api_util.h"
+#include "nfa_internal.h"
+#include "scratch.h"
+#include "util/partial_store.h"
+
+static really_inline
+u32 getSubOffset(const struct Tamarama *t, u32 num) {
+    DEBUG_PRINTF("subengine:%u\n", num);
+    assert(num < t->numSubEngines);
+    const u32 *sub =
+        (const u32 *)((const char *)t + sizeof(struct Tamarama) +
+                      t->numSubEngines * sizeof(u32));
+    assert(ISALIGNED(sub));
+    return sub[num];
+}
+
+static
+const struct NFA *getSubEngine(const struct Tamarama *t,
+                               const u32 activeIdx) {
+    const u32 offset = getSubOffset(t, activeIdx);
+    DEBUG_PRINTF("activeIdx:%u offsets:%u\n", activeIdx, offset);
+    const char *base = (const char *)t;
+    return (const struct NFA *)(base + offset);
+}
+
+static
+void storeActiveIdx(const struct Tamarama *t, char *state,
+                    const u32 idx) {
+    assert(idx <= t->numSubEngines);
+    partial_store_u32(state, idx, t->activeIdxSize);
+}
+
+static
+u32 loadActiveIdx(const char *state,
+                  const u32 activeIdxSize) {
+    return partial_load_u32(state, activeIdxSize);
+}
+
+static really_inline
+void copyQueueProperties(const struct mq *q1, struct mq *q2,
+                         const u32 activeIdxSize) {
+    q2->state = q1->state;
+    q2->streamState = q1->streamState + activeIdxSize;
+    q2->offset = q1->offset;
+    q2->buffer = q1->buffer;
+    q2->length = q1->length;
+    q2->history = q1->history;
+    q2->hlength = q1->hlength;
+    q2->cb = q1->cb;
+    q2->som_cb = q1->som_cb;
+    q2->context = q1->context;
+    q2->scratch = q1->scratch;
+    q2->report_current = q1->report_current;
+}
+
+static
+void copyQueueItems(const struct Tamarama *t, const struct NFA *sub,
+                    struct mq *q1, struct mq *q2, const u32 activeIdx) {
+    const u32 *baseTop = (const u32 *)((const char *)t +
+                                       sizeof(struct Tamarama));
+
+    u32 lower = baseTop[activeIdx];
+    u32 upper = activeIdx == t->numSubEngines - 1 ?
+                    ~0U : baseTop[activeIdx + 1];
+    u32 event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP;
+    while (q1->cur < q1->end) {
+        u32 type = q1->items[q1->cur].type;
+        s64a loc = q1->items[q1->cur].location;
+        DEBUG_PRINTF("type:%u lower:%u upper:%u\n", type, lower, upper);
+        if (type >= lower && type < upper) {
+            u32 event = event_base;
+            if (event == MQE_TOP_FIRST) {
+                event += type - lower;
+            }
+            pushQueue(q2, event, loc);
+        } else {
+            pushQueueNoMerge(q2, MQE_END, loc);
+            break;
+        }
+        q1->cur++;
+    }
+}
+
+static
+void copyQueue(const struct Tamarama *t, const struct NFA *sub,
+               struct mq *q1, struct mq *q2, const u32 activeIdx) {
+    copyQueueProperties(q1, q2, t->activeIdxSize);
+
+    // copy MQE_START item
+    u32 cur = q1->cur++;
+    q2->cur = cur;
+    q2->items[cur] = q1->items[cur];
+    q2->end = cur + 1;
+
+    copyQueueItems(t, sub, q1, q2, activeIdx);
+    // restore cur index of the main queue
+    q1->cur = cur;
+}
+
+static
+u32 findEngineForTop(const u32 *baseTop, const u32 cur,
+                     const u32 numSubEngines) {
+    u32 i;
+    for (i = 0; i < numSubEngines; ++i) {
+        DEBUG_PRINTF("cur:%u base:%u\n", cur, baseTop[i]);
+        if (cur >= baseTop[i] &&
+            (i == numSubEngines - 1 || cur < baseTop[i + 1])) {
+            break;
+        }
+    }
+    return i;
+}
+
+static
+void initSubQueue(const struct Tamarama *t, struct mq *q1,
+                  struct mq *q2, const u32 lastActiveIdx,
+                  const u32 activeIdx) {
+    // Push events to the new queue
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+    assert(!isContainerType(sub->type));
+    q2->nfa = sub;
+
+    // Reinitialize state if the last active subengine is different
+    // from current one
+    if (lastActiveIdx == t->numSubEngines ||
+        lastActiveIdx != activeIdx) {
+        nfaQueueInitState(q2->nfa, q2);
+    }
+
+    copyQueueItems(t, sub, q1, q2, activeIdx);
+    if (q1->items[q1->cur].type == MQE_END) {
+        q1->cur++;
+    }
+    DEBUG_PRINTF("update lastIdx:%u\n", activeIdx);
+    storeActiveIdx(t, q1->streamState, activeIdx);
+}
+
+static
+void updateQueues(const struct Tamarama *t, struct mq *q1, struct mq *q2) {
+    q2->cur = q2->end = 0;
+    copyQueueProperties(q1, q2, t->activeIdxSize);
+
+    const u32 numSubEngines = t->numSubEngines;
+    u32 lastActiveIdx = loadActiveIdx(q1->streamState,
+                                      t->activeIdxSize);
+#ifdef DEBUG
+    DEBUG_PRINTF("external queue\n");
+    debugQueue(q1);
+#endif
+
+    // Push MQE_START event to the subqueue
+    s64a loc = q1->items[q1->cur].location;
+    pushQueueAt(q2, 0, MQE_START, loc);
+    char hasStart = 0;
+    if (q1->items[q1->cur].type == MQE_START) {
+        hasStart = 1;
+        q1->cur++;
+    }
+
+    u32 activeIdx = lastActiveIdx;
+    // If we have top events in the main queue, update current active id
+    if (q1->cur < q1->end - 1) {
+        const u32 *baseTop = (const u32 *)((const char *)t +
+                                           sizeof(struct Tamarama));
+        u32 curTop = q1->items[q1->cur].type;
+        activeIdx = findEngineForTop(baseTop, curTop, numSubEngines);
+    }
+
+    assert(activeIdx < numSubEngines);
+    DEBUG_PRINTF("last id:%u, current id:%u, num of subengines:%u\n",
+                 lastActiveIdx, activeIdx, numSubEngines);
+    // Handle unfinished last alive subengine
+    if (lastActiveIdx != activeIdx &&
+        lastActiveIdx != numSubEngines && hasStart) {
+        loc = q1->items[q1->cur].location;
+        pushQueueNoMerge(q2, MQE_END, loc);
+        q2->nfa = getSubEngine(t, lastActiveIdx);
+        return;
+    }
+
+    initSubQueue(t, q1, q2, lastActiveIdx, activeIdx);
+    DEBUG_PRINTF("finish queues\n");
+}
+
+// After processing subqueue items for subengines, we need to copy back
+// remaining items in subqueue if there are any to Tamarama main queue
+static
+void copyBack(const struct  Tamarama *t, struct mq *q, struct mq *q1) {
+    DEBUG_PRINTF("copy back %u, %u\n", q1->cur, q1->end);
+    q->report_current = q1->report_current;
+    if (q->cur >= q->end && q1->cur >= q1->end) {
+        return;
+    }
+
+    const u32 *baseTop = (const u32 *)((const char *)t +
+                                        sizeof(struct Tamarama));
+    const u32 lastIdx = loadActiveIdx(q->streamState,
+                                      t->activeIdxSize);
+    u32 base = 0, event_base = 0;
+    if (lastIdx != t->numSubEngines) {
+        base = baseTop[lastIdx];
+        const struct NFA *sub = getSubEngine(t, lastIdx);
+        event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP;
+    }
+
+    u32 numItems = q1->end > q1->cur + 1 ? q1->end - q1->cur - 1 : 1;
+    // Also need to copy MQE_END if the main queue is empty
+    if (q->cur == q->end) {
+        numItems++;
+    }
+    u32 cur = q->cur - numItems;
+    q->items[cur] = q1->items[q1->cur++];
+    q->items[cur].type = MQE_START;
+    q->cur = cur++;
+    for (u32 i = 0; i < numItems - 1; ++i) {
+        u32 type = q1->items[q1->cur].type;
+        if (type > MQE_END) {
+            q1->items[q1->cur].type = type - event_base + base;
+        }
+        q->items[cur++] = q1->items[q1->cur++];
+    }
+
+#ifdef DEBUG
+    DEBUG_PRINTF("external queue\n");
+    debugQueue(q);
+#endif
+}
+
+char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state,
+                              const char *streamState, u64a offset,
+                              NfaCallback callback, SomNfaCallback som_cb,
+                              void *context) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return MO_CONTINUE_MATCHING;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+    if (nfaAcceptsEod(sub)) {
+        assert(!isContainerType(sub->type));
+        const char *subStreamState = streamState + t->activeIdxSize;
+        return nfaCheckFinalState(sub, state, subStreamState,
+                                  offset, callback, som_cb, context);
+    }
+
+    return MO_CONTINUE_MATCHING;
+}
+
+char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q,
+                         ReportID report) {
+    DEBUG_PRINTF("exec rose\n");
+    struct mq q1;
+    q1.cur = q1.end = 0;
+    char rv = 0;
+    const struct Tamarama *t = getImplNfa(n);
+    while (q->cur < q->end) {
+        updateQueues(t, q, &q1);
+    }
+
+    if (q1.cur < q1.end) {
+        rv = nfaQueueExecRose(q1.nfa, &q1, report);
+    }
+
+    DEBUG_PRINTF("exec rose rv:%u\n", rv);
+    return rv;
+}
+
+char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 1;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaReportCurrentMatches(sub, &q1);
+}
+
+char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report,
+                               struct mq *q) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaInAcceptState(sub, report, &q1);
+}
+
+char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaInAnyAcceptState(sub, &q1);
+}
+
+char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) {
+    DEBUG_PRINTF("init state\n");
+    const struct Tamarama *t = getImplNfa(n);
+    char *ptr = q->streamState;
+    // Use activeIdxSize as a sentinel value and initialize the state to
+    // an invalid engine as nothing has been triggered yet
+    storeActiveIdx(t, ptr, t->numSubEngines);
+    return 0;
+}
+
+char nfaExecTamarama0_queueCompressState(const struct NFA *n,
+                                         const struct mq *q, s64a loc) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueueProperties(q, &q1, t->activeIdxSize);
+    return nfaQueueCompressState(sub, &q1, loc);
+}
+
+char nfaExecTamarama0_expandState(const struct NFA *n, void *dest,
+                                  const void *src, u64a offset, u8 key) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(src, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    const char *subStreamState = (const char *)src + t->activeIdxSize;
+    return nfaExpandState(sub, dest, subStreamState, offset, key);
+}
+
+enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n,
+                                                      struct mq *q, s64a loc) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return NFA_ZOMBIE_NO;
+    }
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaGetZombieStatus(sub, &q1, loc);
+}
+
+char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) {
+    DEBUG_PRINTF("exec\n");
+    struct mq q1;
+    char rv = MO_ALIVE;
+    char copy = 0;
+    const struct Tamarama *t = getImplNfa(n);
+    while (q->cur < q->end && q_cur_loc(q) <= end) {
+        updateQueues(t, q, &q1);
+        rv = nfaQueueExec_raw(q1.nfa, &q1, end);
+        q->report_current = q1.report_current;
+        copy = 1;
+        if (can_stop_matching(q->scratch)) {
+            break;
+        }
+    }
+    if (copy) {
+        copyBack(t, q, &q1);
+    }
+    return rv;
+}
+
+char nfaExecTamarama0_Q2(const struct NFA *n,
+                         struct mq *q, s64a end) {
+    DEBUG_PRINTF("exec to match\n");
+    struct mq q1;
+    char rv = 0;
+    char copy = 0;
+    const struct Tamarama *t = getImplNfa(n);
+    while (q->cur < q->end && q_cur_loc(q) <= end &&
+           rv != MO_MATCHES_PENDING) {
+        updateQueues(t, q, &q1);
+        rv = nfaQueueExec2_raw(q1.nfa, &q1, end);
+        q->report_current = q1.report_current;
+        copy = 1;
+    }
+    if (copy) {
+        copyBack(t, q, &q1);
+    }
+    return rv;
+}
+
--- a/src/nfa/tamarama.h
+++ b/src/nfa/tamarama.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TAMARAMA_H
+#define TAMARAMA_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "callback.h"
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+struct hs_scratch;
+
+char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state,
+                              const char *streamState, u64a offset,
+                              NfaCallback callback, SomNfaCallback som_cb,
+                              void *context);
+char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report,
+                               struct mq *q);
+char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecTamarama0_queueCompressState(const struct NFA *n,
+                                         const struct mq *q,
+                                         s64a loc);
+char nfaExecTamarama0_expandState(const struct NFA *n, void *dest,
+                                  const void *src, u64a offset, u8 key);
+enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n,
+                                                      struct mq *q, s64a loc);
+char nfaExecTamarama0_Q(const struct NFA *nfa, struct mq *q, s64a end);
+char nfaExecTamarama0_Q2(const struct NFA *nfa, struct mq *q, s64a end);
+
+// only used by outfix and miracles, no implementation for tamarama
+#define nfaExecTamarama0_initCompressedState NFA_API_NO_IMPL
+#define nfaExecTamarama0_B_Reverse NFA_API_NO_IMPL
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/nfa/tamarama_dump.cpp
+++ b/src/nfa/tamarama_dump.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Tamarama: container engine for exclusve engines, dump code.
+ */
+
+#include "config.h"
+
+#include "tamarama_dump.h"
+
+#include "tamarama_internal.h"
+#include "nfa_dump_api.h"
+#include "nfa_dump_internal.h"
+#include "nfa_internal.h"
+
+#include <string>
+#include <sstream>
+
+#ifndef DUMP_SUPPORT
+#error No dump support!
+#endif
+
+namespace ue2 {
+
+void nfaExecTamarama0_dumpDot(const struct NFA *nfa, UNUSED FILE *f,
+                              const std::string &base) {
+    const Tamarama *t = (const Tamarama *)getImplNfa(nfa);
+    const u32 *subOffset =
+        (const u32 *)((const char *)t + sizeof(struct Tamarama) +
+                      t->numSubEngines * sizeof(u32));
+    const char *offset = (const char *)nfa;
+    for (u32 i = 0; i < t->numSubEngines; i++) {
+        std::stringstream ssdot;
+        ssdot << base << "rose_nfa_" << nfa->queueIndex
+            << "_sub_" << i << ".dot";
+        const NFA *sub = (const struct NFA *)(offset + subOffset[i]);
+        FILE *f1 = fopen(ssdot.str().c_str(), "w");
+        nfaDumpDot(sub, f1, base);
+        fclose(f1);
+    }
+}
+
+void nfaExecTamarama0_dumpText(const struct NFA *nfa, FILE *f) {
+    const Tamarama *t = (const Tamarama *)getImplNfa(nfa);
+
+    fprintf(f, "Tamarama container engine\n");
+    fprintf(f, "\n");
+    fprintf(f, "Number of subengine tenants:  %u\n", t->numSubEngines);
+
+    fprintf(f, "\n");
+    dumpTextReverse(nfa, f);
+    fprintf(f, "\n");
+
+    const u32 *subOffset =
+        (const u32 *)((const char *)t + sizeof(struct Tamarama) +
+                      t->numSubEngines * sizeof(u32));
+    const char *offset = (const char *)nfa;
+    for (u32 i = 0; i < t->numSubEngines; i++) {
+        fprintf(f, "Sub %u:\n", i);
+        const NFA *sub = (const struct NFA *)(offset + subOffset[i]);
+        nfaDumpText(sub, f);
+        fprintf(f, "\n");
+    }
+}
+
+} // namespace ue2
--- a/src/nfa/tamarama_dump.h
+++ b/src/nfa/tamarama_dump.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TAMARAMA_DUMP_H
+#define TAMARAMA_DUMP_H
+
+#if defined(DUMP_SUPPORT)
+
+#include <cstdio>
+#include <string>
+
+struct NFA;
+
+namespace ue2 {
+
+void nfaExecTamarama0_dumpDot(const NFA *nfa, FILE *file,
+                              const std::string &base);
+void nfaExecTamarama0_dumpText(const NFA *nfa, FILE *file);
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
+#endif
--- a/src/nfa/tamarama_internal.h
+++ b/src/nfa/tamarama_internal.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ *\brief Tamarama: container engine for exclusive engines,
+ *                 data structures.
+ */
+
+/* Tamarama bytecode layout:
+ * * |-----|
+ * * |     | struct NFA
+ * * |-----|
+ * * |     | struct Tamarama
+ * * |     |
+ * * |-----|
+ * * |     | top remapping table:
+ * * |     | stores top base for each subengine.
+ * * |     | old_top = remapped_top - top_base;
+ * * |     | The size of table is equal to the number of subengines.
+ * * ...
+ * * |     |
+ * * |-----|
+ * * |     | offsets from the start of struct Tamarama to subengines --\
+ * * ...                                                               |
+ * * |     |                                          -----------\     |
+ * * |-----|                                                     |     |
+ * * ||--| | subengine 1 (struct NFA + rest of subengine)     <--/     |
+ * * ||  | |                                                           |
+ * * ||--| |                                                           |
+ * * ||  | |                                                           |
+ * * ||  | |                                                           |
+ * * ||--| |                                                           |
+ * * |     |                                                           |
+ * * ||--| | subengine 2 (struct NFA + rest of subengine)      <-------/
+ * * ||  | |
+ * * ||--| |
+ * * ||  | |
+ * * ||  | |
+ * * ||--| |
+ * * |     |
+ * * ...
+ * * |     |
+ * * |-----| total size of tamarama
+ * *
+ * * Tamarama stream state:
+ * *
+ * * |---|
+ * * |   | active subengine id
+ * * |---|
+ * * |   | common pool of stream state for each engine
+ * * |   |
+ * * |   |
+ * * ...
+ * * |   |
+ * * |   |
+ * * |---|
+ * *
+ * * Tamarama scratch space:
+ * *
+ * * |---|
+ * * |   | common pool of scratch for each engine
+ * * |   |
+ * * |   |
+ * * ...
+ * * |   |
+ * * |   |
+ * * |---|
+ * */
+
+#ifndef NFA_TAMARAMA_INTERNAL_H
+#define NFA_TAMARAMA_INTERNAL_H
+
+#include "ue2common.h"
+
+struct ALIGN_AVX_DIRECTIVE Tamarama {
+    u32 numSubEngines;
+    u8 activeIdxSize;
+};
+
+#endif // NFA_TAMARAMA_INTERNAL_H
--- a/src/nfa/tamaramacompile.cpp
+++ b/src/nfa/tamaramacompile.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Tamarama: container engine for exclusive engines,
+ *                  compiler code.
+ */
+
+#include "config.h"
+
+#include "tamaramacompile.h"
+
+#include "tamarama_internal.h"
+#include "nfa_internal.h"
+#include "nfa_api_queue.h"
+#include "repeatcompile.h"
+#include "util/container.h"
+#include "util/verify_types.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void remapTops(const TamaInfo &tamaInfo,
+               vector<u32> &top_base,
+               map<pair<const NFA *, u32>, u32> &out_top_remap) {
+    u32 i = 0;
+    u32 cur = 0;
+    for (const auto &sub : tamaInfo.subengines) {
+        u32 base = cur;
+        top_base.push_back(base + MQE_TOP_FIRST);
+        DEBUG_PRINTF("subengine:%u\n", i);
+        for (const auto &t : tamaInfo.tops[i++]) {
+            cur = base + t;
+            DEBUG_PRINTF("top remapping %u:%u\n", t ,cur);
+            out_top_remap.emplace(make_pair(sub, t), cur++);
+        }
+    }
+}
+
+/**
+ * update stream state and scratch state sizes and copy in
+ * subengines in Tamarama.
+ */
+static
+void copyInSubnfas(const char *base_offset, NFA &nfa,
+                   const TamaInfo &tamaInfo, u32 *offsets,
+                   char *sub_nfa_offset, const u32 activeIdxSize) {
+    u32 maxStreamStateSize = 0;
+    u32 maxScratchStateSize = 0;
+    sub_nfa_offset = ROUNDUP_PTR(sub_nfa_offset, 64);
+    bool infinite_max_width = false;
+    for (auto &sub : tamaInfo.subengines) {
+        u32 streamStateSize = verify_u32(sub->streamStateSize);
+        u32 scratchStateSize = verify_u32(sub->scratchStateSize);
+        maxStreamStateSize = max(maxStreamStateSize, streamStateSize);
+        maxScratchStateSize = max(maxScratchStateSize, scratchStateSize);
+        sub->queueIndex = nfa.queueIndex;
+
+        memcpy(sub_nfa_offset, sub, sub->length);
+        *offsets = verify_u32(sub_nfa_offset - base_offset);
+        DEBUG_PRINTF("type:%u offsets:%u\n", sub->type, *offsets);
+        ++offsets;
+        sub_nfa_offset += ROUNDUP_CL(sub->length);
+
+        // update nfa properties
+        nfa.flags |= sub->flags;
+        if (!sub->maxWidth) {
+            infinite_max_width = true;
+        } else if (!infinite_max_width) {
+            nfa.maxWidth = max(nfa.maxWidth, sub->maxWidth);
+        }
+    }
+
+    if (infinite_max_width) {
+        nfa.maxWidth = 0;
+    }
+    nfa.maxBiAnchoredWidth = 0;
+    nfa.streamStateSize = activeIdxSize + maxStreamStateSize;
+    nfa.scratchStateSize = maxScratchStateSize;
+}
+
+/**
+ * Take in a collection of exclusive sub engines and produces a tamarama, also
+ * returns via out_top_remap, a mapping indicating how tops in the subengines in
+ * relate to the tamarama's tops.
+ */
+aligned_unique_ptr<NFA> buildTamarama(const TamaInfo &tamaInfo, const u32 queue,
+                        map<pair<const NFA *, u32>, u32> &out_top_remap) {
+    vector<u32> top_base;
+    remapTops(tamaInfo, top_base, out_top_remap);
+
+    size_t subSize = tamaInfo.subengines.size();
+    DEBUG_PRINTF("subSize:%lu\n", subSize);
+    size_t total_size =
+        sizeof(NFA) +               // initial NFA structure
+        sizeof(Tamarama) +          // Tamarama structure
+        sizeof(u32) * subSize +     // base top event value for subengines,
+                                    // used for top remapping at runtime
+        sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and
+                                    // padding for subengines
+
+    for (const auto &sub : tamaInfo.subengines) {
+        total_size += ROUNDUP_CL(sub->length);
+    }
+
+    // use subSize as a sentinel value for no active subengines,
+    // so add one to subSize here
+    u32 activeIdxSize = calcPackedBytes(subSize + 1);
+    aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
+    nfa->type = verify_u8(TAMARAMA_NFA_0);
+    nfa->length = verify_u32(total_size);
+    nfa->queueIndex = queue;
+
+    char *ptr = (char *)nfa.get() + sizeof(NFA);
+    char *base_offset = ptr;
+    Tamarama *t = (Tamarama *)ptr;
+    t->numSubEngines = verify_u32(subSize);
+    t->activeIdxSize = verify_u8(activeIdxSize);
+
+    ptr += sizeof(Tamarama);
+    copy_bytes(ptr, top_base);
+    ptr += byte_length(top_base);
+
+    u32 *offsets = (u32*)ptr;
+    char *sub_nfa_offset = ptr + sizeof(u32) * subSize;
+    copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset,
+                  activeIdxSize);
+    assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size);
+    return nfa;
+}
+
+set<ReportID> all_reports(const TamaProto &proto) {
+    return proto.reports;
+}
+
+void TamaInfo::add(NFA *sub, const set<u32> &top) {
+    assert(subengines.size() < max_occupancy);
+    subengines.push_back(sub);
+    tops.push_back(top);
+}
+
+void TamaProto::add(const NFA *n, const u32 id, const u32 top,
+                    const map<pair<const NFA *, u32>, u32> &out_top_remap) {
+    top_remap.emplace(make_pair(id, top), out_top_remap.at(make_pair(n, top)));
+}
+
+} // namespace ue2
+
--- a/src/nfa/tamaramacompile.h
+++ b/src/nfa/tamaramacompile.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ *  \brief Tamarama: container engine for exclusive engines, compiler code.
+ */
+
+#ifndef NFA_TAMARAMACOMPILE_H
+#define NFA_TAMARAMACOMPILE_H
+
+#include "ue2common.h"
+#include "util/alloc.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+/**
+ * \brief A TamaProto that contains top remapping and reports info
+ */
+struct TamaProto {
+    void add(const NFA *n, const u32 id, const u32 top,
+             const std::map<std::pair<const NFA *, u32>, u32> &out_top_remap);
+    /** Top remapping between <vertex id, top value> and
+     ** remapped top value. */
+    std::map<std::pair<u32, u32>, u32> top_remap;
+
+    /** All the reports in subengines */
+    std::set<ReportID> reports;
+};
+
+/**
+ * \brief Contruction info for a Tamarama engine:
+ * contains at least two subengines.
+ *
+ * A TamaInfo is converted into a single NFA, with each top triggering a
+ * subengine. A TamaInfo can contain at most TamaInfo::max_occupancy
+ * subengines.
+ */
+struct TamaInfo {
+    static constexpr size_t max_occupancy = 65536; // arbitrary limit
+
+    /** \brief Add a new subengine. */
+    void add(NFA* sub, const std::set<u32> &top);
+
+    /** \brief All the subengines */
+    std::vector<NFA *> subengines;
+
+    /** \brief Tops of subengines */
+    std::vector<std::set<u32>> tops;
+};
+
+std::set<ReportID> all_reports(const TamaProto &proto);
+
+/**
+ * Take in a collection of exclusive subengines and produces a tamarama, also
+ * returns via out_top_remap, a mapping indicating how tops in the subengines in
+ * relate to the tamarama's tops.
+ */
+ue2::aligned_unique_ptr<NFA> buildTamarama(const TamaInfo &tamaInfo,
+                      const u32 queue,
+                      std::map<std::pair<const NFA *, u32>, u32> &out_top_remap);
+} // namespace ue2
+
+#endif // NFA_TAMARAMACOMPILE_H