From 9087d59be54a9e9dc0d4c3d95b0ff624c3ae08f1 Mon Sep 17 00:00:00 2001
From: Xiang Wang <xiang.w.wang@intel.com>
Date: Tue, 11 Aug 2015 05:23:12 -0400
Subject: [PATCH] tamarama: add container engine for exclusive nfas

Add the new Tamarama engine that acts as a container for infix/suffix
engines that can be proven to run exclusively of one another.

This reduces stream state for pattern sets with many exclusive engines.
---
 CMakeLists.txt                     |  11 +
 src/grey.cpp                       |   4 +
 src/grey.h                         |   4 +
 src/nfa/castle_dump.cpp            |   5 +-
 src/nfa/castle_dump.h              |   6 +-
 src/nfa/goughdump.cpp              |   8 +-
 src/nfa/goughdump.h                |  10 +-
 src/nfa/lbr_dump.cpp               |  17 +-
 src/nfa/lbr_dump.h                 |  18 +-
 src/nfa/limex.h                    |   4 +-
 src/nfa/limex_dump.cpp             |   3 +-
 src/nfa/mcclellandump.cpp          |   8 +-
 src/nfa/mcclellandump.h            |   9 +-
 src/nfa/mpv_dump.cpp               |   3 +-
 src/nfa/mpv_dump.h                 |   6 +-
 src/nfa/nfa_api.h                  |  14 +
 src/nfa/nfa_api_dispatch.c         |  10 +
 src/nfa/nfa_build_util.cpp         |  12 +
 src/nfa/nfa_dump_api.h             |   5 +-
 src/nfa/nfa_dump_dispatch.cpp      |   7 +-
 src/nfa/nfa_internal.h             |   7 +
 src/nfa/tamarama.c                 | 440 ++++++++++++++++++++++++++++
 src/nfa/tamarama.h                 |  72 +++++
 src/nfa/tamarama_dump.cpp          |  92 ++++++
 src/nfa/tamarama_dump.h            |  49 ++++
 src/nfa/tamarama_internal.h        | 105 +++++++
 src/nfa/tamaramacompile.cpp        | 175 +++++++++++
 src/nfa/tamaramacompile.h          |  94 ++++++
 src/rose/rose_build_bytecode.cpp   | 443 +++++++++++++++++++++++++++-
 src/rose/rose_build_exclusive.cpp  | 446 +++++++++++++++++++++++++++++
 src/rose/rose_build_exclusive.h    | 144 ++++++++++
 src/rose/rose_build_impl.h         |  23 +-
 src/rose/rose_build_misc.cpp       |  10 +-
 src/rose/rose_dump.cpp             |   6 +-
 src/rose/rose_graph.h              |   9 +-
 src/smallwrite/smallwrite_dump.cpp |   4 +-
 src/util/clique.cpp                | 131 +++++++++
 src/util/clique.h                  |  60 ++++
 38 files changed, 2418 insertions(+), 56 deletions(-)
 create mode 100644 src/nfa/tamarama.c
 create mode 100644 src/nfa/tamarama.h
 create mode 100644 src/nfa/tamarama_dump.cpp
 create mode 100644 src/nfa/tamarama_dump.h
 create mode 100644 src/nfa/tamarama_internal.h
 create mode 100644 src/nfa/tamaramacompile.cpp
 create mode 100644 src/nfa/tamaramacompile.h
 create mode 100644 src/rose/rose_build_exclusive.cpp
 create mode 100644 src/rose/rose_build_exclusive.h
 create mode 100644 src/util/clique.cpp
 create mode 100644 src/util/clique.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ba3b29fa..94a54241 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -478,6 +478,9 @@ set (hs_exec_SRCS
     src/nfa/shufti_common.h
     src/nfa/shufti.c
     src/nfa/shufti.h
+    src/nfa/tamarama.c
+    src/nfa/tamarama.h
+    src/nfa/tamarama_internal.h
     src/nfa/truffle_common.h
     src/nfa/truffle.c
     src/nfa/truffle.h
@@ -639,6 +642,8 @@ SET (hs_SRCS
     src/nfa/repeatcompile.h
     src/nfa/shufticompile.cpp
     src/nfa/shufticompile.h
+    src/nfa/tamaramacompile.cpp
+    src/nfa/tamaramacompile.h
     src/nfa/trufflecompile.cpp
     src/nfa/trufflecompile.h
     src/nfagraph/ng.cpp
@@ -823,6 +828,8 @@ SET (hs_SRCS
     src/rose/rose_build_compile.cpp
     src/rose/rose_build_convert.cpp
     src/rose/rose_build_convert.h
+    src/rose/rose_build_exclusive.cpp
+    src/rose/rose_build_exclusive.h
     src/rose/rose_build_groups.cpp
     src/rose/rose_build_groups.h
     src/rose/rose_build_impl.h
@@ -853,6 +860,8 @@ SET (hs_SRCS
     src/util/charreach.cpp
     src/util/charreach.h
     src/util/charreach_util.h
+    src/util/clique.cpp
+    src/util/clique.h
     src/util/compare.h
     src/util/compile_context.cpp
     src/util/compile_context.h
@@ -916,6 +925,8 @@ set(hs_dump_SRCS
     src/nfa/nfa_dump_dispatch.cpp
     src/nfa/nfa_dump_internal.cpp
     src/nfa/nfa_dump_internal.h
+    src/nfa/tamarama_dump.cpp
+    src/nfa/tamarama_dump.h
     src/parser/dump.cpp
     src/parser/dump.h
     src/parser/position_dump.h
diff --git a/src/grey.cpp b/src/grey.cpp
index 1f2fd904..f4a67677 100644
--- a/src/grey.cpp
+++ b/src/grey.cpp
@@ -127,6 +127,8 @@ Grey::Grey(void) :
                    limitSmallWriteOutfixSize(1048576), // 1 MB
                    smallWriteMaxPatterns(10000),
                    smallWriteMaxLiterals(10000),
+                   allowTamarama(true), // Tamarama engine
+                   tamaChunkSize(100),
                    dumpFlags(0),
                    limitPatternCount(8000000), // 8M patterns
                    limitPatternLength(16000),  // 16K bytes
@@ -275,6 +277,8 @@ void applyGreyOverrides(Grey *g, const string &s) {
         G_UPDATE(limitSmallWriteOutfixSize);
         G_UPDATE(smallWriteMaxPatterns);
         G_UPDATE(smallWriteMaxLiterals);
+        G_UPDATE(allowTamarama);
+        G_UPDATE(tamaChunkSize);
         G_UPDATE(limitPatternCount);
         G_UPDATE(limitPatternLength);
         G_UPDATE(limitGraphVertices);
diff --git a/src/grey.h b/src/grey.h
index 634fa3a7..03e40ed5 100644
--- a/src/grey.h
+++ b/src/grey.h
@@ -145,6 +145,10 @@ struct Grey {
     u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
     u32 smallWriteMaxLiterals; // only try small writes if fewer literals
 
+    // Tamarama engine
+    bool allowTamarama;
+    u32 tamaChunkSize; //!< max chunk size for exclusivity analysis in Tamarama
+
     enum DumpFlags {
         DUMP_NONE       = 0,
         DUMP_BASICS     = 1 << 0, // Dump basic textual data
diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp
index dd0e369f..fd1521a5 100644
--- a/src/nfa/castle_dump.cpp
+++ b/src/nfa/castle_dump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -48,7 +48,8 @@
 
 namespace ue2 {
 
-void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) {
+void nfaExecCastle0_dumpDot(const struct NFA *, FILE *,
+                            UNUSED const std::string &base) {
     // No GraphViz output for Castles.
 }
 
diff --git a/src/nfa/castle_dump.h b/src/nfa/castle_dump.h
index c0b1f899..94dadec0 100644
--- a/src/nfa/castle_dump.h
+++ b/src/nfa/castle_dump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,12 +32,14 @@
 #if defined(DUMP_SUPPORT)
 
 #include <cstdio>
+#include <string>
 
 struct NFA;
 
 namespace ue2 {
 
-void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file);
+void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file,
+                            const std::string &base);
 void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
 
 } // namespace ue2
diff --git a/src/nfa/goughdump.cpp b/src/nfa/goughdump.cpp
index f4f15eea..4e6e5425 100644
--- a/src/nfa/goughdump.cpp
+++ b/src/nfa/goughdump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -259,7 +259,8 @@ void dumpTransitions(const NFA *nfa, FILE *f,
     fprintf(f, "\n");
 }
 
-void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
+void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
+                           UNUSED const string &base) {
     assert(nfa->type == GOUGH_NFA_8);
     const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
 
@@ -302,7 +303,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
     dumpTextReverse(nfa, f);
 }
 
-void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
+void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
+                            UNUSED const string &base) {
     assert(nfa->type == GOUGH_NFA_16);
     const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
 
diff --git a/src/nfa/goughdump.h b/src/nfa/goughdump.h
index 5e15356d..b96938e4 100644
--- a/src/nfa/goughdump.h
+++ b/src/nfa/goughdump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,12 +33,16 @@
 
 #include "ue2common.h"
 
+#include <string>
+
 struct NFA;
 
 namespace ue2 {
 
-void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file);
-void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file);
+void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file,
+                           const std::string &base);
+void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file,
+                            const std::string &base);
 void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
 void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);
 
diff --git a/src/nfa/lbr_dump.cpp b/src/nfa/lbr_dump.cpp
index 3de75333..3412ddf5 100644
--- a/src/nfa/lbr_dump.cpp
+++ b/src/nfa/lbr_dump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -49,23 +49,28 @@
 
 namespace ue2 {
 
-void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                           UNUSED const std::string &base) {
     // No impl
 }
 
-void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                            UNUSED const std::string &base) {
     // No impl
 }
 
-void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                             UNUSED const std::string &base) {
     // No impl
 }
 
-void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                            UNUSED const std::string &base) {
     // No impl
 }
 
-void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
+void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
+                            UNUSED const std::string &base) {
     // No impl
 }
 
diff --git a/src/nfa/lbr_dump.h b/src/nfa/lbr_dump.h
index 5f6dd261..06ed51e2 100644
--- a/src/nfa/lbr_dump.h
+++ b/src/nfa/lbr_dump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,16 +32,22 @@
 #ifdef DUMP_SUPPORT
 
 #include <cstdio>
+#include <string>
 
 struct NFA;
 
 namespace ue2 {
 
-void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file);
+void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file,
+                           const std::string &base);
+void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
+void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
+void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
+void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file,
+                            const std::string &base);
 void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
 void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
 void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);
diff --git a/src/nfa/limex.h b/src/nfa/limex.h
index 3d4d258b..9266b5de 100644
--- a/src/nfa/limex.h
+++ b/src/nfa/limex.h
@@ -30,6 +30,7 @@
 #define LIMEX_H
 
 #ifdef __cplusplus
+#include <string>
 extern "C"
 {
 #endif
@@ -40,7 +41,8 @@ extern "C"
 #define GENERATE_NFA_DUMP_DECL(gf_name)                                        \
     } /* extern "C" */                                                         \
     namespace ue2 {                                                            \
-    void gf_name##_dumpDot(const struct NFA *nfa, FILE *file);                 \
+    void gf_name##_dumpDot(const struct NFA *nfa, FILE *file,                  \
+                           const std::string &base);                           \
     void gf_name##_dumpText(const struct NFA *nfa, FILE *file);                \
     } /* namespace ue2 */                                                      \
     extern "C" {
diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp
index 8e1ee219..207769a0 100644
--- a/src/nfa/limex_dump.cpp
+++ b/src/nfa/limex_dump.cpp
@@ -448,7 +448,8 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
     }
 
 #define DUMP_DOT_FN(ddf_n)                                                     \
-    void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f) {              \
+    void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f,                \
+                                       UNUSED const string &base) {            \
         const LimExNFA##ddf_n *limex =                                         \
             (const LimExNFA##ddf_n *)getImplNfa(nfa);                          \
                                                                                \
diff --git a/src/nfa/mcclellandump.cpp b/src/nfa/mcclellandump.cpp
index 52711bf1..dcbb0915 100644
--- a/src/nfa/mcclellandump.cpp
+++ b/src/nfa/mcclellandump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -267,7 +267,8 @@ void dumpDotPreambleDfa(FILE *f) {
     fprintf(f, "0 [style=invis];\n");
 }
 
-void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
+void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
+                                UNUSED const string &base) {
     assert(nfa->type == MCCLELLAN_NFA_16);
     const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
 
@@ -286,7 +287,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
     fprintf(f, "}\n");
 }
 
-void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) {
+void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f,
+                               UNUSED const string &base) {
     assert(nfa->type == MCCLELLAN_NFA_8);
     const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
 
diff --git a/src/nfa/mcclellandump.h b/src/nfa/mcclellandump.h
index d74a6b6d..efa61544 100644
--- a/src/nfa/mcclellandump.h
+++ b/src/nfa/mcclellandump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -34,6 +34,7 @@
 #include "rdfa.h"
 
 #include <cstdio>
+#include <string>
 
 struct mcclellan;
 struct mstate_aux;
@@ -42,8 +43,10 @@ union AccelAux;
 
 namespace ue2 {
 
-void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file);
-void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file);
+void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file,
+                               const std::string &base);
+void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file,
+                                const std::string &base);
 void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
 void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);
 
diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp
index 504cc677..da21d7cf 100644
--- a/src/nfa/mpv_dump.cpp
+++ b/src/nfa/mpv_dump.cpp
@@ -48,7 +48,8 @@
 
 namespace ue2 {
 
-void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file) {
+void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file,
+                         UNUSED const std::string &base) {
 }
 
 static really_inline
diff --git a/src/nfa/mpv_dump.h b/src/nfa/mpv_dump.h
index 5dcd9f8b..23910dce 100644
--- a/src/nfa/mpv_dump.h
+++ b/src/nfa/mpv_dump.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,12 +32,14 @@
 #if defined(DUMP_SUPPORT)
 
 #include <cstdio>
+#include <string>
 
 struct NFA;
 
 namespace ue2 {
 
-void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file);
+void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file,
+                         const std::string &base);
 void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);
 
 } // namespace ue2
diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h
index dad3894a..3ef6dfca 100644
--- a/src/nfa/nfa_api.h
+++ b/src/nfa/nfa_api.h
@@ -120,6 +120,13 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
  */
 char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
 
+/**
+ * Main execution function that doesn't perform the checks and optimisations of
+ * nfaQueueExec() and just dispatches directly to the nfa implementations. It is
+ * intended to be used by the Tamarama engine.
+ */
+char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
+
 /** Return value indicating that the engine is alive. */
 #define MO_ALIVE 1
 
@@ -155,6 +162,13 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
  */
 char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
 
+/**
+ * Main execution function that doesn't perform the checks and optimisations of
+ * nfaQueueExecToMatch() and just dispatches directly to the nfa
+ * implementations. It is intended to be used by the Tamarama engine.
+ */
+char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end);
+
 /**
  * Report matches at the current queue location.
  *
diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c
index 9591cad5..b9c9f2ea 100644
--- a/src/nfa/nfa_api_dispatch.c
+++ b/src/nfa/nfa_api_dispatch.c
@@ -42,6 +42,7 @@
 #include "limex.h"
 #include "mcclellan.h"
 #include "mpv.h"
+#include "tamarama.h"
 
 #define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
     case dc_ltype##_NFA_##dc_subtype:                               \
@@ -68,6 +69,7 @@
         DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func);             \
         DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func);             \
         DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func);          \
+        DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func);      \
     default:                                                  \
         assert(0);                                            \
     }
@@ -105,6 +107,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
     return 0;
 }
 
+char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) {
+    return nfaQueueExec_i(nfa, q, end);
+}
+
+char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) {
+    return nfaQueueExec2_i(nfa, q, end);
+}
+
 static really_inline
 char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
     DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp
index 96d0dabe..9244dcfb 100644
--- a/src/nfa/nfa_build_util.cpp
+++ b/src/nfa/nfa_build_util.cpp
@@ -300,6 +300,18 @@ const has_accel_fn NFATraits<LBR_NFA_Truf>::has_accel = has_accel_generic;
 const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
 #endif
 
+template<> struct NFATraits<TAMARAMA_NFA_0> {
+    UNUSED static const char *name;
+    static const NFACategory category = NFA_OTHER;
+    static const u32 stateAlign = 32;
+    static const bool fast = true;
+    static const has_accel_fn has_accel;
+};
+const has_accel_fn NFATraits<TAMARAMA_NFA_0>::has_accel = has_accel_generic;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<TAMARAMA_NFA_0>::name = "Tamarama";
+#endif
+
 } // namespace
 
 #if defined(DUMP_SUPPORT)
diff --git a/src/nfa/nfa_dump_api.h b/src/nfa/nfa_dump_api.h
index 8675dd5d..1054a204 100644
--- a/src/nfa/nfa_dump_api.h
+++ b/src/nfa/nfa_dump_api.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -36,6 +36,7 @@
 #if defined(DUMP_SUPPORT)
 
 #include <cstdio>
+#include <string>
 
 struct NFA;
 
@@ -45,7 +46,7 @@ namespace ue2 {
  * \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the
  * file pointed to by dotFile.
  */
-void nfaDumpDot(const struct NFA *nfa, FILE *dotFile);
+void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base);
 
 /** \brief Dump a textual representation of the NFA. */
 void nfaDumpText(const struct NFA *fact, FILE *textFile);
diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp
index 577c2fd0..cf2aa7f5 100644
--- a/src/nfa/nfa_dump_dispatch.cpp
+++ b/src/nfa/nfa_dump_dispatch.cpp
@@ -40,6 +40,7 @@
 #include "limex.h"
 #include "mcclellandump.h"
 #include "mpv_dump.h"
+#include "tamarama_dump.h"
 
 #ifndef DUMP_SUPPORT
 #error "no dump support"
@@ -73,12 +74,14 @@ namespace ue2 {
         DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func);             \
         DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func);             \
         DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func);          \
+        DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func);      \
     default:                                                  \
         assert(0);                                            \
     }
 
-void nfaDumpDot(const struct NFA *nfa, FILE *dotFile) {
-    DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile));
+void nfaDumpDot(const struct NFA *nfa, FILE *dotFile,
+                const std::string &base) {
+    DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base));
 }
 
 void nfaDumpText(const struct NFA *nfa, FILE *txtFile) {
diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h
index d0a4ca0b..a3703cb5 100644
--- a/src/nfa/nfa_internal.h
+++ b/src/nfa/nfa_internal.h
@@ -67,6 +67,7 @@ enum NFAEngineType {
     LBR_NFA_Shuf,       /**< magic pseudo nfa */
     LBR_NFA_Truf,       /**< magic pseudo nfa */
     CASTLE_NFA_0,       /**< magic pseudo nfa */
+    TAMARAMA_NFA_0,     /**< magic nfa container */
     /** \brief bogus NFA - not used */
     INVALID_NFA
 };
@@ -173,6 +174,12 @@ int isLbrType(u8 t) {
            t == LBR_NFA_Shuf || t == LBR_NFA_Truf;
 }
 
+/** \brief True if the given type (from NFA::type) is a container engine. */
+static really_inline
+int isContainerType(u8 t) {
+    return t == TAMARAMA_NFA_0;
+}
+
 static really_inline
 int isMultiTopType(u8 t) {
     return !isDfaType(t) && !isLbrType(t);
diff --git a/src/nfa/tamarama.c b/src/nfa/tamarama.c
new file mode 100644
index 00000000..e8dd7690
--- /dev/null
+++ b/src/nfa/tamarama.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+    \brief Tamarama: container engine for exclusive engines, runtime code.
+*/
+#include "config.h"
+
+#include "tamarama.h"
+
+#include "tamarama_internal.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_api_util.h"
+#include "nfa_internal.h"
+#include "scratch.h"
+#include "util/partial_store.h"
+
+static really_inline
+u32 getSubOffset(const struct Tamarama *t, u32 num) {
+    DEBUG_PRINTF("subengine:%u\n", num);
+    assert(num < t->numSubEngines);
+    const u32 *sub =
+        (const u32 *)((const char *)t + sizeof(struct Tamarama) +
+                      t->numSubEngines * sizeof(u32));
+    assert(ISALIGNED(sub));
+    return sub[num];
+}
+
+static
+const struct NFA *getSubEngine(const struct Tamarama *t,
+                               const u32 activeIdx) {
+    const u32 offset = getSubOffset(t, activeIdx);
+    DEBUG_PRINTF("activeIdx:%u offsets:%u\n", activeIdx, offset);
+    const char *base = (const char *)t;
+    return (const struct NFA *)(base + offset);
+}
+
+static
+void storeActiveIdx(const struct Tamarama *t, char *state,
+                    const u32 idx) {
+    assert(idx <= t->numSubEngines);
+    partial_store_u32(state, idx, t->activeIdxSize);
+}
+
+static
+u32 loadActiveIdx(const char *state,
+                  const u32 activeIdxSize) {
+    return partial_load_u32(state, activeIdxSize);
+}
+
+static really_inline
+void copyQueueProperties(const struct mq *q1, struct mq *q2,
+                         const u32 activeIdxSize) {
+    q2->state = q1->state;
+    q2->streamState = q1->streamState + activeIdxSize;
+    q2->offset = q1->offset;
+    q2->buffer = q1->buffer;
+    q2->length = q1->length;
+    q2->history = q1->history;
+    q2->hlength = q1->hlength;
+    q2->cb = q1->cb;
+    q2->som_cb = q1->som_cb;
+    q2->context = q1->context;
+    q2->scratch = q1->scratch;
+    q2->report_current = q1->report_current;
+}
+
+static
+void copyQueueItems(const struct Tamarama *t, const struct NFA *sub,
+                    struct mq *q1, struct mq *q2, const u32 activeIdx) {
+    const u32 *baseTop = (const u32 *)((const char *)t +
+                                       sizeof(struct Tamarama));
+
+    u32 lower = baseTop[activeIdx];
+    u32 upper = activeIdx == t->numSubEngines - 1 ?
+                    ~0U : baseTop[activeIdx + 1];
+    u32 event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP;
+    while (q1->cur < q1->end) {
+        u32 type = q1->items[q1->cur].type;
+        s64a loc = q1->items[q1->cur].location;
+        DEBUG_PRINTF("type:%u lower:%u upper:%u\n", type, lower, upper);
+        if (type >= lower && type < upper) {
+            u32 event = event_base;
+            if (event == MQE_TOP_FIRST) {
+                event += type - lower;
+            }
+            pushQueue(q2, event, loc);
+        } else {
+            pushQueueNoMerge(q2, MQE_END, loc);
+            break;
+        }
+        q1->cur++;
+    }
+}
+
+static
+void copyQueue(const struct Tamarama *t, const struct NFA *sub,
+               struct mq *q1, struct mq *q2, const u32 activeIdx) {
+    copyQueueProperties(q1, q2, t->activeIdxSize);
+
+    // copy MQE_START item
+    u32 cur = q1->cur++;
+    q2->cur = cur;
+    q2->items[cur] = q1->items[cur];
+    q2->end = cur + 1;
+
+    copyQueueItems(t, sub, q1, q2, activeIdx);
+    // restore cur index of the main queue
+    q1->cur = cur;
+}
+
+static
+u32 findEngineForTop(const u32 *baseTop, const u32 cur,
+                     const u32 numSubEngines) {
+    u32 i;
+    for (i = 0; i < numSubEngines; ++i) {
+        DEBUG_PRINTF("cur:%u base:%u\n", cur, baseTop[i]);
+        if (cur >= baseTop[i] &&
+            (i == numSubEngines - 1 || cur < baseTop[i + 1])) {
+            break;
+        }
+    }
+    return i;
+}
+
+static
+void initSubQueue(const struct Tamarama *t, struct mq *q1,
+                  struct mq *q2, const u32 lastActiveIdx,
+                  const u32 activeIdx) {
+    // Push events to the new queue
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+    assert(!isContainerType(sub->type));
+    q2->nfa = sub;
+
+    // Reinitialize state if the last active subengine is different
+    // from current one
+    if (lastActiveIdx == t->numSubEngines ||
+        lastActiveIdx != activeIdx) {
+        nfaQueueInitState(q2->nfa, q2);
+    }
+
+    copyQueueItems(t, sub, q1, q2, activeIdx);
+    if (q1->items[q1->cur].type == MQE_END) {
+        q1->cur++;
+    }
+    DEBUG_PRINTF("update lastIdx:%u\n", activeIdx);
+    storeActiveIdx(t, q1->streamState, activeIdx);
+}
+
+static
+void updateQueues(const struct Tamarama *t, struct mq *q1, struct mq *q2) {
+    q2->cur = q2->end = 0;
+    copyQueueProperties(q1, q2, t->activeIdxSize);
+
+    const u32 numSubEngines = t->numSubEngines;
+    u32 lastActiveIdx = loadActiveIdx(q1->streamState,
+                                      t->activeIdxSize);
+#ifdef DEBUG
+    DEBUG_PRINTF("external queue\n");
+    debugQueue(q1);
+#endif
+
+    // Push MQE_START event to the subqueue
+    s64a loc = q1->items[q1->cur].location;
+    pushQueueAt(q2, 0, MQE_START, loc);
+    char hasStart = 0;
+    if (q1->items[q1->cur].type == MQE_START) {
+        hasStart = 1;
+        q1->cur++;
+    }
+
+    u32 activeIdx = lastActiveIdx;
+    // If we have top events in the main queue, update current active id
+    if (q1->cur < q1->end - 1) {
+        const u32 *baseTop = (const u32 *)((const char *)t +
+                                           sizeof(struct Tamarama));
+        u32 curTop = q1->items[q1->cur].type;
+        activeIdx = findEngineForTop(baseTop, curTop, numSubEngines);
+    }
+
+    assert(activeIdx < numSubEngines);
+    DEBUG_PRINTF("last id:%u, current id:%u, num of subengines:%u\n",
+                 lastActiveIdx, activeIdx, numSubEngines);
+    // Handle unfinished last alive subengine
+    if (lastActiveIdx != activeIdx &&
+        lastActiveIdx != numSubEngines && hasStart) {
+        loc = q1->items[q1->cur].location;
+        pushQueueNoMerge(q2, MQE_END, loc);
+        q2->nfa = getSubEngine(t, lastActiveIdx);
+        return;
+    }
+
+    initSubQueue(t, q1, q2, lastActiveIdx, activeIdx);
+    DEBUG_PRINTF("finish queues\n");
+}
+
+// After processing subqueue items for subengines, we need to copy back
+// remaining items in subqueue if there are any to Tamarama main queue
+static
+void copyBack(const struct  Tamarama *t, struct mq *q, struct mq *q1) {
+    DEBUG_PRINTF("copy back %u, %u\n", q1->cur, q1->end);
+    q->report_current = q1->report_current;
+    if (q->cur >= q->end && q1->cur >= q1->end) {
+        return;
+    }
+
+    const u32 *baseTop = (const u32 *)((const char *)t +
+                                        sizeof(struct Tamarama));
+    const u32 lastIdx = loadActiveIdx(q->streamState,
+                                      t->activeIdxSize);
+    u32 base = 0, event_base = 0;
+    if (lastIdx != t->numSubEngines) {
+        base = baseTop[lastIdx];
+        const struct NFA *sub = getSubEngine(t, lastIdx);
+        event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP;
+    }
+
+    u32 numItems = q1->end > q1->cur + 1 ? q1->end - q1->cur - 1 : 1;
+    // Also need to copy MQE_END if the main queue is empty
+    if (q->cur == q->end) {
+        numItems++;
+    }
+    u32 cur = q->cur - numItems;
+    q->items[cur] = q1->items[q1->cur++];
+    q->items[cur].type = MQE_START;
+    q->cur = cur++;
+    for (u32 i = 0; i < numItems - 1; ++i) {
+        u32 type = q1->items[q1->cur].type;
+        if (type > MQE_END) {
+            q1->items[q1->cur].type = type - event_base + base;
+        }
+        q->items[cur++] = q1->items[q1->cur++];
+    }
+
+#ifdef DEBUG
+    DEBUG_PRINTF("external queue\n");
+    debugQueue(q);
+#endif
+}
+
+char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state,
+                              const char *streamState, u64a offset,
+                              NfaCallback callback, SomNfaCallback som_cb,
+                              void *context) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return MO_CONTINUE_MATCHING;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+    if (nfaAcceptsEod(sub)) {
+        assert(!isContainerType(sub->type));
+        const char *subStreamState = streamState + t->activeIdxSize;
+        return nfaCheckFinalState(sub, state, subStreamState,
+                                  offset, callback, som_cb, context);
+    }
+
+    return MO_CONTINUE_MATCHING;
+}
+
+char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q,
+                         ReportID report) {
+    DEBUG_PRINTF("exec rose\n");
+    struct mq q1;
+    q1.cur = q1.end = 0;
+    char rv = 0;
+    const struct Tamarama *t = getImplNfa(n);
+    while (q->cur < q->end) {
+        updateQueues(t, q, &q1);
+    }
+
+    if (q1.cur < q1.end) {
+        rv = nfaQueueExecRose(q1.nfa, &q1, report);
+    }
+
+    DEBUG_PRINTF("exec rose rv:%u\n", rv);
+    return rv;
+}
+
+char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 1;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaReportCurrentMatches(sub, &q1);
+}
+
+char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report,
+                               struct mq *q) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaInAcceptState(sub, report, &q1);
+}
+
+char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaInAnyAcceptState(sub, &q1);
+}
+
+char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) {
+    DEBUG_PRINTF("init state\n");
+    const struct Tamarama *t = getImplNfa(n);
+    char *ptr = q->streamState;
+    // Use activeIdxSize as a sentinel value and initialize the state to
+    // an invalid engine as nothing has been triggered yet
+    storeActiveIdx(t, ptr, t->numSubEngines);
+    return 0;
+}
+
+char nfaExecTamarama0_queueCompressState(const struct NFA *n,
+                                         const struct mq *q, s64a loc) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueueProperties(q, &q1, t->activeIdxSize);
+    return nfaQueueCompressState(sub, &q1, loc);
+}
+
+char nfaExecTamarama0_expandState(const struct NFA *n, void *dest,
+                                  const void *src, u64a offset, u8 key) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(src, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return 0;
+    }
+
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    const char *subStreamState = (const char *)src + t->activeIdxSize;
+    return nfaExpandState(sub, dest, subStreamState, offset, key);
+}
+
+enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n,
+                                                      struct mq *q, s64a loc) {
+    const struct Tamarama *t = getImplNfa(n);
+    u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize);
+    if (activeIdx == t->numSubEngines) {
+        return NFA_ZOMBIE_NO;
+    }
+    const struct NFA *sub = getSubEngine(t, activeIdx);
+
+    struct mq q1;
+    copyQueue(t, sub, q, &q1, activeIdx);
+    return nfaGetZombieStatus(sub, &q1, loc);
+}
+
+char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) {
+    DEBUG_PRINTF("exec\n");
+    struct mq q1;
+    char rv = MO_ALIVE;
+    char copy = 0;
+    const struct Tamarama *t = getImplNfa(n);
+    while (q->cur < q->end && q_cur_loc(q) <= end) {
+        updateQueues(t, q, &q1);
+        rv = nfaQueueExec_raw(q1.nfa, &q1, end);
+        q->report_current = q1.report_current;
+        copy = 1;
+        if (can_stop_matching(q->scratch)) {
+            break;
+        }
+    }
+    if (copy) {
+        copyBack(t, q, &q1);
+    }
+    return rv;
+}
+
+char nfaExecTamarama0_Q2(const struct NFA *n,
+                         struct mq *q, s64a end) {
+    DEBUG_PRINTF("exec to match\n");
+    struct mq q1;
+    char rv = 0;
+    char copy = 0;
+    const struct Tamarama *t = getImplNfa(n);
+    while (q->cur < q->end && q_cur_loc(q) <= end &&
+           rv != MO_MATCHES_PENDING) {
+        updateQueues(t, q, &q1);
+        rv = nfaQueueExec2_raw(q1.nfa, &q1, end);
+        q->report_current = q1.report_current;
+        copy = 1;
+    }
+    if (copy) {
+        copyBack(t, q, &q1);
+    }
+    return rv;
+}
+
diff --git a/src/nfa/tamarama.h b/src/nfa/tamarama.h
new file mode 100644
index 00000000..c39639a6
--- /dev/null
+++ b/src/nfa/tamarama.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TAMARAMA_H
+#define TAMARAMA_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "callback.h"
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+struct hs_scratch;
+
+char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state,
+                              const char *streamState, u64a offset,
+                              NfaCallback callback, SomNfaCallback som_cb,
+                              void *context);
+char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report,
+                               struct mq *q);
+char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecTamarama0_queueCompressState(const struct NFA *n,
+                                         const struct mq *q,
+                                         s64a loc);
+char nfaExecTamarama0_expandState(const struct NFA *n, void *dest,
+                                  const void *src, u64a offset, u8 key);
+enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n,
+                                                      struct mq *q, s64a loc);
+char nfaExecTamarama0_Q(const struct NFA *nfa, struct mq *q, s64a end);
+char nfaExecTamarama0_Q2(const struct NFA *nfa, struct mq *q, s64a end);
+
+// only used by outfix and miracles, no implementation for tamarama
+#define nfaExecTamarama0_initCompressedState NFA_API_NO_IMPL
+#define nfaExecTamarama0_B_Reverse NFA_API_NO_IMPL
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/nfa/tamarama_dump.cpp b/src/nfa/tamarama_dump.cpp
new file mode 100644
index 00000000..ed2f1cb1
--- /dev/null
+++ b/src/nfa/tamarama_dump.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Tamarama: container engine for exclusve engines, dump code.
+ */
+
+#include "config.h"
+
+#include "tamarama_dump.h"
+
+#include "tamarama_internal.h"
+#include "nfa_dump_api.h"
+#include "nfa_dump_internal.h"
+#include "nfa_internal.h"
+
+#include <string>
+#include <sstream>
+
+#ifndef DUMP_SUPPORT
+#error No dump support!
+#endif
+
+namespace ue2 {
+
+void nfaExecTamarama0_dumpDot(const struct NFA *nfa, UNUSED FILE *f,
+                              const std::string &base) {
+    const Tamarama *t = (const Tamarama *)getImplNfa(nfa);
+    const u32 *subOffset =
+        (const u32 *)((const char *)t + sizeof(struct Tamarama) +
+                      t->numSubEngines * sizeof(u32));
+    const char *offset = (const char *)nfa;
+    for (u32 i = 0; i < t->numSubEngines; i++) {
+        std::stringstream ssdot;
+        ssdot << base << "rose_nfa_" << nfa->queueIndex
+            << "_sub_" << i << ".dot";
+        const NFA *sub = (const struct NFA *)(offset + subOffset[i]);
+        FILE *f1 = fopen(ssdot.str().c_str(), "w");
+        nfaDumpDot(sub, f1, base);
+        fclose(f1);
+    }
+}
+
+void nfaExecTamarama0_dumpText(const struct NFA *nfa, FILE *f) {
+    const Tamarama *t = (const Tamarama *)getImplNfa(nfa);
+
+    fprintf(f, "Tamarama container engine\n");
+    fprintf(f, "\n");
+    fprintf(f, "Number of subengine tenants:  %u\n", t->numSubEngines);
+
+    fprintf(f, "\n");
+    dumpTextReverse(nfa, f);
+    fprintf(f, "\n");
+
+    const u32 *subOffset =
+        (const u32 *)((const char *)t + sizeof(struct Tamarama) +
+                      t->numSubEngines * sizeof(u32));
+    const char *offset = (const char *)nfa;
+    for (u32 i = 0; i < t->numSubEngines; i++) {
+        fprintf(f, "Sub %u:\n", i);
+        const NFA *sub = (const struct NFA *)(offset + subOffset[i]);
+        nfaDumpText(sub, f);
+        fprintf(f, "\n");
+    }
+}
+
+} // namespace ue2
diff --git a/src/nfa/tamarama_dump.h b/src/nfa/tamarama_dump.h
new file mode 100644
index 00000000..dc976004
--- /dev/null
+++ b/src/nfa/tamarama_dump.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TAMARAMA_DUMP_H
+#define TAMARAMA_DUMP_H
+
+#if defined(DUMP_SUPPORT)
+
+#include <cstdio>
+#include <string>
+
+struct NFA;
+
+namespace ue2 {
+
+void nfaExecTamarama0_dumpDot(const NFA *nfa, FILE *file,
+                              const std::string &base);
+void nfaExecTamarama0_dumpText(const NFA *nfa, FILE *file);
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
+#endif
diff --git a/src/nfa/tamarama_internal.h b/src/nfa/tamarama_internal.h
new file mode 100644
index 00000000..5cdc70d4
--- /dev/null
+++ b/src/nfa/tamarama_internal.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ *\brief Tamarama: container engine for exclusive engines,
+ *                 data structures.
+ */
+
+/* Tamarama bytecode layout:
+ * * |-----|
+ * * |     | struct NFA
+ * * |-----|
+ * * |     | struct Tamarama
+ * * |     |
+ * * |-----|
+ * * |     | top remapping table:
+ * * |     | stores top base for each subengine.
+ * * |     | old_top = remapped_top - top_base;
+ * * |     | The size of table is equal to the number of subengines.
+ * * ...
+ * * |     |
+ * * |-----|
+ * * |     | offsets from the start of struct Tamarama to subengines --\
+ * * ...                                                               |
+ * * |     |                                          -----------\     |
+ * * |-----|                                                     |     |
+ * * ||--| | subengine 1 (struct NFA + rest of subengine)     <--/     |
+ * * ||  | |                                                           |
+ * * ||--| |                                                           |
+ * * ||  | |                                                           |
+ * * ||  | |                                                           |
+ * * ||--| |                                                           |
+ * * |     |                                                           |
+ * * ||--| | subengine 2 (struct NFA + rest of subengine)      <-------/
+ * * ||  | |
+ * * ||--| |
+ * * ||  | |
+ * * ||  | |
+ * * ||--| |
+ * * |     |
+ * * ...
+ * * |     |
+ * * |-----| total size of tamarama
+ * *
+ * * Tamarama stream state:
+ * *
+ * * |---|
+ * * |   | active subengine id
+ * * |---|
+ * * |   | common pool of stream state for each engine
+ * * |   |
+ * * |   |
+ * * ...
+ * * |   |
+ * * |   |
+ * * |---|
+ * *
+ * * Tamarama scratch space:
+ * *
+ * * |---|
+ * * |   | common pool of scratch for each engine
+ * * |   |
+ * * |   |
+ * * ...
+ * * |   |
+ * * |   |
+ * * |---|
+ * */
+
+#ifndef NFA_TAMARAMA_INTERNAL_H
+#define NFA_TAMARAMA_INTERNAL_H
+
+#include "ue2common.h"
+
+struct ALIGN_AVX_DIRECTIVE Tamarama {
+    u32 numSubEngines;
+    u8 activeIdxSize;
+};
+
+#endif // NFA_TAMARAMA_INTERNAL_H
diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp
new file mode 100644
index 00000000..73d19595
--- /dev/null
+++ b/src/nfa/tamaramacompile.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Tamarama: container engine for exclusive engines,
+ *                  compiler code.
+ */
+
+#include "config.h"
+
+#include "tamaramacompile.h"
+
+#include "tamarama_internal.h"
+#include "nfa_internal.h"
+#include "nfa_api_queue.h"
+#include "repeatcompile.h"
+#include "util/container.h"
+#include "util/verify_types.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void remapTops(const TamaInfo &tamaInfo,
+               vector<u32> &top_base,
+               map<pair<const NFA *, u32>, u32> &out_top_remap) {
+    u32 i = 0;
+    u32 cur = 0;
+    for (const auto &sub : tamaInfo.subengines) {
+        u32 base = cur;
+        top_base.push_back(base + MQE_TOP_FIRST);
+        DEBUG_PRINTF("subengine:%u\n", i);
+        for (const auto &t : tamaInfo.tops[i++]) {
+            cur = base + t;
+            DEBUG_PRINTF("top remapping %u:%u\n", t ,cur);
+            out_top_remap.emplace(make_pair(sub, t), cur++);
+        }
+    }
+}
+
+/**
+ * update stream state and scratch state sizes and copy in
+ * subengines in Tamarama.
+ */
+static
+void copyInSubnfas(const char *base_offset, NFA &nfa,
+                   const TamaInfo &tamaInfo, u32 *offsets,
+                   char *sub_nfa_offset, const u32 activeIdxSize) {
+    u32 maxStreamStateSize = 0;
+    u32 maxScratchStateSize = 0;
+    sub_nfa_offset = ROUNDUP_PTR(sub_nfa_offset, 64);
+    bool infinite_max_width = false;
+    for (auto &sub : tamaInfo.subengines) {
+        u32 streamStateSize = verify_u32(sub->streamStateSize);
+        u32 scratchStateSize = verify_u32(sub->scratchStateSize);
+        maxStreamStateSize = max(maxStreamStateSize, streamStateSize);
+        maxScratchStateSize = max(maxScratchStateSize, scratchStateSize);
+        sub->queueIndex = nfa.queueIndex;
+
+        memcpy(sub_nfa_offset, sub, sub->length);
+        *offsets = verify_u32(sub_nfa_offset - base_offset);
+        DEBUG_PRINTF("type:%u offsets:%u\n", sub->type, *offsets);
+        ++offsets;
+        sub_nfa_offset += ROUNDUP_CL(sub->length);
+
+        // update nfa properties
+        nfa.flags |= sub->flags;
+        if (!sub->maxWidth) {
+            infinite_max_width = true;
+        } else if (!infinite_max_width) {
+            nfa.maxWidth = max(nfa.maxWidth, sub->maxWidth);
+        }
+    }
+
+    if (infinite_max_width) {
+        nfa.maxWidth = 0;
+    }
+    nfa.maxBiAnchoredWidth = 0;
+    nfa.streamStateSize = activeIdxSize + maxStreamStateSize;
+    nfa.scratchStateSize = maxScratchStateSize;
+}
+
+/**
+ * Take in a collection of exclusive sub engines and produces a tamarama, also
+ * returns via out_top_remap, a mapping indicating how tops in the subengines in
+ * relate to the tamarama's tops.
+ */
+aligned_unique_ptr<NFA> buildTamarama(const TamaInfo &tamaInfo, const u32 queue,
+                        map<pair<const NFA *, u32>, u32> &out_top_remap) {
+    vector<u32> top_base;
+    remapTops(tamaInfo, top_base, out_top_remap);
+
+    size_t subSize = tamaInfo.subengines.size();
+    DEBUG_PRINTF("subSize:%lu\n", subSize);
+    size_t total_size =
+        sizeof(NFA) +               // initial NFA structure
+        sizeof(Tamarama) +          // Tamarama structure
+        sizeof(u32) * subSize +     // base top event value for subengines,
+                                    // used for top remapping at runtime
+        sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and
+                                    // padding for subengines
+
+    for (const auto &sub : tamaInfo.subengines) {
+        total_size += ROUNDUP_CL(sub->length);
+    }
+
+    // use subSize as a sentinel value for no active subengines,
+    // so add one to subSize here
+    u32 activeIdxSize = calcPackedBytes(subSize + 1);
+    aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
+    nfa->type = verify_u8(TAMARAMA_NFA_0);
+    nfa->length = verify_u32(total_size);
+    nfa->queueIndex = queue;
+
+    char *ptr = (char *)nfa.get() + sizeof(NFA);
+    char *base_offset = ptr;
+    Tamarama *t = (Tamarama *)ptr;
+    t->numSubEngines = verify_u32(subSize);
+    t->activeIdxSize = verify_u8(activeIdxSize);
+
+    ptr += sizeof(Tamarama);
+    copy_bytes(ptr, top_base);
+    ptr += byte_length(top_base);
+
+    u32 *offsets = (u32*)ptr;
+    char *sub_nfa_offset = ptr + sizeof(u32) * subSize;
+    copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset,
+                  activeIdxSize);
+    assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size);
+    return nfa;
+}
+
+set<ReportID> all_reports(const TamaProto &proto) {
+    return proto.reports;
+}
+
+void TamaInfo::add(NFA *sub, const set<u32> &top) {
+    assert(subengines.size() < max_occupancy);
+    subengines.push_back(sub);
+    tops.push_back(top);
+}
+
+void TamaProto::add(const NFA *n, const u32 id, const u32 top,
+                    const map<pair<const NFA *, u32>, u32> &out_top_remap) {
+    top_remap.emplace(make_pair(id, top), out_top_remap.at(make_pair(n, top)));
+}
+
+} // namespace ue2
+
diff --git a/src/nfa/tamaramacompile.h b/src/nfa/tamaramacompile.h
new file mode 100644
index 00000000..048b966b
--- /dev/null
+++ b/src/nfa/tamaramacompile.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ *  \brief Tamarama: container engine for exclusive engines, compiler code.
+ */
+
+#ifndef NFA_TAMARAMACOMPILE_H
+#define NFA_TAMARAMACOMPILE_H
+
+#include "ue2common.h"
+#include "util/alloc.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+/**
+ * \brief A TamaProto that contains top remapping and reports info
+ */
+struct TamaProto {
+    void add(const NFA *n, const u32 id, const u32 top,
+             const std::map<std::pair<const NFA *, u32>, u32> &out_top_remap);
+    /** Top remapping between <vertex id, top value> and
+     ** remapped top value. */
+    std::map<std::pair<u32, u32>, u32> top_remap;
+
+    /** All the reports in subengines */
+    std::set<ReportID> reports;
+};
+
+/**
+ * \brief Contruction info for a Tamarama engine:
+ * contains at least two subengines.
+ *
+ * A TamaInfo is converted into a single NFA, with each top triggering a
+ * subengine. A TamaInfo can contain at most TamaInfo::max_occupancy
+ * subengines.
+ */
+struct TamaInfo {
+    static constexpr size_t max_occupancy = 65536; // arbitrary limit
+
+    /** \brief Add a new subengine. */
+    void add(NFA* sub, const std::set<u32> &top);
+
+    /** \brief All the subengines */
+    std::vector<NFA *> subengines;
+
+    /** \brief Tops of subengines */
+    std::vector<std::set<u32>> tops;
+};
+
+std::set<ReportID> all_reports(const TamaProto &proto);
+
+/**
+ * Take in a collection of exclusive subengines and produces a tamarama, also
+ * returns via out_top_remap, a mapping indicating how tops in the subengines in
+ * relate to the tamarama's tops.
+ */
+ue2::aligned_unique_ptr<NFA> buildTamarama(const TamaInfo &tamaInfo,
+                      const u32 queue,
+                      std::map<std::pair<const NFA *, u32>, u32> &out_top_remap);
+} // namespace ue2
+
+#endif // NFA_TAMARAMACOMPILE_H
diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp
index 3f56b101..23e025d0 100644
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@@ -33,6 +33,7 @@
 #include "hs_compile.h" // for HS_MODE_*
 #include "rose_build_add_internal.h"
 #include "rose_build_anchored.h"
+#include "rose_build_exclusive.h"
 #include "rose_build_groups.h"
 #include "rose_build_infix.h"
 #include "rose_build_lookaround.h"
@@ -50,6 +51,8 @@
 #include "nfa/nfa_build_util.h"
 #include "nfa/nfa_internal.h"
 #include "nfa/shufticompile.h"
+#include "nfa/tamaramacompile.h"
+#include "nfa/tamarama_internal.h"
 #include "nfagraph/ng_execute.h"
 #include "nfagraph/ng_holder.h"
 #include "nfagraph/ng_lbr.h"
@@ -71,6 +74,7 @@
 #include "util/compile_error.h"
 #include "util/container.h"
 #include "util/graph_range.h"
+#include "util/make_unique.h"
 #include "util/multibit_build.h"
 #include "util/order_check.h"
 #include "util/queue_index_factory.h"
@@ -1422,6 +1426,296 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
     return true;
 }
 
+static
+unique_ptr<TamaInfo> constructTamaInfo(const RoseGraph &g,
+                     const vector<ExclusiveSubengine> &subengines,
+                     const bool is_suffix) {
+    unique_ptr<TamaInfo> tamaInfo = ue2::make_unique<TamaInfo>();
+    for (const auto &sub : subengines) {
+        const auto &rose_vertices = sub.vertices;
+        NFA *nfa = sub.nfa.get();
+        set<u32> tops;
+        for (const auto &v : rose_vertices) {
+            if (is_suffix) {
+                tops.insert(g[v].suffix.top);
+            } else {
+                for (const auto &e : in_edges_range(v, g)) {
+                    tops.insert(g[e].rose_top);
+                }
+            }
+        }
+        tamaInfo->add(nfa, tops);
+    }
+
+    return tamaInfo;
+}
+
+static
+void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo,
+                TamaProto &tamaProto,
+                const vector<ExclusiveSubengine> &subengines,
+                const map<pair<const NFA *, u32>, u32> &out_top_remap,
+                const bool is_suffix) {
+    u32 i = 0;
+    for (const auto &n : tamaInfo.subengines) {
+        for (const auto &v : subengines[i].vertices) {
+            if (is_suffix) {
+                tamaProto.add(n, g[v].idx, g[v].suffix.top,
+                              out_top_remap);
+            } else {
+                for (const auto &e : in_edges_range(v, g)) {
+                    tamaProto.add(n, g[v].idx, g[e].rose_top,
+                                  out_top_remap);
+                }
+            }
+        }
+        i++;
+    }
+}
+
+static
+shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g,
+                                               build_context &bc,
+                                               const ExclusiveInfo &info,
+                                               const u32 queue,
+                                               const bool is_suffix) {
+    const auto &subengines = info.subengines;
+    auto tamaInfo =
+        constructTamaInfo(g, subengines, is_suffix);
+
+    map<pair<const NFA *, u32>, u32> out_top_remap;
+    auto n = buildTamarama(*tamaInfo, queue, out_top_remap);
+    add_nfa_to_blob(bc, *n);
+
+    DEBUG_PRINTF("queue id:%u\n", queue);
+    shared_ptr<TamaProto> tamaProto = make_shared<TamaProto>();
+    tamaProto->reports = info.reports;
+    updateTops(g, *tamaInfo, *tamaProto, subengines,
+               out_top_remap, is_suffix);
+    return tamaProto;
+}
+
+static
+void buildInfixContainer(RoseGraph &g, build_context &bc,
+                         const vector<ExclusiveInfo> &exclusive_info) {
+    // Build tamarama engine
+    for (const auto &info : exclusive_info) {
+        const u32 queue = info.queue;
+        const auto &subengines = info.subengines;
+        auto tamaProto =
+            constructContainerEngine(g, bc, info, queue, false);
+
+        for (const auto &sub : subengines) {
+            const auto &verts = sub.vertices;
+            for (const auto &v : verts) {
+                DEBUG_PRINTF("vert id:%lu\n", g[v].idx);
+                g[v].left.tamarama = tamaProto;
+            }
+        }
+    }
+}
+
+static
+void buildSuffixContainer(RoseGraph &g, build_context &bc,
+                          const vector<ExclusiveInfo> &exclusive_info) {
+    // Build tamarama engine
+    for (const auto &info : exclusive_info) {
+        const u32 queue = info.queue;
+        const auto &subengines = info.subengines;
+        auto tamaProto =
+            constructContainerEngine(g, bc, info, queue, true);
+        for (const auto &sub : subengines) {
+            const auto &verts = sub.vertices;
+            for (const auto &v : verts) {
+                DEBUG_PRINTF("vert id:%lu\n", g[v].idx);
+                g[v].suffix.tamarama = tamaProto;
+            }
+            const auto &v = verts[0];
+            suffix_id newSuffix(g[v].suffix);
+            bc.suffixes.emplace(newSuffix, queue);
+        }
+    }
+}
+
+static
+void updateExclusiveInfixProperties(const RoseBuildImpl &build,
+                                    build_context &bc,
+                                    const vector<ExclusiveInfo> &exclusive_info,
+                                    set<u32> *no_retrigger_queues) {
+    const RoseGraph &g = build.g;
+    for (const auto &info : exclusive_info) {
+        // Set leftfix optimisations, disabled for tamarama subengines
+        rose_group squash_mask = ~rose_group{0};
+        // Leftfixes can have stop alphabets.
+        vector<u8> stop(N_CHARS, 0);
+        // Infix NFAs can have bounds on their queue lengths.
+        u32 max_queuelen = 0;
+        u32 max_width = 0;
+        u8 cm_count = 0;
+        CharReach cm_cr;
+
+        const auto &qi = info.queue;
+        const auto &subengines = info.subengines;
+        bool no_retrigger = true;
+        for (const auto &sub : subengines) {
+            const auto &verts = sub.vertices;
+            const auto &v_first = verts[0];
+            left_id leftfix(g[v_first].left);
+            if (leftfix.haig() || !leftfix.graph() ||
+                !nfaStuckOn(*leftfix.graph())) {
+                no_retrigger = false;
+            }
+
+            for (const auto &v : verts) {
+                set<ue2_literal> lits;
+                for (auto u : inv_adjacent_vertices_range(v, build.g)) {
+                    for (u32 lit_id : build.g[u].literals) {
+                        lits.insert(build.literals.right.at(lit_id).s);
+                    }
+                }
+                DEBUG_PRINTF("%zu literals\n", lits.size());
+
+                u32 queuelen = findMaxInfixMatches(leftfix, lits);
+                if (queuelen < UINT32_MAX) {
+                    queuelen++;
+                }
+                max_queuelen = max(max_queuelen, queuelen);
+            }
+        }
+
+        if (no_retrigger) {
+            no_retrigger_queues->insert(qi);
+        }
+
+        for (const auto &sub : subengines) {
+            const auto &verts = sub.vertices;
+            for (const auto &v : verts) {
+                u32 lag = g[v].left.lag;
+                bc.leftfix_info.emplace(
+                    v, left_build_info(qi, lag, max_width, squash_mask, stop,
+                                       max_queuelen, cm_count, cm_cr));
+            }
+        }
+    }
+}
+
+static
+void updateExclusiveSuffixProperties(const RoseBuildImpl &build,
+                                const vector<ExclusiveInfo> &exclusive_info,
+                                set<u32> *no_retrigger_queues) {
+    const RoseGraph &g = build.g;
+    for (auto &info : exclusive_info) {
+        const auto &qi = info.queue;
+        const auto &subengines = info.subengines;
+        bool no_retrigger = true;
+        for (const auto &sub : subengines) {
+            const auto &v_first = sub.vertices[0];
+            suffix_id suffix(g[v_first].suffix);
+            if (!suffix.graph() || !nfaStuckOn(*suffix.graph())) {
+                no_retrigger = false;
+                break;
+            }
+        }
+
+        if (no_retrigger) {
+            no_retrigger_queues->insert(qi);
+        }
+    }
+}
+
+static
+void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
+                           QueueIndexFactory &qif,
+                           const map<left_id, set<PredTopPair>> &infixTriggers,
+                           const map<u32, vector<RoseVertex>> &vertex_map,
+                           const vector<vector<u32>> &groups,
+                           set<u32> *no_retrigger_queues) {
+    RoseGraph &g = build.g;
+    const CompileContext &cc = build.cc;
+
+    vector<ExclusiveInfo> exclusive_info;
+    for (const auto &gp : groups) {
+        ExclusiveInfo info;
+        for (const auto &id : gp) {
+            const auto &verts = vertex_map.at(id);
+            left_id leftfix(g[verts[0]].left);
+
+            bool is_transient = false;
+            auto n = makeLeftNfa(build, leftfix, false, is_transient,
+                                 infixTriggers, cc);
+            assert(n);
+
+            setLeftNfaProperties(*n, leftfix);
+
+            ExclusiveSubengine engine;
+            engine.nfa = move(n);
+            engine.vertices = verts;
+            info.subengines.push_back(move(engine));
+        }
+        info.queue = qif.get_queue();
+        exclusive_info.push_back(move(info));
+    }
+    updateExclusiveInfixProperties(build, bc, exclusive_info,
+                                   no_retrigger_queues);
+    buildInfixContainer(g, bc, exclusive_info);
+}
+
+static
+void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
+                          QueueIndexFactory &qif,
+                          const map<left_id, set<PredTopPair>> &infixTriggers,
+                          set<u32> *no_retrigger_queues) {
+    const RoseGraph &g = build.g;
+
+    set<RoleInfo<left_id>> roleInfoSet;
+    map<u32, vector<RoseVertex>> vertex_map;
+
+    u32 role_id = 0;
+    map<left_id, u32> leftfixes;
+    for (auto v : vertices_range(g)) {
+        if (!g[v].left || build.isRootSuccessor(v)) {
+            continue;
+        }
+
+        left_id leftfix(g[v].left);
+
+        // Sanity check: our NFA should contain each of the tops mentioned on
+        // our in-edges.
+        assert(roseHasTops(g, v));
+
+        if (contains(leftfixes, leftfix)) {
+            // NFA already built.
+            u32 id = leftfixes[leftfix];
+            if (contains(vertex_map, id)) {
+                vertex_map[id].push_back(v);
+            }
+            DEBUG_PRINTF("sharing leftfix, id=%u\n", id);
+            continue;
+        }
+
+        if (leftfix.graph() || leftfix.castle()) {
+            leftfixes.emplace(leftfix, role_id);
+            vertex_map[role_id].push_back(v);
+
+            map<u32, vector<vector<CharReach>>> triggers;
+            findTriggerSequences(build, infixTriggers.at(leftfix), &triggers);
+            RoleInfo<left_id> info(leftfix, role_id);
+            if (setTriggerLiteralsInfix(info, triggers)) {
+                roleInfoSet.insert(info);
+            }
+            role_id++;
+        }
+    }
+
+    if (leftfixes.size() > 1) {
+        DEBUG_PRINTF("leftfix size:%lu\n", leftfixes.size());
+        vector<vector<u32>> groups;
+        exclusiveAnalysisInfix(build, vertex_map, roleInfoSet, groups);
+        buildExclusiveInfixes(build, bc, qif, infixTriggers, vertex_map,
+                              groups, no_retrigger_queues);
+    }
+}
+
 static
 bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
                     QueueIndexFactory &qif, set<u32> *no_retrigger_queues,
@@ -1434,8 +1728,13 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
     unordered_map<left_id, vector<RoseVertex> > succs;
     findInfixTriggers(tbi, &infixTriggers);
 
+    if (cc.grey.allowTamarama && cc.streaming && !do_prefix) {
+        findExclusiveInfixes(tbi, bc, qif, infixTriggers,
+                             no_retrigger_queues);
+    }
+
     for (auto v : vertices_range(g)) {
-        if (!g[v].left) {
+        if (!g[v].left || g[v].left.tamarama) {
             continue;
         }
 
@@ -1753,11 +2052,111 @@ void setSuffixProperties(NFA &n, const suffix_id &suff,
 }
 
 static
-bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
-                   set<u32> *no_retrigger_queues) {
-    map<suffix_id, set<PredTopPair> > suffixTriggers;
-    findSuffixTriggers(tbi, &suffixTriggers);
+void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
+                            QueueIndexFactory &qif,
+                            map<suffix_id, set<PredTopPair>> &suffixTriggers,
+                            const map<u32, vector<RoseVertex>> &vertex_map,
+                            const vector<vector<u32>> &groups,
+                            set<u32> *no_retrigger_queues) {
+    RoseGraph &g = build.g;
 
+    vector<ExclusiveInfo> exclusive_info;
+    for (const auto &gp : groups) {
+        ExclusiveInfo info;
+        for (const auto &id : gp) {
+            const auto &verts = vertex_map.at(id);
+            suffix_id s(g[verts[0]].suffix);
+
+            const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
+
+            map<u32, u32> fixed_depth_tops;
+            findFixedDepthTops(g, s_triggers, &fixed_depth_tops);
+
+            map<u32, vector<vector<CharReach>>> triggers;
+            findTriggerSequences(build, s_triggers, &triggers);
+
+            auto n = buildSuffix(build.rm, build.ssm, fixed_depth_tops,
+                                 triggers, s, build.cc);
+            assert(n);
+
+            setSuffixProperties(*n, s, build.rm);
+
+            ExclusiveSubengine engine;
+            engine.nfa = move(n);
+            engine.vertices = verts;
+            info.subengines.push_back(move(engine));
+
+            const auto &reports = all_reports(s);
+            info.reports.insert(reports.begin(), reports.end());
+        }
+        info.queue = qif.get_queue();
+        exclusive_info.push_back(move(info));
+    }
+    updateExclusiveSuffixProperties(build, exclusive_info,
+                                    no_retrigger_queues);
+    buildSuffixContainer(g, bc, exclusive_info);
+}
+
+static
+void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
+                  QueueIndexFactory &qif,
+                  map<suffix_id, set<PredTopPair>> &suffixTriggers,
+                  set<u32> *no_retrigger_queues) {
+    const RoseGraph &g = tbi.g;
+
+    map<suffix_id, u32> suffixes;
+    set<RoleInfo<suffix_id>> roleInfoSet;
+    map<u32, vector<RoseVertex>> vertex_map;
+    u32 role_id = 0;
+    for (auto v : vertices_range(g)) {
+        if (!g[v].suffix) {
+            continue;
+        }
+
+        const suffix_id s(g[v].suffix);
+
+        DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph());
+
+        // We may have already built this NFA.
+        if (contains(suffixes, s)) {
+            u32 id = suffixes[s];
+            if (!tbi.isInETable(v)) {
+                vertex_map[id].push_back(v);
+            }
+            continue;
+        }
+
+        // Currently disable eod suffixes for exclusive analysis
+        if (!tbi.isInETable(v) && (s.graph() || s.castle())) {
+            DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id);
+            suffixes.emplace(s, role_id);
+
+            vertex_map[role_id].push_back(v);
+            const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
+            map<u32, vector<vector<CharReach>>> triggers;
+            findTriggerSequences(tbi, s_triggers, &triggers);
+
+            RoleInfo<suffix_id> info(s, role_id);
+            if (setTriggerLiteralsSuffix(info, triggers)) {
+                roleInfoSet.insert(info);
+            }
+            role_id++;
+        }
+    }
+
+    if (suffixes.size() > 1) {
+        DEBUG_PRINTF("suffix size:%lu\n", suffixes.size());
+        vector<vector<u32>> groups;
+        exclusiveAnalysisSuffix(tbi, vertex_map, roleInfoSet, groups);
+        buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map,
+                               groups, no_retrigger_queues);
+    }
+}
+
+static
+bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
+                   set<u32> *no_retrigger_queues,
+                   const map<suffix_id, set<PredTopPair>> &suffixTriggers) {
     // To ensure compile determinism, build suffix engines in order of their
     // (unique) queue indices, so that we call add_nfa_to_blob in the same
     // order.
@@ -1770,6 +2169,11 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
     for (const auto &e : ordered) {
         const u32 queue = e.first;
         const suffix_id &s = e.second;
+
+        if (s.tamarama()) {
+            continue;
+        }
+
         const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
 
         map<u32, u32> fixed_depth_tops;
@@ -1860,11 +2264,20 @@ static
 bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
                set<u32> *no_retrigger_queues, set<u32> *eager_queues,
                u32 *leftfixBeginQueue) {
+    map<suffix_id, set<PredTopPair>> suffixTriggers;
+    findSuffixTriggers(tbi, &suffixTriggers);
+
+    if (tbi.cc.grey.allowTamarama && tbi.cc.streaming) {
+        findExclusiveSuffixes(tbi, bc, qif, suffixTriggers,
+                              no_retrigger_queues);
+    }
+
     assignSuffixQueues(tbi, bc);
 
-    if (!buildSuffixes(tbi, bc, no_retrigger_queues)) {
+    if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) {
         return false;
     }
+    suffixTriggers.clear();
 
     *leftfixBeginQueue = qif.allocated_count();
 
@@ -3205,7 +3618,15 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v,
     assert(contains(bc.engineOffsets, qi));
     const NFA *nfa = get_nfa_from_blob(bc, qi);
     u32 suffixEvent;
-    if (isMultiTopType(nfa->type)) {
+    if (isContainerType(nfa->type)) {
+        auto tamaProto = g[v].suffix.tamarama.get();
+        assert(tamaProto);
+        u32 top = (u32)MQE_TOP_FIRST +
+                  tamaProto->top_remap.at(make_pair(g[v].idx,
+                                                    g[v].suffix.top));
+        assert(top < MQE_INVALID);
+        suffixEvent = top;
+    } else if (isMultiTopType(nfa->type)) {
         assert(!g[v].suffix.haig);
         u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top;
         assert(top < MQE_INVALID);
@@ -3283,7 +3704,13 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc,
 
         // DFAs have no TOP_N support, so they get a classic MQE_TOP event.
         u32 top;
-        if (!isMultiTopType(nfa->type)) {
+        if (isContainerType(nfa->type)) {
+            auto tamaProto = g[v].left.tamarama.get();
+            assert(tamaProto);
+            top = MQE_TOP_FIRST + tamaProto->top_remap.at(
+                                      make_pair(g[v].idx, g[e].rose_top));
+            assert(top < MQE_INVALID);
+        } else if (!isMultiTopType(nfa->type)) {
             assert(num_tops(g[v].left) == 1);
             top = MQE_TOP;
         } else {
diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp
new file mode 100644
index 00000000..c9e8d215
--- /dev/null
+++ b/src/rose/rose_build_exclusive.cpp
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ue2common.h"
+
+#include "rose_build_exclusive.h"
+#include "rose_build_merge.h"
+#include "nfa/castlecompile.h"
+#include "nfagraph/ng_execute.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_util.h"
+#include "util/clique.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/make_unique.h"
+
+using namespace std;
+
+namespace ue2 {
+
+template<typename role_id>
+struct RoleChunk {
+    vector<RoleInfo<role_id>> roles;
+};
+
+static
+CharReach getReachability(const NGHolder &h) {
+    CharReach cr;
+    for (const auto &v : vertices_range(h)) {
+        if (!is_special(v, h)) {
+            cr |= h[v].char_reach;
+        }
+    }
+    return cr;
+}
+
+template<typename role_id>
+static
+vector<RoleChunk<role_id>> divideIntoChunks(const RoseBuildImpl &build,
+                                 set<RoleInfo<role_id>> &roleInfoSet) {
+    u32 chunkSize = build.cc.grey.tamaChunkSize;
+    u32 cnt = 1;
+    vector<RoleChunk<role_id>> chunks;
+    RoleChunk<role_id> roleChunk;
+    for (const auto &roleInfo : roleInfoSet) {
+        if (cnt == chunkSize) {
+            cnt -= chunkSize;
+            chunks.push_back(roleChunk);
+            roleChunk.roles.clear();
+        }
+        roleChunk.roles.push_back(roleInfo);
+        cnt++;
+    }
+
+    if (cnt > 1) {
+        chunks.push_back(roleChunk);
+    }
+
+    return chunks;
+}
+
+/* add prefix literals to engine graph */
+static
+bool addPrefixLiterals(NGHolder &h, ue2::unordered_set<u32> &tailId,
+                       const vector<vector<CharReach>> &triggers) {
+    DEBUG_PRINTF("add literals to graph\n");
+
+    NFAVertex start = h.start;
+    vector<NFAVertex> heads;
+    vector<NFAVertex> tails;
+    for (const auto &lit : triggers) {
+        NFAVertex last = start;
+        if (lit.empty()) {
+            return false;
+        }
+        u32 i = 0;
+        for (const auto &c : lit) {
+            DEBUG_PRINTF("lit:%s \n", c.to_string().c_str());
+            NFAVertex u = add_vertex(h);
+            h[u].char_reach = c;
+            if (!i++) {
+                heads.push_back(u);
+                last = u;
+                continue;
+            }
+            add_edge(last, u, h);
+            last = u;
+        }
+        tails.push_back(last);
+        tailId.insert(h[last].index);
+    }
+
+    for (auto v : adjacent_vertices_range(start, h)) {
+        if (v != h.startDs) {
+            for (auto &t : tails) {
+                add_edge(t, v, h);
+            }
+        }
+    }
+
+    clear_out_edges(start, h);
+    add_edge(h.start, h.start, h);
+    for (auto &t : heads) {
+        add_edge(start, t, h);
+    }
+
+    DEBUG_PRINTF("literals addition done\n");
+    return true;
+}
+
+/* check if one literal is suffix of another */
+static
+bool isSuffix(const vector<vector<CharReach>> &triggers1,
+              const vector<vector<CharReach>> &triggers2) {
+    // literal suffix test
+    for (const auto &lit1 : triggers1) {
+        for (const auto &lit2 : triggers2) {
+            const size_t len = min(lit1.size(), lit2.size());
+            if (equal(lit1.rbegin(), lit1.rbegin() + len,
+                      lit2.rbegin(), overlaps)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+/* prepare initial infix or suffix graph used for exclusive analysis */
+template<typename role_id>
+static
+u32 prepareRoleGraph(NGHolder &h, const role_id &s1) {
+    u32 num = 0;
+    if (s1.castle()) {
+        num = num_vertices(h);
+        NFAVertex u = add_vertex(h);
+        h[u].char_reach = s1.castle()->reach();
+        add_edge(h.startDs, u, h);
+        // add self loop to repeat characters
+        add_edge(u, u, h);
+    } else if (s1.graph()) {
+        const NGHolder &g = *s1.graph();
+        cloneHolder(h, g);
+        num = num_vertices(h);
+    } else {
+        // only infixes and suffixes with graph properties are possible
+        // candidates, already filtered out other cases before
+        // exclusive analysis
+        assert(0);
+    }
+
+    return num;
+}
+
+/* get a subset of literal if reset character is found */
+static
+vector<CharReach> findStartPos(const CharReach &cr1,
+                               const vector<CharReach> &lit) {
+    auto it = lit.rbegin(), ite = lit.rend();
+    u32 pos = lit.size();
+    for (; it != ite; it++) {
+        if (!overlaps(cr1, *it)) {
+            break;
+        }
+        pos--;
+    }
+
+    return vector<CharReach> (lit.begin() + pos, lit.end());
+}
+
+template<typename role_id>
+static
+bool isExclusive(const NGHolder &h,
+                 const u32 num, ue2::unordered_set<u32> &tailId,
+                 map<u32, ue2::unordered_set<u32>> &skipList,
+                 const RoleInfo<role_id> &role1,
+                 const RoleInfo<role_id> &role2) {
+    const u32 id1 = role1.id;
+    const u32 id2 = role2.id;
+
+    if (contains(skipList, id1) && contains(skipList[id1], id2)) {
+        return false;
+    }
+
+    const auto &triggers1 = role1.literals;
+    const auto &triggers2 = role2.literals;
+    if (isSuffix(triggers1, triggers2)) {
+        skipList[id2].insert(id1);
+        return false;
+    }
+
+    DEBUG_PRINTF("role id2:%u\n", id2);
+    const auto &cr1 = role1.cr;
+    if (overlaps(cr1, role2.last_cr)) {
+        CharReach cr = cr1 | role1.prefix_cr;
+        for (const auto &lit : triggers2) {
+            auto lit1 = findStartPos(cr, lit);
+            if (lit1.empty()) {
+                continue;
+            }
+            u32 lower_bound = 0;
+            if (lit1.size() < lit.size()) {
+                lower_bound = ~0U;
+            }
+
+            ue2::flat_set<NFAVertex> states;
+            for (const auto &v : vertices_range(h)) {
+                if (h[v].index >= lower_bound || h[v].index < 2) {
+                    states.insert(v);
+                }
+            }
+
+            auto activeStates = execute_graph(h, lit1, states);
+            // Check if has only literal states are on
+            for (const auto &s : activeStates) {
+                u32 stateId = h[s].index;
+                if ((stateId > 1 && stateId <= num) ||
+                    contains(tailId, stateId)) {
+                    skipList[id2].insert(id1);
+                    return false;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+template<typename role_id>
+static
+ue2::unordered_set<u32> checkExclusivity(const NGHolder &h,
+                            const u32 num, ue2::unordered_set<u32> &tailId,
+                            map<u32, ue2::unordered_set<u32>> &skipList,
+                            const RoleInfo<role_id> &role1,
+                            const RoleChunk<role_id> &roleChunk) {
+    ue2::unordered_set<u32> info;
+    const u32 id1 = role1.id;
+    for (const auto &role2 : roleChunk.roles) {
+        const u32 id2 = role2.id;
+        if (id1 != id2 && isExclusive(h, num, tailId, skipList,
+                                      role1, role2)) {
+            info.insert(id2);
+        }
+    }
+
+    return info;
+}
+
+static
+void findCliques(const map<u32, set<u32>> &exclusiveGroups,
+                 vector<vector<u32>> &exclusive_roles) {
+    if (exclusiveGroups.empty()) {
+        return;
+    }
+    // Construct the exclusivity graph
+    map<u32, CliqueVertex> vertex_map;
+    unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
+
+    // Add vertices representing infixes/suffixes
+    for (const auto &e : exclusiveGroups) {
+        const u32 id = e.first;
+        CliqueVertex v1 = add_vertex(CliqueVertexProps(id), *cg);
+        vertex_map[id] = v1;
+    }
+
+    // Wire exclusive pairs
+    for (const auto &e1 : exclusiveGroups) {
+        const u32 literalId1 = e1.first;
+        CliqueVertex lv = vertex_map[literalId1];
+        const set<u32> &exclusiveSet = e1.second;
+        for (const auto &e2 : exclusiveGroups) {
+            const u32 literalId2 = e2.first;
+            if (literalId1 < literalId2 &&
+                contains(exclusiveSet, literalId2)) {
+                add_edge(lv, vertex_map[literalId2], *cg);
+                DEBUG_PRINTF("Wire %u:%u\n", literalId1, literalId2);
+            }
+        }
+    }
+
+    // Find clique groups
+    const auto &clique = removeClique(*cg);
+    for (const auto &i : clique) {
+        DEBUG_PRINTF("cliq:%lu\n", i.size());
+        if (i.size() > 1) {
+            exclusive_roles.push_back(i);
+        }
+    }
+    DEBUG_PRINTF("Clique graph size:%lu\n", exclusive_roles.size());
+}
+
+static
+map<u32, set<u32>> findExclusiveGroups(const RoseBuildImpl &build,
+            const map<u32, ue2::unordered_set<u32>> &exclusiveInfo,
+            const map<u32, vector<RoseVertex>> &vertex_map,
+            const bool is_infix) {
+    map<u32, set<u32>> exclusiveGroups;
+    for (const auto &e : exclusiveInfo) {
+        u32 i = e.first;
+        const auto &s = e.second;
+        set<u32> group;
+        set<RoseVertex> q1(vertex_map.at(i).begin(),
+                           vertex_map.at(i).end());
+        DEBUG_PRINTF("vertex set:%lu\n", q1.size());
+        for (const auto &val : s) {
+            set<RoseVertex> q2(vertex_map.at(val).begin(),
+                               vertex_map.at(val).end());
+            if (contains(exclusiveInfo.at(val), i) &&
+                (!is_infix || mergeableRoseVertices(build, q1, q2))) {
+                group.insert(val);
+            }
+        }
+        if (!group.empty()) {
+            exclusiveGroups[i] = group;
+        }
+    }
+
+    return exclusiveGroups;
+}
+
+template<typename role_id>
+static
+bool setTriggerLiterals(RoleInfo<role_id> &roleInfo,
+        const map<u32, vector<vector<CharReach>>> &triggers) {
+    u32 minLiteralLen = ~0U;
+    for (const auto &tr : triggers) {
+        for (const auto &lit : tr.second) {
+            if (lit.empty()) {
+                return false;
+            }
+            minLiteralLen = min(minLiteralLen, (u32)lit.size());
+            roleInfo.last_cr |= lit.back();
+            for (const auto &c : lit) {
+                roleInfo.prefix_cr |= c;
+            }
+            roleInfo.literals.push_back(lit);
+        }
+    }
+
+    if (roleInfo.role.graph()) {
+        const NGHolder &g = *roleInfo.role.graph();
+        roleInfo.cr = getReachability(g);
+    } else if (roleInfo.role.castle()) {
+        roleInfo.cr = roleInfo.role.castle()->reach();
+    }
+
+    // test the score of this engine
+    roleInfo.score = 256 - roleInfo.cr.count() + minLiteralLen;
+    if (roleInfo.score < 20) {
+        return false;
+    }
+
+    return true;
+}
+
+bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo,
+        const map<u32, vector<vector<CharReach>>> &triggers) {
+    return setTriggerLiterals(roleInfo, triggers);
+}
+
+bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo,
+        const map<u32, vector<vector<CharReach>>> &triggers) {
+    return setTriggerLiterals(roleInfo, triggers);
+}
+
+template<typename role_id>
+static
+void exclusiveAnalysis(const RoseBuildImpl &build,
+               const map<u32, vector<RoseVertex>> &vertex_map,
+               set<RoleInfo<role_id>> &roleInfoSet,
+               vector<vector<u32>> &exclusive_roles, const bool is_infix) {
+    const auto &chunks = divideIntoChunks(build, roleInfoSet);
+    DEBUG_PRINTF("Exclusivity analysis entry\n");
+    map<u32, ue2::unordered_set<u32>> exclusiveInfo;
+
+    for (const auto &roleChunk : chunks) {
+        map<u32, ue2::unordered_set<u32>> skipList;
+        for (const auto &role1 : roleChunk.roles) {
+            const u32 id1 = role1.id;
+            const role_id &s1 = role1.role;
+            const auto &triggers1 = role1.literals;
+
+            NGHolder h;
+            u32 num = prepareRoleGraph(h, s1);
+            DEBUG_PRINTF("role id1:%u\n", id1);
+            unordered_set<u32> tailId;
+            if (!addPrefixLiterals(h, tailId, triggers1)) {
+                continue;
+            }
+
+            exclusiveInfo[id1] = checkExclusivity(h, num, tailId,
+                                             skipList, role1, roleChunk);
+        }
+    }
+
+    // Create final candidate exclusive groups
+    const auto exclusiveGroups =
+        findExclusiveGroups(build, exclusiveInfo, vertex_map, is_infix);
+    exclusiveInfo.clear();
+
+    // Find cliques for each exclusive groups
+    findCliques(exclusiveGroups, exclusive_roles);
+}
+
+void exclusiveAnalysisInfix(const RoseBuildImpl &build,
+               const map<u32, vector<RoseVertex>> &vertex_map,
+               set<RoleInfo<left_id>> &roleInfoSet,
+               vector<vector<u32>> &exclusive_roles) {
+    exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles,
+                      true);
+}
+
+void exclusiveAnalysisSuffix(const RoseBuildImpl &build,
+               const map<u32, vector<RoseVertex>> &vertex_map,
+               set<RoleInfo<suffix_id>> &roleInfoSet,
+               vector<vector<u32>> &exclusive_roles) {
+    exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles,
+                      false);
+}
+
+} // namespace ue2
diff --git a/src/rose/rose_build_exclusive.h b/src/rose/rose_build_exclusive.h
new file mode 100644
index 00000000..a6772f7f
--- /dev/null
+++ b/src/rose/rose_build_exclusive.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief exclusive analysis for infix and suffix engines.
+ * Two engines are considered as exclusive if they can never be alive
+ * at the same time. This analysis takes advantage of the property of
+ * triggering literal + engine graph. If the triggering literals of
+ * two engines can make all the states dead in each other's graph,
+ * then they are exclusive.
+ */
+#ifndef ROSE_BUILD_EXCLUSIVE_H
+#define ROSE_BUILD_EXCLUSIVE_H
+
+#include "ue2common.h"
+
+#include "rose_build_impl.h"
+#include "util/alloc.h"
+#include "util/charreach.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace ue2 {
+
+/** brief subengine info including built engine and
+ * corresponding triggering rose vertices */
+struct ExclusiveSubengine {
+    aligned_unique_ptr<NFA> nfa;
+    std::vector<RoseVertex> vertices;
+};
+
+/** \brief exclusive info to build tamarama */
+struct ExclusiveInfo {
+    // subengine info
+    std::vector<ExclusiveSubengine> subengines;
+    // all the report in tamarama
+    std::set<ReportID> reports;
+    // assigned queue id
+    u32 queue;
+};
+
+/** \brief role info structure for exclusive analysis */
+template<typename role_id>
+struct RoleInfo {
+    RoleInfo(role_id role_in, u32 id_in) : role(role_in), id(id_in) {}
+    bool operator==(const RoleInfo &b) const {
+        return id == b.id;
+    }
+    bool operator!=(const RoleInfo &b) const { return !(*this == b); }
+    bool operator<(const RoleInfo &b) const {
+        const RoleInfo &a = *this;
+        if (a.score != b.score) {
+            return a.score > b.score;
+        }
+        ORDER_CHECK(id);
+        return false;
+    }
+
+    std::vector<std::vector<CharReach>> literals; // prefix literals
+    CharReach prefix_cr; // reach of prefix literals
+    CharReach last_cr; // reach of the last character of literals
+    CharReach cr; // reach of engine graph
+    const role_id role; // infix or suffix info
+    const u32 id; // infix or suffix id
+    u32 score; // score for exclusive analysis
+};
+
+/**
+ * \brief add triggering literals to infix info.
+ */
+bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo,
+        const std::map<u32, std::vector<std::vector<CharReach>>> &triggers);
+
+/**
+ * \brief add triggering literals to suffix info.
+ */
+bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo,
+        const std::map<u32, std::vector<std::vector<CharReach>>> &triggers);
+
+/**
+ * Exclusive analysis for infix engines.
+ *
+ * @param build rose build info mainly used to set exclusive chunk size here
+ * @param vertex_map mapping between engine id and rose vertices
+ *        related to this engine
+ * @param roleInfoSet structure contains role properties including infix info,
+ *        triggering literals and literal reachabilities.
+ *        Used for exclusive analysis.
+ * @param exclusive_roles output mapping between engine id and its exclusive
+ *        group id
+ */
+void exclusiveAnalysisInfix(const RoseBuildImpl &build,
+               const std::map<u32, std::vector<RoseVertex>> &vertex_map,
+               std::set<RoleInfo<left_id>> &roleInfoSet,
+               std::vector<std::vector<u32>> &exclusive_roles);
+
+/**
+ * Exclusive analysis for suffix engines.
+ *
+ * @param build rose build info mainly used to set exclusive chunk size here
+ * @param vertex_map mapping between engine id and rose vertices
+ *        related to this engine
+ * @param roleInfoSet structure contains role properties including suffix info,
+ *        triggering literals and literal reachabilities.
+ *        Used for exclusive analysis.
+ * @param exclusive_roles output mapping between engine id and its exclusive
+ *        group id
+ */
+void exclusiveAnalysisSuffix(const RoseBuildImpl &build,
+               const std::map<u32, std::vector<RoseVertex>> &vertex_map,
+               std::set<RoleInfo<suffix_id>> &roleInfoSet,
+               std::vector<std::vector<u32>> &exclusive_roles);
+
+} // namespace ue2
+
+#endif //ROSE_BUILD_EXCLUSIVE_H
+
diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h
index 71940e07..ca1b64e2 100644
--- a/src/rose/rose_build_impl.h
+++ b/src/rose/rose_build_impl.h
@@ -65,12 +65,13 @@ class SomSlotManager;
 struct suffix_id {
     suffix_id(const RoseSuffixInfo &in)
         : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
-          h(in.haig.get()), dfa_min_width(in.dfa_min_width),
+          h(in.haig.get()), t(in.tamarama.get()),
+          dfa_min_width(in.dfa_min_width),
           dfa_max_width(in.dfa_max_width) {
             assert(!g || g->kind == NFA_SUFFIX);
     }
     bool operator==(const suffix_id &b) const {
-        bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
+        bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t;
         assert(!rv || dfa_min_width == b.dfa_min_width);
         assert(!rv || dfa_max_width == b.dfa_max_width);
         return rv;
@@ -82,6 +83,7 @@ struct suffix_id {
         ORDER_CHECK(c);
         ORDER_CHECK(d);
         ORDER_CHECK(h);
+        ORDER_CHECK(t);
         return false;
     }
 
@@ -113,6 +115,22 @@ struct suffix_id {
         }
         return c;
     }
+    TamaProto *tamarama() {
+        if (!d && !h) {
+            assert(dfa_min_width == depth(0));
+            assert(dfa_max_width == depth::infinity());
+        }
+        return t;
+    }
+    const TamaProto *tamarama() const {
+        if (!d && !h) {
+            assert(dfa_min_width == depth(0));
+            assert(dfa_max_width == depth::infinity());
+        }
+        return t;
+    }
+
+
     raw_som_dfa *haig() { return h; }
     const raw_som_dfa *haig() const { return h; }
     raw_dfa *dfa() { return d; }
@@ -125,6 +143,7 @@ private:
     CastleProto *c;
     raw_dfa *d;
     raw_som_dfa *h;
+    TamaProto *t;
     depth dfa_min_width;
     depth dfa_max_width;
 
diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp
index b16e3a69..f430f731 100644
--- a/src/rose/rose_build_misc.cpp
+++ b/src/rose/rose_build_misc.cpp
@@ -34,6 +34,7 @@
 #include "nfa/mcclellancompile_util.h"
 #include "nfa/nfa_api.h"
 #include "nfa/rdfa.h"
+#include "nfa/tamaramacompile.h"
 #include "nfagraph/ng_holder.h"
 #include "nfagraph/ng_limex.h"
 #include "nfagraph/ng_reports.h"
@@ -909,7 +910,7 @@ set<ReportID> all_reports(const OutfixInfo &outfix) {
 
 bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const {
     return top == b.top && graph == b.graph && castle == b.castle &&
-           rdfa == b.rdfa && haig == b.haig;
+           rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama;
 }
 
 bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
@@ -919,6 +920,7 @@ bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
     ORDER_CHECK(castle);
     ORDER_CHECK(haig);
     ORDER_CHECK(rdfa);
+    ORDER_CHECK(tamarama);
     assert(a.dfa_min_width == b.dfa_min_width);
     assert(a.dfa_max_width == b.dfa_max_width);
     return false;
@@ -931,13 +933,16 @@ void RoseSuffixInfo::reset(void) {
     castle.reset();
     rdfa.reset();
     haig.reset();
+    tamarama.reset();
     dfa_min_width = 0;
     dfa_max_width = depth::infinity();
 }
 
 std::set<ReportID> all_reports(const suffix_id &s) {
     assert(s.graph() || s.castle() || s.haig() || s.dfa());
-    if (s.graph()) {
+    if (s.tamarama()) {
+        return all_reports(*s.tamarama());
+    } else if (s.graph()) {
         return all_reports(*s.graph());
     } else if (s.castle()) {
         return all_reports(*s.castle());
@@ -1149,6 +1154,7 @@ void LeftEngInfo::reset(void) {
     castle.reset();
     dfa.reset();
     haig.reset();
+    tamarama.reset();
     lag = 0;
     leftfix_report = MO_INVALID_IDX;
     dfa_min_width = 0;
diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp
index 1d63c71a..19d8414d 100644
--- a/src/rose/rose_dump.cpp
+++ b/src/rose/rose_dump.cpp
@@ -718,7 +718,7 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) {
         FILE *f;
 
         f = fopen(ssdot.str().c_str(), "w");
-        nfaDumpDot(n, f);
+        nfaDumpDot(n, f, base);
         fclose(f);
 
         f = fopen(sstxt.str().c_str(), "w");
@@ -778,7 +778,7 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) {
         FILE *f;
 
         f = fopen(ssdot.str().c_str(), "w");
-        nfaDumpDot(n, f);
+        nfaDumpDot(n, f, base);
         fclose(f);
 
         f = fopen(sstxt.str().c_str(), "w");
@@ -809,7 +809,7 @@ void dumpAnchored(const RoseEngine *t, const string &base) {
         FILE *f;
 
         f = fopen(ssdot.str().c_str(), "w");
-        nfaDumpDot(n, f);
+        nfaDumpDot(n, f, base);
         fclose(f);
 
         f = fopen(sstxt.str().c_str(), "w");
diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h
index b0ac8d11..6abe629b 100644
--- a/src/rose/rose_graph.h
+++ b/src/rose/rose_graph.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -55,6 +55,7 @@ namespace ue2 {
 struct CastleProto;
 struct raw_dfa;
 struct raw_som_dfa;
+struct TamaProto;
 
 /** \brief Table type for a literal. */
 enum rose_literal_table {
@@ -82,6 +83,7 @@ struct LeftEngInfo {
     std::shared_ptr<CastleProto> castle;
     std::shared_ptr<raw_dfa> dfa;
     std::shared_ptr<raw_som_dfa> haig;
+    std::shared_ptr<TamaProto> tamarama;
     u32 lag = 0U;
     ReportID leftfix_report = MO_INVALID_IDX;
     depth dfa_min_width = 0;
@@ -92,6 +94,7 @@ struct LeftEngInfo {
             && other.castle == castle
             && other.dfa == dfa
             && other.haig == haig
+            && other.tamarama == tamarama
             && other.lag == lag
             && other.leftfix_report == leftfix_report;
     }
@@ -104,6 +107,7 @@ struct LeftEngInfo {
         ORDER_CHECK(castle);
         ORDER_CHECK(dfa);
         ORDER_CHECK(haig);
+        ORDER_CHECK(tamarama);
         ORDER_CHECK(lag);
         ORDER_CHECK(leftfix_report);
         return false;
@@ -121,6 +125,7 @@ struct RoseSuffixInfo {
     std::shared_ptr<CastleProto> castle;
     std::shared_ptr<raw_som_dfa> haig;
     std::shared_ptr<raw_dfa> rdfa;
+    std::shared_ptr<TamaProto> tamarama;
     depth dfa_min_width = 0;
     depth dfa_max_width = depth::infinity();
 
@@ -128,7 +133,7 @@ struct RoseSuffixInfo {
     bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
     bool operator<(const RoseSuffixInfo &b) const;
     void reset(void);
-    operator bool() const { return graph || castle || haig || rdfa; }
+    operator bool() const { return graph || castle || haig || rdfa || tamarama; }
 };
 
 /** \brief Properties attached to each Rose graph vertex. */
diff --git a/src/smallwrite/smallwrite_dump.cpp b/src/smallwrite/smallwrite_dump.cpp
index 8987e8b3..0db97df5 100644
--- a/src/smallwrite/smallwrite_dump.cpp
+++ b/src/smallwrite/smallwrite_dump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -73,7 +73,7 @@ void smwrDumpNFA(const SmallWriteEngine *smwr, bool dump_raw,
     FILE *f;
 
     f = fopen((base + "smallwrite_nfa.dot").c_str(), "w");
-    nfaDumpDot(n, f);
+    nfaDumpDot(n, f, base);
     fclose(f);
 
     f = fopen((base + "smallwrite_nfa.txt").c_str(), "w");
diff --git a/src/util/clique.cpp b/src/util/clique.cpp
new file mode 100644
index 00000000..ea22779c
--- /dev/null
+++ b/src/util/clique.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief An algorithm to find cliques.
+ */
+
+#include "clique.h"
+#include "container.h"
+#include "graph_range.h"
+#include "make_unique.h"
+#include "ue2_containers.h"
+
+#include <map>
+#include <set>
+#include <stack>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+vector<u32> getNeighborInfo(const CliqueGraph &g,
+                     const CliqueVertex &cv, const set<u32> &group) {
+    u32 id = g[cv].stateId;
+    vector<u32> neighbor;
+    // find neighbors for cv
+    for (const auto &v : adjacent_vertices_range(cv, g)) {
+        if (g[v].stateId != id && contains(group, g[v].stateId)){
+            neighbor.push_back(g[v].stateId);
+            DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId);
+        }
+    }
+
+    return neighbor;
+}
+
+static
+vector<u32> findCliqueGroup(CliqueGraph &cg) {
+    stack<vector<u32>> gStack;
+
+    // Create mapping between vertex and id
+    map<u32, CliqueVertex> vertexMap;
+    vector<u32> init;
+    for (const auto &v : vertices_range(cg)) {
+        vertexMap[cg[v].stateId] = v;
+        init.push_back(cg[v].stateId);
+    }
+    gStack.push(init);
+
+    // Get the vertex to start from
+    vector<u32> clique;
+    while (!gStack.empty()) {
+        vector<u32> g = move(gStack.top());
+        gStack.pop();
+
+        // Choose a vertex from the graph
+        u32 id = g[0];
+        CliqueVertex &n = vertexMap.at(id);
+        clique.push_back(id);
+        // Corresponding vertex in the original graph
+        set<u32> subgraphId(g.begin(), g.end());
+        auto neighbor = getNeighborInfo(cg, n, subgraphId);
+        // Get graph consisting of neighbors for left branch
+        if (!neighbor.empty()) {
+            gStack.push(neighbor);
+        }
+    }
+
+    return clique;
+}
+
+template<typename Graph>
+bool graph_empty(const Graph &g) {
+    typename Graph::vertex_iterator vi, ve;
+    tie(vi, ve) = vertices(g);
+    return vi == ve;
+}
+
+vector<vector<u32>> removeClique(CliqueGraph &cg) {
+    DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg));
+    vector<vector<u32>> cliquesVec = {findCliqueGroup(cg)};
+    while (!graph_empty(cg)) {
+        const vector<u32> &c = cliquesVec.back();
+        vector<CliqueVertex> dead;
+        for (const auto &v : vertices_range(cg)) {
+            u32 id = cg[v].stateId;
+            if (find(c.begin(), c.end(), id) != c.end()) {
+                dead.push_back(v);
+            }
+        }
+        for (const auto &v : dead) {
+            clear_vertex(v, cg);
+            remove_vertex(v, cg);
+        }
+        if (graph_empty(cg)) {
+            break;
+        }
+        auto clique = findCliqueGroup(cg);
+        cliquesVec.push_back(clique);
+    }
+
+    return cliquesVec;
+}
+
+} // namespace ue2
diff --git a/src/util/clique.h b/src/util/clique.h
new file mode 100644
index 00000000..89c6d4ed
--- /dev/null
+++ b/src/util/clique.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief An algorithm to find cliques.
+ */
+
+#ifndef CLIQUE_H
+#define CLIQUE_H
+
+#include "ue2common.h"
+
+#include <vector>
+
+#include <boost/graph/adjacency_list.hpp>
+
+namespace ue2 {
+
+struct CliqueVertexProps {
+    CliqueVertexProps() {}
+    explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {}
+
+    u32 stateId = ~0U;
+};
+
+typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS,
+                              CliqueVertexProps> CliqueGraph;
+typedef CliqueGraph::vertex_descriptor CliqueVertex;
+
+/** \brief Returns a vector of cliques found in a graph. */
+std::vector<std::vector<u32>> removeClique(CliqueGraph &cg);
+
+} // namespace ue2
+
+#endif