multibit, fatbit: make _size build-time only

This commit makes mmbit_size() and fatbit_size compile-time only, and adds a resource limit for very large multibits.
2026-01-17 16:00:26 +03:00 · 2016-12-05 16:20:52 +11:00
parent 8b7b06d2a4
commit e271781d95
19 changed files with 201 additions and 129 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -553,7 +553,6 @@ set (hs_exec_SRCS
    src/util/copybytes.h
    src/util/exhaust.h
    src/util/fatbit.h
-    src/util/fatbit.c
    src/util/join.h
    src/util/masked_move.h
    src/util/multibit.h
@@ -924,6 +923,8 @@ SET (hs_SRCS
    src/util/determinise.h
    src/util/dump_mask.cpp
    src/util/dump_mask.h
+    src/util/fatbit_build.cpp
+    src/util/fatbit_build.h
    src/util/graph.h
    src/util/hash.h
    src/util/multibit_build.cpp
--- a/src/nfa/castlecompile.cpp
+++ b/src/nfa/castlecompile.cpp
@@ -49,7 +49,6 @@
 #include "util/graph.h"
 #include "util/make_unique.h"
 #include "util/multibit_build.h"
-#include "util/multibit_internal.h"
 #include "util/report_manager.h"
 #include "util/ue2_containers.h"
 #include "util/verify_types.h"
--- a/src/nfa/mpvcompile.cpp
+++ b/src/nfa/mpvcompile.cpp
@@ -34,7 +34,7 @@
 #include "shufticompile.h"
 #include "trufflecompile.h"
 #include "util/alloc.h"
-#include "util/multibit_internal.h"
+#include "util/multibit_build.h"
 #include "util/order_check.h"
 #include "util/report_manager.h"
 #include "util/verify_types.h"
--- a/src/nfa/repeatcompile.cpp
+++ b/src/nfa/repeatcompile.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -34,7 +34,7 @@
 #include "util/charreach.h"
 #include "util/depth.h"
 #include "util/dump_charclass.h"
-#include "util/multibit_internal.h"
+#include "util/multibit_build.h"
 #include "util/verify_types.h"

 #include <algorithm>
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@@ -81,6 +81,7 @@
 #include "util/compile_context.h"
 #include "util/compile_error.h"
 #include "util/container.h"
+#include "util/fatbit_build.h"
 #include "util/graph_range.h"
 #include "util/make_unique.h"
 #include "util/multibit_build.h"
@@ -5435,11 +5436,13 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {

    engine->ekeyCount = rm.numEkeys();
    engine->dkeyCount = rm.numDkeys();
+    engine->dkeyLogSize = fatbit_size(engine->dkeyCount);
    engine->invDkeyOffset = dkeyOffset;
    copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable());

    engine->somHorizon = ssm.somPrecision();
    engine->somLocationCount = ssm.numSomSlots();
+    engine->somLocationFatbitSize = fatbit_size(engine->somLocationCount);

    engine->needsCatchup = bc.needs_catchup ? 1 : 0;

@@ -5454,8 +5457,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
    engine->activeArrayCount = activeArrayCount;
    engine->activeLeftCount = activeLeftCount;
    engine->queueCount = queue_count;
+    engine->activeQueueArraySize = fatbit_size(queue_count);
    engine->eagerIterOffset = eagerIterOffset;
    engine->handledKeyCount = bc.handledKeys.size();
+    engine->handledKeyFatbitSize = fatbit_size(engine->handledKeyCount);

    engine->rolesWithStateCount = bc.numStates;

@@ -5475,11 +5480,13 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {

    engine->lastByteHistoryIterOffset = lastByteOffset;

-    u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id);
-    engine->delay_count = delay_count;
+    engine->delay_count =
+        verify_u32(final_id_to_literal.size() - delay_base_id);
+    engine->delay_fatbit_size = fatbit_size(engine->delay_count);
    engine->delay_base_id = delay_base_id;
    engine->anchored_base_id = anchored_base_id;
    engine->anchored_count = delay_base_id - anchored_base_id;
+    engine->anchored_fatbit_size = fatbit_size(engine->anchored_count);

    engine->rosePrefixCount = rosePrefixCount;

--- a/src/rose/rose_dump.cpp
+++ b/src/rose/rose_dump.cpp
@@ -42,7 +42,7 @@
 #include "nfa/nfa_internal.h"
 #include "nfa/nfa_kind.h"
 #include "util/dump_charclass.h"
-#include "util/multibit_internal.h"
+#include "util/multibit_build.h"
 #include "util/multibit.h"

 #include <algorithm>
@@ -1232,8 +1232,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
    DUMP_U32(t, historyRequired);
    DUMP_U32(t, ekeyCount);
    DUMP_U32(t, dkeyCount);
+    DUMP_U32(t, dkeyLogSize);
    DUMP_U32(t, invDkeyOffset);
    DUMP_U32(t, somLocationCount);
+    DUMP_U32(t, somLocationFatbitSize);
    DUMP_U32(t, rolesWithStateCount);
    DUMP_U32(t, stateSize);
    DUMP_U32(t, anchorStateSize);
@@ -1258,8 +1260,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
    DUMP_U32(t, activeArrayCount);
    DUMP_U32(t, activeLeftCount);
    DUMP_U32(t, queueCount);
+    DUMP_U32(t, activeQueueArraySize);
    DUMP_U32(t, eagerIterOffset);
    DUMP_U32(t, handledKeyCount);
+    DUMP_U32(t, handledKeyFatbitSize);
    DUMP_U32(t, leftOffset);
    DUMP_U32(t, roseCount);
    DUMP_U32(t, lookaroundTableOffset);
@@ -1280,8 +1284,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
    DUMP_U64(t, floating_group_mask);
    DUMP_U32(t, size);
    DUMP_U32(t, delay_count);
+    DUMP_U32(t, delay_fatbit_size);
    DUMP_U32(t, delay_base_id);
    DUMP_U32(t, anchored_count);
+    DUMP_U32(t, anchored_fatbit_size);
    DUMP_U32(t, anchored_base_id);
    DUMP_U32(t, maxFloatingDelayedMatch);
    DUMP_U32(t, delayRebuildLength);
--- a/src/rose/rose_internal.h
+++ b/src/rose/rose_internal.h
@@ -309,9 +309,11 @@ struct RoseEngine {
    u32 historyRequired; /**< max amount of history required for streaming */
    u32 ekeyCount; /**< number of exhaustion keys */
    u32 dkeyCount; /**< number of dedupe keys */
+    u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */
    u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
                         *  report ids */
    u32 somLocationCount; /**< number of som locations required */
+    u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */
    u32 rolesWithStateCount; // number of roles with entries in state bitset
    u32 stateSize; /* size of the state bitset
                    * WARNING: not the size of the rose state */
@@ -370,14 +372,18 @@ struct RoseEngine {
    u32 activeArrayCount; //number of nfas tracked in the active array
    u32 activeLeftCount; //number of nfas tracked in the active rose array
    u32 queueCount;      /**< number of nfa queues */
+    u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes)

    u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if
                          * none */

    /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role
-     * programs. Used to size the handled_roles fatbit in scratch. */
+     * programs. */
    u32 handledKeyCount;

+    /** \brief Size of the handled keys fatbit in scratch (bytes). */
+    u32 handledKeyFatbitSize;
+
    u32 leftOffset;
    u32 roseCount;
    u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values)
@@ -412,9 +418,11 @@ struct RoseEngine {
    rose_group floating_group_mask; /* groups that are used by the ftable */
    u32 size; // (bytes)
    u32 delay_count; /* number of delayed literal ids. */
+    u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes)
    u32 delay_base_id; /* literal id of the first delayed literal.
                        * delayed literal ids are contiguous */
    u32 anchored_count; /* number of anchored literal ids */
+    u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes)
    u32 anchored_base_id; /* literal id of the first literal in the A table.
                           * anchored literal ids are contiguous */
    u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
--- a/src/runtime.c
+++ b/src/runtime.c
@@ -55,7 +55,6 @@
 #include "state.h"
 #include "ue2common.h"
 #include "util/exhaust.h"
-#include "util/fatbit.h"
 #include "util/multibit.h"

 static really_inline
--- a/src/scratch.c
+++ b/src/scratch.c
@@ -43,17 +43,19 @@
 #include "nfa/nfa_api_queue.h"
 #include "rose/rose_internal.h"
 #include "util/fatbit.h"
-#include "util/multibit.h"

 /**
 * Determine the space required for a correctly aligned array of fatbit
 * structure, laid out as:
 *
 * - an array of num_entries pointers, each to a fatbit.
- * - an array of fatbit structures, each of size fatbit_size(num_keys).
+ * - an array of fatbit structures, each of size fatbit_len.
+ *
+ * fatbit_len should have been determined at compile time, via the
+ * fatbit_size() call.
 */
 static
-size_t fatbit_array_size(u32 num_entries, u32 num_keys) {
+size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) {
    size_t len = 0;

    // Array of pointers to each fatbit entry.
@@ -61,7 +63,7 @@ size_t fatbit_array_size(u32 num_entries, u32 num_keys) {

    // Fatbit entries themselves.
    len = ROUNDUP_N(len, alignof(struct fatbit));
-    len += (size_t)fatbit_size(num_keys) * num_entries;
+    len += (size_t)fatbit_len * num_entries;

    return ROUNDUP_N(len, 8); // Round up for potential padding.
 }
@@ -71,17 +73,19 @@ size_t fatbit_array_size(u32 num_entries, u32 num_keys) {
 static
 hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
    u32 queueCount = proto->queueCount;
-    u32 deduperCount = proto->deduper.log_size;
+    u32 activeQueueArraySize = proto->activeQueueArraySize;
+    u32 deduperCount = proto->deduper.dkey_count;
+    u32 deduperLogSize = proto->deduper.log_size;
    u32 bStateSize = proto->bStateSize;
    u32 tStateSize = proto->tStateSize;
    u32 fullStateSize = proto->fullStateSize;
    u32 anchored_literal_region_len = proto->anchored_literal_region_len;
-    u32 anchored_literal_region_width = proto->anchored_literal_count;
+    u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size;

    u32 som_store_size = proto->som_store_count * sizeof(u64a);
    u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a);
-    u32 som_now_size = fatbit_size(proto->som_store_count);
-    u32 som_attempted_size = fatbit_size(proto->som_store_count);
+    u32 som_now_size = proto->som_fatbit_size;
+    u32 som_attempted_size = proto->som_fatbit_size;

    struct hs_scratch *s;
    struct hs_scratch *s_tmp;
@@ -91,18 +95,18 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
    assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum));

    size_t anchored_literal_region_size = fatbit_array_size(
-        anchored_literal_region_len, anchored_literal_region_width);
+        anchored_literal_region_len, proto->anchored_literal_fatbit_size);
    size_t delay_region_size =
-        fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_count);
+        fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size);

    // the size is all the allocated stuff, not including the struct itself
    size_t size = queue_size + 63
                  + bStateSize + tStateSize
                  + fullStateSize + 63 /* cacheline padding */
-                  + fatbit_size(proto->handledKeyCount) /* handled roles */
-                  + fatbit_size(queueCount) /* active queue array */
-                  + 2 * fatbit_size(deduperCount) /* need odd and even logs */
-                  + 2 * fatbit_size(deduperCount) /* ditto som logs */
+                  + proto->handledKeyFatbitSize /* handled roles */
+                  + activeQueueArraySize /* active queue array */
+                  + 2 * deduperLogSize /* need odd and even logs */
+                  + 2 * deduperLogSize /* ditto som logs */
                  + 2 * sizeof(u64a) * deduperCount /* start offsets for som */
                  + anchored_literal_region_size + qmpq_size
                  + delay_region_size
@@ -157,7 +161,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
    for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) {
        s->delay_slots[i] = (struct fatbit *)current;
        assert(ISALIGNED(s->delay_slots[i]));
-        current += fatbit_size(proto->delay_count);
+        current += proto->delay_fatbit_size;
    }

    current = ROUNDUP_PTR(current, alignof(struct fatbit *));
@@ -167,7 +171,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
    for (u32 i = 0; i < anchored_literal_region_len; i++) {
        s->al_log[i] = (struct fatbit *)current;
        assert(ISALIGNED(s->al_log[i]));
-        current += fatbit_size(anchored_literal_region_width);
+        current += anchored_literal_fatbit_size;
    }

    current = ROUNDUP_PTR(current, 8);
@@ -193,22 +197,22 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {

    assert(ISALIGNED_N(current, 8));
    s->aqa = (struct fatbit *)current;
-    current += fatbit_size(queueCount);
+    current += activeQueueArraySize;

    s->handled_roles = (struct fatbit *)current;
-    current += fatbit_size(proto->handledKeyCount);
+    current += proto->handledKeyFatbitSize;

    s->deduper.log[0] = (struct fatbit *)current;
-    current += fatbit_size(deduperCount);
+    current += deduperLogSize;

    s->deduper.log[1] = (struct fatbit *)current;
-    current += fatbit_size(deduperCount);
+    current += deduperLogSize;

    s->deduper.som_log[0] = (struct fatbit *)current;
-    current += fatbit_size(deduperCount);
+    current += deduperLogSize;

    s->deduper.som_log[1] = (struct fatbit *)current;
-    current += fatbit_size(deduperCount);
+    current += deduperLogSize;

    s->som_set_now = (struct fatbit *)current;
    current += som_now_size;
@@ -293,19 +297,19 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) {
        proto->anchored_literal_region_len = rose->anchoredDistance;
    }

-    if (rose->anchored_count > proto->anchored_literal_count) {
+    if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) {
        resize = 1;
-        proto->anchored_literal_count = rose->anchored_count;
+        proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size;
    }

-    if (rose->delay_count > proto->delay_count) {
+    if (rose->delay_fatbit_size > proto->delay_fatbit_size) {
        resize = 1;
-        proto->delay_count = rose->delay_count;
+        proto->delay_fatbit_size = rose->delay_fatbit_size;
    }

-    if (rose->handledKeyCount > proto->handledKeyCount) {
+    if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) {
        resize = 1;
-        proto->handledKeyCount = rose->handledKeyCount;
+        proto->handledKeyFatbitSize = rose->handledKeyFatbitSize;
    }

    if (rose->tStateSize > proto->tStateSize) {
@@ -319,12 +323,22 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) {
        proto->som_store_count = som_store_count;
    }

+    if (rose->somLocationFatbitSize > proto->som_fatbit_size) {
+        resize = 1;
+        proto->som_fatbit_size = rose->somLocationFatbitSize;
+    }
+
    u32 queueCount = rose->queueCount;
    if (queueCount > proto->queueCount) {
        resize = 1;
        proto->queueCount = queueCount;
    }

+    if (rose->activeQueueArraySize > proto->activeQueueArraySize) {
+        resize = 1;
+        proto->activeQueueArraySize = rose->activeQueueArraySize;
+    }
+
    u32 bStateSize = 0;
    if (rose->mode == HS_MODE_BLOCK) {
        bStateSize = rose->stateOffsets.end;
@@ -344,9 +358,10 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) {
        proto->fullStateSize = fullStateSize;
    }

-    if (rose->dkeyCount > proto->deduper.log_size) {
+    if (rose->dkeyCount > proto->deduper.dkey_count) {
        resize = 1;
-        proto->deduper.log_size = rose->dkeyCount;
+        proto->deduper.dkey_count = rose->dkeyCount;
+        proto->deduper.log_size = rose->dkeyLogSize;
    }

    if (resize) {
--- a/src/scratch.h
+++ b/src/scratch.h
@@ -148,6 +148,7 @@ struct match_deduper {
    struct fatbit *log[2]; /**< even, odd logs */
    struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */
    u64a *som_start_log[2]; /**< even, odd start offset logs for som */
+    u32 dkey_count;
    u32 log_size;
    u64a current_report_offset;
    u8 som_log_dirty;
@@ -162,6 +163,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
    u32 magic;
    u8 in_use; /**< non-zero when being used by an API call. */
    u32 queueCount;
+    u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */
    u32 bStateSize; /**< sizeof block mode states */
    u32 tStateSize; /**< sizeof transient rose states */
    u32 fullStateSize; /**< size of uncompressed nfa state */
@@ -179,7 +181,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
    struct core_info core_info;
    struct match_deduper deduper;
    u32 anchored_literal_region_len;
-    u32 anchored_literal_count;
+    u32 anchored_literal_fatbit_size; /**< size of each anch fatbit in bytes */
    struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already
                                   * handled by this literal */
    u64a *som_store; /**< array of som locations */
@@ -191,8 +193,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
                            * location had been writable */
    u64a som_set_now_offset; /**< offset at which som_set_now represents */
    u32 som_store_count;
-    u32 handledKeyCount;
-    u32 delay_count;
+    u32 som_fatbit_size; /**< size of som location fatbit structures in bytes */
+    u32 handledKeyFatbitSize; /**< size of handled_keys fatbit in bytes */
+    u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */
    u32 scratchSize;
    char *scratch_alloc; /* user allocated scratch object */
    u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE];
--- a/src/scratch_dump.cpp
+++ b/src/scratch_dump.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -32,7 +32,7 @@
 #include "scratch_dump.h"
 #include "hs_internal.h"
 #include "ue2common.h"
-#include "util/multibit_internal.h"
+#include "util/multibit_build.h"
 #include "nfa/nfa_api_queue.h"
 #include "rose/rose_internal.h"

@@ -54,12 +54,11 @@ void dumpScratch(const struct hs_scratch *s, FILE *f) {
    fprintf(f, "  queues               : %zu bytes\n",
            s->queueCount * sizeof(struct mq));
    fprintf(f, "  bStateSize           : %u bytes\n", s->bStateSize);
-    fprintf(f, "  active queue array   : %u bytes\n",
-            mmbit_size(s->queueCount));
+    fprintf(f, "  active queue array   : %u bytes\n", s->activeQueueArraySize);
    fprintf(f, "  qmpq                 : %zu bytes\n",
            s->queueCount * sizeof(struct queue_match));
    fprintf(f, "  delay info           : %u bytes\n",
-            mmbit_size(s->delay_count) * DELAY_SLOT_COUNT);
+            s->delay_fatbit_size * DELAY_SLOT_COUNT);
 }

 } // namespace ue2
--- a/src/util/fatbit.h
+++ b/src/util/fatbit.h
@@ -40,6 +40,10 @@
 #include "multibit.h"
 #include "ue2common.h"

+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define MIN_FAT_SIZE 32

 struct fatbit {
@@ -82,11 +86,8 @@ u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) {
    return mmbit_iterate(bits->fb_int.raw, total_bits, it_in);
 }

-/** \brief Return the size in bytes of a fatbit that can store the given
- * number of bits.
- *
- * Not for use in performance-critical code, implementation is in fatbit.c.
- */
-u32 fatbit_size(u32 total_bits);
+#ifdef __cplusplus
+} // extern "C"
+#endif

 #endif
--- a/src/util/fatbit_build.cpp
+++ b/src/util/fatbit_build.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -26,9 +26,19 @@
 * POSSIBILITY OF SUCH DAMAGE.
 */

+#include "fatbit_build.h"
+
 #include "fatbit.h"
-#include "multibit.h"
+#include "multibit_build.h"
+
+#include <algorithm>
+
+using namespace std;
+
+namespace ue2 {

 u32 fatbit_size(u32 total_bits) {
-    return MAX(sizeof(struct fatbit), mmbit_size(total_bits));
+    return max(u32{sizeof(struct fatbit)}, mmbit_size(total_bits));
 }
+
+} // namespace ue2
--- a/src/util/fatbit_build.h
+++ b/src/util/fatbit_build.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Fatbit: build code
+ */
+
+#ifndef FATBIT_BUILD_H
+#define FATBIT_BUILD_H
+
+#include "ue2common.h"
+
+namespace ue2 {
+
+/**
+ * \brief Return the size in bytes of a fatbit that can store the given
+ * number of bits.
+ */
+u32 fatbit_size(u32 total_bits);
+
+} // namespace ue2
+
+#endif // FATBIT_BUILD_H
--- a/src/util/multibit.c
+++ b/src/util/multibit.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -138,62 +138,3 @@ const u32 mmbit_root_offset_from_level[7] = {
    1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4),
    1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4) + (1 << MMB_KEY_SHIFT * 5),
 };
-
-u32 mmbit_size(u32 total_bits) {
-    MDEBUG_PRINTF("%u\n", total_bits);
-
-    // Flat model multibit structures are just stored as a bit vector.
-    if (total_bits <= MMB_FLAT_MAX_BITS) {
-        return ROUNDUP_N(total_bits, 8) / 8;
-    }
-
-    u64a current_level = 1; // Number of blocks on current level.
-    u64a total = 0;         // Total number of blocks.
-    while (current_level * MMB_KEY_BITS < total_bits) {
-        total += current_level;
-        current_level <<= MMB_KEY_SHIFT;
-    }
-
-    // Last level is a one-for-one bit vector. It needs room for total_bits
-    // elements, rounded up to the nearest block.
-    u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS;
-    total += last_level;
-
-    assert(total * sizeof(MMB_TYPE) <= UINT32_MAX);
-    return (u32)(total * sizeof(MMB_TYPE));
-}
-
-#ifdef DUMP_SUPPORT
-
-#include <stdio.h>
-#include <stdlib.h>
-
-/** \brief Dump a sparse iterator's keys to stdout. */
-void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it,
-                            u32 total_bits) {
-    // Expediency and future-proofing: create a temporary multibit of the right
-    // size with all the bits on, then walk it with this sparse iterator.
-    size_t bytes = mmbit_size(total_bits);
-    u8 *bits = malloc(bytes);
-    if (!bits) {
-        printf("Failed to alloc %zu bytes for temp multibit", bytes);
-        return;
-    }
-    for (u32 i = 0; i < total_bits; i++) {
-        mmbit_set_i(bits, total_bits, i);
-    }
-
-    struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES];
-    u32 idx = 0;
-    for (u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, s);
-             i != MMB_INVALID;
-             i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, s)) {
-        printf("%u ", i);
-    }
-
-    printf("(%u keys)", idx + 1);
-
-    free(bits);
-}
-
-#endif // DUMP_SUPPORT
--- a/src/util/multibit_build.cpp
+++ b/src/util/multibit_build.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -34,6 +34,7 @@
 #include "scatter.h"
 #include "ue2common.h"
 #include "rose/rose_build_scatter.h"
+#include "util/compile_error.h"

 #include <cassert>
 #include <cstring> // for memset
@@ -45,6 +46,32 @@ using namespace std;

 namespace ue2 {

+u32 mmbit_size(u32 total_bits) {
+    if (total_bits > MMB_MAX_BITS) {
+        throw ResourceLimitError();
+    }
+
+    // Flat model multibit structures are just stored as a bit vector.
+    if (total_bits <= MMB_FLAT_MAX_BITS) {
+        return ROUNDUP_N(total_bits, 8) / 8;
+    }
+
+    u64a current_level = 1; // Number of blocks on current level.
+    u64a total = 0;         // Total number of blocks.
+    while (current_level * MMB_KEY_BITS < total_bits) {
+        total += current_level;
+        current_level <<= MMB_KEY_SHIFT;
+    }
+
+    // Last level is a one-for-one bit vector. It needs room for total_bits
+    // elements, rounded up to the nearest block.
+    u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS;
+    total += last_level;
+
+    assert(total * sizeof(MMB_TYPE) <= UINT32_MAX);
+    return (u32)(total * sizeof(MMB_TYPE));
+}
+
 namespace {
 struct TreeNode {
    MMB_TYPE mask = 0;
@@ -133,6 +160,7 @@ void mmbBuildSparseIterator(vector<mmbit_sparse_iter> &out,
    assert(out.empty());
    assert(!bits.empty());
    assert(total_bits > 0);
+    assert(total_bits <= MMB_MAX_BITS);

    DEBUG_PRINTF("building sparse iter for %zu of %u bits\n",
                 bits.size(), total_bits);
--- a/src/util/multibit_build.h
+++ b/src/util/multibit_build.h
@@ -50,6 +50,15 @@ size_t hash_value(const mmbit_sparse_iter &iter) {

 namespace ue2 {

+/**
+ * \brief Return the size in bytes of a multibit that can store the given
+ * number of bits.
+ *
+ * This will throw a resource limit assertion if the requested mmbit is too
+ * large.
+ */
+u32 mmbit_size(u32 total_bits);
+
 /** \brief Construct a sparse iterator over the values in \a bits for a
 * multibit of size \a total_bits. */
 void mmbBuildSparseIterator(std::vector<mmbit_sparse_iter> &out,
--- a/src/util/multibit_internal.h
+++ b/src/util/multibit_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@@ -47,6 +47,9 @@ extern "C" {
 typedef u64a MMB_TYPE; /**< Basic block type for mmbit operations. */
 #define MMB_MAX_LEVEL 6 /**< Maximum level in the mmbit pyramid. */

+/** \brief Maximum number of keys (bits) in a multibit. */
+#define MMB_MAX_BITS (1U << 31)
+
 /** \brief Sparse iterator record type.
 *
 * A sparse iterator is a tree of these records, where val identifies the
@@ -71,13 +74,6 @@ struct mmbit_sparse_state {
 /** \brief Maximum number of \ref mmbit_sparse_state that could be needed. */
 #define MAX_SPARSE_ITER_STATES (6 + 1)

-/** \brief Return the size in bytes of a multibit that can store the given
- * number of bits.
- *
- * Not for use in performance-critical code, implementation is in multibit.c.
- */
-u32 mmbit_size(u32 total_bits);
-
 #ifdef __cplusplus
 } // extern "C"
 #endif
--- a/unit/internal/multi_bit.cpp
+++ b/unit/internal/multi_bit.cpp
@@ -30,10 +30,10 @@

 #include "gtest/gtest.h"
 #include "ue2common.h"
+#include "util/compile_error.h"
 #include "util/make_unique.h"
 #include "util/multibit.h"
 #include "util/multibit_build.h"
-#include "util/target_info.h"

 #include <algorithm>
 #include <memory>
@@ -1303,9 +1303,11 @@ static const MultiBitTestParam multibitTests[] = {
    { 1U << 29, 24413 },
    { 1U << 30, 50377 },
    { 1U << 31, 104729 },
-
-    // { UINT32_MAX, 104729 }, // Very slow
 };

 INSTANTIATE_TEST_CASE_P(MultiBit, MultiBitTest, ValuesIn(multibitTests));

+TEST(MultiBit, SizeTooBig) {
+    ASSERT_NO_THROW(mmbit_size(MMB_MAX_BITS));
+    ASSERT_THROW(mmbit_size(MMB_MAX_BITS + 1), ResourceLimitError);
+}