vectorscan/src/util/multibit.c
Justin Viiret 15c2980948 Make key 64 bits where large shifts may be used.
This fixes a long-standing issue with large multibit structures.
2015-12-07 09:38:32 +11:00

200 lines
6.0 KiB
C

/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Multibit: lookup tables and support code.
*
* This C file contains the constant tables used by multibit, so we don't end
* up creating copies of them for every unit that uses it.
*/
#include "multibit.h"
#include "ue2common.h"
const u8 mmbit_keyshift_lut[32] = {
30, 30, 24, 24, 24, 24, 24, 24, 18, 18, 18,
18, 18, 18, 12, 12, 12, 12, 12, 12, 6, 6,
6, 6, 6, 6, 0, 0, 0, 0, 0, 0
};
// The only actually valid values of ks are as shown in the LUT above, but a
// division is just too expensive.
const u8 mmbit_maxlevel_from_keyshift_lut[32] = {
0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4,
5, 5
};
const u8 mmbit_maxlevel_direct_lut[32] = {
5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3,
3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0
};
#define ZERO_TO_LUT(x) ((1ULL << x) - 1)
const u64a mmbit_zero_to_lut[65] = {
ZERO_TO_LUT(0),
ZERO_TO_LUT(1),
ZERO_TO_LUT(2),
ZERO_TO_LUT(3),
ZERO_TO_LUT(4),
ZERO_TO_LUT(5),
ZERO_TO_LUT(6),
ZERO_TO_LUT(7),
ZERO_TO_LUT(8),
ZERO_TO_LUT(9),
ZERO_TO_LUT(10),
ZERO_TO_LUT(11),
ZERO_TO_LUT(12),
ZERO_TO_LUT(13),
ZERO_TO_LUT(14),
ZERO_TO_LUT(15),
ZERO_TO_LUT(16),
ZERO_TO_LUT(17),
ZERO_TO_LUT(18),
ZERO_TO_LUT(19),
ZERO_TO_LUT(20),
ZERO_TO_LUT(21),
ZERO_TO_LUT(22),
ZERO_TO_LUT(23),
ZERO_TO_LUT(24),
ZERO_TO_LUT(25),
ZERO_TO_LUT(26),
ZERO_TO_LUT(27),
ZERO_TO_LUT(28),
ZERO_TO_LUT(29),
ZERO_TO_LUT(30),
ZERO_TO_LUT(31),
ZERO_TO_LUT(32),
ZERO_TO_LUT(33),
ZERO_TO_LUT(34),
ZERO_TO_LUT(35),
ZERO_TO_LUT(36),
ZERO_TO_LUT(37),
ZERO_TO_LUT(38),
ZERO_TO_LUT(39),
ZERO_TO_LUT(40),
ZERO_TO_LUT(41),
ZERO_TO_LUT(42),
ZERO_TO_LUT(43),
ZERO_TO_LUT(44),
ZERO_TO_LUT(45),
ZERO_TO_LUT(46),
ZERO_TO_LUT(47),
ZERO_TO_LUT(48),
ZERO_TO_LUT(49),
ZERO_TO_LUT(50),
ZERO_TO_LUT(51),
ZERO_TO_LUT(52),
ZERO_TO_LUT(53),
ZERO_TO_LUT(54),
ZERO_TO_LUT(55),
ZERO_TO_LUT(56),
ZERO_TO_LUT(57),
ZERO_TO_LUT(58),
ZERO_TO_LUT(59),
ZERO_TO_LUT(60),
ZERO_TO_LUT(61),
ZERO_TO_LUT(62),
ZERO_TO_LUT(63),
~0ULL
};
const u32 mmbit_root_offset_from_level[7] = {
0,
1,
1 + (1 << MMB_KEY_SHIFT),
1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2),
1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3),
1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4),
1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4) + (1 << MMB_KEY_SHIFT * 5),
};
u32 mmbit_size(u32 total_bits) {
MDEBUG_PRINTF("%u\n", total_bits);
// Flat model multibit structures are just stored as a bit vector.
if (total_bits <= MMB_FLAT_MAX_BITS) {
return ROUNDUP_N(total_bits, 8) / 8;
}
u64a current_level = 1; // Number of blocks on current level.
u64a total = 0; // Total number of blocks.
while (current_level * MMB_KEY_BITS < total_bits) {
total += current_level;
current_level <<= MMB_KEY_SHIFT;
}
// Last level is a one-for-one bit vector. It needs room for total_bits
// elements, rounded up to the nearest block.
u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS;
total += last_level;
assert(total * sizeof(MMB_TYPE) <= UINT32_MAX);
return (u32)(total * sizeof(MMB_TYPE));
}
#ifdef DUMP_SUPPORT
#include <stdio.h>
#include <stdlib.h>
/** \brief Dump a sparse iterator's keys to stdout. */
void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it,
u32 total_bits) {
// Expediency and future-proofing: create a temporary multibit of the right
// size with all the bits on, then walk it with this sparse iterator.
size_t bytes = mmbit_size(total_bits);
u8 *bits = malloc(bytes);
if (!bits) {
printf("Failed to alloc %zu bytes for temp multibit", bytes);
return;
}
for (u32 i = 0; i < total_bits; i++) {
mmbit_set_i(bits, total_bits, i);
}
struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES];
u32 idx = 0;
for (u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, s);
i != MMB_INVALID;
i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, s)) {
printf("%u ", i);
}
printf("(%u keys)", idx + 1);
free(bits);
}
#endif // DUMP_SUPPORT