mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 11:16:29 +03:00
Make key 64 bits where large shifts may be used.
This fixes a long-standing issue with large multibit structures.
This commit is contained in:
committed by
Matthew Barr
parent
205bc1af7f
commit
15c2980948
@@ -142,23 +142,25 @@ const u32 mmbit_root_offset_from_level[7] = {
|
||||
u32 mmbit_size(u32 total_bits) {
|
||||
MDEBUG_PRINTF("%u\n", total_bits);
|
||||
|
||||
// UE-2228: multibit has bugs in very, very large cases that we should be
|
||||
// protected against at compile time by resource limits.
|
||||
assert(total_bits <= 1U << 30);
|
||||
|
||||
// Flat model multibit structures are just stored as a bit vector.
|
||||
if (total_bits <= MMB_FLAT_MAX_BITS) {
|
||||
return ROUNDUP_N(total_bits, 8) / 8;
|
||||
}
|
||||
|
||||
u32 current_level = 1;
|
||||
u32 total = 0;
|
||||
u64a current_level = 1; // Number of blocks on current level.
|
||||
u64a total = 0; // Total number of blocks.
|
||||
while (current_level * MMB_KEY_BITS < total_bits) {
|
||||
total += current_level;
|
||||
current_level <<= MMB_KEY_SHIFT;
|
||||
}
|
||||
total += (total_bits + MMB_KEY_BITS - 1)/MMB_KEY_BITS;
|
||||
return sizeof(MMB_TYPE) * total;
|
||||
|
||||
// Last level is a one-for-one bit vector. It needs room for total_bits
|
||||
// elements, rounded up to the nearest block.
|
||||
u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS;
|
||||
total += last_level;
|
||||
|
||||
assert(total * sizeof(MMB_TYPE) <= UINT32_MAX);
|
||||
return (u32)(total * sizeof(MMB_TYPE));
|
||||
}
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
@@ -235,18 +235,18 @@ const u8 *mmbit_get_level_root_const(const u8 *bits, u32 level) {
|
||||
/** \brief get the block for this key on the current level as a u8 ptr */
|
||||
static really_inline
|
||||
u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
|
||||
return mmbit_get_level_root(bits, level) +
|
||||
(key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) *
|
||||
sizeof(MMB_TYPE);
|
||||
u8 *level_root = mmbit_get_level_root(bits, level);
|
||||
u32 ks = mmbit_get_ks(max_level, level);
|
||||
return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
|
||||
}
|
||||
|
||||
/** \brief get the block for this key on the current level as a const u8 ptr */
|
||||
static really_inline
|
||||
const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level,
|
||||
u32 key) {
|
||||
return mmbit_get_level_root_const(bits, level) +
|
||||
(key >> (mmbit_get_ks(max_level, level) + MMB_KEY_SHIFT)) *
|
||||
sizeof(MMB_TYPE);
|
||||
const u8 *level_root = mmbit_get_level_root_const(bits, level);
|
||||
u32 ks = mmbit_get_ks(max_level, level);
|
||||
return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
|
||||
}
|
||||
|
||||
/** \brief get the _byte_ for this key on the current level as a u8 ptr */
|
||||
@@ -254,7 +254,7 @@ static really_inline
|
||||
u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
|
||||
u8 *level_root = mmbit_get_level_root(bits, level);
|
||||
u32 ks = mmbit_get_ks(max_level, level);
|
||||
return level_root + (key >> (ks + MMB_KEY_SHIFT - 3));
|
||||
return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT - 3));
|
||||
}
|
||||
|
||||
/** \brief get our key value for the current level */
|
||||
@@ -721,11 +721,11 @@ u32 mmbit_iterate_bounded_flat(const u8 *bits, u32 total_bits, u32 begin,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max,
|
||||
u32 block_base) {
|
||||
MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u64a block_min, u64a block_max,
|
||||
u64a block_base) {
|
||||
const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT;
|
||||
u32 lshift = (block_min - block_base) >> level_shift;
|
||||
u32 ushift = (block_max - block_base) >> level_shift;
|
||||
u64a lshift = (block_min - block_base) >> level_shift;
|
||||
u64a ushift = (block_max - block_base) >> level_shift;
|
||||
MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0;
|
||||
MMB_TYPE umask =
|
||||
ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES;
|
||||
@@ -734,7 +734,7 @@ MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u32 block_min, u32 block_max,
|
||||
|
||||
static really_inline
|
||||
u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) {
|
||||
u32 key = 0;
|
||||
u64a key = 0;
|
||||
u32 ks = mmbit_keyshift(total_bits);
|
||||
const u32 max_level = mmbit_maxlevel_from_keyshift(ks);
|
||||
u32 level = 0;
|
||||
@@ -743,9 +743,9 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32
|
||||
assert(level <= max_level);
|
||||
|
||||
u32 block_width = MMB_KEY_BITS << ks;
|
||||
u32 block_base = key*block_width;
|
||||
u32 block_min = MAX(it_start, block_base);
|
||||
u32 block_max = MIN(it_end, block_base + block_width - 1);
|
||||
u64a block_base = key * block_width;
|
||||
u64a block_min = MAX(it_start, block_base);
|
||||
u64a block_max = MIN(it_end, block_base + block_width - 1);
|
||||
const u8 *block_ptr =
|
||||
mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE);
|
||||
MMB_TYPE block = mmb_load(block_ptr);
|
||||
@@ -761,13 +761,14 @@ u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32
|
||||
// No bit found, go up a level
|
||||
// we know that this block didn't have any answers, so we can push
|
||||
// our start iterator forward.
|
||||
it_start = block_base + block_width;
|
||||
if (it_start > it_end) {
|
||||
u64a next_start = block_base + block_width;
|
||||
if (next_start > it_end) {
|
||||
break;
|
||||
}
|
||||
if (level-- == 0) {
|
||||
break;
|
||||
}
|
||||
it_start = next_start;
|
||||
key >>= MMB_KEY_SHIFT;
|
||||
ks += MMB_KEY_SHIFT;
|
||||
}
|
||||
|
Reference in New Issue
Block a user