mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
1605 lines
51 KiB
C
1605 lines
51 KiB
C
/*
|
|
* Copyright (c) 2015-2017, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/** \file
|
|
* \brief API for handling bounded repeats.
|
|
*
|
|
* This file provides an internal API for handling bounded repeats of character
|
|
* classes. It is used by the Large Bounded Repeat (LBR) engine and by the
|
|
* bounded repeat handling in the LimEx NFA engine as well.
|
|
*/
|
|
#include "repeat.h"
|
|
#include "util/bitutils.h"
|
|
#include "util/multibit.h"
|
|
#include "util/pack_bits.h"
|
|
#include "util/partial_store.h"
|
|
#include "util/unaligned.h"
|
|
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
/** \brief Returns the total capacity of the ring.
|
|
* Note that it's currently one greater than repeatMax so that we can handle
|
|
* cases where the tug and pos triggers overlap. */
|
|
static
|
|
u32 ringCapacity(const struct RepeatInfo *info) {
|
|
return info->repeatMax + 1;
|
|
}
|
|
|
|
/** \brief Returns the number of elements currently in the ring. Note that if
|
|
* the first and last indices are equal, the ring is full. */
|
|
static
|
|
u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) {
|
|
if (xs->last > xs->first) {
|
|
return xs->last - xs->first;
|
|
} else { // wrapped
|
|
return ringSize - (xs->first - xs->last);
|
|
}
|
|
}
|
|
|
|
/** \brief Returns the offset of the _last_ top stored in the ring. */
|
|
static
|
|
u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) {
|
|
return xs->offset + ringOccupancy(xs, ringSize) - 1;
|
|
}
|
|
|
|
#if !defined(NDEBUG) || defined(DUMP_SUPPORT)
|
|
/** \brief For debugging: returns the total capacity of the range list. */
|
|
static UNUSED
|
|
u32 rangeListCapacity(const struct RepeatInfo *info) {
|
|
u32 d = info->repeatMax - info->repeatMin;
|
|
assert(d > 0); // should be in a RING model!
|
|
return 2 * ((info->repeatMax / d) + 1);
|
|
}
|
|
#endif
|
|
|
|
#ifdef DEBUG
|
|
static
|
|
void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs,
|
|
const u8 *ring) {
|
|
const u32 ringSize = ringCapacity(info);
|
|
DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize),
|
|
ringSize, xs->first, xs->last);
|
|
|
|
u16 i = xs->first, n = 0;
|
|
do {
|
|
if (mmbit_isset(ring, ringSize, i)) {
|
|
u64a ringOffset = xs->offset + n;
|
|
printf("%llu ", ringOffset);
|
|
}
|
|
++i, ++n;
|
|
if (i == ringSize) {
|
|
i = 0;
|
|
}
|
|
} while (i != xs->last);
|
|
printf("\n");
|
|
}
|
|
|
|
static
|
|
void dumpRange(const struct RepeatInfo *info,
|
|
const struct RepeatRangeControl *xs, const u16 *ring) {
|
|
const u32 ringSize = rangeListCapacity(info);
|
|
DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize);
|
|
|
|
if (xs->num) {
|
|
for (u32 i = 0; i < xs->num; i++) {
|
|
printf("%llu ", xs->offset + unaligned_load_u16(ring + i));
|
|
}
|
|
} else {
|
|
printf("empty");
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
static
|
|
void dumpBitmap(const struct RepeatBitmapControl *xs) {
|
|
DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset);
|
|
u64a bitmap = xs->bitmap;
|
|
while (bitmap) {
|
|
printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap));
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
static
|
|
void dumpTrailer(const struct RepeatInfo *info,
|
|
const struct RepeatTrailerControl *xs) {
|
|
const u64a m_width = info->repeatMax - info->repeatMin;
|
|
DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset,
|
|
xs->offset + m_width);
|
|
u64a bitmap = xs->bitmap;
|
|
if (bitmap) {
|
|
printf(", also matches at: ");
|
|
while (bitmap) {
|
|
u32 idx = findAndClearMSB_64(&bitmap);
|
|
printf("%llu ", xs->offset - idx - 1);
|
|
}
|
|
} else {
|
|
printf(", no earlier matches");
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
#endif // DEBUG
|
|
|
|
#ifndef NDEBUG
|
|
/** \brief For debugging: returns true if the range is ordered with no dupes. */
|
|
static UNUSED
|
|
int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) {
|
|
for (u32 i = 1; i < xs->num; i++) {
|
|
u16 a = unaligned_load_u16(ring + i - 1);
|
|
u16 b = unaligned_load_u16(ring + i);
|
|
if (a >= b) {
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
u64a repeatLastTopRing(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl) {
|
|
const u32 ringSize = ringCapacity(info);
|
|
return ringLastTop(&ctrl->ring, ringSize);
|
|
}
|
|
|
|
u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) {
|
|
const u16 *ring = (const u16 *)state;
|
|
const struct RepeatRangeControl *xs = &ctrl->range;
|
|
assert(xs->num);
|
|
return xs->offset + unaligned_load_u16(ring + xs->num - 1);
|
|
}
|
|
|
|
u64a repeatLastTopBitmap(const union RepeatControl *ctrl) {
|
|
const struct RepeatBitmapControl *xs = &ctrl->bitmap;
|
|
if (!xs->bitmap) {
|
|
/* last top was too long ago */
|
|
return 0;
|
|
}
|
|
return xs->offset + 63 - clz64(xs->bitmap);
|
|
}
|
|
|
|
u64a repeatLastTopTrailer(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl) {
|
|
const struct RepeatTrailerControl *xs = &ctrl->trailer;
|
|
assert(xs->offset >= info->repeatMin);
|
|
return xs->offset - info->repeatMin;
|
|
}
|
|
|
|
u64a repeatNextMatchRing(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, const void *state,
|
|
u64a offset) {
|
|
const struct RepeatRingControl *xs = &ctrl->ring;
|
|
const u8 *ring = (const u8 *)state;
|
|
const u32 ringSize = ringCapacity(info);
|
|
|
|
// We should have at least one top stored.
|
|
assert(mmbit_any(ring, ringSize));
|
|
assert(info->repeatMax < REPEAT_INF);
|
|
|
|
// Increment offset, as we want the NEXT match.
|
|
offset++;
|
|
|
|
const u64a base_offset = xs->offset;
|
|
DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset);
|
|
|
|
u64a delta = offset - base_offset;
|
|
if (offset < base_offset || delta < info->repeatMin) {
|
|
DEBUG_PRINTF("before min repeat\n");
|
|
return base_offset + info->repeatMin;
|
|
}
|
|
if (offset > ringLastTop(xs, ringSize) + info->repeatMax) {
|
|
DEBUG_PRINTF("ring is stale\n");
|
|
return 0; // no more matches
|
|
}
|
|
|
|
DEBUG_PRINTF("delta=%llu\n", delta);
|
|
u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
|
DEBUG_PRINTF("lower=%llu\n", lower);
|
|
|
|
assert(lower < ringSize);
|
|
|
|
// First scan, either to xs->last if there's no wrap-around or ringSize
|
|
// (end of the underlying multibit) if we are wrapping.
|
|
|
|
u32 begin = xs->first + lower;
|
|
if (begin >= ringSize) {
|
|
// This branch and sub tested a lot faster than using % (integer div).
|
|
begin -= ringSize;
|
|
}
|
|
const u32 end = begin >= xs->last ? ringSize : xs->last;
|
|
u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end);
|
|
if (i != MMB_INVALID) {
|
|
u32 j = i - begin + lower;
|
|
return MAX(offset, base_offset + j + info->repeatMin);
|
|
}
|
|
|
|
// A second scan is necessary if we need to cope with wrap-around in the
|
|
// ring buffer.
|
|
|
|
if (begin >= xs->last) {
|
|
i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last);
|
|
if (i != MMB_INVALID) {
|
|
u32 j = i + (ringSize - begin) + lower;
|
|
return MAX(offset, base_offset + j + info->repeatMin);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
u64a repeatNextMatchRange(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, const void *state,
|
|
u64a offset) {
|
|
const struct RepeatRangeControl *xs = &ctrl->range;
|
|
const u16 *ring = (const u16 *)state;
|
|
|
|
assert(xs->num > 0);
|
|
assert(xs->num <= rangeListCapacity(info));
|
|
assert(rangeListIsOrdered(xs, ring));
|
|
assert(info->repeatMax < REPEAT_INF);
|
|
|
|
for (u32 i = 0; i < xs->num; i++) {
|
|
u64a base = xs->offset + unaligned_load_u16(ring + i);
|
|
u64a first = base + info->repeatMin;
|
|
if (offset < first) {
|
|
return first;
|
|
}
|
|
if (offset < base + info->repeatMax) {
|
|
return offset + 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatBitmapControl *xs = &ctrl->bitmap;
|
|
const u64a base = xs->offset;
|
|
u64a bitmap = xs->bitmap;
|
|
|
|
// FIXME: quick exit if there is no match, based on last top in bitmap?
|
|
|
|
while (bitmap) {
|
|
u64a top = base + findAndClearLSB_64(&bitmap);
|
|
if (offset < top + info->repeatMin) {
|
|
return top + info->repeatMin;
|
|
}
|
|
if (offset < top + info->repeatMax) {
|
|
return offset + 1;
|
|
}
|
|
}
|
|
|
|
return 0; // No more matches.
|
|
}
|
|
|
|
u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatTrailerControl *xs = &ctrl->trailer;
|
|
const u32 m_width = info->repeatMax - info->repeatMin;
|
|
|
|
DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset);
|
|
DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin,
|
|
info->repeatMax, m_width);
|
|
|
|
assert(xs->offset >= info->repeatMin);
|
|
|
|
if (offset >= xs->offset + m_width) {
|
|
DEBUG_PRINTF("no more matches\n");
|
|
return 0;
|
|
}
|
|
|
|
if (offset >= xs->offset) {
|
|
DEBUG_PRINTF("inside most recent match window, next match %llu\n",
|
|
offset + 1);
|
|
return offset + 1;
|
|
}
|
|
|
|
// Offset is before the match window, we need to consult the bitmap of
|
|
// earlier match offsets.
|
|
u64a bitmap = xs->bitmap;
|
|
|
|
u64a diff = xs->offset - offset;
|
|
DEBUG_PRINTF("diff=%llu\n", diff);
|
|
if (diff <= 64) {
|
|
assert(diff);
|
|
bitmap &= (1ULL << (diff - 1)) - 1;
|
|
}
|
|
DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap);
|
|
if (bitmap) {
|
|
u32 idx = 63 - clz64(bitmap);
|
|
DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx,
|
|
xs->offset - idx);
|
|
DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1);
|
|
u64a next_match = xs->offset - idx - 1;
|
|
assert(next_match > offset);
|
|
return next_match;
|
|
}
|
|
|
|
DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset);
|
|
return xs->offset;
|
|
}
|
|
|
|
/** \brief Store the first top in the ring buffer. */
|
|
static
|
|
void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring,
|
|
u64a offset, const u32 ringSize) {
|
|
DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize);
|
|
xs->offset = offset;
|
|
mmbit_clear(ring, ringSize);
|
|
mmbit_set(ring, ringSize, 0);
|
|
xs->first = 0;
|
|
xs->last = 1;
|
|
}
|
|
|
|
static really_inline
|
|
char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize,
|
|
const u64a offset) {
|
|
u64a finalMatch = ringLastTop(xs, ringSize);
|
|
if (offset - finalMatch >= ringSize) {
|
|
DEBUG_PRINTF("all matches in ring are stale\n");
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl,
|
|
void *state, u64a offset, char is_alive) {
|
|
struct RepeatRingControl *xs = &ctrl->ring;
|
|
u8 *ring = (u8 *)state;
|
|
const u32 ringSize = ringCapacity(info);
|
|
assert(ringSize > 0);
|
|
|
|
DEBUG_PRINTF("storing top for offset %llu in ring\n", offset);
|
|
|
|
if (!is_alive || ringIsStale(xs, ringSize, offset)) {
|
|
storeInitialRingTop(xs, ring, offset, ringSize);
|
|
} else {
|
|
assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order.
|
|
u32 occ = ringOccupancy(xs, ringSize);
|
|
u64a diff = offset - xs->offset;
|
|
DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
|
|
if (diff >= ringSize) {
|
|
u32 push = diff - ringSize + 1;
|
|
DEBUG_PRINTF("push ring %u\n", push);
|
|
xs->first += push;
|
|
if (xs->first >= ringSize) {
|
|
xs->first -= ringSize;
|
|
}
|
|
xs->offset += push;
|
|
diff -= push;
|
|
occ -= push;
|
|
}
|
|
|
|
// There's now room in the ring for this top, so we write a run of
|
|
// zeroes, then a one.
|
|
DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
|
|
assert(diff < ringSize);
|
|
assert(diff >= occ);
|
|
u32 n = diff - occ;
|
|
|
|
u32 i = xs->last + n;
|
|
|
|
mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize));
|
|
if (i >= ringSize) {
|
|
i -= ringSize;
|
|
mmbit_unset_range(ring, ringSize, 0, i);
|
|
}
|
|
|
|
assert(i != xs->first);
|
|
DEBUG_PRINTF("set bit %u\n", i);
|
|
mmbit_set(ring, ringSize, i);
|
|
xs->last = i + 1;
|
|
if (xs->last == ringSize) {
|
|
xs->last = 0;
|
|
}
|
|
}
|
|
|
|
// Our ring indices shouldn't have spiraled off into uncharted space.
|
|
assert(xs->first < ringSize);
|
|
assert(xs->last < ringSize);
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("post-store ring state\n");
|
|
dumpRing(info, xs, ring);
|
|
#endif
|
|
|
|
// The final top stored in our ring should be the one we just wrote in.
|
|
assert(ringLastTop(xs, ringSize) == offset);
|
|
}
|
|
|
|
static really_inline
|
|
void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring,
|
|
u64a offset) {
|
|
xs->offset = offset;
|
|
xs->num = 1;
|
|
unaligned_store_u16(ring, 0);
|
|
}
|
|
|
|
void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl,
|
|
void *state, u64a offset, char is_alive) {
|
|
struct RepeatRangeControl *xs = &ctrl->range;
|
|
u16 *ring = (u16 *)state;
|
|
|
|
if (!is_alive) {
|
|
DEBUG_PRINTF("storing initial top at %llu\n", offset);
|
|
storeInitialRangeTop(xs, ring, offset);
|
|
return;
|
|
}
|
|
|
|
DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n",
|
|
offset, xs->num, rangeListCapacity(info));
|
|
|
|
#ifdef DEBUG
|
|
dumpRange(info, xs, ring);
|
|
#endif
|
|
|
|
// Walk ring from front. Identify the number of stale elements, and shift
|
|
// the whole ring to delete them.
|
|
u32 i = 0;
|
|
for (; i < xs->num; i++) {
|
|
u64a this_offset = xs->offset + unaligned_load_u16(ring + i);
|
|
DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset,
|
|
offset - this_offset);
|
|
if (offset - this_offset <= info->repeatMax) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i == xs->num) {
|
|
DEBUG_PRINTF("whole ring is stale\n");
|
|
storeInitialRangeTop(xs, ring, offset);
|
|
return;
|
|
} else if (i > 0) {
|
|
DEBUG_PRINTF("expiring %u stale tops\n", i);
|
|
u16 first_offset = unaligned_load_u16(ring + i); // first live top
|
|
for (u32 j = 0; j < xs->num - i; j++) {
|
|
u16 val = unaligned_load_u16(ring + i + j);
|
|
assert(val >= first_offset);
|
|
unaligned_store_u16(ring + j, val - first_offset);
|
|
}
|
|
xs->offset += first_offset;
|
|
xs->num -= i;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("post-expire:\n");
|
|
dumpRange(info, xs, ring);
|
|
#endif
|
|
|
|
if (xs->num == 1) {
|
|
goto append;
|
|
}
|
|
|
|
// Let d = repeatMax - repeatMin
|
|
// Examine penultimate entry x[-2].
|
|
// If (offset - x[-2] <= d), then last entry x[-1] can be replaced with
|
|
// entry for offset.
|
|
assert(xs->num >= 2);
|
|
u32 d = info->repeatMax - info->repeatMin;
|
|
u64a penultimate_offset =
|
|
xs->offset + unaligned_load_u16(ring + xs->num - 2);
|
|
if (offset - penultimate_offset <= d) {
|
|
assert(offset - xs->offset <= (u16)-1);
|
|
unaligned_store_u16(ring + xs->num - 1, offset - xs->offset);
|
|
goto done;
|
|
}
|
|
|
|
// Otherwise, write a new entry for offset and return.
|
|
|
|
append:
|
|
assert(offset - xs->offset <= (u16)-1);
|
|
assert(xs->num < rangeListCapacity(info));
|
|
unaligned_store_u16(ring + xs->num, offset - xs->offset);
|
|
xs->num++;
|
|
|
|
done:
|
|
assert(rangeListIsOrdered(xs, ring));
|
|
}
|
|
|
|
void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl,
|
|
u64a offset, char is_alive) {
|
|
DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin,
|
|
info->repeatMax, offset);
|
|
|
|
struct RepeatBitmapControl *xs = &ctrl->bitmap;
|
|
if (!is_alive || !xs->bitmap) {
|
|
DEBUG_PRINTF("storing initial top at %llu\n", offset);
|
|
xs->offset = offset;
|
|
xs->bitmap = 1U;
|
|
return;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("pre-store:\n");
|
|
dumpBitmap(xs);
|
|
#endif
|
|
|
|
assert(offset >= xs->offset);
|
|
|
|
u64a last_top = xs->offset + 63 - clz64(xs->bitmap);
|
|
if (offset > last_top + info->repeatMax) {
|
|
DEBUG_PRINTF("bitmap stale, storing initial top\n");
|
|
xs->offset = offset;
|
|
xs->bitmap = 1U;
|
|
return;
|
|
}
|
|
|
|
u64a diff = offset - xs->offset;
|
|
if (diff >= info->repeatMax + 1) {
|
|
DEBUG_PRINTF("need expire, diff=%llu\n", diff);
|
|
u64a push = diff - info->repeatMax;
|
|
xs->offset += push;
|
|
xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push;
|
|
DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset);
|
|
}
|
|
|
|
// Write a new entry.
|
|
diff = offset - xs->offset;
|
|
assert(diff < 64);
|
|
xs->bitmap |= (1ULL << diff);
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("post-store:\n");
|
|
dumpBitmap(xs);
|
|
#endif
|
|
}
|
|
|
|
/** \brief Returns 1 if the ring has a match between (logical) index \a lower
|
|
* and \a upper, excluding \a upper. */
|
|
static
|
|
int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring,
|
|
const u32 ringSize, u32 lower, u32 upper) {
|
|
assert(lower < upper);
|
|
assert(lower < ringSize);
|
|
assert(upper <= ringSize);
|
|
|
|
u32 i = xs->first + lower;
|
|
if (i >= ringSize) {
|
|
i -= ringSize;
|
|
}
|
|
|
|
// Performance tweak: if we're looking at a fixed repeat, we can just use
|
|
// mmbit_isset.
|
|
if (lower + 1 == upper) {
|
|
return mmbit_isset(ring, ringSize, i);
|
|
}
|
|
|
|
u32 end = xs->first + upper;
|
|
if (end >= ringSize) {
|
|
end -= ringSize;
|
|
}
|
|
|
|
// First scan, either to end if there's no wrap-around or ringSize (end of
|
|
// the underlying multibit) if we are wrapping.
|
|
|
|
u32 scan_end = i < end ? end : ringSize;
|
|
u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end);
|
|
if (m != MMB_INVALID) {
|
|
return 1;
|
|
}
|
|
|
|
// A second scan is necessary if we need to cope with wrap-around in the
|
|
// ring buffer.
|
|
|
|
if (i >= end) {
|
|
m = mmbit_iterate_bounded(ring, ringSize, 0, end);
|
|
return m != MMB_INVALID;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/** Return a mask of ones in bit positions [0..v]. */
|
|
static really_inline
|
|
u64a mask_ones_to(u32 v) {
|
|
if (v < 63) {
|
|
return (1ULL << (v + 1)) - 1;
|
|
} else {
|
|
return ~(0ULL);
|
|
}
|
|
}
|
|
|
|
void repeatStoreTrailer(const struct RepeatInfo *info,
|
|
union RepeatControl *ctrl, u64a offset, char is_alive) {
|
|
DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin,
|
|
info->repeatMax, offset);
|
|
|
|
struct RepeatTrailerControl *xs = &ctrl->trailer;
|
|
|
|
/* The TRAILER repeat model stores the following data in its control block:
|
|
*
|
|
* 1. offset, which is the min extent of the most recent match window
|
|
* (i.e. corresponding to the most recent top)
|
|
* 2. bitmap, which is a bitmap of up to repeatMin matches before
|
|
* the min extent offset.
|
|
*/
|
|
|
|
const u64a next_extent = offset + info->repeatMin;
|
|
|
|
if (!is_alive) {
|
|
xs->offset = next_extent;
|
|
xs->bitmap = 0;
|
|
DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent);
|
|
return;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("pre-store:\n");
|
|
dumpTrailer(info, xs);
|
|
#endif
|
|
|
|
const u32 m_width = info->repeatMax - info->repeatMin;
|
|
DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset,
|
|
xs->offset + m_width);
|
|
|
|
assert(next_extent > xs->offset);
|
|
u64a diff = next_extent - xs->offset;
|
|
DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width);
|
|
|
|
assert(diff);
|
|
xs->bitmap = diff < 64 ? xs->bitmap << diff : 0;
|
|
|
|
// Switch on bits in the bitmask corresponding to matches in the previous
|
|
// match window.
|
|
if (diff <= m_width) {
|
|
u64a m = mask_ones_to(diff - 1);
|
|
xs->bitmap |= m;
|
|
} else {
|
|
u64a shift = diff - m_width - 1;
|
|
if (shift < 64) {
|
|
u64a m = mask_ones_to(m_width);
|
|
m <<= shift;
|
|
xs->bitmap |= m;
|
|
}
|
|
}
|
|
|
|
DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap);
|
|
|
|
// Update max extent.
|
|
xs->offset = next_extent;
|
|
|
|
// Trim stale history: we only need repeatMin bytes of history.
|
|
if (info->repeatMin < 63) {
|
|
u64a mask = (1ULL << (info->repeatMin + 1)) - 1;
|
|
xs->bitmap &= mask;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("post-store:\n");
|
|
dumpTrailer(info, xs);
|
|
#endif
|
|
}
|
|
|
|
enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl,
|
|
const void *state, u64a offset) {
|
|
const struct RepeatRingControl *xs = &ctrl->ring;
|
|
const u8 *ring = (const u8 *)state;
|
|
const u32 ringSize = ringCapacity(info);
|
|
|
|
assert(mmbit_any(ring, ringSize));
|
|
assert(offset >= xs->offset);
|
|
|
|
DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset,
|
|
info->repeatMin, info->repeatMax);
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("ring state\n");
|
|
dumpRing(info, xs, ring);
|
|
#endif
|
|
|
|
if (offset - xs->offset < info->repeatMin) {
|
|
DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
if (offset - ringLastTop(xs, ringSize) >= ringSize) {
|
|
DEBUG_PRINTF("ring is stale\n");
|
|
return REPEAT_STALE;
|
|
}
|
|
|
|
// If we're not stale, delta fits in the range [repeatMin, lastTop +
|
|
// repeatMax], which fits in a u32.
|
|
assert(offset - xs->offset < UINT32_MAX);
|
|
u32 delta = (u32)(offset - xs->offset);
|
|
DEBUG_PRINTF("delta=%u\n", delta);
|
|
|
|
// Find the bounds on possible matches in the ring buffer.
|
|
u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
|
u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize));
|
|
|
|
if (lower >= upper) {
|
|
DEBUG_PRINTF("no matches to check\n");
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper);
|
|
if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
|
|
return REPEAT_MATCH;
|
|
}
|
|
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl,
|
|
const void *state, u64a offset) {
|
|
const struct RepeatRangeControl *xs = &ctrl->range;
|
|
const u16 *ring = (const u16 *)state;
|
|
|
|
assert(xs->num > 0);
|
|
assert(xs->num <= rangeListCapacity(info));
|
|
assert(rangeListIsOrdered(xs, ring));
|
|
|
|
// Walk the ring. For each entry x:
|
|
// if (offset - x) falls inside repeat bounds, return success.
|
|
|
|
// It may be worth doing tests on first and last elements first to bail
|
|
// early if the whole ring is too young or stale.
|
|
|
|
DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n",
|
|
xs->num, rangeListCapacity(info), offset,
|
|
info->repeatMin, info->repeatMax);
|
|
#ifdef DEBUG
|
|
dumpRange(info, xs, ring);
|
|
#endif
|
|
|
|
// Quick pre-check for minimum.
|
|
assert(offset >= xs->offset);
|
|
if (offset - xs->offset < info->repeatMin) {
|
|
DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
// We check the most recent offset first, as we can establish staleness.
|
|
u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1);
|
|
assert(offset >= match);
|
|
u64a diff = offset - match;
|
|
if (diff > info->repeatMax) {
|
|
DEBUG_PRINTF("range list is stale\n");
|
|
return REPEAT_STALE;
|
|
} else if (diff >= info->repeatMin) {
|
|
return REPEAT_MATCH;
|
|
}
|
|
|
|
// Check the other offsets in the list.
|
|
u32 count = xs->num - 1;
|
|
for (u32 i = 0; i < count; i++) {
|
|
match = xs->offset + unaligned_load_u16(ring + i);
|
|
assert(offset >= match);
|
|
diff = offset - match;
|
|
if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
|
return REPEAT_MATCH;
|
|
}
|
|
}
|
|
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl,
|
|
u64a offset) {
|
|
const struct RepeatBitmapControl *xs = &ctrl->bitmap;
|
|
|
|
DEBUG_PRINTF("checking if offset=%llu is a match\n", offset);
|
|
|
|
#ifdef DEBUG
|
|
dumpBitmap(xs);
|
|
#endif
|
|
|
|
u64a bitmap = xs->bitmap;
|
|
if (!bitmap) {
|
|
DEBUG_PRINTF("no tops; stale\n");
|
|
return REPEAT_STALE;
|
|
}
|
|
|
|
// Quick pre-check for minimum.
|
|
const u64a base = xs->offset;
|
|
assert(offset >= base);
|
|
if (offset - base < info->repeatMin) {
|
|
DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
// We check the most recent offset first, as we can establish staleness.
|
|
u64a match = base + findAndClearMSB_64(&bitmap);
|
|
DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
|
|
assert(offset >= match);
|
|
u64a diff = offset - match;
|
|
if (diff > info->repeatMax) {
|
|
DEBUG_PRINTF("stale\n");
|
|
return REPEAT_STALE;
|
|
} else if (diff >= info->repeatMin) {
|
|
return REPEAT_MATCH;
|
|
}
|
|
|
|
while (bitmap) {
|
|
match = base + findAndClearLSB_64(&bitmap);
|
|
DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
|
|
assert(offset >= match);
|
|
diff = offset - match;
|
|
if (diff >= info->repeatMin && diff <= info->repeatMax) {
|
|
return REPEAT_MATCH;
|
|
}
|
|
}
|
|
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl,
|
|
u64a offset) {
|
|
const struct RepeatTrailerControl *xs = &ctrl->trailer;
|
|
const u32 m_width = info->repeatMax - info->repeatMin;
|
|
|
|
DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset,
|
|
xs->offset, xs->bitmap);
|
|
|
|
if (offset > xs->offset + m_width) {
|
|
DEBUG_PRINTF("stale\n");
|
|
return REPEAT_STALE;
|
|
}
|
|
|
|
if (offset >= xs->offset) {
|
|
DEBUG_PRINTF("in match window\n");
|
|
return REPEAT_MATCH;
|
|
}
|
|
|
|
if (offset >= xs->offset - info->repeatMin) {
|
|
u32 idx = xs->offset - offset - 1;
|
|
DEBUG_PRINTF("check bitmap idx %u\n", idx);
|
|
assert(idx < 64);
|
|
if (xs->bitmap & (1ULL << idx)) {
|
|
DEBUG_PRINTF("match in bitmap\n");
|
|
return REPEAT_MATCH;
|
|
}
|
|
}
|
|
|
|
DEBUG_PRINTF("no match\n");
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
/** \brief True if the given value can be packed into len bytes. */
|
|
static really_inline
|
|
int fits_in_len_bytes(u64a val, u32 len) {
|
|
if (len >= 8) {
|
|
return 1;
|
|
}
|
|
return val <= (1ULL << (len * 8));
|
|
}
|
|
|
|
static really_inline
|
|
void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) {
|
|
assert(val <= offset);
|
|
assert(fits_in_len_bytes(max, len));
|
|
u64a delta = offset - val;
|
|
if (delta >= max) {
|
|
delta = max;
|
|
}
|
|
DEBUG_PRINTF("delta %llu\n", delta);
|
|
assert(fits_in_len_bytes(delta, len));
|
|
partial_store_u64a(dest, delta, len);
|
|
}
|
|
|
|
static
|
|
void repeatPackRing(char *dest, const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatRingControl *xs = &ctrl->ring;
|
|
const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
|
|
const u32 offset_len = info->packedCtrlSize - ring_indices_len;
|
|
|
|
// Write out packed relative base offset.
|
|
assert(info->packedCtrlSize > ring_indices_len);
|
|
storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
|
|
|
|
// Write out ring indices.
|
|
if (ring_indices_len == 4) {
|
|
unaligned_store_u16(dest + offset_len, xs->first);
|
|
unaligned_store_u16(dest + offset_len + 2, xs->last);
|
|
} else {
|
|
assert(xs->first < 256 && xs->last < 256);
|
|
u8 *indices = (u8 *)dest + offset_len;
|
|
indices[0] = xs->first;
|
|
indices[1] = xs->last;
|
|
}
|
|
}
|
|
|
|
static
|
|
void repeatPackOffset(char *dest, const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatOffsetControl *xs = &ctrl->offset;
|
|
DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
|
|
if (!info->packedCtrlSize) {
|
|
assert(info->type == REPEAT_ALWAYS);
|
|
DEBUG_PRINTF("externally guarded .*\n");
|
|
return;
|
|
}
|
|
storePackedRelative(dest, xs->offset, offset, info->horizon,
|
|
info->packedCtrlSize);
|
|
}
|
|
|
|
static
|
|
void repeatPackRange(char *dest, const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatRangeControl *xs = &ctrl->range;
|
|
|
|
// Write out packed relative base offset.
|
|
assert(info->packedCtrlSize > 1);
|
|
storePackedRelative(dest, xs->offset, offset, info->horizon,
|
|
info->packedCtrlSize - 1);
|
|
|
|
// Write out range number of elements.
|
|
dest[info->packedCtrlSize - 1] = xs->num;
|
|
}
|
|
|
|
static
|
|
void repeatPackBitmap(char *dest, const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatBitmapControl *xs = &ctrl->bitmap;
|
|
const u32 bound = info->repeatMax;
|
|
|
|
assert(offset >= xs->offset);
|
|
u64a new_base = offset > bound ? offset - bound : 0;
|
|
|
|
// Shift bitmap to begin at new_base rather than xs->offset.
|
|
u64a bitmap = xs->bitmap;
|
|
if (new_base >= xs->offset) {
|
|
u64a shift = new_base - xs->offset;
|
|
bitmap = shift < 64 ? bitmap >> shift : 0;
|
|
} else {
|
|
u64a shift = xs->offset - new_base;
|
|
bitmap = shift < 64 ? bitmap << shift : 0;
|
|
}
|
|
|
|
DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize);
|
|
|
|
// Write out packed bitmap.
|
|
assert(fits_in_len_bytes(bitmap, info->packedCtrlSize));
|
|
partial_store_u64a(dest, bitmap, info->packedCtrlSize);
|
|
}
|
|
|
|
static
|
|
void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatRingControl *xs = &ctrl->ring;
|
|
// set ring index pointer according to patch count
|
|
const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
|
|
const u32 offset_len = info->packedCtrlSize - ring_indices_len;
|
|
|
|
// Write out packed relative base offset.
|
|
assert(info->packedCtrlSize > ring_indices_len);
|
|
storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
|
|
|
|
// Write out ring indices.
|
|
if (ring_indices_len == 4) {
|
|
unaligned_store_u16(dest + offset_len, xs->first);
|
|
unaligned_store_u16(dest + offset_len + 2, xs->last);
|
|
} else {
|
|
assert(xs->first < 256 && xs->last < 256);
|
|
u8 *indices = (u8 *)dest + offset_len;
|
|
indices[0] = xs->first;
|
|
indices[1] = xs->last;
|
|
}
|
|
|
|
}
|
|
|
|
static
|
|
void repeatPackTrailer(char *dest, const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
const struct RepeatTrailerControl *xs = &ctrl->trailer;
|
|
|
|
DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n",
|
|
offset, xs->offset, xs->bitmap);
|
|
|
|
// XXX: xs->offset may be zero in the NFA path (effectively uninitialized).
|
|
u64a top;
|
|
if (xs->offset) {
|
|
assert(xs->offset >= info->repeatMin);
|
|
top = xs->offset - info->repeatMin;
|
|
} else {
|
|
top = 0;
|
|
}
|
|
|
|
top = offset - top; // Pack top relative to offset.
|
|
|
|
u64a v[2];
|
|
v[0] = MIN(top, info->horizon);
|
|
v[1] = xs->bitmap;
|
|
|
|
pack_bits_64(dest, v, info->packedFieldSizes, 2);
|
|
}
|
|
|
|
void repeatPack(char *dest, const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl, u64a offset) {
|
|
assert(dest && info && ctrl);
|
|
|
|
switch ((enum RepeatType)info->type) {
|
|
case REPEAT_RING:
|
|
repeatPackRing(dest, info, ctrl, offset);
|
|
break;
|
|
case REPEAT_FIRST:
|
|
case REPEAT_LAST:
|
|
repeatPackOffset(dest, info, ctrl, offset);
|
|
break;
|
|
case REPEAT_RANGE:
|
|
repeatPackRange(dest, info, ctrl, offset);
|
|
break;
|
|
case REPEAT_BITMAP:
|
|
repeatPackBitmap(dest, info, ctrl, offset);
|
|
break;
|
|
case REPEAT_SPARSE_OPTIMAL_P:
|
|
repeatPackSparseOptimalP(dest, info, ctrl, offset);
|
|
break;
|
|
case REPEAT_TRAILER:
|
|
repeatPackTrailer(dest, info, ctrl, offset);
|
|
break;
|
|
case REPEAT_ALWAYS:
|
|
/* nothing to do - no state */
|
|
break;
|
|
}
|
|
}
|
|
|
|
static really_inline
|
|
u64a loadPackedRelative(const char *src, u64a offset, u32 len) {
|
|
u64a delta = partial_load_u64a(src, len);
|
|
DEBUG_PRINTF("delta %llu\n", delta);
|
|
assert(offset >= delta);
|
|
return offset - delta;
|
|
}
|
|
|
|
static
|
|
void repeatUnpackRing(const char *src, const struct RepeatInfo *info,
|
|
u64a offset, union RepeatControl *ctrl) {
|
|
struct RepeatRingControl *xs = &ctrl->ring;
|
|
const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
|
|
const u32 offset_len = info->packedCtrlSize - ring_indices_len;
|
|
xs->offset = loadPackedRelative(src, offset, offset_len);
|
|
if (ring_indices_len == 4) {
|
|
xs->first = unaligned_load_u16(src + offset_len);
|
|
xs->last = unaligned_load_u16(src + offset_len + 2);
|
|
} else {
|
|
const u8 *indices = (const u8 *)src + offset_len;
|
|
xs->first = indices[0];
|
|
xs->last = indices[1];
|
|
}
|
|
}
|
|
|
|
static
|
|
void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
|
|
u64a offset, union RepeatControl *ctrl) {
|
|
struct RepeatOffsetControl *xs = &ctrl->offset;
|
|
if (!info->packedCtrlSize) {
|
|
assert(info->type == REPEAT_ALWAYS);
|
|
DEBUG_PRINTF("externally guarded .*\n");
|
|
xs->offset = 0;
|
|
} else {
|
|
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
|
|
}
|
|
DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
|
|
info->horizon);
|
|
}
|
|
|
|
static
|
|
void repeatUnpackRange(const char *src, const struct RepeatInfo *info,
|
|
u64a offset, union RepeatControl *ctrl) {
|
|
struct RepeatRangeControl *xs = &ctrl->range;
|
|
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1);
|
|
xs->num = src[info->packedCtrlSize - 1];
|
|
}
|
|
|
|
static
|
|
void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info,
|
|
u64a offset, union RepeatControl *ctrl) {
|
|
struct RepeatBitmapControl *xs = &ctrl->bitmap;
|
|
xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0;
|
|
xs->bitmap = partial_load_u64a(src, info->packedCtrlSize);
|
|
}
|
|
|
|
static
|
|
void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info,
|
|
u64a offset, union RepeatControl *ctrl) {
|
|
struct RepeatRingControl *xs = &ctrl->ring;
|
|
const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
|
|
const u32 offset_len = info->packedCtrlSize - ring_indices_len;
|
|
xs->offset = loadPackedRelative(src, offset, offset_len);
|
|
if (ring_indices_len == 4) {
|
|
xs->first = unaligned_load_u16(src + offset_len);
|
|
xs->last = unaligned_load_u16(src + offset_len + 2);
|
|
} else {
|
|
const u8 *indices = (const u8 *)src + offset_len;
|
|
xs->first = indices[0];
|
|
xs->last = indices[1];
|
|
}
|
|
}
|
|
|
|
static
|
|
void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info,
|
|
u64a offset, union RepeatControl *ctrl) {
|
|
struct RepeatTrailerControl *xs = &ctrl->trailer;
|
|
|
|
u64a v[2];
|
|
unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2);
|
|
|
|
xs->offset = offset - v[0] + info->repeatMin;
|
|
xs->bitmap = v[1];
|
|
|
|
DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset,
|
|
xs->bitmap);
|
|
}
|
|
|
|
void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
|
|
union RepeatControl *ctrl) {
|
|
assert(src && info && ctrl);
|
|
|
|
switch ((enum RepeatType)info->type) {
|
|
case REPEAT_RING:
|
|
repeatUnpackRing(src, info, offset, ctrl);
|
|
break;
|
|
case REPEAT_FIRST:
|
|
case REPEAT_LAST:
|
|
repeatUnpackOffset(src, info, offset, ctrl);
|
|
break;
|
|
case REPEAT_RANGE:
|
|
repeatUnpackRange(src, info, offset, ctrl);
|
|
break;
|
|
case REPEAT_BITMAP:
|
|
repeatUnpackBitmap(src, info, offset, ctrl);
|
|
break;
|
|
case REPEAT_SPARSE_OPTIMAL_P:
|
|
repeatUnpackSparseOptimalP(src, info, offset, ctrl);
|
|
break;
|
|
case REPEAT_TRAILER:
|
|
repeatUnpackTrailer(src, info, offset, ctrl);
|
|
break;
|
|
case REPEAT_ALWAYS:
|
|
/* nothing to do - no state */
|
|
break;
|
|
}
|
|
}
|
|
|
|
static really_inline
|
|
const u64a *getImplTable(const struct RepeatInfo *info) {
|
|
const u64a *table = ((const u64a *)(ROUNDUP_PTR(
|
|
((const char *)(info) +
|
|
sizeof(*info)),
|
|
alignof(u64a))));
|
|
return table;
|
|
}
|
|
|
|
static
|
|
void storeInitialRingTopPatch(const struct RepeatInfo *info,
|
|
struct RepeatRingControl *xs,
|
|
u8 *state, u64a offset) {
|
|
DEBUG_PRINTF("set the first patch, offset=%llu\n", offset);
|
|
xs->offset = offset;
|
|
|
|
u8 *active = state;
|
|
u32 patch_count = info->patchCount;
|
|
mmbit_clear(active, patch_count);
|
|
mmbit_set(active, patch_count, 0);
|
|
|
|
u8 *ring = active + info->patchesOffset;
|
|
u32 encoding_size = info->encodingSize;
|
|
partial_store_u64a(ring, 1ull, encoding_size);
|
|
xs->first = 0;
|
|
xs->last = 1;
|
|
}
|
|
|
|
static
|
|
u32 getSparseOptimalTargetValue(const struct RepeatInfo *info,
|
|
const u32 tval, u64a *val) {
|
|
u32 patch_size = info->patchSize;
|
|
const u64a *repeatTable = getImplTable(info);
|
|
u32 loc = 0;
|
|
DEBUG_PRINTF("val:%llu \n", *val);
|
|
for (u32 i = 1; i <= patch_size - tval; i++) {
|
|
u64a tmp = repeatTable[patch_size - i];
|
|
if (*val >= tmp) {
|
|
*val -= tmp;
|
|
loc = i;
|
|
i += (info->minPeriod - 1);
|
|
}
|
|
}
|
|
|
|
return loc;
|
|
}
|
|
|
|
static
|
|
u64a sparseLastTop(const struct RepeatInfo *info,
|
|
const struct RepeatRingControl *xs, const u8 *state) {
|
|
DEBUG_PRINTF("looking for last top\n");
|
|
u32 patch_size = info->patchSize;
|
|
u32 patch_count = info->patchCount;
|
|
u32 encoding_size = info->encodingSize;
|
|
|
|
u32 occ = ringOccupancy(xs, patch_count);
|
|
u32 patch = xs->first + occ - 1;
|
|
if (patch >= patch_count) {
|
|
patch -= patch_count;
|
|
}
|
|
|
|
DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
|
|
const u8 *ring = state + info->patchesOffset;
|
|
u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
|
|
|
|
DEBUG_PRINTF("val:%llu\n", val);
|
|
const u64a *repeatTable = getImplTable(info);
|
|
for (s32 i = patch_size - 1; i >= 0; i--) {
|
|
if (val >= repeatTable[i]) {
|
|
DEBUG_PRINTF("xs->offset%llu v%u p%llu\n",
|
|
xs->offset, i, repeatTable[i]);
|
|
return xs->offset + i + (occ - 1) * patch_size;
|
|
}
|
|
}
|
|
|
|
assert(0);
|
|
return 0;
|
|
}
|
|
|
|
u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl,
|
|
const void *state) {
|
|
return sparseLastTop(info, &ctrl->ring, state);
|
|
}
|
|
|
|
u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl,
|
|
const void *state, u64a offset) {
|
|
const struct RepeatRingControl *xs = &ctrl->ring;
|
|
|
|
DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n",
|
|
info->repeatMin, info->repeatMax, offset);
|
|
|
|
assert(offset >= xs->offset);
|
|
|
|
u64a nextOffset = offset + 1;
|
|
|
|
u32 patch_size = info->patchSize;
|
|
u32 patch;
|
|
u32 tval;
|
|
if (nextOffset <= xs->offset + info->repeatMin) {
|
|
patch = xs->first;
|
|
tval = 0;
|
|
} else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) {
|
|
DEBUG_PRINTF("ring is stale\n");
|
|
return 0;
|
|
} else {
|
|
assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale
|
|
u32 delta = (u32)(nextOffset - xs->offset);
|
|
u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
|
patch = lower / patch_size;
|
|
tval = lower - patch * patch_size;
|
|
}
|
|
|
|
DEBUG_PRINTF("patch %u\n", patch);
|
|
u32 patch_count = info->patchCount;
|
|
if (patch >= patch_count) {
|
|
return 0;
|
|
}
|
|
|
|
DEBUG_PRINTF("initial test for %u\n", tval);
|
|
|
|
u32 begin = xs->first + patch;
|
|
if (begin >= patch_count) {
|
|
begin -= patch_count;
|
|
}
|
|
|
|
const u8 *active = (const u8 *)state;
|
|
const u8 *ring = active + info->patchesOffset;
|
|
u32 encoding_size = info->encodingSize;
|
|
const u32 end = begin >= xs->last ? patch_count : xs->last;
|
|
u32 low = tval;
|
|
u64a diff = 0, loc = 0;
|
|
DEBUG_PRINTF("begin %u end %u\n", begin, end);
|
|
for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end);
|
|
p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
|
|
p + 1, end)) {
|
|
if (p != begin) {
|
|
low = 0;
|
|
}
|
|
|
|
u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size);
|
|
u32 p1 = 0;
|
|
if (p >= xs->first) {
|
|
p1 = p - xs->first;
|
|
} else {
|
|
p1 = p + patch_count - xs->first;
|
|
}
|
|
|
|
if (val) {
|
|
loc = getSparseOptimalTargetValue(info, low, &val);
|
|
diff = (p1 + 1) * patch_size - loc;
|
|
}
|
|
if (loc) {
|
|
u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff);
|
|
DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv);
|
|
return rv;
|
|
}
|
|
low = 0;
|
|
}
|
|
|
|
low = 0;
|
|
if (begin >= xs->last) {
|
|
for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last);
|
|
p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
|
|
p + 1, xs->last)) {
|
|
|
|
u64a val = partial_load_u64a(ring + encoding_size * p,
|
|
encoding_size);
|
|
if (val) {
|
|
loc = getSparseOptimalTargetValue(info, low, &val);
|
|
diff = (p + 1) * patch_size - loc;
|
|
}
|
|
if (loc) {
|
|
u64a rv = MAX(nextOffset, xs->offset + info->repeatMin +
|
|
diff + (end - xs->first) * patch_size);
|
|
DEBUG_PRINTF("next match at %llu\n", rv);
|
|
return rv;
|
|
}
|
|
}
|
|
}
|
|
|
|
DEBUG_PRINTF("next match\n");
|
|
return 0;
|
|
}
|
|
|
|
void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
|
|
union RepeatControl *ctrl, void *state,
|
|
u64a offset, char is_alive) {
|
|
struct RepeatRingControl *xs = &ctrl->ring;
|
|
u8 *active = (u8 *)state;
|
|
|
|
DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset,
|
|
info->encodingSize);
|
|
|
|
// If (a) this is the first top, or (b) the ring is stale, initialize the
|
|
// ring and write this offset in as the first top.
|
|
if (!is_alive ||
|
|
offset > sparseLastTop(info, xs, state) + info->repeatMax) {
|
|
storeInitialRingTopPatch(info, xs, active, offset);
|
|
return;
|
|
}
|
|
|
|
// Tops should arrive in order, with no duplicates.
|
|
assert(offset > sparseLastTop(info, xs, state));
|
|
|
|
// As the ring is not stale, our delta should fit within a u32.
|
|
assert(offset - xs->offset <= UINT32_MAX);
|
|
u32 delta = (u32)(offset - xs->offset);
|
|
u32 patch_size = info->patchSize;
|
|
u32 patch_count = info->patchCount;
|
|
u32 encoding_size = info->encodingSize;
|
|
u32 patch = delta / patch_size;
|
|
|
|
DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size,
|
|
patch);
|
|
|
|
u8 *ring = active + info->patchesOffset;
|
|
u32 occ = ringOccupancy(xs, patch_count);
|
|
u64a val = 0;
|
|
u32 idx;
|
|
|
|
DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n",
|
|
patch, patch_count, occ);
|
|
if (patch >= patch_count) {
|
|
u32 patch_shift_count = patch - patch_count + 1;
|
|
assert(patch >= patch_shift_count);
|
|
DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
|
|
xs->offset += patch_size * patch_shift_count;
|
|
xs->first += patch_shift_count;
|
|
if (xs->first >= patch_count) {
|
|
xs->first -= patch_count;
|
|
}
|
|
idx = xs->last + patch - occ;
|
|
mmbit_unset_range(active, patch_count, xs->last,
|
|
MIN(idx, patch_count));
|
|
if (idx >= patch_count) {
|
|
idx -= patch_count;
|
|
mmbit_unset_range(active, patch_count, 0, idx + 1);
|
|
}
|
|
xs->last = idx + 1;
|
|
if (xs->last == patch_count) {
|
|
xs->last = 0;
|
|
}
|
|
} else if (patch < occ) {
|
|
assert(patch == occ - 1);
|
|
idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1;
|
|
val = partial_load_u64a(ring + encoding_size * idx, encoding_size);
|
|
} else {
|
|
idx = xs->last + patch - occ;
|
|
mmbit_unset_range(active, patch_count, xs->last,
|
|
MIN(idx, patch_count));
|
|
if (idx >= patch_count) {
|
|
idx -= patch_count;
|
|
mmbit_unset_range(active, patch_count, 0, idx + 1);
|
|
}
|
|
xs->last = idx + 1;
|
|
if (xs->last == patch_count) {
|
|
xs->last = 0;
|
|
}
|
|
}
|
|
|
|
assert((u64a)patch * patch_size <= delta);
|
|
u32 diff = delta - patch * patch_size;
|
|
const u64a *repeatTable = getImplTable(info);
|
|
val += repeatTable[diff];
|
|
|
|
DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ);
|
|
DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n",
|
|
xs->first, xs->last, patch);
|
|
DEBUG_PRINTF("value:%llu\n", val);
|
|
assert(fits_in_len_bytes(val, encoding_size));
|
|
partial_store_u64a(ring + encoding_size * idx, val, encoding_size);
|
|
mmbit_set(active, patch_count, idx);
|
|
}
|
|
|
|
static
|
|
char sparseHasMatch(const struct RepeatInfo *info, const u8 *state,
|
|
u32 lower, u32 upper) {
|
|
u32 patch_size = info->patchSize;
|
|
u32 patch_count = info->patchCount;
|
|
u32 encoding_size = info->encodingSize;
|
|
u32 patch_lower = lower / patch_size;
|
|
u32 patch_upper = upper / patch_size;
|
|
u32 diff = lower - patch_lower * patch_size;
|
|
|
|
DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
|
|
const u64a *repeatTable = getImplTable(info);
|
|
|
|
const u8 *ring = state + info->patchesOffset;
|
|
const u8 *active = state;
|
|
u64a val;
|
|
// test the first patch
|
|
if (mmbit_isset(active, patch_count, patch_lower)) {
|
|
val = partial_load_u64a(ring + encoding_size * patch_lower,
|
|
encoding_size);
|
|
DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n",
|
|
patch_size, diff, repeatTable[diff]);
|
|
DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n",
|
|
patch_lower, patch_upper);
|
|
if (patch_upper == patch_lower) {
|
|
u32 limit = upper - patch_lower * patch_size;
|
|
getSparseOptimalTargetValue(info, limit + 1, &val);
|
|
}
|
|
if (val >= repeatTable[diff]) {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (patch_lower == patch_upper) {
|
|
return 0;
|
|
}
|
|
|
|
// test the patches between first and last
|
|
u32 m = mmbit_iterate_bounded(active, patch_count,
|
|
patch_lower + 1, patch_upper);
|
|
if (m != MMB_INVALID) {
|
|
return 1;
|
|
}
|
|
|
|
if (patch_upper == patch_count) {
|
|
return 0;
|
|
}
|
|
|
|
// test the last patch
|
|
if (!mmbit_isset(active, patch_count, patch_upper)) {
|
|
return 0;
|
|
}
|
|
diff = (patch_upper + 1) * patch_size - upper;
|
|
DEBUG_PRINTF("diff=%u\n", diff);
|
|
val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size);
|
|
getSparseOptimalTargetValue(info, patch_size - diff + 1, &val);
|
|
if (val) {
|
|
DEBUG_PRINTF("last patch: val=%llu\n", val);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
|
|
const union RepeatControl *ctrl,
|
|
const void *state, u64a offset) {
|
|
DEBUG_PRINTF("check for match at %llu corresponding to trigger "
|
|
"at [%llu, %llu]\n", offset, offset - info->repeatMax,
|
|
offset - info->repeatMin);
|
|
|
|
const struct RepeatRingControl *xs = &ctrl->ring;
|
|
const u8 *ring = (const u8 *)state;
|
|
|
|
assert(offset >= xs->offset);
|
|
|
|
if (offset < xs->offset + info->repeatMin) {
|
|
DEBUG_PRINTF("too soon\n");
|
|
return REPEAT_NOMATCH;
|
|
} else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) {
|
|
DEBUG_PRINTF("stale\n");
|
|
return REPEAT_STALE;
|
|
}
|
|
|
|
// Our delta between the base offset of the ring and the current offset
|
|
// must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This
|
|
// range fits comfortably within a u32.
|
|
assert(offset - xs->offset <= UINT32_MAX);
|
|
|
|
u32 delta = (u32)(offset - xs->offset);
|
|
u32 patch_size = info->patchSize;
|
|
u32 patch_count = info->patchCount;
|
|
u32 occ = ringOccupancy(xs, patch_count);
|
|
|
|
u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
|
|
u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1);
|
|
|
|
DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
|
|
u32 patch_lower = lower / patch_size;
|
|
u32 patch_upper = upper / patch_size;
|
|
|
|
if (patch_lower >= occ) {
|
|
DEBUG_PRINTF("too late\n");
|
|
return REPEAT_NOMATCH;
|
|
}
|
|
|
|
u32 remaining_lower = lower - patch_lower * patch_size;
|
|
u32 remaining_upper = upper - patch_upper * patch_size;
|
|
patch_lower += xs->first;
|
|
patch_upper += xs->first;
|
|
if (patch_lower >= patch_count) {
|
|
patch_lower -= patch_count;
|
|
patch_upper -= patch_count;
|
|
} else if (patch_upper >= patch_count) {
|
|
patch_upper -= patch_count;
|
|
}
|
|
|
|
DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n",
|
|
xs->first, xs->last, patch_lower, patch_upper);
|
|
|
|
u32 scan_end;
|
|
const char is_not_wrapped = (patch_lower <= patch_upper);
|
|
if (is_not_wrapped) {
|
|
scan_end = patch_upper * patch_size + remaining_upper;
|
|
} else {
|
|
scan_end = patch_count * patch_size;
|
|
}
|
|
|
|
lower = patch_lower * patch_size + remaining_lower;
|
|
if (sparseHasMatch(info, ring, lower, scan_end)) {
|
|
return REPEAT_MATCH;
|
|
}
|
|
|
|
if (!is_not_wrapped) {
|
|
upper -= (patch_count - xs->first) * patch_size;
|
|
if (sparseHasMatch(info, ring, 0, upper)) {
|
|
return REPEAT_MATCH;
|
|
}
|
|
}
|
|
|
|
return REPEAT_NOMATCH;
|
|
}
|