mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Introduce REPEAT_ALWAYS model for {0,} castle repeats
As Castle guards the repeats, no more state is needed for these repeats
This commit is contained in:
parent
5e0d10d805
commit
05beadf52f
@ -316,7 +316,7 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
bool is_reset = repeatInfoPair[i].second;
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max,
|
||||
min_period, is_reset);
|
||||
min_period, is_reset, true);
|
||||
RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period);
|
||||
|
||||
DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i,
|
||||
|
@ -130,6 +130,9 @@ char repeatIsDead(const struct RepeatInfo *info,
|
||||
return lstate->ctrl.ring.offset == REPEAT_DEAD;
|
||||
case REPEAT_TRAILER:
|
||||
return lstate->ctrl.trailer.offset == REPEAT_DEAD;
|
||||
case REPEAT_ALWAYS:
|
||||
assert(!"REPEAT_ALWAYS should only be used by Castle");
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
@ -922,6 +922,11 @@ void repeatPackOffset(char *dest, const struct RepeatInfo *info,
|
||||
const union RepeatControl *ctrl, u64a offset) {
|
||||
const struct RepeatOffsetControl *xs = &ctrl->offset;
|
||||
DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
|
||||
if (!info->packedCtrlSize) {
|
||||
assert(info->type == REPEAT_ALWAYS);
|
||||
DEBUG_PRINTF("externally guarded .*\n");
|
||||
return;
|
||||
}
|
||||
storePackedRelative(dest, xs->offset, offset, info->horizon,
|
||||
info->packedCtrlSize);
|
||||
}
|
||||
@ -1040,6 +1045,9 @@ void repeatPack(char *dest, const struct RepeatInfo *info,
|
||||
case REPEAT_TRAILER:
|
||||
repeatPackTrailer(dest, info, ctrl, offset);
|
||||
break;
|
||||
case REPEAT_ALWAYS:
|
||||
/* nothing to do - no state */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1072,7 +1080,13 @@ static
|
||||
void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
|
||||
u64a offset, union RepeatControl *ctrl) {
|
||||
struct RepeatOffsetControl *xs = &ctrl->offset;
|
||||
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
|
||||
if (!info->packedCtrlSize) {
|
||||
assert(info->type == REPEAT_ALWAYS);
|
||||
DEBUG_PRINTF("externally guarded .*\n");
|
||||
xs->offset = 0;
|
||||
} else {
|
||||
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
|
||||
}
|
||||
DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
|
||||
info->horizon);
|
||||
}
|
||||
@ -1149,6 +1163,9 @@ void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
|
||||
case REPEAT_TRAILER:
|
||||
repeatUnpackTrailer(src, info, offset, ctrl);
|
||||
break;
|
||||
case REPEAT_ALWAYS:
|
||||
/* nothing to do - no state */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,6 +135,8 @@ u64a repeatLastTop(const struct RepeatInfo *info,
|
||||
return repeatLastTopSparseOptimalP(info, ctrl, state);
|
||||
case REPEAT_TRAILER:
|
||||
return repeatLastTopTrailer(info, ctrl);
|
||||
case REPEAT_ALWAYS:
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bad repeat type %u\n", info->type);
|
||||
@ -200,6 +202,8 @@ u64a repeatNextMatch(const struct RepeatInfo *info,
|
||||
return repeatNextMatchSparseOptimalP(info, ctrl, state, offset);
|
||||
case REPEAT_TRAILER:
|
||||
return repeatNextMatchTrailer(info, ctrl, offset);
|
||||
case REPEAT_ALWAYS:
|
||||
return offset + 1;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bad repeat type %u\n", info->type);
|
||||
@ -275,6 +279,9 @@ void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
|
||||
case REPEAT_TRAILER:
|
||||
repeatStoreTrailer(info, ctrl, offset, is_alive);
|
||||
break;
|
||||
case REPEAT_ALWAYS:
|
||||
/* nothing to do - no state */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -348,6 +355,8 @@ enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
|
||||
return repeatHasMatchSparseOptimalP(info, ctrl, state, offset);
|
||||
case REPEAT_TRAILER:
|
||||
return repeatHasMatchTrailer(info, ctrl, offset);
|
||||
case REPEAT_ALWAYS:
|
||||
return REPEAT_MATCH;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
@ -47,26 +47,26 @@ enum RepeatType {
|
||||
/** General mechanism for tracking {N,M} repeats. Stores the first top as
|
||||
* an absolute offset, then subsequent tops in the {N,M} range as a ring of
|
||||
* relative top indices stored in a multibit. */
|
||||
REPEAT_RING = 0,
|
||||
REPEAT_RING,
|
||||
|
||||
/** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure,
|
||||
* since only the first top encountered needs to be stored. */
|
||||
REPEAT_FIRST = 1,
|
||||
REPEAT_FIRST,
|
||||
|
||||
/** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we
|
||||
* store the most recent top encountered. */
|
||||
REPEAT_LAST = 2,
|
||||
REPEAT_LAST,
|
||||
|
||||
/** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases
|
||||
* where there is a large difference between N and M, and developed to
|
||||
* reduce the state requirements of this case (relative to the RING model).
|
||||
* Uses a small ordered array of top indices relative to \ref
|
||||
* RepeatRangeControl::offset. */
|
||||
REPEAT_RANGE = 3,
|
||||
REPEAT_RANGE,
|
||||
|
||||
/** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref
|
||||
* RepeatBitmapControl structure at runtime. */
|
||||
REPEAT_BITMAP = 4,
|
||||
REPEAT_BITMAP,
|
||||
|
||||
/** Optimal mechanism for tracking {N,M} repeats when there is a bound on
|
||||
* how frequently they can be retriggered.
|
||||
@ -78,13 +78,17 @@ enum RepeatType {
|
||||
* referencing a table that stores values from f(0, min) to f(repeat, min)
|
||||
* eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9.
|
||||
* We search the optimal patch size between min and repeat in advance and
|
||||
* use the scheme above to do encoding and decoding to reduce stream state size
|
||||
* */
|
||||
REPEAT_SPARSE_OPTIMAL_P = 5,
|
||||
* use the scheme above to do encoding and decoding to reduce stream state
|
||||
* size. */
|
||||
REPEAT_SPARSE_OPTIMAL_P,
|
||||
|
||||
/** Used for {N,M} repeats where 0 < N < 64. Uses the \ref RepeatTrailerControl
|
||||
* structure at runtime. */
|
||||
REPEAT_TRAILER = 6,
|
||||
/** Used for {N,M} repeats where 0 < N < 64. Uses the
|
||||
* \ref RepeatTrailerControl structure at runtime. */
|
||||
REPEAT_TRAILER,
|
||||
|
||||
/** Degenerate repeat that always returns true. Used by castle for pseudo
|
||||
* [^X]* repeats. */
|
||||
REPEAT_ALWAYS,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -204,6 +208,8 @@ const char *repeatTypeName(u8 type) {
|
||||
return "SPARSE_OPTIMAL_P";
|
||||
case REPEAT_TRAILER:
|
||||
return "TRAILER";
|
||||
case REPEAT_ALWAYS:
|
||||
return "ALWAYS";
|
||||
}
|
||||
assert(0);
|
||||
return "UNKNOWN";
|
||||
|
@ -206,6 +206,13 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
|
||||
packedFieldSizes[1] = repeatMin;
|
||||
packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U;
|
||||
break;
|
||||
case REPEAT_ALWAYS:
|
||||
assert(repeatMin == 0ULL);
|
||||
assert(repeatMax.is_infinite());
|
||||
stateSize = 0; // everything is in the control block.
|
||||
horizon = 0;
|
||||
packedCtrlSize = 0;
|
||||
break;
|
||||
}
|
||||
DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize,
|
||||
packedCtrlSize, horizon);
|
||||
@ -232,9 +239,14 @@ u32 streamStateSize(enum RepeatType type, const depth &repeatMin,
|
||||
}
|
||||
|
||||
enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
|
||||
u32 minPeriod, bool is_reset) {
|
||||
u32 minPeriod, bool is_reset,
|
||||
bool has_external_guard) {
|
||||
if (repeatMax.is_infinite()) {
|
||||
return REPEAT_FIRST;
|
||||
if (has_external_guard && !repeatMin) {
|
||||
return REPEAT_ALWAYS;
|
||||
} else {
|
||||
return REPEAT_FIRST;
|
||||
}
|
||||
}
|
||||
|
||||
if (repeatMin == depth(0) || is_reset) {
|
||||
|
@ -68,7 +68,8 @@ struct RepeatStateInfo {
|
||||
* type.
|
||||
*/
|
||||
enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
|
||||
u32 minPeriod, bool is_reset);
|
||||
u32 minPeriod, bool is_reset,
|
||||
bool has_external_guard = false);
|
||||
|
||||
u32 calcPackedBytes(u64a val);
|
||||
|
||||
|
@ -193,7 +193,9 @@ static const RepeatTestInfo repeatTests[] = {
|
||||
{ REPEAT_FIRST, 100, depth::infinity() },
|
||||
{ REPEAT_FIRST, 1000, depth::infinity() },
|
||||
{ REPEAT_FIRST, 3000, depth::infinity() },
|
||||
{ REPEAT_FIRST, 10000, depth::infinity() }
|
||||
{ REPEAT_FIRST, 10000, depth::infinity() },
|
||||
// {,} repeats -- always
|
||||
{ REPEAT_ALWAYS, 0, depth::infinity() },
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Repeat, RepeatTest, ValuesIn(repeatTests));
|
||||
@ -289,6 +291,10 @@ TEST_P(RepeatTest, FillRing) {
|
||||
|
||||
TEST_P(RepeatTest, FindTops) {
|
||||
SCOPED_TRACE(testing::Message() << "Repeat: " << info);
|
||||
/* REPEAT_ALWAYS has no state and so does not track top locations */
|
||||
if (info.type == REPEAT_ALWAYS) {
|
||||
return;
|
||||
}
|
||||
|
||||
repeatStore(&info, ctrl, state, 1000, 0);
|
||||
ASSERT_EQ(1000, repeatLastTop(&info, ctrl, state));
|
||||
@ -364,7 +370,8 @@ TEST_P(RepeatTest, TwoTops) {
|
||||
SCOPED_TRACE(testing::Message() << "Repeat: " << info);
|
||||
|
||||
// Only appropriate for tests that store more than one top.
|
||||
if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST) {
|
||||
if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST
|
||||
|| info.type == REPEAT_ALWAYS) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user