mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Introduce REPEAT_ALWAYS model for {0,} castle repeats
As Castle guards the repeats, no more state is needed for these repeats
This commit is contained in:
parent
5e0d10d805
commit
05beadf52f
@ -316,7 +316,7 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
|||||||
bool is_reset = repeatInfoPair[i].second;
|
bool is_reset = repeatInfoPair[i].second;
|
||||||
|
|
||||||
enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max,
|
enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max,
|
||||||
min_period, is_reset);
|
min_period, is_reset, true);
|
||||||
RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period);
|
RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period);
|
||||||
|
|
||||||
DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i,
|
DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i,
|
||||||
|
@ -130,6 +130,9 @@ char repeatIsDead(const struct RepeatInfo *info,
|
|||||||
return lstate->ctrl.ring.offset == REPEAT_DEAD;
|
return lstate->ctrl.ring.offset == REPEAT_DEAD;
|
||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
return lstate->ctrl.trailer.offset == REPEAT_DEAD;
|
return lstate->ctrl.trailer.offset == REPEAT_DEAD;
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
assert(!"REPEAT_ALWAYS should only be used by Castle");
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -922,6 +922,11 @@ void repeatPackOffset(char *dest, const struct RepeatInfo *info,
|
|||||||
const union RepeatControl *ctrl, u64a offset) {
|
const union RepeatControl *ctrl, u64a offset) {
|
||||||
const struct RepeatOffsetControl *xs = &ctrl->offset;
|
const struct RepeatOffsetControl *xs = &ctrl->offset;
|
||||||
DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
|
DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
|
||||||
|
if (!info->packedCtrlSize) {
|
||||||
|
assert(info->type == REPEAT_ALWAYS);
|
||||||
|
DEBUG_PRINTF("externally guarded .*\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
storePackedRelative(dest, xs->offset, offset, info->horizon,
|
storePackedRelative(dest, xs->offset, offset, info->horizon,
|
||||||
info->packedCtrlSize);
|
info->packedCtrlSize);
|
||||||
}
|
}
|
||||||
@ -1040,6 +1045,9 @@ void repeatPack(char *dest, const struct RepeatInfo *info,
|
|||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
repeatPackTrailer(dest, info, ctrl, offset);
|
repeatPackTrailer(dest, info, ctrl, offset);
|
||||||
break;
|
break;
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
/* nothing to do - no state */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1072,7 +1080,13 @@ static
|
|||||||
void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
|
void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
|
||||||
u64a offset, union RepeatControl *ctrl) {
|
u64a offset, union RepeatControl *ctrl) {
|
||||||
struct RepeatOffsetControl *xs = &ctrl->offset;
|
struct RepeatOffsetControl *xs = &ctrl->offset;
|
||||||
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
|
if (!info->packedCtrlSize) {
|
||||||
|
assert(info->type == REPEAT_ALWAYS);
|
||||||
|
DEBUG_PRINTF("externally guarded .*\n");
|
||||||
|
xs->offset = 0;
|
||||||
|
} else {
|
||||||
|
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
|
||||||
|
}
|
||||||
DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
|
DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
|
||||||
info->horizon);
|
info->horizon);
|
||||||
}
|
}
|
||||||
@ -1149,6 +1163,9 @@ void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
|
|||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
repeatUnpackTrailer(src, info, offset, ctrl);
|
repeatUnpackTrailer(src, info, offset, ctrl);
|
||||||
break;
|
break;
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
/* nothing to do - no state */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,6 +135,8 @@ u64a repeatLastTop(const struct RepeatInfo *info,
|
|||||||
return repeatLastTopSparseOptimalP(info, ctrl, state);
|
return repeatLastTopSparseOptimalP(info, ctrl, state);
|
||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
return repeatLastTopTrailer(info, ctrl);
|
return repeatLastTopTrailer(info, ctrl);
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("bad repeat type %u\n", info->type);
|
DEBUG_PRINTF("bad repeat type %u\n", info->type);
|
||||||
@ -200,6 +202,8 @@ u64a repeatNextMatch(const struct RepeatInfo *info,
|
|||||||
return repeatNextMatchSparseOptimalP(info, ctrl, state, offset);
|
return repeatNextMatchSparseOptimalP(info, ctrl, state, offset);
|
||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
return repeatNextMatchTrailer(info, ctrl, offset);
|
return repeatNextMatchTrailer(info, ctrl, offset);
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
return offset + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("bad repeat type %u\n", info->type);
|
DEBUG_PRINTF("bad repeat type %u\n", info->type);
|
||||||
@ -275,6 +279,9 @@ void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
|
|||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
repeatStoreTrailer(info, ctrl, offset, is_alive);
|
repeatStoreTrailer(info, ctrl, offset, is_alive);
|
||||||
break;
|
break;
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
/* nothing to do - no state */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -348,6 +355,8 @@ enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
|
|||||||
return repeatHasMatchSparseOptimalP(info, ctrl, state, offset);
|
return repeatHasMatchSparseOptimalP(info, ctrl, state, offset);
|
||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
return repeatHasMatchTrailer(info, ctrl, offset);
|
return repeatHasMatchTrailer(info, ctrl, offset);
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
return REPEAT_MATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -47,26 +47,26 @@ enum RepeatType {
|
|||||||
/** General mechanism for tracking {N,M} repeats. Stores the first top as
|
/** General mechanism for tracking {N,M} repeats. Stores the first top as
|
||||||
* an absolute offset, then subsequent tops in the {N,M} range as a ring of
|
* an absolute offset, then subsequent tops in the {N,M} range as a ring of
|
||||||
* relative top indices stored in a multibit. */
|
* relative top indices stored in a multibit. */
|
||||||
REPEAT_RING = 0,
|
REPEAT_RING,
|
||||||
|
|
||||||
/** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure,
|
/** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure,
|
||||||
* since only the first top encountered needs to be stored. */
|
* since only the first top encountered needs to be stored. */
|
||||||
REPEAT_FIRST = 1,
|
REPEAT_FIRST,
|
||||||
|
|
||||||
/** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we
|
/** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we
|
||||||
* store the most recent top encountered. */
|
* store the most recent top encountered. */
|
||||||
REPEAT_LAST = 2,
|
REPEAT_LAST,
|
||||||
|
|
||||||
/** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases
|
/** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases
|
||||||
* where there is a large difference between N and M, and developed to
|
* where there is a large difference between N and M, and developed to
|
||||||
* reduce the state requirements of this case (relative to the RING model).
|
* reduce the state requirements of this case (relative to the RING model).
|
||||||
* Uses a small ordered array of top indices relative to \ref
|
* Uses a small ordered array of top indices relative to \ref
|
||||||
* RepeatRangeControl::offset. */
|
* RepeatRangeControl::offset. */
|
||||||
REPEAT_RANGE = 3,
|
REPEAT_RANGE,
|
||||||
|
|
||||||
/** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref
|
/** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref
|
||||||
* RepeatBitmapControl structure at runtime. */
|
* RepeatBitmapControl structure at runtime. */
|
||||||
REPEAT_BITMAP = 4,
|
REPEAT_BITMAP,
|
||||||
|
|
||||||
/** Optimal mechanism for tracking {N,M} repeats when there is a bound on
|
/** Optimal mechanism for tracking {N,M} repeats when there is a bound on
|
||||||
* how frequently they can be retriggered.
|
* how frequently they can be retriggered.
|
||||||
@ -78,13 +78,17 @@ enum RepeatType {
|
|||||||
* referencing a table that stores values from f(0, min) to f(repeat, min)
|
* referencing a table that stores values from f(0, min) to f(repeat, min)
|
||||||
* eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9.
|
* eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9.
|
||||||
* We search the optimal patch size between min and repeat in advance and
|
* We search the optimal patch size between min and repeat in advance and
|
||||||
* use the scheme above to do encoding and decoding to reduce stream state size
|
* use the scheme above to do encoding and decoding to reduce stream state
|
||||||
* */
|
* size. */
|
||||||
REPEAT_SPARSE_OPTIMAL_P = 5,
|
REPEAT_SPARSE_OPTIMAL_P,
|
||||||
|
|
||||||
/** Used for {N,M} repeats where 0 < N < 64. Uses the \ref RepeatTrailerControl
|
/** Used for {N,M} repeats where 0 < N < 64. Uses the
|
||||||
* structure at runtime. */
|
* \ref RepeatTrailerControl structure at runtime. */
|
||||||
REPEAT_TRAILER = 6,
|
REPEAT_TRAILER,
|
||||||
|
|
||||||
|
/** Degenerate repeat that always returns true. Used by castle for pseudo
|
||||||
|
* [^X]* repeats. */
|
||||||
|
REPEAT_ALWAYS,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -204,6 +208,8 @@ const char *repeatTypeName(u8 type) {
|
|||||||
return "SPARSE_OPTIMAL_P";
|
return "SPARSE_OPTIMAL_P";
|
||||||
case REPEAT_TRAILER:
|
case REPEAT_TRAILER:
|
||||||
return "TRAILER";
|
return "TRAILER";
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
return "ALWAYS";
|
||||||
}
|
}
|
||||||
assert(0);
|
assert(0);
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
|
@ -206,6 +206,13 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
|
|||||||
packedFieldSizes[1] = repeatMin;
|
packedFieldSizes[1] = repeatMin;
|
||||||
packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U;
|
packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U;
|
||||||
break;
|
break;
|
||||||
|
case REPEAT_ALWAYS:
|
||||||
|
assert(repeatMin == 0ULL);
|
||||||
|
assert(repeatMax.is_infinite());
|
||||||
|
stateSize = 0; // everything is in the control block.
|
||||||
|
horizon = 0;
|
||||||
|
packedCtrlSize = 0;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize,
|
DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize,
|
||||||
packedCtrlSize, horizon);
|
packedCtrlSize, horizon);
|
||||||
@ -232,9 +239,14 @@ u32 streamStateSize(enum RepeatType type, const depth &repeatMin,
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
|
enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
|
||||||
u32 minPeriod, bool is_reset) {
|
u32 minPeriod, bool is_reset,
|
||||||
|
bool has_external_guard) {
|
||||||
if (repeatMax.is_infinite()) {
|
if (repeatMax.is_infinite()) {
|
||||||
return REPEAT_FIRST;
|
if (has_external_guard && !repeatMin) {
|
||||||
|
return REPEAT_ALWAYS;
|
||||||
|
} else {
|
||||||
|
return REPEAT_FIRST;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (repeatMin == depth(0) || is_reset) {
|
if (repeatMin == depth(0) || is_reset) {
|
||||||
|
@ -68,7 +68,8 @@ struct RepeatStateInfo {
|
|||||||
* type.
|
* type.
|
||||||
*/
|
*/
|
||||||
enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
|
enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
|
||||||
u32 minPeriod, bool is_reset);
|
u32 minPeriod, bool is_reset,
|
||||||
|
bool has_external_guard = false);
|
||||||
|
|
||||||
u32 calcPackedBytes(u64a val);
|
u32 calcPackedBytes(u64a val);
|
||||||
|
|
||||||
|
@ -193,7 +193,9 @@ static const RepeatTestInfo repeatTests[] = {
|
|||||||
{ REPEAT_FIRST, 100, depth::infinity() },
|
{ REPEAT_FIRST, 100, depth::infinity() },
|
||||||
{ REPEAT_FIRST, 1000, depth::infinity() },
|
{ REPEAT_FIRST, 1000, depth::infinity() },
|
||||||
{ REPEAT_FIRST, 3000, depth::infinity() },
|
{ REPEAT_FIRST, 3000, depth::infinity() },
|
||||||
{ REPEAT_FIRST, 10000, depth::infinity() }
|
{ REPEAT_FIRST, 10000, depth::infinity() },
|
||||||
|
// {,} repeats -- always
|
||||||
|
{ REPEAT_ALWAYS, 0, depth::infinity() },
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Repeat, RepeatTest, ValuesIn(repeatTests));
|
INSTANTIATE_TEST_CASE_P(Repeat, RepeatTest, ValuesIn(repeatTests));
|
||||||
@ -289,6 +291,10 @@ TEST_P(RepeatTest, FillRing) {
|
|||||||
|
|
||||||
TEST_P(RepeatTest, FindTops) {
|
TEST_P(RepeatTest, FindTops) {
|
||||||
SCOPED_TRACE(testing::Message() << "Repeat: " << info);
|
SCOPED_TRACE(testing::Message() << "Repeat: " << info);
|
||||||
|
/* REPEAT_ALWAYS has no state and so does not track top locations */
|
||||||
|
if (info.type == REPEAT_ALWAYS) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
repeatStore(&info, ctrl, state, 1000, 0);
|
repeatStore(&info, ctrl, state, 1000, 0);
|
||||||
ASSERT_EQ(1000, repeatLastTop(&info, ctrl, state));
|
ASSERT_EQ(1000, repeatLastTop(&info, ctrl, state));
|
||||||
@ -364,7 +370,8 @@ TEST_P(RepeatTest, TwoTops) {
|
|||||||
SCOPED_TRACE(testing::Message() << "Repeat: " << info);
|
SCOPED_TRACE(testing::Message() << "Repeat: " << info);
|
||||||
|
|
||||||
// Only appropriate for tests that store more than one top.
|
// Only appropriate for tests that store more than one top.
|
||||||
if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST) {
|
if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST
|
||||||
|
|| info.type == REPEAT_ALWAYS) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user