diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index e5cc9267..2b0f6141 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -316,7 +316,7 @@ void buildSubcastles(const CastleProto &proto, vector &subs, bool is_reset = repeatInfoPair[i].second; enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max, - min_period, is_reset); + min_period, is_reset, true); RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period); DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i, diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index de93d4a3..0d69cc2a 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -130,6 +130,9 @@ char repeatIsDead(const struct RepeatInfo *info, return lstate->ctrl.ring.offset == REPEAT_DEAD; case REPEAT_TRAILER: return lstate->ctrl.trailer.offset == REPEAT_DEAD; + case REPEAT_ALWAYS: + assert(!"REPEAT_ALWAYS should only be used by Castle"); + return 0; } assert(0); diff --git a/src/nfa/repeat.c b/src/nfa/repeat.c index c1ff5162..d12bc5a1 100644 --- a/src/nfa/repeat.c +++ b/src/nfa/repeat.c @@ -922,6 +922,11 @@ void repeatPackOffset(char *dest, const struct RepeatInfo *info, const union RepeatControl *ctrl, u64a offset) { const struct RepeatOffsetControl *xs = &ctrl->offset; DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon); + if (!info->packedCtrlSize) { + assert(info->type == REPEAT_ALWAYS); + DEBUG_PRINTF("externally guarded .*\n"); + return; + } storePackedRelative(dest, xs->offset, offset, info->horizon, info->packedCtrlSize); } @@ -1040,6 +1045,9 @@ void repeatPack(char *dest, const struct RepeatInfo *info, case REPEAT_TRAILER: repeatPackTrailer(dest, info, ctrl, offset); break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } @@ -1072,7 +1080,13 @@ static void repeatUnpackOffset(const char *src, const struct RepeatInfo *info, u64a offset, union RepeatControl *ctrl) { struct RepeatOffsetControl *xs = &ctrl->offset; - xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize); + if (!info->packedCtrlSize) { + assert(info->type == REPEAT_ALWAYS); + DEBUG_PRINTF("externally guarded .*\n"); + xs->offset = 0; + } else { + xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize); + } DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset, info->horizon); } @@ -1149,6 +1163,9 @@ void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, case REPEAT_TRAILER: repeatUnpackTrailer(src, info, offset, ctrl); break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } diff --git a/src/nfa/repeat.h b/src/nfa/repeat.h index 37374d02..d4f84ea0 100644 --- a/src/nfa/repeat.h +++ b/src/nfa/repeat.h @@ -135,6 +135,8 @@ u64a repeatLastTop(const struct RepeatInfo *info, return repeatLastTopSparseOptimalP(info, ctrl, state); case REPEAT_TRAILER: return repeatLastTopTrailer(info, ctrl); + case REPEAT_ALWAYS: + return 0; } DEBUG_PRINTF("bad repeat type %u\n", info->type); @@ -200,6 +202,8 @@ u64a repeatNextMatch(const struct RepeatInfo *info, return repeatNextMatchSparseOptimalP(info, ctrl, state, offset); case REPEAT_TRAILER: return repeatNextMatchTrailer(info, ctrl, offset); + case REPEAT_ALWAYS: + return offset + 1; } DEBUG_PRINTF("bad repeat type %u\n", info->type); @@ -275,6 +279,9 @@ void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, case REPEAT_TRAILER: repeatStoreTrailer(info, ctrl, offset, is_alive); break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } @@ -348,6 +355,8 @@ enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, return repeatHasMatchSparseOptimalP(info, ctrl, state, offset); case REPEAT_TRAILER: return repeatHasMatchTrailer(info, ctrl, offset); + case REPEAT_ALWAYS: + return REPEAT_MATCH; } assert(0); diff --git a/src/nfa/repeat_internal.h b/src/nfa/repeat_internal.h index bf479d1f..9e3f455c 100644 --- a/src/nfa/repeat_internal.h +++ b/src/nfa/repeat_internal.h @@ -47,26 +47,26 @@ enum RepeatType { /** General mechanism for tracking {N,M} repeats. Stores the first top as * an absolute offset, then subsequent tops in the {N,M} range as a ring of * relative top indices stored in a multibit. */ - REPEAT_RING = 0, + REPEAT_RING, /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure, * since only the first top encountered needs to be stored. */ - REPEAT_FIRST = 1, + REPEAT_FIRST, /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we * store the most recent top encountered. */ - REPEAT_LAST = 2, + REPEAT_LAST, /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases * where there is a large difference between N and M, and developed to * reduce the state requirements of this case (relative to the RING model). * Uses a small ordered array of top indices relative to \ref * RepeatRangeControl::offset. */ - REPEAT_RANGE = 3, + REPEAT_RANGE, /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref * RepeatBitmapControl structure at runtime. */ - REPEAT_BITMAP = 4, + REPEAT_BITMAP, /** Optimal mechanism for tracking {N,M} repeats when there is a bound on * how frequently they can be retriggered. @@ -78,13 +78,17 @@ enum RepeatType { * referencing a table that stores values from f(0, min) to f(repeat, min) * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9. * We search the optimal patch size between min and repeat in advance and - * use the scheme above to do encoding and decoding to reduce stream state size - * */ - REPEAT_SPARSE_OPTIMAL_P = 5, + * use the scheme above to do encoding and decoding to reduce stream state + * size. */ + REPEAT_SPARSE_OPTIMAL_P, - /** Used for {N,M} repeats where 0 < N < 64. Uses the \ref RepeatTrailerControl - * structure at runtime. */ - REPEAT_TRAILER = 6, + /** Used for {N,M} repeats where 0 < N < 64. Uses the + * \ref RepeatTrailerControl structure at runtime. */ + REPEAT_TRAILER, + + /** Degenerate repeat that always returns true. Used by castle for pseudo + * [^X]* repeats. */ + REPEAT_ALWAYS, }; /** @@ -204,6 +208,8 @@ const char *repeatTypeName(u8 type) { return "SPARSE_OPTIMAL_P"; case REPEAT_TRAILER: return "TRAILER"; + case REPEAT_ALWAYS: + return "ALWAYS"; } assert(0); return "UNKNOWN"; diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 2f187503..2e1010bb 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -206,6 +206,13 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, packedFieldSizes[1] = repeatMin; packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; break; + case REPEAT_ALWAYS: + assert(repeatMin == 0ULL); + assert(repeatMax.is_infinite()); + stateSize = 0; // everything is in the control block. + horizon = 0; + packedCtrlSize = 0; + break; } DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, packedCtrlSize, horizon); @@ -232,9 +239,14 @@ u32 streamStateSize(enum RepeatType type, const depth &repeatMin, } enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, - u32 minPeriod, bool is_reset) { + u32 minPeriod, bool is_reset, + bool has_external_guard) { if (repeatMax.is_infinite()) { - return REPEAT_FIRST; + if (has_external_guard && !repeatMin) { + return REPEAT_ALWAYS; + } else { + return REPEAT_FIRST; + } } if (repeatMin == depth(0) || is_reset) { diff --git a/src/nfa/repeatcompile.h b/src/nfa/repeatcompile.h index 2800ccdb..fe9a7106 100644 --- a/src/nfa/repeatcompile.h +++ b/src/nfa/repeatcompile.h @@ -68,7 +68,8 @@ struct RepeatStateInfo { * type. */ enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, - u32 minPeriod, bool is_reset); + u32 minPeriod, bool is_reset, + bool has_external_guard = false); u32 calcPackedBytes(u64a val); diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index 94f1bdc1..7f245e62 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -193,7 +193,9 @@ static const RepeatTestInfo repeatTests[] = { { REPEAT_FIRST, 100, depth::infinity() }, { REPEAT_FIRST, 1000, depth::infinity() }, { REPEAT_FIRST, 3000, depth::infinity() }, - { REPEAT_FIRST, 10000, depth::infinity() } + { REPEAT_FIRST, 10000, depth::infinity() }, + // {,} repeats -- always + { REPEAT_ALWAYS, 0, depth::infinity() }, }; INSTANTIATE_TEST_CASE_P(Repeat, RepeatTest, ValuesIn(repeatTests)); @@ -289,6 +291,10 @@ TEST_P(RepeatTest, FillRing) { TEST_P(RepeatTest, FindTops) { SCOPED_TRACE(testing::Message() << "Repeat: " << info); + /* REPEAT_ALWAYS has no state and so does not track top locations */ + if (info.type == REPEAT_ALWAYS) { + return; + } repeatStore(&info, ctrl, state, 1000, 0); ASSERT_EQ(1000, repeatLastTop(&info, ctrl, state)); @@ -364,7 +370,8 @@ TEST_P(RepeatTest, TwoTops) { SCOPED_TRACE(testing::Message() << "Repeat: " << info); // Only appropriate for tests that store more than one top. - if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST) { + if (info.type == REPEAT_FIRST || info.type == REPEAT_LAST + || info.type == REPEAT_ALWAYS) { return; }