diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 274e5705..5558381d 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -111,17 +111,22 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) { DEBUG_PRINTF("offset=%llu\n", offset); if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - DEBUG_PRINTF("subcastle %u\n", activeIdx); - if (activeIdx < c->numRepeats && subCastleReportCurrent(c, q, - offset, activeIdx) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + DEBUG_PRINTF("subcastle %u\n", activeIdx); + if (subCastleReportCurrent(c, q, + offset, activeIdx) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("subcastle %u\n", i); @@ -168,9 +173,12 @@ char castleInAccept(const struct Castle *c, struct mq *q, } if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); if (subCastleInAccept(c, q, report, offset, activeIdx)) { return 1; @@ -178,11 +186,10 @@ char castleInAccept(const struct Castle *c, struct mq *q, } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; - i = mmbit_iterate(active, c->numRepeats, i)) { + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("subcastle %u\n", i); if (subCastleInAccept(c, q, report, offset, i)) { return 1; @@ -197,7 +204,6 @@ static really_inline void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, void *full_state, void *stream_state, const u32 subIdx) { - u8 *active = (u8 *)stream_state; const struct SubCastle *sub = getSubCastle(c, subIdx); const struct RepeatInfo *info = getRepeatInfo(sub); @@ -207,10 +213,13 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset); - if (sub->exclusive) { - partial_store_u32(stream_state, c->numRepeats, c->activeIdxSize); + if (sub->exclusiveId < c->numRepeats) { + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + mmbit_unset(groups, c->numGroups, sub->exclusiveId); } else { - mmbit_unset(active + c->activeIdxSize, c->numRepeats, subIdx); + u8 *active = (u8 *)stream_state + c->activeOffset; + mmbit_unset(active, c->numRepeats, subIdx); } } } @@ -226,16 +235,20 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, } if (c->exclusive) { - const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, activeIdx); } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)stream_state + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)stream_state + c->activeOffset; const struct mmbit_sparse_iter *it = (const void *)((const char *)c + c->staleIterOffset); @@ -266,12 +279,20 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, info->packedCtrlSize; char is_alive = 0; - if (sub->exclusive) { - const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - is_alive = (activeIdx == top); - partial_store_u32(stream_state, top, c->activeIdxSize); + u8 *active = (u8 *)stream_state; + if (sub->exclusiveId < c->numRepeats) { + u8 *groups = active + c->groupIterOffset; + active += sub->exclusiveId * c->activeIdxSize; + if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) { + const u32 activeIdx = partial_load_u32(active, c->activeIdxSize); + is_alive = (activeIdx == top); + } + + if (!is_alive) { + partial_store_u32(active, top, c->activeIdxSize); + } } else { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + active += c->activeOffset; is_alive = mmbit_set(active, c->numRepeats, top); } @@ -309,11 +330,11 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin, u64a match = repeatNextMatch(info, rctrl, rstate, begin); if (match == 0) { DEBUG_PRINTF("no more matches for sub %u\n", subIdx); - if (sub->exclusive) { - partial_store_u32(stream_state, c->numRepeats, - c->activeIdxSize); + if (sub->exclusiveId < c->numRepeats) { + u8 *groups = (u8 *)stream_state + c->groupIterOffset; + mmbit_unset(groups, c->numGroups, sub->exclusiveId); } else { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + u8 *active = (u8 *)stream_state + c->activeOffset; mmbit_unset(active, c->numRepeats, subIdx); } return; @@ -346,16 +367,20 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, *mloc = 0; if (c->exclusive) { - const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, &found, activeIdx); } } - if (!c->pureExclusive) { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)stream_state + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { @@ -384,31 +409,38 @@ u64a subCastleNextMatch(const struct Castle *c, void *full_state, return repeatNextMatch(info, rctrl, rstate, loc); } +static really_inline +void set_matching(const struct Castle *c, const u64a match, u8 *active, + u8 *matching, const u32 active_size, const u32 active_id, + const u32 matching_id, u64a *offset, const u64a end) { + if (match == 0) { + DEBUG_PRINTF("no more matches\n"); + mmbit_unset(active, active_size, active_id); + } else if (match > end) { + // If we had a local copy of the active mmbit, we could skip + // looking at this repeat again. But we don't, so we just move + // on. + } else if (match == *offset) { + mmbit_set(matching, c->numRepeats, matching_id); + } else if (match < *offset) { + // New minimum offset. + *offset = match; + mmbit_clear(matching, c->numRepeats); + mmbit_set(matching, c->numRepeats, matching_id); + } +} + static really_inline void subCastleMatchLoop(const struct Castle *c, void *full_state, void *stream_state, const u64a end, const u64a loc, u64a *offset) { - u8 *active = (u8 *)stream_state + c->activeIdxSize; + u8 *active = (u8 *)stream_state + c->activeOffset; u8 *matching = full_state; - mmbit_clear(matching, c->numRepeats); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i); - if (match == 0) { - DEBUG_PRINTF("no more matches\n"); - mmbit_unset(active, c->numRepeats, i); - } else if (match > end) { - // If we had a local copy of the active mmbit, we could skip - // looking at this repeat again. But we don't, so we just move - // on. - } else if (match == *offset) { - mmbit_set(matching, c->numRepeats, i); - } else if (match < *offset) { - // New minimum offset. - *offset = match; - mmbit_clear(matching, c->numRepeats); - mmbit_set(matching, c->numRepeats, i); - } + set_matching(c, match, active, matching, c->numRepeats, i, + i, offset, end); } } @@ -451,61 +483,37 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end, // full_state (scratch). u64a offset = end; // min offset of next match - char found = 0; u32 activeIdx = 0; + mmbit_clear(matching, c->numRepeats); if (c->exclusive) { - activeIdx = partial_load_u32(stream_state, c->activeIdxSize); - if (activeIdx < c->numRepeats) { - u32 i = activeIdx; - DEBUG_PRINTF("subcastle %u\n", i); + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + activeIdx = partial_load_u32(cur, c->activeIdxSize); u64a match = subCastleNextMatch(c, full_state, stream_state, - loc, i); - - if (match == 0) { - DEBUG_PRINTF("no more matches\n"); - partial_store_u32(stream_state, c->numRepeats, - c->activeIdxSize); - } else if (match > end) { - // If we had a local copy of the active mmbit, we could skip - // looking at this repeat again. But we don't, so we just move - // on. - } else if (match <= offset) { - if (match < offset) { - // New minimum offset. - offset = match; - } - found = 1; - } + loc, activeIdx); + set_matching(c, match, groups, matching, c->numGroups, i, + activeIdx, &offset, end); } } - const char hasMatch = found; - u64a newOffset = offset; - if (!c->pureExclusive) { + if (c->exclusive != PURE_EXCLUSIVE) { subCastleMatchLoop(c, full_state, stream_state, - end, loc, &newOffset); - - DEBUG_PRINTF("offset=%llu\n", newOffset); - if (mmbit_any(matching, c->numRepeats)) { - found = 1; - if (subCastleFireMatch(c, full_state, stream_state, - cb, ctx, newOffset) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } + end, loc, &offset); } - - if (!found) { + DEBUG_PRINTF("offset=%llu\n", offset); + if (!mmbit_any(matching, c->numRepeats)) { + DEBUG_PRINTF("no more matches\n"); break; - } else if (hasMatch && offset == newOffset) { - const struct SubCastle *sub = getSubCastle(c, activeIdx); - DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, activeIdx); - if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) { - DEBUG_PRINTF("caller told us to halt\n"); - return MO_HALT_MATCHING; - } } - loc = newOffset; + + if (subCastleFireMatch(c, full_state, stream_state, + cb, ctx, offset) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + loc = offset; } return MO_CONTINUE_MATCHING; @@ -564,7 +572,8 @@ char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, static really_inline char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, buf + begin, buf + end); + const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, + buf + begin, buf + end); if (ptr == buf + end) { DEBUG_PRINTF("no escape found\n"); return 0; @@ -725,10 +734,11 @@ static really_inline void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { DEBUG_PRINTF("clearing active repeats due to escape\n"); if (c->exclusive) { - partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (!c->pureExclusive) { + if (c->exclusive != PURE_EXCLUSIVE) { mmbit_clear(active, c->numRepeats); } } @@ -755,7 +765,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, return 1; } - u8 *active = (u8 *)q->streamState + c->activeIdxSize; // active multibit + u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit assert(q->cur + 1 < q->end); // require at least two items assert(q_cur_type(q) == MQE_START); @@ -769,14 +779,8 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, char found = 0; if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - goto scan_done; - } + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + found = mmbit_any(groups, c->numGroups); } if (!found && !mmbit_any(active, c->numRepeats)) { @@ -842,10 +846,9 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, } if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (c->pureExclusive || activeIdx < c->numRepeats) { - return activeIdx < c->numRepeats; + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + if (mmbit_any_precise(groups, c->numGroups)) { + return 1; } } @@ -905,7 +908,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(q_cur_type(q) == MQE_START); const struct Castle *c = getImplNfa(n); - u8 *active = (u8 *)q->streamState + c->activeIdxSize; + u8 *active = (u8 *)q->streamState + c->activeOffset; u64a end_offset = q_last_loc(q) + q->offset; s64a last_kill_loc = castleLastKillLoc(c, q); @@ -938,14 +941,9 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { char found = 0; if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { - found = 1; - } else if (c->pureExclusive) { - DEBUG_PRINTF("castle is dead\n"); - return 0; - } + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + found = mmbit_any_precise(groups, c->numGroups); + } if (!found && !mmbit_any_precise(active, c->numRepeats)) { @@ -988,11 +986,12 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { const struct Castle *c = getImplNfa(n); assert(q->streamState); if (c->exclusive) { - partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (!c->pureExclusive) { - u8 *active = (u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)q->streamState + c->activeOffset; mmbit_clear(active, c->numRepeats); } return 0; @@ -1006,11 +1005,12 @@ char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset, const struct Castle *c = getImplNfa(n); if (c->exclusive) { - partial_store_u32(state, c->numRepeats, c->activeIdxSize); + u8 *groups = (u8 *)state + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (!c->pureExclusive) { - u8 *active = (u8 *)state + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)state + c->activeOffset; mmbit_clear(active, c->numRepeats); } return 0; @@ -1041,16 +1041,19 @@ char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q, const u64a offset = q->offset + loc; DEBUG_PRINTF("offset=%llu\n", offset); if (c->exclusive) { - const u32 activeIdx = partial_load_u32(q->streamState, - c->activeIdxSize); - if (activeIdx < c->numRepeats) { + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("packing state for sub %u\n", activeIdx); subCastleQueueCompressState(c, activeIdx, q, offset); } } - if (!c->pureExclusive) { - const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("packing state for sub %u\n", i); @@ -1084,15 +1087,19 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest, const struct Castle *c = getImplNfa(n); if (c->exclusive) { - const u32 activeIdx = partial_load_u32(src, c->activeIdxSize); - if (activeIdx < c->numRepeats) { + const u8 *active = (const u8 *)src; + const u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + const u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); subCastleExpandState(c, activeIdx, dest, src, offset); } } - if (!c->pureExclusive) { + if (c->exclusive != PURE_EXCLUSIVE) { // Unpack state for all active repeats. - const u8 *active = (const u8 *)src + c->activeIdxSize; + const u8 *active = (const u8 *)src + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { subCastleExpandState(c, i, dest, src, offset); diff --git a/src/nfa/castle_internal.h b/src/nfa/castle_internal.h index 54578d67..429c232f 100644 --- a/src/nfa/castle_internal.h +++ b/src/nfa/castle_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,9 @@ struct SubCastle { u32 streamStateOffset; //!< offset within stream state u32 repeatInfoOffset; //!< offset of RepeatInfo structure // relative to the start of SubCastle - char exclusive; //!< exclusive info of this SubCastle + u32 exclusiveId; //!< exclusive group id of this SubCastle, + // set to the number of SubCastles in Castle + // if it is not exclusive }; #define CASTLE_DOT 0 @@ -51,6 +53,12 @@ struct SubCastle { #define CASTLE_SHUFTI 3 #define CASTLE_TRUFFLE 4 +enum ExclusiveType { + NOT_EXCLUSIVE, //!< no subcastles are exclusive + EXCLUSIVE, //!< a subset of subcastles are exclusive + PURE_EXCLUSIVE //!< all subcastles are exclusive +}; + /** * \brief Castle engine structure. * @@ -66,26 +74,56 @@ struct SubCastle { * - sparse iterator for subcastles that may be stale * * Castle stores an "active repeats" multibit in stream state, followed by the - * packed repeat state for each SubCastle. If all SubCastles are mutual - * exclusive, we store current active SubCastle id instead of "active repeats" - * multibit in stream state. If there are both exclusive and non-exclusive - * SubCastle groups, we use an active id for the exclusive group and a multibit - * for the non-exclusive group. + * packed repeat state for each SubCastle. If there are both exclusive and + * non-exclusive SubCastle groups, we use an active id for each exclusive group + * and a multibit for the non-exclusive group. We also store an "active + * exclusive groups" multibit for exclusive groups. If all SubCastles are mutual + * exclusive, we remove "active repeats" multibit from stream state. + * * Castle stream state: + * * + * * |---| + * * | | active subengine id for exclusive group 1 + * * |---| + * * | | active subengine id for exclusive group 2(if necessary) + * * |---| + * * ... + * * |---| + * * | | "active repeats" multibit for non-exclusive subcastles + * * | | (if not all subcastles are exclusive) + * * |---| + * * | | active multibit for exclusive groups + * * | | + * * |---| + * * ||-|| common pool of stream state for exclusive group 1 + * * ||-|| + * * |---| + * * ||-|| common pool of stream state for exclusive group 2(if necessary) + * * ||-|| + * * |---| + * * ... + * * |---| + * * | | stream state for each non-exclusive subcastles + * * ... + * * | | + * * |---| * * In full state (stored in scratch space) it stores a temporary multibit over * the repeats (used by \ref castleMatchLoop), followed by the repeat control - * blocks for each SubCastle. If all SubCastles are mutual exclusive, we only - * need to store the repeat control blocks for each SubCastle. + * blocks for each SubCastle. */ struct ALIGN_AVX_DIRECTIVE Castle { - u32 numRepeats; - u8 type; //!< tells us which scanning mechanism (below) to use - char exclusive; //!< tells us if there are mutual exclusive SubCastles - char pureExclusive; //!< tells us if all SubCastles are mutual exclusive - u8 activeIdxSize; //!< number of bytes in stream state to store - // active SubCastle id for exclusive mode - u32 staleIterOffset; // removeClique(CliqueGraph &cg) { // the end locations where it overlaps with other literals, // then the literals are mutual exclusive static -bool findExclusivePair(const u32 id1, const u32 id2, +bool findExclusivePair(const size_t id1, const size_t id2, + const size_t lower, const vector> &min_reset_dist, const vector>> &triggers) { const auto &triggers1 = triggers[id1]; const auto &triggers2 = triggers[id2]; - for (u32 i = 0; i < triggers1.size(); ++i) { - for (u32 j = 0; j < triggers2.size(); ++j) { + for (size_t i = 0; i < triggers1.size(); ++i) { + for (size_t j = 0; j < triggers2.size(); ++j) { if (!literalOverlap(triggers1[i], triggers2[j], - min_reset_dist[id2][j]) || + min_reset_dist[id2 - lower][j]) || !literalOverlap(triggers2[j], triggers1[i], - min_reset_dist[id1][i])) { + min_reset_dist[id1 - lower][i])) { return false; } } @@ -264,40 +265,75 @@ bool findExclusivePair(const u32 id1, const u32 id2, } static -vector checkExclusion(const CharReach &cr, - const vector>> &triggers) { - vector group; - if (!triggers.size() || triggers.size() == 1) { - return group; - } +vector> checkExclusion(u32 &streamStateSize, + const CharReach &cr, + const vector>> &triggers, + enum ExclusiveType &exclusive, + const size_t numRepeats) { + vector> groups; + size_t trigSize = triggers.size(); + DEBUG_PRINTF("trigSize %zu\n", trigSize); - vector> min_reset_dist; - // get min reset distance for each repeat - for (auto it = triggers.begin(); it != triggers.end(); it++) { - const vector &tmp_dist = minResetDistToEnd(*it, cr); - min_reset_dist.push_back(tmp_dist); - } + size_t lower = 0; + size_t total = 0; + while (lower < trigSize) { + vector vertices; + unique_ptr cg = make_unique(); - vector vertices; - unique_ptr cg = make_unique(); - for (u32 i = 0; i < triggers.size(); ++i) { - CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); - vertices.push_back(v); - } + vector> min_reset_dist; + size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize); + // get min reset distance for each repeat + for (size_t i = lower; i < upper; i++) { + CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); + vertices.push_back(v); - // find exclusive pair for each repeat - for (u32 i = 0; i < triggers.size(); ++i) { - CliqueVertex s = vertices[i]; - for (u32 j = i + 1; j < triggers.size(); ++j) { - if (findExclusivePair(i, j, min_reset_dist, triggers)) { - CliqueVertex d = vertices[j]; - add_edge(s, d, *cg); + const vector &tmp_dist = + minResetDistToEnd(triggers[i], cr); + min_reset_dist.push_back(tmp_dist); + } + + // find exclusive pair for each repeat + for (size_t i = lower; i < upper; i++) { + CliqueVertex s = vertices[i - lower]; + for (size_t j = i + 1; j < upper; j++) { + if (findExclusivePair(i, j, lower, min_reset_dist, + triggers)) { + CliqueVertex d = vertices[j - lower]; + add_edge(s, d, *cg); + } } } - } - // find the largest exclusive group - return removeClique(*cg); + // find the largest exclusive group + auto clique = removeClique(*cg); + size_t cliqueSize = clique.size(); + if (cliqueSize > 1) { + groups.push_back(clique); + exclusive = EXCLUSIVE; + total += cliqueSize; + } + + lower += CLIQUE_GRAPH_MAX_SIZE; + } + DEBUG_PRINTF("clique size %lu, num of repeats %lu\n", + total, numRepeats); + if (total == numRepeats) { + exclusive = PURE_EXCLUSIVE; + streamStateSize = 0; + }; + + return groups; +} + +namespace { +struct ExclusiveInfo { + + /** Mapping between top and exclusive group id */ + map groupId; + + /** Number of exclusive groups */ + u32 numGroups = 0; +}; } static @@ -306,10 +342,12 @@ void buildSubcastles(const CastleProto &proto, vector &subs, const vector> &repeatInfoPair, u32 &scratchStateSize, u32 &streamStateSize, u32 &tableSize, vector &tables, u32 &sparseRepeats, - const set &exclusiveGroup, vector &may_stale) { + const ExclusiveInfo &exclusiveInfo, + vector &may_stale) { u32 i = 0; - u32 maxStreamSize = 0; - bool exclusive = exclusiveGroup.size() > 1; + const auto &groupId = exclusiveInfo.groupId; + const auto &numGroups = exclusiveInfo.numGroups; + vector maxStreamSize(numGroups, 0); for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); it != ite; ++it, ++i) { const PureRepeat &pr = it->second; @@ -330,8 +368,9 @@ void buildSubcastles(const CastleProto &proto, vector &subs, RepeatInfo &info = infos[i]; // handle exclusive case differently - if (exclusive && exclusiveGroup.find(i) != exclusiveGroup.end()) { - maxStreamSize = MAX(maxStreamSize, rsi.packedCtrlSize); + if (contains(groupId, i)) { + u32 id = groupId.at(i); + maxStreamSize[id] = MAX(maxStreamSize[id], rsi.packedCtrlSize); } else { subScratchStateSize = verify_u32(sizeof(RepeatControl)); subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); @@ -366,25 +405,34 @@ void buildSubcastles(const CastleProto &proto, vector &subs, sub.report = *pr.reports.begin(); if (rtype == REPEAT_SPARSE_OPTIMAL_P) { - for (u32 j = 0; j < rsi.patchSize; j++) { - tables.push_back(rsi.table[j]); - } - sparseRepeats++; - patchSize[i] = rsi.patchSize; - tableSize += rsi.patchSize; + for (u32 j = 0; j < rsi.patchSize; j++) { + tables.push_back(rsi.table[j]); + } + sparseRepeats++; + patchSize[i] = rsi.patchSize; + tableSize += rsi.patchSize; } } - if (exclusive) { - for (auto k : exclusiveGroup) { - SubCastle &sub = subs[k]; - RepeatInfo &info = infos[k]; - info.packedCtrlSize = maxStreamSize; + vector scratchOffset(numGroups, 0); + vector streamOffset(numGroups, 0); + for (const auto &j : groupId) { + u32 top = j.first; + u32 id = j.second; + SubCastle &sub = subs[top]; + RepeatInfo &info = infos[top]; + info.packedCtrlSize = maxStreamSize[id]; + if (!scratchOffset[id]) { sub.fullStateOffset = scratchStateSize; sub.streamStateOffset = streamStateSize; + scratchOffset[id] = scratchStateSize; + streamOffset[id] = streamStateSize; + scratchStateSize += verify_u32(sizeof(RepeatControl)); + streamStateSize += maxStreamSize[id]; + } else { + sub.fullStateOffset = scratchOffset[id]; + sub.streamStateOffset = streamOffset[id]; } - scratchStateSize += verify_u32(sizeof(RepeatControl)); - streamStateSize += maxStreamSize; } } @@ -423,8 +471,9 @@ buildCastle(const CastleProto &proto, depth maxWidth(0); u32 i = 0; - vector candidateRepeats; + ExclusiveInfo exclusiveInfo; vector>> candidateTriggers; + vector candidateRepeats; vector> repeatInfoPair; for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); it != ite; ++it, ++i) { @@ -459,38 +508,40 @@ buildCastle(const CastleProto &proto, repeatInfoPair.push_back(make_pair(min_period, is_reset)); - if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) { - candidateTriggers.push_back(triggers.at(top)); - candidateRepeats.push_back(i); - } + candidateTriggers.push_back(triggers.at(top)); + candidateRepeats.push_back(i); } // Case 1: exclusive repeats - bool exclusive = false; - bool pureExclusive = false; + enum ExclusiveType exclusive = NOT_EXCLUSIVE; u32 activeIdxSize = 0; - set exclusiveGroup; + u32 groupIterOffset = 0; if (cc.grey.castleExclusive) { - vector tmpGroup = checkExclusion(cr, candidateTriggers); - const u32 exclusiveSize = tmpGroup.size(); - if (exclusiveSize > 1) { - // Case 1: mutual exclusive repeats group found, initialize state - // sizes - exclusive = true; + auto cliqueGroups = + checkExclusion(streamStateSize, cr, candidateTriggers, + exclusive, numRepeats); + for (const auto &group : cliqueGroups) { + // mutual exclusive repeats group found, + // update state sizes activeIdxSize = calcPackedBytes(numRepeats + 1); - if (exclusiveSize == numRepeats) { - pureExclusive = true; - streamStateSize = 0; - scratchStateSize = 0; - } streamStateSize += activeIdxSize; // replace with top values - for (const auto &val : tmpGroup) { - exclusiveGroup.insert(candidateRepeats[val]); + for (const auto &val : group) { + const u32 top = candidateRepeats[val]; + exclusiveInfo.groupId[top] = exclusiveInfo.numGroups; } + exclusiveInfo.numGroups++; } + + if (exclusive) { + groupIterOffset = streamStateSize; + streamStateSize += mmbit_size(exclusiveInfo.numGroups); + } + + DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups); } + candidateRepeats.clear(); DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(), exclusive); @@ -501,7 +552,7 @@ buildCastle(const CastleProto &proto, buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, scratchStateSize, streamStateSize, tableSize, - tables, sparseRepeats, exclusiveGroup, may_stale); + tables, sparseRepeats, exclusiveInfo, may_stale); DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); vector stale_iter; @@ -536,9 +587,11 @@ buildCastle(const CastleProto &proto, char *ptr = base_ptr; Castle *c = (Castle *)ptr; c->numRepeats = verify_u32(subs.size()); - c->exclusive = exclusive; - c->pureExclusive = pureExclusive; + c->numGroups = exclusiveInfo.numGroups; + c->exclusive = verify_s8(exclusive); c->activeIdxSize = verify_u8(activeIdxSize); + c->activeOffset = verify_u32(c->numGroups * activeIdxSize); + c->groupIterOffset = groupIterOffset; writeCastleScanEngine(cr, c); @@ -572,10 +625,10 @@ buildCastle(const CastleProto &proto, } // set exclusive group info - if (exclusiveGroup.find(i) != exclusiveGroup.end()) { - sub->exclusive = 1; + if (contains(exclusiveInfo.groupId, i)) { + sub->exclusiveId = exclusiveInfo.groupId[i]; } else { - sub->exclusive = 0; + sub->exclusiveId = numRepeats; } }