Castle: exclusive analysis for multiple subcastle chunks

Apply clique analysis to subcastle chunks if the number of
subcastles is large and check the status of each chunk
separately at runtime.
This commit is contained in:
Xiang Wang 2015-12-11 13:27:53 -05:00 committed by Matthew Barr
parent 6bcccb4c5d
commit a7daa70942
3 changed files with 333 additions and 235 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -111,17 +111,22 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) {
DEBUG_PRINTF("offset=%llu\n", offset);
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx);
if (activeIdx < c->numRepeats && subCastleReportCurrent(c, q,
offset, activeIdx) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx);
if (subCastleReportCurrent(c, q,
offset, activeIdx) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (!c->pureExclusive) {
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize;
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("subcastle %u\n", i);
@ -168,9 +173,12 @@ char castleInAccept(const struct Castle *c, struct mq *q,
}
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
if (activeIdx < c->numRepeats) {
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx);
if (subCastleInAccept(c, q, report, offset, activeIdx)) {
return 1;
@ -178,11 +186,10 @@ char castleInAccept(const struct Castle *c, struct mq *q,
}
}
if (!c->pureExclusive) {
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize;
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID;
i = mmbit_iterate(active, c->numRepeats, i)) {
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("subcastle %u\n", i);
if (subCastleInAccept(c, q, report, offset, i)) {
return 1;
@ -197,7 +204,6 @@ static really_inline
void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
void *full_state, void *stream_state,
const u32 subIdx) {
u8 *active = (u8 *)stream_state;
const struct SubCastle *sub = getSubCastle(c, subIdx);
const struct RepeatInfo *info = getRepeatInfo(sub);
@ -207,10 +213,13 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset);
if (sub->exclusive) {
partial_store_u32(stream_state, c->numRepeats, c->activeIdxSize);
if (sub->exclusiveId < c->numRepeats) {
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
mmbit_unset(groups, c->numGroups, sub->exclusiveId);
} else {
mmbit_unset(active + c->activeIdxSize, c->numRepeats, subIdx);
u8 *active = (u8 *)stream_state + c->activeOffset;
mmbit_unset(active, c->numRepeats, subIdx);
}
}
}
@ -226,16 +235,20 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
}
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
if (activeIdx < c->numRepeats) {
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx);
subCastleDeactivateStaleSubs(c, offset, full_state,
stream_state, activeIdx);
}
}
if (!c->pureExclusive) {
const u8 *active = (const u8 *)stream_state + c->activeIdxSize;
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)stream_state + c->activeOffset;
const struct mmbit_sparse_iter *it
= (const void *)((const char *)c + c->staleIterOffset);
@ -266,12 +279,20 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
info->packedCtrlSize;
char is_alive = 0;
if (sub->exclusive) {
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
is_alive = (activeIdx == top);
partial_store_u32(stream_state, top, c->activeIdxSize);
u8 *active = (u8 *)stream_state;
if (sub->exclusiveId < c->numRepeats) {
u8 *groups = active + c->groupIterOffset;
active += sub->exclusiveId * c->activeIdxSize;
if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) {
const u32 activeIdx = partial_load_u32(active, c->activeIdxSize);
is_alive = (activeIdx == top);
}
if (!is_alive) {
partial_store_u32(active, top, c->activeIdxSize);
}
} else {
u8 *active = (u8 *)stream_state + c->activeIdxSize;
active += c->activeOffset;
is_alive = mmbit_set(active, c->numRepeats, top);
}
@ -309,11 +330,11 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin,
u64a match = repeatNextMatch(info, rctrl, rstate, begin);
if (match == 0) {
DEBUG_PRINTF("no more matches for sub %u\n", subIdx);
if (sub->exclusive) {
partial_store_u32(stream_state, c->numRepeats,
c->activeIdxSize);
if (sub->exclusiveId < c->numRepeats) {
u8 *groups = (u8 *)stream_state + c->groupIterOffset;
mmbit_unset(groups, c->numGroups, sub->exclusiveId);
} else {
u8 *active = (u8 *)stream_state + c->activeIdxSize;
u8 *active = (u8 *)stream_state + c->activeOffset;
mmbit_unset(active, c->numRepeats, subIdx);
}
return;
@ -346,16 +367,20 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
*mloc = 0;
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
if (activeIdx < c->numRepeats) {
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx);
subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
&found, activeIdx);
}
}
if (!c->pureExclusive) {
u8 *active = (u8 *)stream_state + c->activeIdxSize;
if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)stream_state + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID;
i = mmbit_iterate(active, c->numRepeats, i)) {
@ -384,31 +409,38 @@ u64a subCastleNextMatch(const struct Castle *c, void *full_state,
return repeatNextMatch(info, rctrl, rstate, loc);
}
static really_inline
void set_matching(const struct Castle *c, const u64a match, u8 *active,
u8 *matching, const u32 active_size, const u32 active_id,
const u32 matching_id, u64a *offset, const u64a end) {
if (match == 0) {
DEBUG_PRINTF("no more matches\n");
mmbit_unset(active, active_size, active_id);
} else if (match > end) {
// If we had a local copy of the active mmbit, we could skip
// looking at this repeat again. But we don't, so we just move
// on.
} else if (match == *offset) {
mmbit_set(matching, c->numRepeats, matching_id);
} else if (match < *offset) {
// New minimum offset.
*offset = match;
mmbit_clear(matching, c->numRepeats);
mmbit_set(matching, c->numRepeats, matching_id);
}
}
static really_inline
void subCastleMatchLoop(const struct Castle *c, void *full_state,
void *stream_state, const u64a end,
const u64a loc, u64a *offset) {
u8 *active = (u8 *)stream_state + c->activeIdxSize;
u8 *active = (u8 *)stream_state + c->activeOffset;
u8 *matching = full_state;
mmbit_clear(matching, c->numRepeats);
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
if (match == 0) {
DEBUG_PRINTF("no more matches\n");
mmbit_unset(active, c->numRepeats, i);
} else if (match > end) {
// If we had a local copy of the active mmbit, we could skip
// looking at this repeat again. But we don't, so we just move
// on.
} else if (match == *offset) {
mmbit_set(matching, c->numRepeats, i);
} else if (match < *offset) {
// New minimum offset.
*offset = match;
mmbit_clear(matching, c->numRepeats);
mmbit_set(matching, c->numRepeats, i);
}
set_matching(c, match, active, matching, c->numRepeats, i,
i, offset, end);
}
}
@ -451,61 +483,37 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
// full_state (scratch).
u64a offset = end; // min offset of next match
char found = 0;
u32 activeIdx = 0;
mmbit_clear(matching, c->numRepeats);
if (c->exclusive) {
activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
if (activeIdx < c->numRepeats) {
u32 i = activeIdx;
DEBUG_PRINTF("subcastle %u\n", i);
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
activeIdx = partial_load_u32(cur, c->activeIdxSize);
u64a match = subCastleNextMatch(c, full_state, stream_state,
loc, i);
if (match == 0) {
DEBUG_PRINTF("no more matches\n");
partial_store_u32(stream_state, c->numRepeats,
c->activeIdxSize);
} else if (match > end) {
// If we had a local copy of the active mmbit, we could skip
// looking at this repeat again. But we don't, so we just move
// on.
} else if (match <= offset) {
if (match < offset) {
// New minimum offset.
offset = match;
}
found = 1;
}
loc, activeIdx);
set_matching(c, match, groups, matching, c->numGroups, i,
activeIdx, &offset, end);
}
}
const char hasMatch = found;
u64a newOffset = offset;
if (!c->pureExclusive) {
if (c->exclusive != PURE_EXCLUSIVE) {
subCastleMatchLoop(c, full_state, stream_state,
end, loc, &newOffset);
DEBUG_PRINTF("offset=%llu\n", newOffset);
if (mmbit_any(matching, c->numRepeats)) {
found = 1;
if (subCastleFireMatch(c, full_state, stream_state,
cb, ctx, newOffset) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
end, loc, &offset);
}
if (!found) {
DEBUG_PRINTF("offset=%llu\n", offset);
if (!mmbit_any(matching, c->numRepeats)) {
DEBUG_PRINTF("no more matches\n");
break;
} else if (hasMatch && offset == newOffset) {
const struct SubCastle *sub = getSubCastle(c, activeIdx);
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, activeIdx);
if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) {
DEBUG_PRINTF("caller told us to halt\n");
return MO_HALT_MATCHING;
}
}
loc = newOffset;
if (subCastleFireMatch(c, full_state, stream_state,
cb, ctx, offset) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
loc = offset;
}
return MO_CONTINUE_MATCHING;
@ -564,7 +572,8 @@ char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin,
static really_inline
char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin,
const size_t end, size_t *loc) {
const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, buf + begin, buf + end);
const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
buf + begin, buf + end);
if (ptr == buf + end) {
DEBUG_PRINTF("no escape found\n");
return 0;
@ -725,10 +734,11 @@ static really_inline
void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
DEBUG_PRINTF("clearing active repeats due to escape\n");
if (c->exclusive) {
partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize);
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
}
if (!c->pureExclusive) {
if (c->exclusive != PURE_EXCLUSIVE) {
mmbit_clear(active, c->numRepeats);
}
}
@ -755,7 +765,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
return 1;
}
u8 *active = (u8 *)q->streamState + c->activeIdxSize; // active multibit
u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit
assert(q->cur + 1 < q->end); // require at least two items
assert(q_cur_type(q) == MQE_START);
@ -769,14 +779,8 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
char found = 0;
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
if (activeIdx < c->numRepeats) {
found = 1;
} else if (c->pureExclusive) {
DEBUG_PRINTF("castle is dead\n");
goto scan_done;
}
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
found = mmbit_any(groups, c->numGroups);
}
if (!found && !mmbit_any(active, c->numRepeats)) {
@ -842,10 +846,9 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
}
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
if (c->pureExclusive || activeIdx < c->numRepeats) {
return activeIdx < c->numRepeats;
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
if (mmbit_any_precise(groups, c->numGroups)) {
return 1;
}
}
@ -905,7 +908,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
assert(q_cur_type(q) == MQE_START);
const struct Castle *c = getImplNfa(n);
u8 *active = (u8 *)q->streamState + c->activeIdxSize;
u8 *active = (u8 *)q->streamState + c->activeOffset;
u64a end_offset = q_last_loc(q) + q->offset;
s64a last_kill_loc = castleLastKillLoc(c, q);
@ -938,14 +941,9 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
char found = 0;
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
if (activeIdx < c->numRepeats) {
found = 1;
} else if (c->pureExclusive) {
DEBUG_PRINTF("castle is dead\n");
return 0;
}
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
found = mmbit_any_precise(groups, c->numGroups);
}
if (!found && !mmbit_any_precise(active, c->numRepeats)) {
@ -988,11 +986,12 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
const struct Castle *c = getImplNfa(n);
assert(q->streamState);
if (c->exclusive) {
partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize);
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
}
if (!c->pureExclusive) {
u8 *active = (u8 *)q->streamState + c->activeIdxSize;
if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)q->streamState + c->activeOffset;
mmbit_clear(active, c->numRepeats);
}
return 0;
@ -1006,11 +1005,12 @@ char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset,
const struct Castle *c = getImplNfa(n);
if (c->exclusive) {
partial_store_u32(state, c->numRepeats, c->activeIdxSize);
u8 *groups = (u8 *)state + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
}
if (!c->pureExclusive) {
u8 *active = (u8 *)state + c->activeIdxSize;
if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)state + c->activeOffset;
mmbit_clear(active, c->numRepeats);
}
return 0;
@ -1041,16 +1041,19 @@ char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q,
const u64a offset = q->offset + loc;
DEBUG_PRINTF("offset=%llu\n", offset);
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
if (activeIdx < c->numRepeats) {
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
subCastleQueueCompressState(c, activeIdx, q, offset);
}
}
if (!c->pureExclusive) {
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize;
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("packing state for sub %u\n", i);
@ -1084,15 +1087,19 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest,
const struct Castle *c = getImplNfa(n);
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(src, c->activeIdxSize);
if (activeIdx < c->numRepeats) {
const u8 *active = (const u8 *)src;
const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
subCastleExpandState(c, activeIdx, dest, src, offset);
}
}
if (!c->pureExclusive) {
if (c->exclusive != PURE_EXCLUSIVE) {
// Unpack state for all active repeats.
const u8 *active = (const u8 *)src + c->activeIdxSize;
const u8 *active = (const u8 *)src + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
subCastleExpandState(c, i, dest, src, offset);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -42,7 +42,9 @@ struct SubCastle {
u32 streamStateOffset; //!< offset within stream state
u32 repeatInfoOffset; //!< offset of RepeatInfo structure
// relative to the start of SubCastle
char exclusive; //!< exclusive info of this SubCastle
u32 exclusiveId; //!< exclusive group id of this SubCastle,
// set to the number of SubCastles in Castle
// if it is not exclusive
};
#define CASTLE_DOT 0
@ -51,6 +53,12 @@ struct SubCastle {
#define CASTLE_SHUFTI 3
#define CASTLE_TRUFFLE 4
enum ExclusiveType {
NOT_EXCLUSIVE, //!< no subcastles are exclusive
EXCLUSIVE, //!< a subset of subcastles are exclusive
PURE_EXCLUSIVE //!< all subcastles are exclusive
};
/**
* \brief Castle engine structure.
*
@ -66,26 +74,56 @@ struct SubCastle {
* - sparse iterator for subcastles that may be stale
*
* Castle stores an "active repeats" multibit in stream state, followed by the
* packed repeat state for each SubCastle. If all SubCastles are mutual
* exclusive, we store current active SubCastle id instead of "active repeats"
* multibit in stream state. If there are both exclusive and non-exclusive
* SubCastle groups, we use an active id for the exclusive group and a multibit
* for the non-exclusive group.
* packed repeat state for each SubCastle. If there are both exclusive and
* non-exclusive SubCastle groups, we use an active id for each exclusive group
* and a multibit for the non-exclusive group. We also store an "active
* exclusive groups" multibit for exclusive groups. If all SubCastles are mutual
* exclusive, we remove "active repeats" multibit from stream state.
* * Castle stream state:
* *
* * |---|
* * | | active subengine id for exclusive group 1
* * |---|
* * | | active subengine id for exclusive group 2(if necessary)
* * |---|
* * ...
* * |---|
* * | | "active repeats" multibit for non-exclusive subcastles
* * | | (if not all subcastles are exclusive)
* * |---|
* * | | active multibit for exclusive groups
* * | |
* * |---|
* * ||-|| common pool of stream state for exclusive group 1
* * ||-||
* * |---|
* * ||-|| common pool of stream state for exclusive group 2(if necessary)
* * ||-||
* * |---|
* * ...
* * |---|
* * | | stream state for each non-exclusive subcastles
* * ...
* * | |
* * |---|
*
* In full state (stored in scratch space) it stores a temporary multibit over
* the repeats (used by \ref castleMatchLoop), followed by the repeat control
* blocks for each SubCastle. If all SubCastles are mutual exclusive, we only
* need to store the repeat control blocks for each SubCastle.
* blocks for each SubCastle.
*/
struct ALIGN_AVX_DIRECTIVE Castle {
u32 numRepeats;
u8 type; //!< tells us which scanning mechanism (below) to use
char exclusive; //!< tells us if there are mutual exclusive SubCastles
char pureExclusive; //!< tells us if all SubCastles are mutual exclusive
u8 activeIdxSize; //!< number of bytes in stream state to store
// active SubCastle id for exclusive mode
u32 staleIterOffset; //<! offset to a sparse iterator to check for stale
// sub castles
u32 numRepeats; //!< number of repeats in Castle
u32 numGroups; //!< number of exclusive groups
u8 type; //!< tells us which scanning mechanism (below) to use
u8 exclusive; //!< tells us if there are mutual exclusive SubCastles
u8 activeIdxSize; //!< number of bytes in stream state to store
// active SubCastle id for exclusive mode
u32 activeOffset; //!< offset to active multibit for non-exclusive
// SubCastles
u32 staleIterOffset; //!< offset to a sparse iterator to check for stale
// sub castles
u32 groupIterOffset; //!< offset to a iterator to check the aliveness of
// exclusive groups
union {
struct {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -245,17 +245,18 @@ vector<u32> removeClique(CliqueGraph &cg) {
// the end locations where it overlaps with other literals,
// then the literals are mutual exclusive
static
bool findExclusivePair(const u32 id1, const u32 id2,
bool findExclusivePair(const size_t id1, const size_t id2,
const size_t lower,
const vector<vector<size_t>> &min_reset_dist,
const vector<vector<vector<CharReach>>> &triggers) {
const auto &triggers1 = triggers[id1];
const auto &triggers2 = triggers[id2];
for (u32 i = 0; i < triggers1.size(); ++i) {
for (u32 j = 0; j < triggers2.size(); ++j) {
for (size_t i = 0; i < triggers1.size(); ++i) {
for (size_t j = 0; j < triggers2.size(); ++j) {
if (!literalOverlap(triggers1[i], triggers2[j],
min_reset_dist[id2][j]) ||
min_reset_dist[id2 - lower][j]) ||
!literalOverlap(triggers2[j], triggers1[i],
min_reset_dist[id1][i])) {
min_reset_dist[id1 - lower][i])) {
return false;
}
}
@ -264,40 +265,75 @@ bool findExclusivePair(const u32 id1, const u32 id2,
}
static
vector<u32> checkExclusion(const CharReach &cr,
const vector<vector<vector<CharReach>>> &triggers) {
vector<u32> group;
if (!triggers.size() || triggers.size() == 1) {
return group;
}
vector<vector<u32>> checkExclusion(u32 &streamStateSize,
const CharReach &cr,
const vector<vector<vector<CharReach>>> &triggers,
enum ExclusiveType &exclusive,
const size_t numRepeats) {
vector<vector<u32>> groups;
size_t trigSize = triggers.size();
DEBUG_PRINTF("trigSize %zu\n", trigSize);
vector<vector<size_t>> min_reset_dist;
// get min reset distance for each repeat
for (auto it = triggers.begin(); it != triggers.end(); it++) {
const vector<size_t> &tmp_dist = minResetDistToEnd(*it, cr);
min_reset_dist.push_back(tmp_dist);
}
size_t lower = 0;
size_t total = 0;
while (lower < trigSize) {
vector<CliqueVertex> vertices;
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
vector<CliqueVertex> vertices;
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
for (u32 i = 0; i < triggers.size(); ++i) {
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
vertices.push_back(v);
}
vector<vector<size_t>> min_reset_dist;
size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize);
// get min reset distance for each repeat
for (size_t i = lower; i < upper; i++) {
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
vertices.push_back(v);
// find exclusive pair for each repeat
for (u32 i = 0; i < triggers.size(); ++i) {
CliqueVertex s = vertices[i];
for (u32 j = i + 1; j < triggers.size(); ++j) {
if (findExclusivePair(i, j, min_reset_dist, triggers)) {
CliqueVertex d = vertices[j];
add_edge(s, d, *cg);
const vector<size_t> &tmp_dist =
minResetDistToEnd(triggers[i], cr);
min_reset_dist.push_back(tmp_dist);
}
// find exclusive pair for each repeat
for (size_t i = lower; i < upper; i++) {
CliqueVertex s = vertices[i - lower];
for (size_t j = i + 1; j < upper; j++) {
if (findExclusivePair(i, j, lower, min_reset_dist,
triggers)) {
CliqueVertex d = vertices[j - lower];
add_edge(s, d, *cg);
}
}
}
}
// find the largest exclusive group
return removeClique(*cg);
// find the largest exclusive group
auto clique = removeClique(*cg);
size_t cliqueSize = clique.size();
if (cliqueSize > 1) {
groups.push_back(clique);
exclusive = EXCLUSIVE;
total += cliqueSize;
}
lower += CLIQUE_GRAPH_MAX_SIZE;
}
DEBUG_PRINTF("clique size %lu, num of repeats %lu\n",
total, numRepeats);
if (total == numRepeats) {
exclusive = PURE_EXCLUSIVE;
streamStateSize = 0;
};
return groups;
}
namespace {
struct ExclusiveInfo {
/** Mapping between top and exclusive group id */
map<u32, u32> groupId;
/** Number of exclusive groups */
u32 numGroups = 0;
};
}
static
@ -306,10 +342,12 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
const vector<pair<depth, bool>> &repeatInfoPair,
u32 &scratchStateSize, u32 &streamStateSize,
u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
const set<u32> &exclusiveGroup, vector<u32> &may_stale) {
const ExclusiveInfo &exclusiveInfo,
vector<u32> &may_stale) {
u32 i = 0;
u32 maxStreamSize = 0;
bool exclusive = exclusiveGroup.size() > 1;
const auto &groupId = exclusiveInfo.groupId;
const auto &numGroups = exclusiveInfo.numGroups;
vector<u32> maxStreamSize(numGroups, 0);
for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
it != ite; ++it, ++i) {
const PureRepeat &pr = it->second;
@ -330,8 +368,9 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
RepeatInfo &info = infos[i];
// handle exclusive case differently
if (exclusive && exclusiveGroup.find(i) != exclusiveGroup.end()) {
maxStreamSize = MAX(maxStreamSize, rsi.packedCtrlSize);
if (contains(groupId, i)) {
u32 id = groupId.at(i);
maxStreamSize[id] = MAX(maxStreamSize[id], rsi.packedCtrlSize);
} else {
subScratchStateSize = verify_u32(sizeof(RepeatControl));
subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
@ -366,25 +405,34 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
sub.report = *pr.reports.begin();
if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
for (u32 j = 0; j < rsi.patchSize; j++) {
tables.push_back(rsi.table[j]);
}
sparseRepeats++;
patchSize[i] = rsi.patchSize;
tableSize += rsi.patchSize;
for (u32 j = 0; j < rsi.patchSize; j++) {
tables.push_back(rsi.table[j]);
}
sparseRepeats++;
patchSize[i] = rsi.patchSize;
tableSize += rsi.patchSize;
}
}
if (exclusive) {
for (auto k : exclusiveGroup) {
SubCastle &sub = subs[k];
RepeatInfo &info = infos[k];
info.packedCtrlSize = maxStreamSize;
vector<u32> scratchOffset(numGroups, 0);
vector<u32> streamOffset(numGroups, 0);
for (const auto &j : groupId) {
u32 top = j.first;
u32 id = j.second;
SubCastle &sub = subs[top];
RepeatInfo &info = infos[top];
info.packedCtrlSize = maxStreamSize[id];
if (!scratchOffset[id]) {
sub.fullStateOffset = scratchStateSize;
sub.streamStateOffset = streamStateSize;
scratchOffset[id] = scratchStateSize;
streamOffset[id] = streamStateSize;
scratchStateSize += verify_u32(sizeof(RepeatControl));
streamStateSize += maxStreamSize[id];
} else {
sub.fullStateOffset = scratchOffset[id];
sub.streamStateOffset = streamOffset[id];
}
scratchStateSize += verify_u32(sizeof(RepeatControl));
streamStateSize += maxStreamSize;
}
}
@ -423,8 +471,9 @@ buildCastle(const CastleProto &proto,
depth maxWidth(0);
u32 i = 0;
vector<u32> candidateRepeats;
ExclusiveInfo exclusiveInfo;
vector<vector<vector<CharReach>>> candidateTriggers;
vector<u32> candidateRepeats;
vector<pair<depth, bool>> repeatInfoPair;
for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
it != ite; ++it, ++i) {
@ -459,38 +508,40 @@ buildCastle(const CastleProto &proto,
repeatInfoPair.push_back(make_pair(min_period, is_reset));
if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) {
candidateTriggers.push_back(triggers.at(top));
candidateRepeats.push_back(i);
}
candidateTriggers.push_back(triggers.at(top));
candidateRepeats.push_back(i);
}
// Case 1: exclusive repeats
bool exclusive = false;
bool pureExclusive = false;
enum ExclusiveType exclusive = NOT_EXCLUSIVE;
u32 activeIdxSize = 0;
set<u32> exclusiveGroup;
u32 groupIterOffset = 0;
if (cc.grey.castleExclusive) {
vector<u32> tmpGroup = checkExclusion(cr, candidateTriggers);
const u32 exclusiveSize = tmpGroup.size();
if (exclusiveSize > 1) {
// Case 1: mutual exclusive repeats group found, initialize state
// sizes
exclusive = true;
auto cliqueGroups =
checkExclusion(streamStateSize, cr, candidateTriggers,
exclusive, numRepeats);
for (const auto &group : cliqueGroups) {
// mutual exclusive repeats group found,
// update state sizes
activeIdxSize = calcPackedBytes(numRepeats + 1);
if (exclusiveSize == numRepeats) {
pureExclusive = true;
streamStateSize = 0;
scratchStateSize = 0;
}
streamStateSize += activeIdxSize;
// replace with top values
for (const auto &val : tmpGroup) {
exclusiveGroup.insert(candidateRepeats[val]);
for (const auto &val : group) {
const u32 top = candidateRepeats[val];
exclusiveInfo.groupId[top] = exclusiveInfo.numGroups;
}
exclusiveInfo.numGroups++;
}
if (exclusive) {
groupIterOffset = streamStateSize;
streamStateSize += mmbit_size(exclusiveInfo.numGroups);
}
DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups);
}
candidateRepeats.clear();
DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(),
exclusive);
@ -501,7 +552,7 @@ buildCastle(const CastleProto &proto,
buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
scratchStateSize, streamStateSize, tableSize,
tables, sparseRepeats, exclusiveGroup, may_stale);
tables, sparseRepeats, exclusiveInfo, may_stale);
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
vector<mmbit_sparse_iter> stale_iter;
@ -536,9 +587,11 @@ buildCastle(const CastleProto &proto,
char *ptr = base_ptr;
Castle *c = (Castle *)ptr;
c->numRepeats = verify_u32(subs.size());
c->exclusive = exclusive;
c->pureExclusive = pureExclusive;
c->numGroups = exclusiveInfo.numGroups;
c->exclusive = verify_s8(exclusive);
c->activeIdxSize = verify_u8(activeIdxSize);
c->activeOffset = verify_u32(c->numGroups * activeIdxSize);
c->groupIterOffset = groupIterOffset;
writeCastleScanEngine(cr, c);
@ -572,10 +625,10 @@ buildCastle(const CastleProto &proto,
}
// set exclusive group info
if (exclusiveGroup.find(i) != exclusiveGroup.end()) {
sub->exclusive = 1;
if (contains(exclusiveInfo.groupId, i)) {
sub->exclusiveId = exclusiveInfo.groupId[i];
} else {
sub->exclusive = 0;
sub->exclusiveId = numRepeats;
}
}