Castle: exclusive analysis for multiple subcastle chunks

Apply clique analysis to subcastle chunks if the number of
subcastles is large and check the status of each chunk
separately at runtime.
This commit is contained in:
Xiang Wang 2015-12-11 13:27:53 -05:00 committed by Matthew Barr
parent 6bcccb4c5d
commit a7daa70942
3 changed files with 333 additions and 235 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -111,17 +111,22 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) {
DEBUG_PRINTF("offset=%llu\n", offset); DEBUG_PRINTF("offset=%llu\n", offset);
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState, u8 *active = (u8 *)q->streamState;
c->activeIdxSize); u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
if (activeIdx < c->numRepeats && subCastleReportCurrent(c, q, if (subCastleReportCurrent(c, q,
offset, activeIdx) == MO_HALT_MATCHING) { offset, activeIdx) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
} }
}
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("subcastle %u\n", i); DEBUG_PRINTF("subcastle %u\n", i);
@ -168,9 +173,12 @@ char castleInAccept(const struct Castle *c, struct mq *q,
} }
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState, u8 *active = (u8 *)q->streamState;
c->activeIdxSize); u8 *groups = active + c->groupIterOffset;
if (activeIdx < c->numRepeats) { for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
if (subCastleInAccept(c, q, report, offset, activeIdx)) { if (subCastleInAccept(c, q, report, offset, activeIdx)) {
return 1; return 1;
@ -178,11 +186,10 @@ char castleInAccept(const struct Castle *c, struct mq *q,
} }
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("subcastle %u\n", i); DEBUG_PRINTF("subcastle %u\n", i);
if (subCastleInAccept(c, q, report, offset, i)) { if (subCastleInAccept(c, q, report, offset, i)) {
return 1; return 1;
@ -197,7 +204,6 @@ static really_inline
void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
void *full_state, void *stream_state, void *full_state, void *stream_state,
const u32 subIdx) { const u32 subIdx) {
u8 *active = (u8 *)stream_state;
const struct SubCastle *sub = getSubCastle(c, subIdx); const struct SubCastle *sub = getSubCastle(c, subIdx);
const struct RepeatInfo *info = getRepeatInfo(sub); const struct RepeatInfo *info = getRepeatInfo(sub);
@ -207,10 +213,13 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset); DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset);
if (sub->exclusive) { if (sub->exclusiveId < c->numRepeats) {
partial_store_u32(stream_state, c->numRepeats, c->activeIdxSize); u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
mmbit_unset(groups, c->numGroups, sub->exclusiveId);
} else { } else {
mmbit_unset(active + c->activeIdxSize, c->numRepeats, subIdx); u8 *active = (u8 *)stream_state + c->activeOffset;
mmbit_unset(active, c->numRepeats, subIdx);
} }
} }
} }
@ -226,16 +235,20 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
} }
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); u8 *active = (u8 *)stream_state;
if (activeIdx < c->numRepeats) { u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
subCastleDeactivateStaleSubs(c, offset, full_state, subCastleDeactivateStaleSubs(c, offset, full_state,
stream_state, activeIdx); stream_state, activeIdx);
} }
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)stream_state + c->activeIdxSize; const u8 *active = (const u8 *)stream_state + c->activeOffset;
const struct mmbit_sparse_iter *it const struct mmbit_sparse_iter *it
= (const void *)((const char *)c + c->staleIterOffset); = (const void *)((const char *)c + c->staleIterOffset);
@ -266,12 +279,20 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
info->packedCtrlSize; info->packedCtrlSize;
char is_alive = 0; char is_alive = 0;
if (sub->exclusive) { u8 *active = (u8 *)stream_state;
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); if (sub->exclusiveId < c->numRepeats) {
u8 *groups = active + c->groupIterOffset;
active += sub->exclusiveId * c->activeIdxSize;
if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) {
const u32 activeIdx = partial_load_u32(active, c->activeIdxSize);
is_alive = (activeIdx == top); is_alive = (activeIdx == top);
partial_store_u32(stream_state, top, c->activeIdxSize); }
if (!is_alive) {
partial_store_u32(active, top, c->activeIdxSize);
}
} else { } else {
u8 *active = (u8 *)stream_state + c->activeIdxSize; active += c->activeOffset;
is_alive = mmbit_set(active, c->numRepeats, top); is_alive = mmbit_set(active, c->numRepeats, top);
} }
@ -309,11 +330,11 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin,
u64a match = repeatNextMatch(info, rctrl, rstate, begin); u64a match = repeatNextMatch(info, rctrl, rstate, begin);
if (match == 0) { if (match == 0) {
DEBUG_PRINTF("no more matches for sub %u\n", subIdx); DEBUG_PRINTF("no more matches for sub %u\n", subIdx);
if (sub->exclusive) { if (sub->exclusiveId < c->numRepeats) {
partial_store_u32(stream_state, c->numRepeats, u8 *groups = (u8 *)stream_state + c->groupIterOffset;
c->activeIdxSize); mmbit_unset(groups, c->numGroups, sub->exclusiveId);
} else { } else {
u8 *active = (u8 *)stream_state + c->activeIdxSize; u8 *active = (u8 *)stream_state + c->activeOffset;
mmbit_unset(active, c->numRepeats, subIdx); mmbit_unset(active, c->numRepeats, subIdx);
} }
return; return;
@ -346,16 +367,20 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
*mloc = 0; *mloc = 0;
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize); u8 *active = (u8 *)stream_state;
if (activeIdx < c->numRepeats) { u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx); DEBUG_PRINTF("subcastle %u\n", activeIdx);
subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
&found, activeIdx); &found, activeIdx);
} }
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)stream_state + c->activeIdxSize; u8 *active = (u8 *)stream_state + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i != MMB_INVALID;
i = mmbit_iterate(active, c->numRepeats, i)) { i = mmbit_iterate(active, c->numRepeats, i)) {
@ -385,30 +410,37 @@ u64a subCastleNextMatch(const struct Castle *c, void *full_state,
} }
static really_inline static really_inline
void subCastleMatchLoop(const struct Castle *c, void *full_state, void set_matching(const struct Castle *c, const u64a match, u8 *active,
void *stream_state, const u64a end, u8 *matching, const u32 active_size, const u32 active_id,
const u64a loc, u64a *offset) { const u32 matching_id, u64a *offset, const u64a end) {
u8 *active = (u8 *)stream_state + c->activeIdxSize;
u8 *matching = full_state;
mmbit_clear(matching, c->numRepeats);
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
if (match == 0) { if (match == 0) {
DEBUG_PRINTF("no more matches\n"); DEBUG_PRINTF("no more matches\n");
mmbit_unset(active, c->numRepeats, i); mmbit_unset(active, active_size, active_id);
} else if (match > end) { } else if (match > end) {
// If we had a local copy of the active mmbit, we could skip // If we had a local copy of the active mmbit, we could skip
// looking at this repeat again. But we don't, so we just move // looking at this repeat again. But we don't, so we just move
// on. // on.
} else if (match == *offset) { } else if (match == *offset) {
mmbit_set(matching, c->numRepeats, i); mmbit_set(matching, c->numRepeats, matching_id);
} else if (match < *offset) { } else if (match < *offset) {
// New minimum offset. // New minimum offset.
*offset = match; *offset = match;
mmbit_clear(matching, c->numRepeats); mmbit_clear(matching, c->numRepeats);
mmbit_set(matching, c->numRepeats, i); mmbit_set(matching, c->numRepeats, matching_id);
} }
}
static really_inline
void subCastleMatchLoop(const struct Castle *c, void *full_state,
void *stream_state, const u64a end,
const u64a loc, u64a *offset) {
u8 *active = (u8 *)stream_state + c->activeOffset;
u8 *matching = full_state;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
set_matching(c, match, active, matching, c->numRepeats, i,
i, offset, end);
} }
} }
@ -451,61 +483,37 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
// full_state (scratch). // full_state (scratch).
u64a offset = end; // min offset of next match u64a offset = end; // min offset of next match
char found = 0;
u32 activeIdx = 0; u32 activeIdx = 0;
mmbit_clear(matching, c->numRepeats);
if (c->exclusive) { if (c->exclusive) {
activeIdx = partial_load_u32(stream_state, c->activeIdxSize); u8 *active = (u8 *)stream_state;
if (activeIdx < c->numRepeats) { u8 *groups = active + c->groupIterOffset;
u32 i = activeIdx; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
DEBUG_PRINTF("subcastle %u\n", i); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
activeIdx = partial_load_u32(cur, c->activeIdxSize);
u64a match = subCastleNextMatch(c, full_state, stream_state, u64a match = subCastleNextMatch(c, full_state, stream_state,
loc, i); loc, activeIdx);
set_matching(c, match, groups, matching, c->numGroups, i,
if (match == 0) { activeIdx, &offset, end);
DEBUG_PRINTF("no more matches\n");
partial_store_u32(stream_state, c->numRepeats,
c->activeIdxSize);
} else if (match > end) {
// If we had a local copy of the active mmbit, we could skip
// looking at this repeat again. But we don't, so we just move
// on.
} else if (match <= offset) {
if (match < offset) {
// New minimum offset.
offset = match;
}
found = 1;
}
} }
} }
const char hasMatch = found; if (c->exclusive != PURE_EXCLUSIVE) {
u64a newOffset = offset;
if (!c->pureExclusive) {
subCastleMatchLoop(c, full_state, stream_state, subCastleMatchLoop(c, full_state, stream_state,
end, loc, &newOffset); end, loc, &offset);
DEBUG_PRINTF("offset=%llu\n", newOffset);
if (mmbit_any(matching, c->numRepeats)) {
found = 1;
if (subCastleFireMatch(c, full_state, stream_state,
cb, ctx, newOffset) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
} }
} DEBUG_PRINTF("offset=%llu\n", offset);
} if (!mmbit_any(matching, c->numRepeats)) {
DEBUG_PRINTF("no more matches\n");
if (!found) {
break; break;
} else if (hasMatch && offset == newOffset) { }
const struct SubCastle *sub = getSubCastle(c, activeIdx);
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, activeIdx); if (subCastleFireMatch(c, full_state, stream_state,
if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) { cb, ctx, offset) == MO_HALT_MATCHING) {
DEBUG_PRINTF("caller told us to halt\n");
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
} loc = offset;
loc = newOffset;
} }
return MO_CONTINUE_MATCHING; return MO_CONTINUE_MATCHING;
@ -564,7 +572,8 @@ char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin,
static really_inline static really_inline
char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin, char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin,
const size_t end, size_t *loc) { const size_t end, size_t *loc) {
const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, buf + begin, buf + end); const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
buf + begin, buf + end);
if (ptr == buf + end) { if (ptr == buf + end) {
DEBUG_PRINTF("no escape found\n"); DEBUG_PRINTF("no escape found\n");
return 0; return 0;
@ -725,10 +734,11 @@ static really_inline
void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
DEBUG_PRINTF("clearing active repeats due to escape\n"); DEBUG_PRINTF("clearing active repeats due to escape\n");
if (c->exclusive) { if (c->exclusive) {
partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
mmbit_clear(active, c->numRepeats); mmbit_clear(active, c->numRepeats);
} }
} }
@ -755,7 +765,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
return 1; return 1;
} }
u8 *active = (u8 *)q->streamState + c->activeIdxSize; // active multibit u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit
assert(q->cur + 1 < q->end); // require at least two items assert(q->cur + 1 < q->end); // require at least two items
assert(q_cur_type(q) == MQE_START); assert(q_cur_type(q) == MQE_START);
@ -769,14 +779,8 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
char found = 0; char found = 0;
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState, u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
c->activeIdxSize); found = mmbit_any(groups, c->numGroups);
if (activeIdx < c->numRepeats) {
found = 1;
} else if (c->pureExclusive) {
DEBUG_PRINTF("castle is dead\n");
goto scan_done;
}
} }
if (!found && !mmbit_any(active, c->numRepeats)) { if (!found && !mmbit_any(active, c->numRepeats)) {
@ -842,10 +846,9 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
} }
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState, u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
c->activeIdxSize); if (mmbit_any_precise(groups, c->numGroups)) {
if (c->pureExclusive || activeIdx < c->numRepeats) { return 1;
return activeIdx < c->numRepeats;
} }
} }
@ -905,7 +908,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
assert(q_cur_type(q) == MQE_START); assert(q_cur_type(q) == MQE_START);
const struct Castle *c = getImplNfa(n); const struct Castle *c = getImplNfa(n);
u8 *active = (u8 *)q->streamState + c->activeIdxSize; u8 *active = (u8 *)q->streamState + c->activeOffset;
u64a end_offset = q_last_loc(q) + q->offset; u64a end_offset = q_last_loc(q) + q->offset;
s64a last_kill_loc = castleLastKillLoc(c, q); s64a last_kill_loc = castleLastKillLoc(c, q);
@ -938,14 +941,9 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
char found = 0; char found = 0;
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState, u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
c->activeIdxSize); found = mmbit_any_precise(groups, c->numGroups);
if (activeIdx < c->numRepeats) {
found = 1;
} else if (c->pureExclusive) {
DEBUG_PRINTF("castle is dead\n");
return 0;
}
} }
if (!found && !mmbit_any_precise(active, c->numRepeats)) { if (!found && !mmbit_any_precise(active, c->numRepeats)) {
@ -988,11 +986,12 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
const struct Castle *c = getImplNfa(n); const struct Castle *c = getImplNfa(n);
assert(q->streamState); assert(q->streamState);
if (c->exclusive) { if (c->exclusive) {
partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize); u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)q->streamState + c->activeIdxSize; u8 *active = (u8 *)q->streamState + c->activeOffset;
mmbit_clear(active, c->numRepeats); mmbit_clear(active, c->numRepeats);
} }
return 0; return 0;
@ -1006,11 +1005,12 @@ char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset,
const struct Castle *c = getImplNfa(n); const struct Castle *c = getImplNfa(n);
if (c->exclusive) { if (c->exclusive) {
partial_store_u32(state, c->numRepeats, c->activeIdxSize); u8 *groups = (u8 *)state + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)state + c->activeIdxSize; u8 *active = (u8 *)state + c->activeOffset;
mmbit_clear(active, c->numRepeats); mmbit_clear(active, c->numRepeats);
} }
return 0; return 0;
@ -1041,16 +1041,19 @@ char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q,
const u64a offset = q->offset + loc; const u64a offset = q->offset + loc;
DEBUG_PRINTF("offset=%llu\n", offset); DEBUG_PRINTF("offset=%llu\n", offset);
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState, u8 *active = (u8 *)q->streamState;
c->activeIdxSize); u8 *groups = active + c->groupIterOffset;
if (activeIdx < c->numRepeats) { for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("packing state for sub %u\n", activeIdx); DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
subCastleQueueCompressState(c, activeIdx, q, offset); subCastleQueueCompressState(c, activeIdx, q, offset);
} }
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize; const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("packing state for sub %u\n", i); DEBUG_PRINTF("packing state for sub %u\n", i);
@ -1084,15 +1087,19 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest,
const struct Castle *c = getImplNfa(n); const struct Castle *c = getImplNfa(n);
if (c->exclusive) { if (c->exclusive) {
const u32 activeIdx = partial_load_u32(src, c->activeIdxSize); const u8 *active = (const u8 *)src;
if (activeIdx < c->numRepeats) { const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
subCastleExpandState(c, activeIdx, dest, src, offset); subCastleExpandState(c, activeIdx, dest, src, offset);
} }
} }
if (!c->pureExclusive) { if (c->exclusive != PURE_EXCLUSIVE) {
// Unpack state for all active repeats. // Unpack state for all active repeats.
const u8 *active = (const u8 *)src + c->activeIdxSize; const u8 *active = (const u8 *)src + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
subCastleExpandState(c, i, dest, src, offset); subCastleExpandState(c, i, dest, src, offset);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -42,7 +42,9 @@ struct SubCastle {
u32 streamStateOffset; //!< offset within stream state u32 streamStateOffset; //!< offset within stream state
u32 repeatInfoOffset; //!< offset of RepeatInfo structure u32 repeatInfoOffset; //!< offset of RepeatInfo structure
// relative to the start of SubCastle // relative to the start of SubCastle
char exclusive; //!< exclusive info of this SubCastle u32 exclusiveId; //!< exclusive group id of this SubCastle,
// set to the number of SubCastles in Castle
// if it is not exclusive
}; };
#define CASTLE_DOT 0 #define CASTLE_DOT 0
@ -51,6 +53,12 @@ struct SubCastle {
#define CASTLE_SHUFTI 3 #define CASTLE_SHUFTI 3
#define CASTLE_TRUFFLE 4 #define CASTLE_TRUFFLE 4
enum ExclusiveType {
NOT_EXCLUSIVE, //!< no subcastles are exclusive
EXCLUSIVE, //!< a subset of subcastles are exclusive
PURE_EXCLUSIVE //!< all subcastles are exclusive
};
/** /**
* \brief Castle engine structure. * \brief Castle engine structure.
* *
@ -66,26 +74,56 @@ struct SubCastle {
* - sparse iterator for subcastles that may be stale * - sparse iterator for subcastles that may be stale
* *
* Castle stores an "active repeats" multibit in stream state, followed by the * Castle stores an "active repeats" multibit in stream state, followed by the
* packed repeat state for each SubCastle. If all SubCastles are mutual * packed repeat state for each SubCastle. If there are both exclusive and
* exclusive, we store current active SubCastle id instead of "active repeats" * non-exclusive SubCastle groups, we use an active id for each exclusive group
* multibit in stream state. If there are both exclusive and non-exclusive * and a multibit for the non-exclusive group. We also store an "active
* SubCastle groups, we use an active id for the exclusive group and a multibit * exclusive groups" multibit for exclusive groups. If all SubCastles are mutual
* for the non-exclusive group. * exclusive, we remove "active repeats" multibit from stream state.
* * Castle stream state:
* *
* * |---|
* * | | active subengine id for exclusive group 1
* * |---|
* * | | active subengine id for exclusive group 2(if necessary)
* * |---|
* * ...
* * |---|
* * | | "active repeats" multibit for non-exclusive subcastles
* * | | (if not all subcastles are exclusive)
* * |---|
* * | | active multibit for exclusive groups
* * | |
* * |---|
* * ||-|| common pool of stream state for exclusive group 1
* * ||-||
* * |---|
* * ||-|| common pool of stream state for exclusive group 2(if necessary)
* * ||-||
* * |---|
* * ...
* * |---|
* * | | stream state for each non-exclusive subcastles
* * ...
* * | |
* * |---|
* *
* In full state (stored in scratch space) it stores a temporary multibit over * In full state (stored in scratch space) it stores a temporary multibit over
* the repeats (used by \ref castleMatchLoop), followed by the repeat control * the repeats (used by \ref castleMatchLoop), followed by the repeat control
* blocks for each SubCastle. If all SubCastles are mutual exclusive, we only * blocks for each SubCastle.
* need to store the repeat control blocks for each SubCastle.
*/ */
struct ALIGN_AVX_DIRECTIVE Castle { struct ALIGN_AVX_DIRECTIVE Castle {
u32 numRepeats; u32 numRepeats; //!< number of repeats in Castle
u32 numGroups; //!< number of exclusive groups
u8 type; //!< tells us which scanning mechanism (below) to use u8 type; //!< tells us which scanning mechanism (below) to use
char exclusive; //!< tells us if there are mutual exclusive SubCastles u8 exclusive; //!< tells us if there are mutual exclusive SubCastles
char pureExclusive; //!< tells us if all SubCastles are mutual exclusive
u8 activeIdxSize; //!< number of bytes in stream state to store u8 activeIdxSize; //!< number of bytes in stream state to store
// active SubCastle id for exclusive mode // active SubCastle id for exclusive mode
u32 staleIterOffset; //<! offset to a sparse iterator to check for stale u32 activeOffset; //!< offset to active multibit for non-exclusive
// SubCastles
u32 staleIterOffset; //!< offset to a sparse iterator to check for stale
// sub castles // sub castles
u32 groupIterOffset; //!< offset to a iterator to check the aliveness of
// exclusive groups
union { union {
struct { struct {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -245,17 +245,18 @@ vector<u32> removeClique(CliqueGraph &cg) {
// the end locations where it overlaps with other literals, // the end locations where it overlaps with other literals,
// then the literals are mutual exclusive // then the literals are mutual exclusive
static static
bool findExclusivePair(const u32 id1, const u32 id2, bool findExclusivePair(const size_t id1, const size_t id2,
const size_t lower,
const vector<vector<size_t>> &min_reset_dist, const vector<vector<size_t>> &min_reset_dist,
const vector<vector<vector<CharReach>>> &triggers) { const vector<vector<vector<CharReach>>> &triggers) {
const auto &triggers1 = triggers[id1]; const auto &triggers1 = triggers[id1];
const auto &triggers2 = triggers[id2]; const auto &triggers2 = triggers[id2];
for (u32 i = 0; i < triggers1.size(); ++i) { for (size_t i = 0; i < triggers1.size(); ++i) {
for (u32 j = 0; j < triggers2.size(); ++j) { for (size_t j = 0; j < triggers2.size(); ++j) {
if (!literalOverlap(triggers1[i], triggers2[j], if (!literalOverlap(triggers1[i], triggers2[j],
min_reset_dist[id2][j]) || min_reset_dist[id2 - lower][j]) ||
!literalOverlap(triggers2[j], triggers1[i], !literalOverlap(triggers2[j], triggers1[i],
min_reset_dist[id1][i])) { min_reset_dist[id1 - lower][i])) {
return false; return false;
} }
} }
@ -264,40 +265,75 @@ bool findExclusivePair(const u32 id1, const u32 id2,
} }
static static
vector<u32> checkExclusion(const CharReach &cr, vector<vector<u32>> checkExclusion(u32 &streamStateSize,
const vector<vector<vector<CharReach>>> &triggers) { const CharReach &cr,
vector<u32> group; const vector<vector<vector<CharReach>>> &triggers,
if (!triggers.size() || triggers.size() == 1) { enum ExclusiveType &exclusive,
return group; const size_t numRepeats) {
} vector<vector<u32>> groups;
size_t trigSize = triggers.size();
DEBUG_PRINTF("trigSize %zu\n", trigSize);
size_t lower = 0;
size_t total = 0;
while (lower < trigSize) {
vector<CliqueVertex> vertices;
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
vector<vector<size_t>> min_reset_dist; vector<vector<size_t>> min_reset_dist;
size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize);
// get min reset distance for each repeat // get min reset distance for each repeat
for (auto it = triggers.begin(); it != triggers.end(); it++) { for (size_t i = lower; i < upper; i++) {
const vector<size_t> &tmp_dist = minResetDistToEnd(*it, cr); CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
vertices.push_back(v);
const vector<size_t> &tmp_dist =
minResetDistToEnd(triggers[i], cr);
min_reset_dist.push_back(tmp_dist); min_reset_dist.push_back(tmp_dist);
} }
vector<CliqueVertex> vertices;
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
for (u32 i = 0; i < triggers.size(); ++i) {
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
vertices.push_back(v);
}
// find exclusive pair for each repeat // find exclusive pair for each repeat
for (u32 i = 0; i < triggers.size(); ++i) { for (size_t i = lower; i < upper; i++) {
CliqueVertex s = vertices[i]; CliqueVertex s = vertices[i - lower];
for (u32 j = i + 1; j < triggers.size(); ++j) { for (size_t j = i + 1; j < upper; j++) {
if (findExclusivePair(i, j, min_reset_dist, triggers)) { if (findExclusivePair(i, j, lower, min_reset_dist,
CliqueVertex d = vertices[j]; triggers)) {
CliqueVertex d = vertices[j - lower];
add_edge(s, d, *cg); add_edge(s, d, *cg);
} }
} }
} }
// find the largest exclusive group // find the largest exclusive group
return removeClique(*cg); auto clique = removeClique(*cg);
size_t cliqueSize = clique.size();
if (cliqueSize > 1) {
groups.push_back(clique);
exclusive = EXCLUSIVE;
total += cliqueSize;
}
lower += CLIQUE_GRAPH_MAX_SIZE;
}
DEBUG_PRINTF("clique size %lu, num of repeats %lu\n",
total, numRepeats);
if (total == numRepeats) {
exclusive = PURE_EXCLUSIVE;
streamStateSize = 0;
};
return groups;
}
namespace {
struct ExclusiveInfo {
/** Mapping between top and exclusive group id */
map<u32, u32> groupId;
/** Number of exclusive groups */
u32 numGroups = 0;
};
} }
static static
@ -306,10 +342,12 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
const vector<pair<depth, bool>> &repeatInfoPair, const vector<pair<depth, bool>> &repeatInfoPair,
u32 &scratchStateSize, u32 &streamStateSize, u32 &scratchStateSize, u32 &streamStateSize,
u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats, u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
const set<u32> &exclusiveGroup, vector<u32> &may_stale) { const ExclusiveInfo &exclusiveInfo,
vector<u32> &may_stale) {
u32 i = 0; u32 i = 0;
u32 maxStreamSize = 0; const auto &groupId = exclusiveInfo.groupId;
bool exclusive = exclusiveGroup.size() > 1; const auto &numGroups = exclusiveInfo.numGroups;
vector<u32> maxStreamSize(numGroups, 0);
for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
it != ite; ++it, ++i) { it != ite; ++it, ++i) {
const PureRepeat &pr = it->second; const PureRepeat &pr = it->second;
@ -330,8 +368,9 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
RepeatInfo &info = infos[i]; RepeatInfo &info = infos[i];
// handle exclusive case differently // handle exclusive case differently
if (exclusive && exclusiveGroup.find(i) != exclusiveGroup.end()) { if (contains(groupId, i)) {
maxStreamSize = MAX(maxStreamSize, rsi.packedCtrlSize); u32 id = groupId.at(i);
maxStreamSize[id] = MAX(maxStreamSize[id], rsi.packedCtrlSize);
} else { } else {
subScratchStateSize = verify_u32(sizeof(RepeatControl)); subScratchStateSize = verify_u32(sizeof(RepeatControl));
subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
@ -375,16 +414,25 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
} }
} }
if (exclusive) { vector<u32> scratchOffset(numGroups, 0);
for (auto k : exclusiveGroup) { vector<u32> streamOffset(numGroups, 0);
SubCastle &sub = subs[k]; for (const auto &j : groupId) {
RepeatInfo &info = infos[k]; u32 top = j.first;
info.packedCtrlSize = maxStreamSize; u32 id = j.second;
SubCastle &sub = subs[top];
RepeatInfo &info = infos[top];
info.packedCtrlSize = maxStreamSize[id];
if (!scratchOffset[id]) {
sub.fullStateOffset = scratchStateSize; sub.fullStateOffset = scratchStateSize;
sub.streamStateOffset = streamStateSize; sub.streamStateOffset = streamStateSize;
} scratchOffset[id] = scratchStateSize;
streamOffset[id] = streamStateSize;
scratchStateSize += verify_u32(sizeof(RepeatControl)); scratchStateSize += verify_u32(sizeof(RepeatControl));
streamStateSize += maxStreamSize; streamStateSize += maxStreamSize[id];
} else {
sub.fullStateOffset = scratchOffset[id];
sub.streamStateOffset = streamOffset[id];
}
} }
} }
@ -423,8 +471,9 @@ buildCastle(const CastleProto &proto,
depth maxWidth(0); depth maxWidth(0);
u32 i = 0; u32 i = 0;
vector<u32> candidateRepeats; ExclusiveInfo exclusiveInfo;
vector<vector<vector<CharReach>>> candidateTriggers; vector<vector<vector<CharReach>>> candidateTriggers;
vector<u32> candidateRepeats;
vector<pair<depth, bool>> repeatInfoPair; vector<pair<depth, bool>> repeatInfoPair;
for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
it != ite; ++it, ++i) { it != ite; ++it, ++i) {
@ -459,39 +508,41 @@ buildCastle(const CastleProto &proto,
repeatInfoPair.push_back(make_pair(min_period, is_reset)); repeatInfoPair.push_back(make_pair(min_period, is_reset));
if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) {
candidateTriggers.push_back(triggers.at(top)); candidateTriggers.push_back(triggers.at(top));
candidateRepeats.push_back(i); candidateRepeats.push_back(i);
} }
}
// Case 1: exclusive repeats // Case 1: exclusive repeats
bool exclusive = false; enum ExclusiveType exclusive = NOT_EXCLUSIVE;
bool pureExclusive = false;
u32 activeIdxSize = 0; u32 activeIdxSize = 0;
set<u32> exclusiveGroup; u32 groupIterOffset = 0;
if (cc.grey.castleExclusive) { if (cc.grey.castleExclusive) {
vector<u32> tmpGroup = checkExclusion(cr, candidateTriggers); auto cliqueGroups =
const u32 exclusiveSize = tmpGroup.size(); checkExclusion(streamStateSize, cr, candidateTriggers,
if (exclusiveSize > 1) { exclusive, numRepeats);
// Case 1: mutual exclusive repeats group found, initialize state for (const auto &group : cliqueGroups) {
// sizes // mutual exclusive repeats group found,
exclusive = true; // update state sizes
activeIdxSize = calcPackedBytes(numRepeats + 1); activeIdxSize = calcPackedBytes(numRepeats + 1);
if (exclusiveSize == numRepeats) {
pureExclusive = true;
streamStateSize = 0;
scratchStateSize = 0;
}
streamStateSize += activeIdxSize; streamStateSize += activeIdxSize;
// replace with top values // replace with top values
for (const auto &val : tmpGroup) { for (const auto &val : group) {
exclusiveGroup.insert(candidateRepeats[val]); const u32 top = candidateRepeats[val];
exclusiveInfo.groupId[top] = exclusiveInfo.numGroups;
} }
exclusiveInfo.numGroups++;
} }
if (exclusive) {
groupIterOffset = streamStateSize;
streamStateSize += mmbit_size(exclusiveInfo.numGroups);
} }
DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups);
}
candidateRepeats.clear();
DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(), DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(),
exclusive); exclusive);
@ -501,7 +552,7 @@ buildCastle(const CastleProto &proto,
buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
scratchStateSize, streamStateSize, tableSize, scratchStateSize, streamStateSize, tableSize,
tables, sparseRepeats, exclusiveGroup, may_stale); tables, sparseRepeats, exclusiveInfo, may_stale);
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
vector<mmbit_sparse_iter> stale_iter; vector<mmbit_sparse_iter> stale_iter;
@ -536,9 +587,11 @@ buildCastle(const CastleProto &proto,
char *ptr = base_ptr; char *ptr = base_ptr;
Castle *c = (Castle *)ptr; Castle *c = (Castle *)ptr;
c->numRepeats = verify_u32(subs.size()); c->numRepeats = verify_u32(subs.size());
c->exclusive = exclusive; c->numGroups = exclusiveInfo.numGroups;
c->pureExclusive = pureExclusive; c->exclusive = verify_s8(exclusive);
c->activeIdxSize = verify_u8(activeIdxSize); c->activeIdxSize = verify_u8(activeIdxSize);
c->activeOffset = verify_u32(c->numGroups * activeIdxSize);
c->groupIterOffset = groupIterOffset;
writeCastleScanEngine(cr, c); writeCastleScanEngine(cr, c);
@ -572,10 +625,10 @@ buildCastle(const CastleProto &proto,
} }
// set exclusive group info // set exclusive group info
if (exclusiveGroup.find(i) != exclusiveGroup.end()) { if (contains(exclusiveInfo.groupId, i)) {
sub->exclusive = 1; sub->exclusiveId = exclusiveInfo.groupId[i];
} else { } else {
sub->exclusive = 0; sub->exclusiveId = numRepeats;
} }
} }