mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
allow some prefixes that may squash the literal match to run eagerly
This commit is contained in:
parent
f9ded59361
commit
f166bc5658
@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
|||||||
return castleInAccept(c, q, report, q_cur_offset(q));
|
return castleInAccept(c, q, report, q_cur_offset(q));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
assert(n->type == CASTLE_NFA_0);
|
||||||
|
DEBUG_PRINTF("entry\n");
|
||||||
|
|
||||||
|
const struct Castle *c = getImplNfa(n);
|
||||||
|
const u64a offset = q_cur_offset(q);
|
||||||
|
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||||
|
|
||||||
|
if (c->exclusive) {
|
||||||
|
u8 *active = (u8 *)q->streamState;
|
||||||
|
u8 *groups = active + c->groupIterOffset;
|
||||||
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
|
u8 *cur = active + i * c->activeIdxSize;
|
||||||
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
|
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||||
|
const struct SubCastle *sub = getSubCastle(c, activeIdx);
|
||||||
|
if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||||
|
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
|
||||||
|
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||||
|
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||||
|
DEBUG_PRINTF("subcastle %u\n", i);
|
||||||
|
const struct SubCastle *sub = getSubCastle(c, i);
|
||||||
|
if (subCastleInAccept(c, q, sub->report, offset, i)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
|
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
|
||||||
assert(n && q);
|
assert(n && q);
|
||||||
assert(n->type == CASTLE_NFA_0);
|
assert(n->type == CASTLE_NFA_0);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -1048,6 +1048,14 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
|
|||||||
return nfaExecMcClellan16_inAccept(n, report, q);
|
return nfaExecMcClellan16_inAccept(n, report, q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
return nfaExecMcClellan8_inAnyAccept(n, q);
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
return nfaExecMcClellan16_inAnyAccept(n, q);
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
char goughCheckEOD(const struct NFA *nfa, u16 s,
|
char goughCheckEOD(const struct NFA *nfa, u16 s,
|
||||||
const struct gough_som_info *som,
|
const struct gough_som_info *som,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -46,6 +46,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
|||||||
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||||
|
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -68,6 +69,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
|||||||
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||||
|
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
|
|||||||
char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||||
|
char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
|
|||||||
return lbrInAccept(l, lstate, q->streamState, offset, report);
|
return lbrInAccept(l, lstate, q->streamState, offset, report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
||||||
|
assert(nfa && q);
|
||||||
|
assert(isLbrType(nfa->type));
|
||||||
|
DEBUG_PRINTF("entry\n");
|
||||||
|
|
||||||
|
const struct lbr_common *l = getImplNfa(nfa);
|
||||||
|
return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
|
||||||
|
}
|
||||||
|
|
||||||
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
|
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
|
||||||
struct mq *q) {
|
struct mq *q) {
|
||||||
assert(nfa && q);
|
assert(nfa && q);
|
||||||
|
@ -60,6 +60,7 @@ extern "C"
|
|||||||
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
|
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
|
||||||
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
||||||
struct mq *q); \
|
struct mq *q); \
|
||||||
|
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
|
||||||
char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
|
char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
|
||||||
char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
|
char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
|
||||||
void *state, u8 key); \
|
void *state, u8 key); \
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -40,6 +40,7 @@
|
|||||||
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
|
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
|
||||||
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
|
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
|
||||||
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
|
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
|
||||||
|
#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE)
|
||||||
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
|
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
|
||||||
#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
|
#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
|
||||||
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
|
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
|
||||||
@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||||
|
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||||
|
u64a offset) {
|
||||||
|
assert(limex);
|
||||||
|
|
||||||
|
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
|
||||||
|
STATE_T accstate = AND_STATE(state, acceptMask);
|
||||||
|
|
||||||
|
// Are we in an accept state?
|
||||||
|
if (ISZERO_STATE(accstate)) {
|
||||||
|
DEBUG_PRINTF("no accept states are on\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate);
|
||||||
|
|
||||||
|
return ISNONZERO_STATE(accstate);
|
||||||
|
}
|
||||||
|
|
||||||
#undef TESTEOD_FN
|
#undef TESTEOD_FN
|
||||||
#undef TESTEOD_REV_FN
|
#undef TESTEOD_REV_FN
|
||||||
#undef REPORTCURRENT_FN
|
#undef REPORTCURRENT_FN
|
||||||
#undef EXPIRE_ESTATE_FN
|
#undef EXPIRE_ESTATE_FN
|
||||||
#undef LIMEX_INACCEPT_FN
|
#undef LIMEX_INACCEPT_FN
|
||||||
|
#undef LIMEX_INANYACCEPT_FN
|
||||||
#undef INITIAL_FN
|
#undef INITIAL_FN
|
||||||
#undef TOP_FN
|
#undef TOP_FN
|
||||||
#undef TOPN_FN
|
#undef TOPN_FN
|
||||||
|
@ -1008,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args,
|
|||||||
// Suffixes and outfixes can mask out leaf states, which should all be
|
// Suffixes and outfixes can mask out leaf states, which should all be
|
||||||
// accepts. Right now we can only do this when there is nothing in initDs,
|
// accepts. Right now we can only do this when there is nothing in initDs,
|
||||||
// as we switch that on unconditionally in the expand call.
|
// as we switch that on unconditionally in the expand call.
|
||||||
if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) {
|
if (!inspects_states_for_accepts(h)
|
||||||
|
&& !hasInitDsStates(h, args.state_ids)) {
|
||||||
NFAStateSet nonleaf(args.num_states);
|
NFAStateSet nonleaf(args.num_states);
|
||||||
for (const auto &e : edges_range(h)) {
|
for (const auto &e : edges_range(h)) {
|
||||||
u32 from = args.state_ids.at(source(e, h));
|
u32 from = args.state_ids.at(source(e, h));
|
||||||
|
@ -650,7 +650,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
ep = MIN(ep, end_abs);
|
ep = MIN(ep, end_abs);
|
||||||
assert(ep >= sp);
|
assert(ep >= sp);
|
||||||
|
|
||||||
assert(sp >= offset); // We no longer do history buffer scans here.
|
if (sp < offset) {
|
||||||
|
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
|
||||||
|
assert(offset - sp <= q->hlength);
|
||||||
|
u64a local_ep = MIN(offset, ep);
|
||||||
|
u64a final_look = 0;
|
||||||
|
/* we are starting inside the history buffer */
|
||||||
|
if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset,
|
||||||
|
local_ep - sp, &ctx, sp,
|
||||||
|
&final_look) == MO_HALT_MATCHING) {
|
||||||
|
DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu "
|
||||||
|
"offset:%llu\n", final_look, sp, end_abs, offset);
|
||||||
|
assert(q->cur);
|
||||||
|
q->cur--;
|
||||||
|
q->items[q->cur].type = MQE_START;
|
||||||
|
q->items[q->cur].location = sp + final_look - offset;
|
||||||
|
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
sp = local_ep;
|
||||||
|
}
|
||||||
|
|
||||||
if (sp >= ep) {
|
if (sp >= ep) {
|
||||||
goto scan_done;
|
goto scan_done;
|
||||||
@ -868,6 +888,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
|
|||||||
offset, report);
|
offset, report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
||||||
|
assert(nfa && q);
|
||||||
|
assert(q->state && q->streamState);
|
||||||
|
|
||||||
|
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
||||||
|
union RepeatControl *repeat_ctrl =
|
||||||
|
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||||
|
char *repeat_state = q->streamState + limex->stateSize;
|
||||||
|
STATE_T state = LOAD_STATE(q->state);
|
||||||
|
u64a offset = q->offset + q_last_loc(q) + 1;
|
||||||
|
|
||||||
|
return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
|
||||||
enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
||||||
const struct NFA *nfa,
|
const struct NFA *nfa,
|
||||||
struct mq *q,
|
struct mq *q,
|
||||||
|
@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
|
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *ctxt = q->context;
|
void *ctxt = q->context;
|
||||||
u16 s = *(u16 *)q->state;
|
u16 s = *(u16 *)q->state;
|
||||||
@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
|||||||
struct mq *q) {
|
struct mq *q) {
|
||||||
assert(n && q);
|
assert(n && q);
|
||||||
|
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
u8 s = *(u8 *)q->state;
|
u8 s = *(u8 *)q->state;
|
||||||
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
||||||
if (s < m->accept_limit_8) {
|
if (s < m->accept_limit_8) {
|
||||||
@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
|||||||
return mcclellanHasAccept(m, get_aux(m, s), report);
|
return mcclellanHasAccept(m, get_aux(m, s), report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
|
||||||
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
|
u8 s = *(u8 *)q->state;
|
||||||
|
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
||||||
|
assert(s < m->accept_limit_8 || get_aux(m, s)->accept);
|
||||||
|
|
||||||
|
return s >= m->accept_limit_8;
|
||||||
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q) {
|
struct mq *q) {
|
||||||
assert(n && q);
|
assert(n && q);
|
||||||
|
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
u16 s = *(u16 *)q->state;
|
u16 s = *(u16 *)q->state;
|
||||||
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
||||||
|
|
||||||
return mcclellanHasAccept(m, get_aux(m, s), report);
|
return mcclellanHasAccept(m, get_aux(m, s), report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
|
||||||
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
|
u16 s = *(u16 *)q->state;
|
||||||
|
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
||||||
|
|
||||||
|
return !!get_aux(m, s)->accept;
|
||||||
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_8);
|
assert(n->type == MCCLELLAN_NFA_8);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_16);
|
assert(n->type == MCCLELLAN_NFA_16);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_8);
|
assert(n->type == MCCLELLAN_NFA_8);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_16);
|
assert(n->type == MCCLELLAN_NFA_16);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
|
|
||||||
char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
void *state, UNUSED u8 key) {
|
void *state, UNUSED u8 key) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
u8 s = offset ? m->start_floating : m->start_anchored;
|
u8 s = offset ? m->start_floating : m->start_anchored;
|
||||||
if (s) {
|
if (s) {
|
||||||
*(u8 *)state = s;
|
*(u8 *)state = s;
|
||||||
@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
|||||||
|
|
||||||
char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
void *state, UNUSED u8 key) {
|
void *state, UNUSED u8 key) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
u16 s = offset ? m->start_floating : m->start_anchored;
|
u16 s = offset ? m->start_floating : m->start_anchored;
|
||||||
if (s) {
|
if (s) {
|
||||||
unaligned_store_u16(state, s);
|
unaligned_store_u16(state, s);
|
||||||
@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
|||||||
void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
||||||
const u8 *buf, char top, size_t start_off,
|
const u8 *buf, char top, size_t start_off,
|
||||||
size_t len, NfaCallback cb, void *ctxt) {
|
size_t len, NfaCallback cb, void *ctxt) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
|
|
||||||
u8 s = top ? m->start_anchored : *(u8 *)state;
|
u8 s = top ? m->start_anchored : *(u8 *)state;
|
||||||
|
|
||||||
@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
|||||||
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
||||||
const u8 *buf, char top, size_t start_off,
|
const u8 *buf, char top, size_t start_off,
|
||||||
size_t len, NfaCallback cb, void *ctxt) {
|
size_t len, NfaCallback cb, void *ctxt) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
|
|
||||||
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
|
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -47,6 +47,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -70,6 +71,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -395,4 +395,36 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool can_die_early(const raw_dfa &raw, dstate_id_t s,
|
||||||
|
map<dstate_id_t, u32> &visited, u32 age_limit) {
|
||||||
|
if (contains(visited, s) && visited[s] >= age_limit) {
|
||||||
|
/* we have already visited (or are in the process of visiting) here with
|
||||||
|
* a looser limit. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
visited[s] = age_limit;
|
||||||
|
|
||||||
|
if (s == DEAD_STATE) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (age_limit == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &next : raw.states[s].next) {
|
||||||
|
if (can_die_early(raw, next, visited, age_limit - 1)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool can_die_early(const raw_dfa &raw, u32 age_limit) {
|
||||||
|
map<dstate_id_t, u32> visited;
|
||||||
|
return can_die_early(raw, raw.start_anchored, visited, age_limit);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -57,6 +57,8 @@ size_t hash_dfa(const raw_dfa &rdfa);
|
|||||||
|
|
||||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
|
dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
|
||||||
|
|
||||||
|
bool can_die_early(const raw_dfa &raw, u32 age_limit);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -36,7 +36,6 @@ struct NFA;
|
|||||||
|
|
||||||
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
|
||||||
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
|
|||||||
|
|
||||||
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL
|
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL
|
||||||
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL
|
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL
|
||||||
#define nfaExecMpv0_QR NFA_API_NO_IMPL
|
#define nfaExecMpv0_QR NFA_API_NO_IMPL
|
||||||
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
|
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
|
||||||
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
|
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
|
||||||
|
@ -175,10 +175,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
|
|||||||
*/
|
*/
|
||||||
char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns non-zero if the NFA is in any accept state regardless of report
|
||||||
|
* ID.
|
||||||
|
*/
|
||||||
|
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process the queued commands on the given NFA up to end or the first match.
|
* Process the queued commands on the given NFA up to end or the first match.
|
||||||
*
|
*
|
||||||
* Note: This version is meant for rose prefix NFAs:
|
* Note: This version is meant for rose prefix/infix NFAs:
|
||||||
* - never uses a callback
|
* - never uses a callback
|
||||||
* - loading of state at a point in history is not special cased
|
* - loading of state at a point in history is not special cased
|
||||||
*
|
*
|
||||||
@ -187,9 +193,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
|||||||
* end with some variant of end. The location field of the events must
|
* end with some variant of end. The location field of the events must
|
||||||
* be monotonically increasing. If not all the data was processed during
|
* be monotonically increasing. If not all the data was processed during
|
||||||
* the call, the queue is updated to reflect the remaining work.
|
* the call, the queue is updated to reflect the remaining work.
|
||||||
* @param report we are interested in, if set at the end of the scan returns
|
* @param report we are interested in. If the given report will be raised at
|
||||||
* @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should
|
* the end location, the function returns @ref MO_MATCHES_PENDING. If no
|
||||||
* be passed in.
|
* match information is desired, MO_INVALID_IDX should be passed in.
|
||||||
* @return @ref MO_ALIVE if the nfa is still active with no matches pending,
|
* @return @ref MO_ALIVE if the nfa is still active with no matches pending,
|
||||||
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
|
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
|
||||||
* alive
|
* alive
|
||||||
|
@ -228,7 +228,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
|
|||||||
|
|
||||||
assert(q);
|
assert(q);
|
||||||
assert(end >= 0);
|
assert(end >= 0);
|
||||||
assert(q->context);
|
|
||||||
assert(q->state);
|
assert(q->state);
|
||||||
assert(q->cur < q->end);
|
assert(q->cur < q->end);
|
||||||
assert(q->end <= MAX_MQE_LEN);
|
assert(q->end <= MAX_MQE_LEN);
|
||||||
@ -285,6 +284,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) {
|
||||||
|
DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
|
char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
|
||||||
DEBUG_PRINTF("nfa=%p\n", nfa);
|
DEBUG_PRINTF("nfa=%p\n", nfa);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
@ -47,6 +47,7 @@ enum nfa_kind {
|
|||||||
NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
|
NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
|
||||||
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
|
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
|
||||||
NFA_REV_PREFIX, //! reverse running prefixes (for som)
|
NFA_REV_PREFIX, //! reverse running prefixes (for som)
|
||||||
|
NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief True if this kind of engine is triggered by a top event. */
|
/** \brief True if this kind of engine is triggered by a top event. */
|
||||||
@ -63,8 +64,10 @@ bool is_triggered(enum nfa_kind k) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief True if this kind of engine generates callback events when it
|
* \brief True if this kind of engine generates actively checks for accept
|
||||||
* enters accept states.
|
* states either to halt matching or to raise a callback. Only these engines
|
||||||
|
* generated with this property should call nfaQueueExec() or
|
||||||
|
* nfaQueueExecToMatch().
|
||||||
*/
|
*/
|
||||||
inline
|
inline
|
||||||
bool generates_callbacks(enum nfa_kind k) {
|
bool generates_callbacks(enum nfa_kind k) {
|
||||||
@ -73,6 +76,24 @@ bool generates_callbacks(enum nfa_kind k) {
|
|||||||
case NFA_OUTFIX:
|
case NFA_OUTFIX:
|
||||||
case NFA_OUTFIX_RAW:
|
case NFA_OUTFIX_RAW:
|
||||||
case NFA_REV_PREFIX:
|
case NFA_REV_PREFIX:
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief True if this kind of engine has its state inspected to see if it is in
|
||||||
|
* an accept state. Engines generated with this property will commonly call
|
||||||
|
* nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState().
|
||||||
|
*/
|
||||||
|
inline
|
||||||
|
bool inspects_states_for_accepts(enum nfa_kind k) {
|
||||||
|
switch (k) {
|
||||||
|
case NFA_PREFIX:
|
||||||
|
case NFA_INFIX:
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -324,4 +324,49 @@ flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
|
|||||||
initial_states);
|
initial_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool can_die_early(const NGHolder &g, const vector<StateInfo> &info,
|
||||||
|
const dynamic_bitset<> &s,
|
||||||
|
map<dynamic_bitset<>, u32> &visited, u32 age_limit) {
|
||||||
|
if (contains(visited, s) && visited[s] >= age_limit) {
|
||||||
|
/* we have already (or are in the process) of visiting here with a
|
||||||
|
* looser limit. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
visited[s] = age_limit;
|
||||||
|
|
||||||
|
if (s.none()) {
|
||||||
|
DEBUG_PRINTF("dead\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (age_limit == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
dynamic_bitset<> all_succ(s.size());
|
||||||
|
step(g, info, s, &all_succ);
|
||||||
|
all_succ.reset(NODE_START_DOTSTAR);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
|
dynamic_bitset<> next = all_succ;
|
||||||
|
filter_by_reach(info, &next, CharReach(i));
|
||||||
|
if (can_die_early(g, info, next, visited, age_limit - 1)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool can_die_early(const NGHolder &g, u32 age_limit) {
|
||||||
|
if (proper_out_degree(g.startDs, g)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const vector<StateInfo> &info = makeInfoTable(g);
|
||||||
|
map<dynamic_bitset<>, u32> visited;
|
||||||
|
return can_die_early(g, info, makeStateBitset(g, {g.start}), visited,
|
||||||
|
age_limit);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -64,6 +64,9 @@ flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
|||||||
const flat_set<NFAVertex> &input_start_states,
|
const flat_set<NFAVertex> &input_start_states,
|
||||||
const flat_set<NFAVertex> &initial);
|
const flat_set<NFAVertex> &initial);
|
||||||
|
|
||||||
|
/* returns true if it is possible for the nfa to die within age_limit bytes */
|
||||||
|
bool can_die_early(const NGHolder &g, u32 age_limit);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -315,15 +315,26 @@ void remove_edges(const Container &c, NGHolder &h, bool renumber = true) {
|
|||||||
remove_edges(c.begin(), c.end(), h, renumber);
|
remove_edges(c.begin(), c.end(), h, renumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
static UNUSED
|
inline
|
||||||
bool is_triggered(const NGHolder &g) {
|
bool is_triggered(const NGHolder &g) {
|
||||||
return is_triggered(g.kind);
|
return is_triggered(g.kind);
|
||||||
}
|
}
|
||||||
|
|
||||||
static UNUSED
|
inline
|
||||||
bool generates_callbacks(const NGHolder &g) {
|
bool generates_callbacks(const NGHolder &g) {
|
||||||
return generates_callbacks(g.kind);
|
return generates_callbacks(g.kind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool has_managed_reports(const NGHolder &g) {
|
||||||
|
return has_managed_reports(g.kind);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool inspects_states_for_accepts(const NGHolder &g) {
|
||||||
|
return inspects_states_for_accepts(g.kind);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -373,7 +373,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
|||||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||||
bool compress_state, bool do_accel, bool impl_test_only, u32 hint,
|
bool compress_state, bool do_accel, bool impl_test_only, u32 hint,
|
||||||
const CompileContext &cc) {
|
const CompileContext &cc) {
|
||||||
if (!generates_callbacks(h_in)) {
|
if (!has_managed_reports(h_in)) {
|
||||||
rm = nullptr;
|
rm = nullptr;
|
||||||
} else {
|
} else {
|
||||||
assert(rm);
|
assert(rm);
|
||||||
@ -413,7 +413,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
|||||||
|
|
||||||
set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
|
set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
|
||||||
|
|
||||||
if (generates_callbacks(*h)) {
|
if (has_managed_reports(*h)) {
|
||||||
assert(rm);
|
assert(rm);
|
||||||
remapReportsToPrograms(*h, *rm);
|
remapReportsToPrograms(*h, *rm);
|
||||||
}
|
}
|
||||||
@ -508,7 +508,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!generates_callbacks(g)) {
|
if (!has_managed_reports(g)) {
|
||||||
rm = nullptr;
|
rm = nullptr;
|
||||||
} else {
|
} else {
|
||||||
assert(rm);
|
assert(rm);
|
||||||
@ -547,7 +547,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm
|
|||||||
|
|
||||||
removeRedundancy(g, som);
|
removeRedundancy(g, som);
|
||||||
|
|
||||||
if (rm && generates_callbacks(g)) {
|
if (rm && has_managed_reports(g)) {
|
||||||
pruneHighlanderDominated(g, *rm);
|
pruneHighlanderDominated(g, *rm);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -560,7 +560,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm
|
|||||||
|
|
||||||
u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
|
u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
|
||||||
const CompileContext &cc) {
|
const CompileContext &cc) {
|
||||||
if (!generates_callbacks(g)) {
|
if (!has_managed_reports(g)) {
|
||||||
rm = nullptr;
|
rm = nullptr;
|
||||||
} else {
|
} else {
|
||||||
assert(rm);
|
assert(rm);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -531,9 +531,9 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
|
|||||||
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
|
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
|
||||||
assert(allMatchStatesHaveReports(graph));
|
assert(allMatchStatesHaveReports(graph));
|
||||||
|
|
||||||
bool prunable = grey.highlanderPruneDFA && generates_callbacks(graph);
|
bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph);
|
||||||
assert(rm || !generates_callbacks(graph));
|
assert(rm || !has_managed_reports(graph));
|
||||||
if (!generates_callbacks(graph)) {
|
if (!has_managed_reports(graph)) {
|
||||||
rm = nullptr;
|
rm = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -112,6 +112,12 @@ void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
|
|||||||
case NFA_SUFFIX:
|
case NFA_SUFFIX:
|
||||||
lhs->kind = NFA_INFIX;
|
lhs->kind = NFA_INFIX;
|
||||||
break;
|
break;
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
|
/* Current code should not be assigning eager until well after all the
|
||||||
|
* splitting is done. */
|
||||||
|
assert(0);
|
||||||
|
lhs->kind = NFA_EAGER_PREFIX;
|
||||||
|
break;
|
||||||
case NFA_REV_PREFIX:
|
case NFA_REV_PREFIX:
|
||||||
case NFA_OUTFIX_RAW:
|
case NFA_OUTFIX_RAW:
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -154,6 +160,12 @@ void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
|
|||||||
case NFA_OUTFIX:
|
case NFA_OUTFIX:
|
||||||
rhs->kind = NFA_SUFFIX;
|
rhs->kind = NFA_SUFFIX;
|
||||||
break;
|
break;
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
|
/* Current code should not be assigning eager until well after all the
|
||||||
|
* splitting is done. */
|
||||||
|
assert(0);
|
||||||
|
rhs->kind = NFA_INFIX;
|
||||||
|
break;
|
||||||
case NFA_REV_PREFIX:
|
case NFA_REV_PREFIX:
|
||||||
case NFA_OUTFIX_RAW:
|
case NFA_OUTFIX_RAW:
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -266,6 +266,86 @@ int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
|||||||
return can_stop_matching(scratch);
|
return can_stop_matching(scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static rose_inline
|
||||||
|
void runEagerPrefixesBlock(const struct RoseEngine *t,
|
||||||
|
struct hs_scratch *scratch) {
|
||||||
|
if (!t->eagerIterOffset) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *state = scratch->core_info.state;
|
||||||
|
u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
|
||||||
|
* left_table */
|
||||||
|
const u32 arCount = t->activeLeftCount;
|
||||||
|
const u32 qCount = t->queueCount;
|
||||||
|
const struct LeftNfaInfo *left_table = getLeftTable(t);
|
||||||
|
const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset);
|
||||||
|
|
||||||
|
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
|
||||||
|
|
||||||
|
u32 idx = 0;
|
||||||
|
u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state);
|
||||||
|
for (; ri != MMB_INVALID;
|
||||||
|
ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) {
|
||||||
|
const struct LeftNfaInfo *left = left_table + ri;
|
||||||
|
u32 qi = ri + t->leftfixBeginQueue;
|
||||||
|
DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag);
|
||||||
|
|
||||||
|
assert(!fatbit_isset(scratch->aqa, qCount, qi));
|
||||||
|
assert(left->eager);
|
||||||
|
assert(!left->infix);
|
||||||
|
|
||||||
|
struct mq *q = scratch->queues + qi;
|
||||||
|
const struct NFA *nfa = getNfaByQueue(t, qi);
|
||||||
|
|
||||||
|
if (scratch->core_info.len < nfa->minWidth) {
|
||||||
|
/* we know that there is not enough data for this to ever match, so
|
||||||
|
* we can immediately squash/ */
|
||||||
|
mmbit_unset(ara, arCount, ri);
|
||||||
|
scratch->tctxt.groups &= left->squash_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET);
|
||||||
|
|
||||||
|
fatbit_set(scratch->aqa, qCount, qi);
|
||||||
|
initRoseQueue(t, qi, left, scratch);
|
||||||
|
|
||||||
|
pushQueueAt(q, 0, MQE_START, 0);
|
||||||
|
pushQueueAt(q, 1, MQE_TOP, 0);
|
||||||
|
pushQueueAt(q, 2, MQE_END, loc);
|
||||||
|
nfaQueueInitState(nfa, q);
|
||||||
|
|
||||||
|
char alive = nfaQueueExecToMatch(q->nfa, q, loc);
|
||||||
|
|
||||||
|
if (!alive) {
|
||||||
|
DEBUG_PRINTF("queue %u dead, squashing\n", qi);
|
||||||
|
mmbit_unset(ara, arCount, ri);
|
||||||
|
fatbit_unset(scratch->aqa, qCount, qi);
|
||||||
|
scratch->tctxt.groups &= left->squash_mask;
|
||||||
|
} else if (q->cur == q->end) {
|
||||||
|
assert(alive != MO_MATCHES_PENDING);
|
||||||
|
if (loc == (s64a)scratch->core_info.len) {
|
||||||
|
/* We know that the prefix does not match in the block so we
|
||||||
|
* can squash the groups anyway even though it did not die */
|
||||||
|
/* TODO: if we knew the minimum lag the leftfix is checked at we
|
||||||
|
* could make this check tighter */
|
||||||
|
DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi);
|
||||||
|
mmbit_unset(ara, arCount, ri);
|
||||||
|
fatbit_unset(scratch->aqa, qCount, qi);
|
||||||
|
scratch->tctxt.groups &= left->squash_mask;
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
|
||||||
|
q->cur = q->end = 0;
|
||||||
|
pushQueueAt(q, 0, MQE_START, loc);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert(alive == MO_MATCHES_PENDING);
|
||||||
|
DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
|
||||||
|
q->end--; /* remove end item */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||||
assert(t);
|
assert(t);
|
||||||
assert(scratch);
|
assert(scratch);
|
||||||
@ -314,6 +394,8 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
|||||||
hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
|
hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
|
||||||
scratch, tctxt->groups);
|
scratch, tctxt->groups);
|
||||||
} else {
|
} else {
|
||||||
|
runEagerPrefixesBlock(t, scratch);
|
||||||
|
|
||||||
if (roseBlockAnchored(t, scratch)) {
|
if (roseBlockAnchored(t, scratch)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -424,7 +424,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch,
|
|||||||
}
|
}
|
||||||
|
|
||||||
s64a loc = (s64a)end - ci->buf_offset - leftfixLag;
|
s64a loc = (s64a)end - ci->buf_offset - leftfixLag;
|
||||||
assert(loc >= q_cur_loc(q));
|
assert(loc >= q_cur_loc(q) || left->eager);
|
||||||
assert(leftfixReport != MO_INVALID_IDX);
|
assert(leftfixReport != MO_INVALID_IDX);
|
||||||
|
|
||||||
if (!is_infix && left->transient) {
|
if (!is_infix && left->transient) {
|
||||||
@ -471,7 +471,13 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch,
|
|||||||
DEBUG_PRINTF("checking for report %u\n", leftfixReport);
|
DEBUG_PRINTF("checking for report %u\n", leftfixReport);
|
||||||
DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
|
DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
|
||||||
return rv == MO_MATCHES_PENDING;
|
return rv == MO_MATCHES_PENDING;
|
||||||
|
} else if (q_cur_loc(q) > loc) {
|
||||||
|
/* an eager leftfix may have already progressed past loc if there is no
|
||||||
|
* match at loc. */
|
||||||
|
assert(left->eager);
|
||||||
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
|
assert(q_cur_loc(q) == loc);
|
||||||
DEBUG_PRINTF("checking for report %u\n", leftfixReport);
|
DEBUG_PRINTF("checking for report %u\n", leftfixReport);
|
||||||
char rv = nfaInAcceptState(q->nfa, leftfixReport, q);
|
char rv = nfaInAcceptState(q->nfa, leftfixReport, q);
|
||||||
DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
|
DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
|
||||||
|
@ -1038,6 +1038,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
case NFA_REV_PREFIX:
|
case NFA_REV_PREFIX:
|
||||||
case NFA_OUTFIX_RAW:
|
case NFA_OUTFIX_RAW:
|
||||||
DEBUG_PRINTF("kind %u\n", (u32)h.kind);
|
DEBUG_PRINTF("kind %u\n", (u32)h.kind);
|
||||||
|
@ -50,6 +50,7 @@
|
|||||||
#include "nfa/nfa_build_util.h"
|
#include "nfa/nfa_build_util.h"
|
||||||
#include "nfa/nfa_internal.h"
|
#include "nfa/nfa_internal.h"
|
||||||
#include "nfa/shufticompile.h"
|
#include "nfa/shufticompile.h"
|
||||||
|
#include "nfagraph/ng_execute.h"
|
||||||
#include "nfagraph/ng_holder.h"
|
#include "nfagraph/ng_holder.h"
|
||||||
#include "nfagraph/ng_lbr.h"
|
#include "nfagraph/ng_lbr.h"
|
||||||
#include "nfagraph/ng_limex.h"
|
#include "nfagraph/ng_limex.h"
|
||||||
@ -1046,8 +1047,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
|
|||||||
// streaming mode.
|
// streaming mode.
|
||||||
const bool compress_state = !is_transient;
|
const bool compress_state = !is_transient;
|
||||||
|
|
||||||
assert(!left.graph()
|
assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX);
|
||||||
|| left.graph()->kind == (is_prefix ? NFA_PREFIX : NFA_INFIX));
|
assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX
|
||||||
|
|| left.graph()->kind == NFA_EAGER_PREFIX);
|
||||||
|
|
||||||
// Holder should be implementable as an NFA at the very least.
|
// Holder should be implementable as an NFA at the very least.
|
||||||
if (!left.dfa() && left.graph()) {
|
if (!left.dfa() && left.graph()) {
|
||||||
@ -1089,7 +1091,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
|
|||||||
|
|
||||||
if (!n && left.graph()) {
|
if (!n && left.graph()) {
|
||||||
map<u32, vector<vector<CharReach>>> triggers;
|
map<u32, vector<vector<CharReach>>> triggers;
|
||||||
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
|
if (left.graph()->kind == NFA_INFIX) {
|
||||||
|
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
|
||||||
|
}
|
||||||
n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
|
n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
|
||||||
compress_state, cc);
|
compress_state, cc);
|
||||||
}
|
}
|
||||||
@ -1125,17 +1129,309 @@ void setLeftNfaProperties(NFA &n, const left_id &left) {
|
|||||||
// graph.
|
// graph.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports,
|
||||||
|
const vector<NFAVertex> &starts,
|
||||||
|
const vector<CharReach> &tail) {
|
||||||
|
assert(!tail.empty());
|
||||||
|
NFAVertex curr = add_vertex(h);
|
||||||
|
for (NFAVertex v : starts) {
|
||||||
|
assert(!edge(v, h.acceptEod, h).second);
|
||||||
|
assert(h[v].reports == reports);
|
||||||
|
h[v].reports.clear();
|
||||||
|
remove_edge(v, h.accept, h);
|
||||||
|
add_edge(v, curr, h);
|
||||||
|
}
|
||||||
|
auto it = tail.begin();
|
||||||
|
h[curr].char_reach = *it;
|
||||||
|
++it;
|
||||||
|
while (it != tail.end()) {
|
||||||
|
NFAVertex old = curr;
|
||||||
|
curr = add_vertex(h);
|
||||||
|
add_edge(old, curr, h);
|
||||||
|
assert(!it->none());
|
||||||
|
h[curr].char_reach = *it;
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
|
||||||
|
h[curr].reports = reports;
|
||||||
|
add_edge(curr, h.accept, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void appendTailToHolder(NGHolder &h, const vector<CharReach> &tail) {
|
||||||
|
assert(in_degree(h.acceptEod, h) == 1);
|
||||||
|
assert(!tail.empty());
|
||||||
|
|
||||||
|
map<flat_set<ReportID>, vector<NFAVertex> > reporters;
|
||||||
|
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
|
||||||
|
reporters[h[v].reports].push_back(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &e : reporters) {
|
||||||
|
appendTailToHolder(h, e.first, e.second, tail);
|
||||||
|
}
|
||||||
|
|
||||||
|
h.renumberEdges();
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h,
|
||||||
|
const vector<RoseVertex> &succs) {
|
||||||
|
const RoseGraph &rg = build.g;
|
||||||
|
static const size_t MAX_RESTORE_LEN = 5;
|
||||||
|
|
||||||
|
vector<CharReach> restored(MAX_RESTORE_LEN);
|
||||||
|
for (RoseVertex v : succs) {
|
||||||
|
u32 lag = rg[v].left.lag;
|
||||||
|
for (u32 lit_id : rg[v].literals) {
|
||||||
|
u32 delay = build.literals.right.at(lit_id).delay;
|
||||||
|
const ue2_literal &literal = build.literals.right.at(lit_id).s;
|
||||||
|
assert(lag <= literal.length() + delay);
|
||||||
|
size_t base = literal.length() + delay - lag;
|
||||||
|
if (base >= literal.length()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
size_t len = literal.length() - base;
|
||||||
|
len = MIN(len, restored.size());
|
||||||
|
restored.resize(len);
|
||||||
|
auto lit_it = literal.begin() + base;
|
||||||
|
for (u32 i = 0; i < len; i++) {
|
||||||
|
assert(lit_it != literal.end());
|
||||||
|
restored[i] |= *lit_it;
|
||||||
|
++lit_it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!restored.empty());
|
||||||
|
|
||||||
|
appendTailToHolder(h, restored);
|
||||||
|
|
||||||
|
return restored.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define EAGER_DIE_BEFORE_LIMIT 10
|
||||||
|
|
||||||
|
struct eager_info {
|
||||||
|
shared_ptr<NGHolder> new_graph;
|
||||||
|
u32 lag_adjust = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
static
|
||||||
|
bool checkSuitableForEager(bool is_prefix, const left_id &left,
|
||||||
|
const RoseBuildImpl &build,
|
||||||
|
const vector<RoseVertex> &succs,
|
||||||
|
rose_group squash_mask, rose_group initial_groups,
|
||||||
|
eager_info &ei, const CompileContext &cc) {
|
||||||
|
DEBUG_PRINTF("checking prefix --> %016llx...\n", squash_mask);
|
||||||
|
|
||||||
|
const RoseGraph &rg = build.g;
|
||||||
|
|
||||||
|
if (!is_prefix) {
|
||||||
|
DEBUG_PRINTF("not prefix\n");
|
||||||
|
return false; /* only prefixes (for now...) */
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((initial_groups & squash_mask) == initial_groups) {
|
||||||
|
DEBUG_PRINTF("no squash -- useless\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (RoseVertex s : succs) {
|
||||||
|
if (build.isInETable(s)
|
||||||
|
|| contains(rg[s].literals, build.eod_event_literal_id)) {
|
||||||
|
return false; /* Ignore EOD related prefixes */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (left.dfa()) {
|
||||||
|
const raw_dfa &dfa = *left.dfa();
|
||||||
|
if (dfa.start_floating != DEAD_STATE) {
|
||||||
|
return false; /* not purely anchored */
|
||||||
|
}
|
||||||
|
if (!dfa.states[dfa.start_anchored].reports.empty()) {
|
||||||
|
return false; /* vacuous (todo: handle?) */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ei.new_graph = rg[succs[0]].left.graph;
|
||||||
|
} else if (left.graph()) {
|
||||||
|
const NGHolder &g = *left.graph();
|
||||||
|
if (proper_out_degree(g.startDs, g)) {
|
||||||
|
return false; /* not purely anchored */
|
||||||
|
}
|
||||||
|
if (is_match_vertex(g.start, g)) {
|
||||||
|
return false; /* vacuous (todo: handle?) */
|
||||||
|
}
|
||||||
|
|
||||||
|
ei.new_graph = cloneHolder(*left.graph());
|
||||||
|
auto gg = ei.new_graph;
|
||||||
|
gg->kind = NFA_EAGER_PREFIX;
|
||||||
|
|
||||||
|
ei.lag_adjust = decreaseLag(build, *gg, succs);
|
||||||
|
|
||||||
|
if (!can_die_early(*gg, EAGER_DIE_BEFORE_LIMIT)) {
|
||||||
|
DEBUG_PRINTF("not eager as stuck alive\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We need to ensure that adding in the literals does not cause us to no
|
||||||
|
* longer be able to build an nfa. */
|
||||||
|
bool ok = isImplementableNFA(*gg, nullptr, cc);
|
||||||
|
if (!ok) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("unable to determine if good for eager running\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("eager prefix\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei,
|
||||||
|
const vector<RoseVertex> &succs) {
|
||||||
|
u32 lag_adjust = ei.lag_adjust;
|
||||||
|
auto gg = ei.new_graph;
|
||||||
|
for (RoseVertex v : succs) {
|
||||||
|
g[v].left.graph = gg;
|
||||||
|
assert(g[v].left.lag >= lag_adjust);
|
||||||
|
g[v].left.lag -= lag_adjust;
|
||||||
|
DEBUG_PRINTF("added %u literal chars back, new lag %u\n", lag_adjust,
|
||||||
|
g[v].left.lag);
|
||||||
|
}
|
||||||
|
left_id leftfix = g[succs[0]].left;
|
||||||
|
|
||||||
|
if (leftfix.graph()) {
|
||||||
|
assert(leftfix.graph()->kind == NFA_PREFIX
|
||||||
|
|| leftfix.graph()->kind == NFA_EAGER_PREFIX);
|
||||||
|
leftfix.graph()->kind = NFA_EAGER_PREFIX;
|
||||||
|
}
|
||||||
|
if (leftfix.dfa()) {
|
||||||
|
assert(leftfix.dfa()->kind == NFA_PREFIX);
|
||||||
|
leftfix.dfa()->kind = NFA_EAGER_PREFIX;
|
||||||
|
}
|
||||||
|
|
||||||
|
return leftfix;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
|
||||||
|
const map<left_id, set<PredTopPair> > &infixTriggers,
|
||||||
|
set<u32> *no_retrigger_queues, set<u32> *eager_queues,
|
||||||
|
const map<left_id, eager_info> &eager,
|
||||||
|
const vector<RoseVertex> &succs, left_id leftfix) {
|
||||||
|
RoseGraph &g = build.g;
|
||||||
|
const CompileContext &cc = build.cc;
|
||||||
|
const ReportManager &rm = build.rm;
|
||||||
|
|
||||||
|
bool is_transient = contains(build.transient, leftfix);
|
||||||
|
rose_group squash_mask = build.rose_squash_masks.at(leftfix);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : "");
|
||||||
|
|
||||||
|
if (contains(eager, leftfix)) {
|
||||||
|
eager_queues->insert(qi);
|
||||||
|
leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs);
|
||||||
|
}
|
||||||
|
|
||||||
|
aligned_unique_ptr<NFA> nfa;
|
||||||
|
// Need to build NFA, which is either predestined to be a Haig (in SOM mode)
|
||||||
|
// or could be all manner of things.
|
||||||
|
if (leftfix.haig()) {
|
||||||
|
nfa = goughCompile(*leftfix.haig(), build.ssm.somPrecision(), cc, rm);
|
||||||
|
} else {
|
||||||
|
nfa = makeLeftNfa(build, leftfix, prefix, is_transient, infixTriggers,
|
||||||
|
cc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!nfa) {
|
||||||
|
assert(!"failed to build leftfix");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
setLeftNfaProperties(*nfa, leftfix);
|
||||||
|
|
||||||
|
build.leftfix_queue_map.emplace(leftfix, qi);
|
||||||
|
nfa->queueIndex = qi;
|
||||||
|
|
||||||
|
if (!prefix && !leftfix.haig() && leftfix.graph()
|
||||||
|
&& nfaStuckOn(*leftfix.graph())) {
|
||||||
|
DEBUG_PRINTF("%u sticks on\n", qi);
|
||||||
|
no_retrigger_queues->insert(qi);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("built leftfix, qi=%u\n", qi);
|
||||||
|
add_nfa_to_blob(bc, *nfa);
|
||||||
|
|
||||||
|
// Leftfixes can have stop alphabets.
|
||||||
|
vector<u8> stop(N_CHARS, 0);
|
||||||
|
/* haigs track som information - need more care */
|
||||||
|
som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE;
|
||||||
|
if (leftfix.graph()) {
|
||||||
|
stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som);
|
||||||
|
} else if (leftfix.castle()) {
|
||||||
|
stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Infix NFAs can have bounds on their queue lengths.
|
||||||
|
u32 max_queuelen = UINT32_MAX;
|
||||||
|
if (!prefix) {
|
||||||
|
set<ue2_literal> lits;
|
||||||
|
for (RoseVertex v : succs) {
|
||||||
|
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||||
|
for (u32 lit_id : g[u].literals) {
|
||||||
|
lits.insert(build.literals.right.at(lit_id).s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("%zu literals\n", lits.size());
|
||||||
|
max_queuelen = findMaxInfixMatches(leftfix, lits);
|
||||||
|
if (max_queuelen < UINT32_MAX) {
|
||||||
|
max_queuelen++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 max_width;
|
||||||
|
if (is_transient) {
|
||||||
|
depth d = findMaxWidth(leftfix);
|
||||||
|
assert(d.is_finite());
|
||||||
|
max_width = d;
|
||||||
|
} else {
|
||||||
|
max_width = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 cm_count = 0;
|
||||||
|
CharReach cm_cr;
|
||||||
|
if (cc.grey.allowCountingMiracles) {
|
||||||
|
findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (RoseVertex v : succs) {
|
||||||
|
bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width,
|
||||||
|
squash_mask, stop,
|
||||||
|
max_queuelen, cm_count,
|
||||||
|
cm_cr));
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
|
bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
|
||||||
QueueIndexFactory &qif, set<u32> *no_retrigger_queues,
|
QueueIndexFactory &qif, set<u32> *no_retrigger_queues,
|
||||||
bool do_prefix) {
|
set<u32> *eager_queues, bool do_prefix) {
|
||||||
const RoseGraph &g = tbi.g;
|
RoseGraph &g = tbi.g;
|
||||||
const CompileContext &cc = tbi.cc;
|
const CompileContext &cc = tbi.cc;
|
||||||
const ReportManager &rm = tbi.rm;
|
|
||||||
|
|
||||||
ue2::unordered_map<left_id, u32> seen; // already built queue indices
|
|
||||||
|
|
||||||
map<left_id, set<PredTopPair> > infixTriggers;
|
map<left_id, set<PredTopPair> > infixTriggers;
|
||||||
|
vector<left_id> order;
|
||||||
|
unordered_map<left_id, vector<RoseVertex> > succs;
|
||||||
findInfixTriggers(tbi, &infixTriggers);
|
findInfixTriggers(tbi, &infixTriggers);
|
||||||
|
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
@ -1143,6 +1439,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v));
|
||||||
bool is_prefix = tbi.isRootSuccessor(v);
|
bool is_prefix = tbi.isRootSuccessor(v);
|
||||||
|
|
||||||
if (do_prefix != is_prefix) {
|
if (do_prefix != is_prefix) {
|
||||||
@ -1156,8 +1453,6 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
|
|||||||
// our in-edges.
|
// our in-edges.
|
||||||
assert(roseHasTops(g, v));
|
assert(roseHasTops(g, v));
|
||||||
|
|
||||||
u32 qi; // queue index, set below.
|
|
||||||
u32 lag = g[v].left.lag;
|
|
||||||
bool is_transient = contains(tbi.transient, leftfix);
|
bool is_transient = contains(tbi.transient, leftfix);
|
||||||
|
|
||||||
// Transient leftfixes can sometimes be implemented solely with
|
// Transient leftfixes can sometimes be implemented solely with
|
||||||
@ -1173,95 +1468,42 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (contains(seen, leftfix)) {
|
if (!contains(succs, leftfix)) {
|
||||||
// NFA already built.
|
order.push_back(leftfix);
|
||||||
qi = seen[leftfix];
|
|
||||||
assert(contains(bc.engineOffsets, qi));
|
|
||||||
DEBUG_PRINTF("sharing leftfix, qi=%u\n", qi);
|
|
||||||
} else {
|
|
||||||
DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : "");
|
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa;
|
|
||||||
|
|
||||||
// Need to build NFA, which is either predestined to be a Haig (in
|
|
||||||
// SOM mode) or could be all manner of things.
|
|
||||||
if (leftfix.haig()) {
|
|
||||||
nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc,
|
|
||||||
rm);
|
|
||||||
} else {
|
|
||||||
assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v));
|
|
||||||
nfa = makeLeftNfa(tbi, leftfix, is_prefix, is_transient,
|
|
||||||
infixTriggers, cc);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nfa) {
|
|
||||||
assert(!"failed to build leftfix");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
setLeftNfaProperties(*nfa, leftfix);
|
|
||||||
|
|
||||||
qi = qif.get_queue();
|
|
||||||
tbi.leftfix_queue_map.emplace(leftfix, qi);
|
|
||||||
nfa->queueIndex = qi;
|
|
||||||
|
|
||||||
if (!is_prefix && !leftfix.haig() && leftfix.graph() &&
|
|
||||||
nfaStuckOn(*leftfix.graph())) {
|
|
||||||
DEBUG_PRINTF("%u sticks on\n", qi);
|
|
||||||
no_retrigger_queues->insert(qi);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("built leftfix, qi=%u\n", qi);
|
|
||||||
add_nfa_to_blob(bc, *nfa);
|
|
||||||
seen.emplace(leftfix, qi);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
succs[leftfix].push_back(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
rose_group initial_groups = tbi.getInitialGroups();
|
||||||
|
rose_group combined_eager_squashed_mask = ~0ULL;
|
||||||
|
|
||||||
|
map<left_id, eager_info> eager;
|
||||||
|
|
||||||
|
for (const left_id &leftfix : order) {
|
||||||
|
const auto &left_succs = succs[leftfix];
|
||||||
|
|
||||||
rose_group squash_mask = tbi.rose_squash_masks.at(leftfix);
|
rose_group squash_mask = tbi.rose_squash_masks.at(leftfix);
|
||||||
|
eager_info ei;
|
||||||
|
|
||||||
// Leftfixes can have stop alphabets.
|
if (checkSuitableForEager(do_prefix, leftfix, tbi, left_succs,
|
||||||
vector<u8> stop(N_CHARS, 0);
|
squash_mask, initial_groups, ei, cc)) {
|
||||||
/* haigs track som information - need more care */
|
eager[leftfix] = ei;
|
||||||
som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE;
|
combined_eager_squashed_mask &= squash_mask;
|
||||||
if (leftfix.graph()) {
|
DEBUG_PRINTF("combo %016llx...\n", combined_eager_squashed_mask);
|
||||||
stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som);
|
|
||||||
} else if (leftfix.castle()) {
|
|
||||||
stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Infix NFAs can have bounds on their queue lengths.
|
if (do_prefix && combined_eager_squashed_mask & initial_groups) {
|
||||||
u32 max_queuelen = UINT32_MAX;
|
DEBUG_PRINTF("eager groups won't squash everyone - be lazy\n");
|
||||||
if (!is_prefix) {
|
eager_queues->clear();
|
||||||
set<ue2_literal> lits;
|
eager.clear();
|
||||||
for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
|
}
|
||||||
for (u32 lit_id : tbi.g[u].literals) {
|
|
||||||
lits.insert(tbi.literals.right.at(lit_id).s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("%zu literals\n", lits.size());
|
|
||||||
max_queuelen = findMaxInfixMatches(leftfix, lits);
|
|
||||||
if (max_queuelen < UINT32_MAX) {
|
|
||||||
max_queuelen++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 max_width;
|
for (const left_id &leftfix : order) {
|
||||||
if (is_transient) {
|
buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers,
|
||||||
depth d = findMaxWidth(leftfix);
|
no_retrigger_queues, eager_queues, eager, succs[leftfix],
|
||||||
assert(d.is_finite());
|
leftfix);
|
||||||
max_width = d;
|
|
||||||
} else {
|
|
||||||
max_width = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
u8 cm_count = 0;
|
|
||||||
CharReach cm_cr;
|
|
||||||
if (cc.grey.allowCountingMiracles) {
|
|
||||||
findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr);
|
|
||||||
}
|
|
||||||
|
|
||||||
bc.leftfix_info.emplace(
|
|
||||||
v, left_build_info(qi, lag, max_width, squash_mask, stop,
|
|
||||||
max_queuelen, cm_count, cm_cr));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -1613,9 +1855,11 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Note: buildNfas may reduce the lag for vertices that have prefixes */
|
||||||
static
|
static
|
||||||
bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
|
bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
|
||||||
set<u32> *no_retrigger_queues, u32 *leftfixBeginQueue) {
|
set<u32> *no_retrigger_queues, set<u32> *eager_queues,
|
||||||
|
u32 *leftfixBeginQueue) {
|
||||||
assignSuffixQueues(tbi, bc);
|
assignSuffixQueues(tbi, bc);
|
||||||
|
|
||||||
if (!buildSuffixes(tbi, bc, no_retrigger_queues)) {
|
if (!buildSuffixes(tbi, bc, no_retrigger_queues)) {
|
||||||
@ -1624,11 +1868,13 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
|
|||||||
|
|
||||||
*leftfixBeginQueue = qif.allocated_count();
|
*leftfixBeginQueue = qif.allocated_count();
|
||||||
|
|
||||||
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, true)) {
|
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
|
||||||
|
true)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, false)) {
|
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
|
||||||
|
false)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1672,10 +1918,10 @@ static
|
|||||||
void findTransientQueues(const map<RoseVertex, left_build_info> &leftfix_info,
|
void findTransientQueues(const map<RoseVertex, left_build_info> &leftfix_info,
|
||||||
set<u32> *out) {
|
set<u32> *out) {
|
||||||
DEBUG_PRINTF("curating transient queues\n");
|
DEBUG_PRINTF("curating transient queues\n");
|
||||||
for (const auto &rbi : leftfix_info | map_values) {
|
for (const auto &build : leftfix_info | map_values) {
|
||||||
if (rbi.transient) {
|
if (build.transient) {
|
||||||
DEBUG_PRINTF("q %u is transient\n", rbi.queue);
|
DEBUG_PRINTF("q %u is transient\n", build.queue);
|
||||||
out->insert(rbi.queue);
|
out->insert(build.queue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3301,9 +3547,9 @@ void assignStateIndices(const RoseBuildImpl &build, build_context &bc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool hasUsefulStops(const left_build_info &rbi) {
|
bool hasUsefulStops(const left_build_info &build) {
|
||||||
for (u32 i = 0; i < N_CHARS; i++) {
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
if (rbi.stopAlphabet[i]) {
|
if (build.stopAlphabet[i]) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3312,6 +3558,7 @@ bool hasUsefulStops(const left_build_info &rbi) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
|
void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
|
||||||
|
const set<u32> &eager_queues,
|
||||||
u32 leftfixBeginQueue, u32 leftfixCount,
|
u32 leftfixBeginQueue, u32 leftfixCount,
|
||||||
vector<LeftNfaInfo> &leftTable, u32 *laggedRoseCount,
|
vector<LeftNfaInfo> &leftTable, u32 *laggedRoseCount,
|
||||||
size_t *history) {
|
size_t *history) {
|
||||||
@ -3371,6 +3618,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
|
|||||||
DEBUG_PRINTF("mw = %u\n", lbi.transient);
|
DEBUG_PRINTF("mw = %u\n", lbi.transient);
|
||||||
left.transient = verify_u8(lbi.transient);
|
left.transient = verify_u8(lbi.transient);
|
||||||
left.infix = tbi.isNonRootSuccessor(v);
|
left.infix = tbi.isNonRootSuccessor(v);
|
||||||
|
left.eager = contains(eager_queues, lbi.queue);
|
||||||
|
|
||||||
// A rose has a lagIndex if it's non-transient and we are
|
// A rose has a lagIndex if it's non-transient and we are
|
||||||
// streaming.
|
// streaming.
|
||||||
@ -4271,6 +4519,25 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u32 buildEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
|
||||||
|
u32 queue_count,
|
||||||
|
build_context &bc) {
|
||||||
|
if (eager.empty()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<u32> vec;
|
||||||
|
for (u32 q : eager) {
|
||||||
|
assert(q >= leftfixBeginQueue);
|
||||||
|
vec.push_back(q - leftfixBeginQueue);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<mmbit_sparse_iter> iter;
|
||||||
|
mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue);
|
||||||
|
return addIteratorToTable(bc, iter);
|
||||||
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||||
DerivedBoundaryReports dboundary(boundary);
|
DerivedBoundaryReports dboundary(boundary);
|
||||||
|
|
||||||
@ -4305,7 +4572,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
u32 outfixEndQueue = qif.allocated_count();
|
u32 outfixEndQueue = qif.allocated_count();
|
||||||
u32 leftfixBeginQueue = outfixEndQueue;
|
u32 leftfixBeginQueue = outfixEndQueue;
|
||||||
|
|
||||||
if (!buildNfas(*this, bc, qif, &no_retrigger_queues,
|
set<u32> eager_queues;
|
||||||
|
|
||||||
|
/* Note: buildNfas may reduce the lag for vertices that have prefixes */
|
||||||
|
if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues,
|
||||||
&leftfixBeginQueue)) {
|
&leftfixBeginQueue)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -4325,7 +4595,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
|
|
||||||
u32 laggedRoseCount = 0;
|
u32 laggedRoseCount = 0;
|
||||||
vector<LeftNfaInfo> leftInfoTable;
|
vector<LeftNfaInfo> leftInfoTable;
|
||||||
buildLeftInfoTable(*this, bc, leftfixBeginQueue,
|
buildLeftInfoTable(*this, bc, eager_queues, leftfixBeginQueue,
|
||||||
queue_count - leftfixBeginQueue, leftInfoTable,
|
queue_count - leftfixBeginQueue, leftInfoTable,
|
||||||
&laggedRoseCount, &historyRequired);
|
&laggedRoseCount, &historyRequired);
|
||||||
|
|
||||||
@ -4340,6 +4610,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
buildActiveLeftIter(leftInfoTable, activeLeftIter);
|
buildActiveLeftIter(leftInfoTable, activeLeftIter);
|
||||||
|
|
||||||
u32 lastByteOffset = buildLastByteIter(g, bc);
|
u32 lastByteOffset = buildLastByteIter(g, bc);
|
||||||
|
u32 eagerIterOffset = buildEagerQueueIter(eager_queues, leftfixBeginQueue,
|
||||||
|
queue_count, bc);
|
||||||
|
|
||||||
// Enforce role table resource limit.
|
// Enforce role table resource limit.
|
||||||
if (num_vertices(g) > cc.grey.limitRoseRoleCount) {
|
if (num_vertices(g) > cc.grey.limitRoseRoleCount) {
|
||||||
@ -4513,6 +4785,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
engine->activeArrayCount = activeArrayCount;
|
engine->activeArrayCount = activeArrayCount;
|
||||||
engine->activeLeftCount = activeLeftCount;
|
engine->activeLeftCount = activeLeftCount;
|
||||||
engine->queueCount = queue_count;
|
engine->queueCount = queue_count;
|
||||||
|
engine->eagerIterOffset = eagerIterOffset;
|
||||||
engine->handledKeyCount = bc.handledKeys.size();
|
engine->handledKeyCount = bc.handledKeys.size();
|
||||||
|
|
||||||
engine->group_weak_end = group_weak_end;
|
engine->group_weak_end = group_weak_end;
|
||||||
|
@ -76,6 +76,8 @@ string to_string(nfa_kind k) {
|
|||||||
return "REV_PREFIX";
|
return "REV_PREFIX";
|
||||||
case NFA_OUTFIX_RAW:
|
case NFA_OUTFIX_RAW:
|
||||||
return "OUTFIX_RAW";
|
return "OUTFIX_RAW";
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
|
return "EAGER_PREFIX";
|
||||||
}
|
}
|
||||||
assert(0);
|
assert(0);
|
||||||
return "?";
|
return "?";
|
||||||
|
@ -150,7 +150,7 @@ struct left_id {
|
|||||||
: g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
|
: g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
|
||||||
h(in.haig.get()), dfa_min_width(in.dfa_min_width),
|
h(in.haig.get()), dfa_min_width(in.dfa_min_width),
|
||||||
dfa_max_width(in.dfa_max_width) {
|
dfa_max_width(in.dfa_max_width) {
|
||||||
assert(!g || !generates_callbacks(*g));
|
assert(!g || !has_managed_reports(*g));
|
||||||
}
|
}
|
||||||
bool operator==(const left_id &b) const {
|
bool operator==(const left_id &b) const {
|
||||||
bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
|
bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
|
||||||
|
@ -605,6 +605,9 @@ void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex);
|
const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex);
|
||||||
|
if (left->eager) {
|
||||||
|
fout << "eager ";
|
||||||
|
}
|
||||||
if (left->transient) {
|
if (left->transient) {
|
||||||
fout << "transient " << (u32)left->transient << " ";
|
fout << "transient " << (u32)left->transient << " ";
|
||||||
}
|
}
|
||||||
@ -1018,6 +1021,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
|||||||
DUMP_U32(t, activeArrayCount);
|
DUMP_U32(t, activeArrayCount);
|
||||||
DUMP_U32(t, activeLeftCount);
|
DUMP_U32(t, activeLeftCount);
|
||||||
DUMP_U32(t, queueCount);
|
DUMP_U32(t, queueCount);
|
||||||
|
DUMP_U32(t, eagerIterOffset);
|
||||||
DUMP_U32(t, handledKeyCount);
|
DUMP_U32(t, handledKeyCount);
|
||||||
DUMP_U32(t, leftOffset);
|
DUMP_U32(t, leftOffset);
|
||||||
DUMP_U32(t, roseCount);
|
DUMP_U32(t, roseCount);
|
||||||
|
@ -144,6 +144,7 @@ struct LeftNfaInfo {
|
|||||||
u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
|
u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
|
||||||
u8 transient; /**< 0 if not transient, else max width of transient prefix */
|
u8 transient; /**< 0 if not transient, else max width of transient prefix */
|
||||||
char infix; /* TODO: make flags */
|
char infix; /* TODO: make flags */
|
||||||
|
char eager; /**< nfa should be run eagerly to first match or death */
|
||||||
char eod_check; /**< nfa is used by the event eod literal */
|
char eod_check; /**< nfa is used by the event eod literal */
|
||||||
u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
|
u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
|
||||||
rose_group squash_mask; /* & mask applied when rose nfa dies */
|
rose_group squash_mask; /* & mask applied when rose nfa dies */
|
||||||
@ -366,6 +367,9 @@ struct RoseEngine {
|
|||||||
u32 activeLeftCount; //number of nfas tracked in the active rose array
|
u32 activeLeftCount; //number of nfas tracked in the active rose array
|
||||||
u32 queueCount; /**< number of nfa queues */
|
u32 queueCount; /**< number of nfa queues */
|
||||||
|
|
||||||
|
u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if
|
||||||
|
* none */
|
||||||
|
|
||||||
/** \brief Number of keys used by CHECK_SET_HANDLED instructions in role
|
/** \brief Number of keys used by CHECK_SET_HANDLED instructions in role
|
||||||
* programs. Used to size the handled_roles fatbit in scratch. */
|
* programs. Used to size the handled_roles fatbit in scratch. */
|
||||||
u32 handledKeyCount;
|
u32 handledKeyCount;
|
||||||
|
@ -55,6 +55,11 @@
|
|||||||
|
|
||||||
#define rose_inline really_inline
|
#define rose_inline really_inline
|
||||||
|
|
||||||
|
/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager
|
||||||
|
* prefixes are always run in exactly the same way as normal prefixes. */
|
||||||
|
#define EAGER_STOP_OFFSET 64
|
||||||
|
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const void *getByOffset(const struct RoseEngine *t, u32 offset) {
|
const void *getByOffset(const struct RoseEngine *t, u32 offset) {
|
||||||
assert(offset < t->size);
|
assert(offset < t->size);
|
||||||
|
@ -423,6 +423,92 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable,
|
|||||||
assert(!can_stop_matching(scratch));
|
assert(!can_stop_matching(scratch));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static rose_inline
|
||||||
|
void runEagerPrefixesStream(const struct RoseEngine *t,
|
||||||
|
struct hs_scratch *scratch) {
|
||||||
|
if (!t->eagerIterOffset
|
||||||
|
|| scratch->core_info.buf_offset >= EAGER_STOP_OFFSET) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *state = scratch->core_info.state;
|
||||||
|
u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
|
||||||
|
* left_table */
|
||||||
|
const u32 arCount = t->activeLeftCount;
|
||||||
|
const u32 qCount = t->queueCount;
|
||||||
|
const struct LeftNfaInfo *left_table = getLeftTable(t);
|
||||||
|
const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset);
|
||||||
|
|
||||||
|
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
|
||||||
|
|
||||||
|
u32 idx = 0;
|
||||||
|
u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state);
|
||||||
|
for (; ri != MMB_INVALID;
|
||||||
|
ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) {
|
||||||
|
const struct LeftNfaInfo *left = left_table + ri;
|
||||||
|
u32 qi = ri + t->leftfixBeginQueue;
|
||||||
|
DEBUG_PRINTF("leftfix %u of %u, maxLag=%u\n", ri, arCount, left->maxLag);
|
||||||
|
|
||||||
|
assert(!fatbit_isset(scratch->aqa, qCount, qi));
|
||||||
|
assert(left->eager);
|
||||||
|
assert(!left->infix);
|
||||||
|
|
||||||
|
struct mq *q = scratch->queues + qi;
|
||||||
|
const struct NFA *nfa = getNfaByQueue(t, qi);
|
||||||
|
s64a loc = MIN(scratch->core_info.len,
|
||||||
|
EAGER_STOP_OFFSET - scratch->core_info.buf_offset);
|
||||||
|
|
||||||
|
fatbit_set(scratch->aqa, qCount, qi);
|
||||||
|
initRoseQueue(t, qi, left, scratch);
|
||||||
|
|
||||||
|
if (scratch->core_info.buf_offset) {
|
||||||
|
s64a sp = left->transient ? -(s64a)scratch->core_info.hlen
|
||||||
|
: -(s64a)loadRoseDelay(t, state, left);
|
||||||
|
pushQueueAt(q, 0, MQE_START, sp);
|
||||||
|
if (scratch->core_info.buf_offset + sp > 0) {
|
||||||
|
loadStreamState(nfa, q, sp);
|
||||||
|
/* if the leftfix fix is currently in a match state, we cannot
|
||||||
|
* advance it. */
|
||||||
|
if (nfaInAnyAcceptState(nfa, q)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
pushQueueAt(q, 1, MQE_END, loc);
|
||||||
|
} else {
|
||||||
|
pushQueueAt(q, 1, MQE_TOP, sp);
|
||||||
|
pushQueueAt(q, 2, MQE_END, loc);
|
||||||
|
nfaQueueInitState(q->nfa, q);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pushQueueAt(q, 0, MQE_START, 0);
|
||||||
|
pushQueueAt(q, 1, MQE_TOP, 0);
|
||||||
|
pushQueueAt(q, 2, MQE_END, loc);
|
||||||
|
nfaQueueInitState(nfa, q);
|
||||||
|
}
|
||||||
|
|
||||||
|
char alive = nfaQueueExecToMatch(q->nfa, q, loc);
|
||||||
|
|
||||||
|
if (!alive) {
|
||||||
|
DEBUG_PRINTF("queue %u dead, squashing\n", qi);
|
||||||
|
mmbit_unset(ara, arCount, ri);
|
||||||
|
fatbit_unset(scratch->aqa, qCount, qi);
|
||||||
|
scratch->tctxt.groups &= left->squash_mask;
|
||||||
|
} else if (q->cur == q->end) {
|
||||||
|
assert(alive != MO_MATCHES_PENDING);
|
||||||
|
/* unlike in block mode we cannot squash groups if there is no match
|
||||||
|
* in this block as we need the groups on for later stream writes */
|
||||||
|
/* TODO: investigate possibility of a method to suppress groups for
|
||||||
|
* a single stream block. */
|
||||||
|
DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
|
||||||
|
q->cur = q->end = 0;
|
||||||
|
pushQueueAt(q, 0, MQE_START, loc);
|
||||||
|
} else {
|
||||||
|
assert(alive == MO_MATCHES_PENDING);
|
||||||
|
DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
|
||||||
|
q->end--; /* remove end item */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||||
DEBUG_PRINTF("OH HAI\n");
|
DEBUG_PRINTF("OH HAI\n");
|
||||||
assert(t);
|
assert(t);
|
||||||
@ -472,6 +558,8 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
|||||||
streamInitSufPQ(t, state, scratch);
|
streamInitSufPQ(t, state, scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
runEagerPrefixesStream(t, scratch);
|
||||||
|
|
||||||
u32 alen = t->anchoredDistance > offset ?
|
u32 alen = t->anchoredDistance > offset ?
|
||||||
MIN(length + offset, t->anchoredDistance) - offset : 0;
|
MIN(length + offset, t->anchoredDistance) - offset : 0;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user