mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
literal matching: separate path for pure literal patterns
This commit is contained in:
parent
63e7d89fcc
commit
f68723a606
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -807,6 +807,9 @@ void findIncludedLits(vector<hwlmLiteral> &lits,
|
||||
for (size_t i = 0; i < cnt; i++) {
|
||||
u32 bucket1 = group[i].first;
|
||||
u32 id1 = group[i].second;
|
||||
if (lits[id1].pure) {
|
||||
continue;
|
||||
}
|
||||
buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
|
||||
exception_map);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -62,6 +62,7 @@ struct LitInfo {
|
||||
u8 size;
|
||||
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
|
||||
u8 next;
|
||||
u8 pure; //!< The pass-on of pure flag from hwlmLiteral.
|
||||
};
|
||||
|
||||
#define FDRC_FLAG_NO_CONFIRM 1
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -87,6 +87,7 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
info.flags = flags;
|
||||
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
|
||||
info.groups = lit.groups;
|
||||
info.pure = lit.pure;
|
||||
|
||||
// these are built up assuming a LE machine
|
||||
CONF_TYPE msk = all_ones;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -65,6 +65,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
||||
u8 oldNext; // initialized in loop
|
||||
do {
|
||||
assert(ISALIGNED(li));
|
||||
scratch->pure = li->pure;
|
||||
|
||||
if (unlikely((conf_key & li->msk) != li->v)) {
|
||||
goto out;
|
||||
@ -99,6 +100,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
||||
li++;
|
||||
} while (oldNext);
|
||||
scratch->fdr_conf = NULL;
|
||||
scratch->pure = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -83,9 +83,10 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
|
||||
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
|
||||
const vector<u8> &msk_in, const vector<u8> &cmp_in)
|
||||
const vector<u8> &msk_in, const vector<u8> &cmp_in,
|
||||
bool pure_in)
|
||||
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
|
||||
groups(groups_in), msk(msk_in), cmp(cmp_in) {
|
||||
groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) {
|
||||
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
|
||||
assert(msk.size() <= HWLM_MASKLEN);
|
||||
assert(msk.size() == cmp.size());
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -113,15 +113,20 @@ struct hwlmLiteral {
|
||||
*/
|
||||
std::vector<u8> cmp;
|
||||
|
||||
bool pure; //!< \brief The pass-on of pure flag from LitFragment.
|
||||
|
||||
/** \brief Complete constructor, takes group information and msk/cmp.
|
||||
*
|
||||
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
|
||||
u32 id_in, hwlm_group_t groups_in,
|
||||
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
|
||||
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in,
|
||||
bool pure_in = false);
|
||||
|
||||
/** \brief Simple constructor: no group information, no msk/cmp. */
|
||||
/** \brief Simple constructor: no group information, no msk/cmp.
|
||||
*
|
||||
* This constructor is only used in internal unit test. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
|
||||
: hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {}
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -185,6 +185,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vis.lit.set_pure();
|
||||
const ue2_literal &lit = vis.lit;
|
||||
|
||||
if (lit.empty()) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -238,7 +238,11 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
|
||||
assert(id && id < t->size); // id is an offset into bytecode
|
||||
const u64a som = 0;
|
||||
const u8 flags = 0;
|
||||
return roseRunProgram(t, scratch, id, som, end, flags);
|
||||
if (!scratch->pure) {
|
||||
return roseRunProgram(t, scratch, id, som, end, flags);
|
||||
} else {
|
||||
return roseRunProgram_l(t, scratch, id, som, end, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -2778,6 +2778,293 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#define L_PROGRAM_CASE(name) \
|
||||
case ROSE_INSTR_##name: { \
|
||||
DEBUG_PRINTF("l_instruction: " #name " (pc=%u)\n", \
|
||||
programOffset + (u32)(pc - pc_base)); \
|
||||
const struct ROSE_STRUCT_##name *ri = \
|
||||
(const struct ROSE_STRUCT_##name *)pc;
|
||||
|
||||
#define L_PROGRAM_NEXT_INSTRUCTION \
|
||||
pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define L_PROGRAM_NEXT_INSTRUCTION_JUMP continue;
|
||||
|
||||
hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u32 programOffset,
|
||||
u64a som, u64a end, u8 prog_flags) {
|
||||
DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset,
|
||||
som, end, prog_flags);
|
||||
|
||||
assert(programOffset != ROSE_INVALID_PROG_OFFSET);
|
||||
assert(programOffset >= sizeof(struct RoseEngine));
|
||||
assert(programOffset < t->size);
|
||||
|
||||
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
|
||||
|
||||
const char *pc_base = getByOffset(t, programOffset);
|
||||
const char *pc = pc_base;
|
||||
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
assert(*(const u8 *)pc != ROSE_INSTR_END);
|
||||
|
||||
for (;;) {
|
||||
assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
|
||||
assert(pc >= pc_base);
|
||||
assert((size_t)(pc - pc_base) < t->size);
|
||||
const u8 code = *(const u8 *)pc;
|
||||
assert(code <= LAST_ROSE_INSTRUCTION);
|
||||
|
||||
switch ((enum RoseInstructionCode)code) {
|
||||
L_PROGRAM_CASE(END) {
|
||||
DEBUG_PRINTF("finished\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CATCH_UP) {
|
||||
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(SOM_FROM_REPORT) {
|
||||
som = handleSomExternal(scratch, &ri->som, end);
|
||||
DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
|
||||
som);
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(DEDUPE) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
const char do_som = t->hasSom; // TODO: constant propagate
|
||||
const char is_external_report = 1;
|
||||
enum DedupeResult rv =
|
||||
dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
|
||||
ri->dkey, ri->offset_adjust,
|
||||
is_external_report, ri->quash_som, do_som);
|
||||
switch (rv) {
|
||||
case DEDUPE_HALT:
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
case DEDUPE_SKIP:
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
case DEDUPE_CONTINUE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(DEDUPE_SOM) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
const char is_external_report = 0;
|
||||
const char do_som = 1;
|
||||
enum DedupeResult rv =
|
||||
dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
|
||||
ri->dkey, ri->offset_adjust,
|
||||
is_external_report, ri->quash_som, do_som);
|
||||
switch (rv) {
|
||||
case DEDUPE_HALT:
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
case DEDUPE_SKIP:
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
case DEDUPE_CONTINUE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(REPORT) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
|
||||
INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(REPORT_EXHAUST) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
|
||||
ri->ekey) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(REPORT_SOM) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseReportSom(t, scratch, som, end, ri->onmatch,
|
||||
ri->offset_adjust,
|
||||
INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(DEDUPE_AND_REPORT) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
const char do_som = t->hasSom; // TODO: constant propagate
|
||||
const char is_external_report = 1;
|
||||
enum DedupeResult rv =
|
||||
dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
|
||||
ri->dkey, ri->offset_adjust,
|
||||
is_external_report, ri->quash_som, do_som);
|
||||
switch (rv) {
|
||||
case DEDUPE_HALT:
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
case DEDUPE_SKIP:
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
case DEDUPE_CONTINUE:
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 ekey = INVALID_EKEY;
|
||||
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
|
||||
ekey) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(FINAL_REPORT) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
|
||||
INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
/* One-shot specialisation: this instruction always terminates
|
||||
* execution of the program. */
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_EXHAUSTED) {
|
||||
DEBUG_PRINTF("check ekey %u\n", ri->ekey);
|
||||
assert(ri->ekey != INVALID_EKEY);
|
||||
assert(ri->ekey < t->ekeyCount);
|
||||
const char *evec = scratch->core_info.exhaustionVector;
|
||||
if (isExhausted(t, evec, ri->ekey)) {
|
||||
DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
|
||||
ri->ekey);
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_LONG_LIT) {
|
||||
const char nocase = 0;
|
||||
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
|
||||
ri->lit_length, nocase)) {
|
||||
DEBUG_PRINTF("failed long lit check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
|
||||
const char nocase = 1;
|
||||
if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
|
||||
ri->lit_length, nocase)) {
|
||||
DEBUG_PRINTF("failed nocase long lit check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_MED_LIT) {
|
||||
const char nocase = 0;
|
||||
if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
|
||||
ri->lit_length, nocase)) {
|
||||
DEBUG_PRINTF("failed lit check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
|
||||
const char nocase = 1;
|
||||
if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
|
||||
ri->lit_length, nocase)) {
|
||||
DEBUG_PRINTF("failed long lit check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(SET_LOGICAL) {
|
||||
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
|
||||
ri->lkey, ri->offset_adjust);
|
||||
assert(ri->lkey != INVALID_LKEY);
|
||||
assert(ri->lkey < t->lkeyCount);
|
||||
char *lvec = scratch->core_info.logicalVector;
|
||||
setLogicalVal(t, lvec, ri->lkey, 1);
|
||||
updateLastCombMatchOffset(tctxt, end + ri->offset_adjust);
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(SET_COMBINATION) {
|
||||
DEBUG_PRINTF("set ckey %u as active\n", ri->ckey);
|
||||
assert(ri->ckey != INVALID_CKEY);
|
||||
assert(ri->ckey < t->ckeyCount);
|
||||
char *cvec = scratch->core_info.combVector;
|
||||
setCombinationActive(t, cvec, ri->ckey);
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(FLUSH_COMBINATION) {
|
||||
assert(end >= tctxt->lastCombMatchOffset);
|
||||
if (end > tctxt->lastCombMatchOffset) {
|
||||
if (flushActiveCombinations(t, scratch)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(SET_EXHAUST) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseSetExhaust(t, scratch, ri->ekey)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
default: {
|
||||
assert(0); // unreachable
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(0); // unreachable
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
#undef L_PROGRAM_CASE
|
||||
#undef L_PROGRAM_NEXT_INSTRUCTION
|
||||
#undef L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
|
||||
#undef PROGRAM_CASE
|
||||
#undef PROGRAM_NEXT_INSTRUCTION
|
||||
#undef PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -54,4 +54,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u32 programOffset,
|
||||
u64a som, u64a end, u8 prog_flags);
|
||||
|
||||
hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u32 programOffset,
|
||||
u64a som, u64a end, u8 prog_flags);
|
||||
|
||||
#endif // PROGRAM_RUNTIME_H
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -2843,9 +2843,34 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
|
||||
|
||||
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
|
||||
dumpString(lit.s).c_str());
|
||||
auto &fi = frag_info[getFragment(lit)];
|
||||
fi.lit_ids.push_back(lit_id);
|
||||
fi.groups |= groups;
|
||||
|
||||
/** 0:/xxabcdefgh/ */
|
||||
/** 1:/yyabcdefgh/ */
|
||||
/** 2:/yyabcdefgh.+/ */
|
||||
// Above 3 patterns should firstly convert into RoseLiteralMap with
|
||||
// 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into
|
||||
// LitFragment with 1 element ("abcdefgh"). Special care should be
|
||||
// taken to handle the 'pure' flag during the conversion.
|
||||
|
||||
rose_literal_id lit_frag = getFragment(lit);
|
||||
auto it = frag_info.find(lit_frag);
|
||||
if (it != frag_info.end()) {
|
||||
if (!lit_frag.s.get_pure() && it->first.s.get_pure()) {
|
||||
struct FragmentInfo f_info = it->second;
|
||||
f_info.lit_ids.push_back(lit_id);
|
||||
f_info.groups |= groups;
|
||||
frag_info.erase(it->first);
|
||||
frag_info.emplace(lit_frag, f_info);
|
||||
} else {
|
||||
it->second.lit_ids.push_back(lit_id);
|
||||
it->second.groups |= groups;
|
||||
}
|
||||
} else {
|
||||
struct FragmentInfo f_info;
|
||||
f_info.lit_ids.push_back(lit_id);
|
||||
f_info.groups |= groups;
|
||||
frag_info.emplace(lit_frag, f_info);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &m : frag_info) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -340,7 +340,14 @@ public:
|
||||
std::pair<u32, bool> insert(const rose_literal_id &lit) {
|
||||
auto it = lits_index.find(lit);
|
||||
if (it != lits_index.end()) {
|
||||
return {it->second, false};
|
||||
u32 idx = it->second;
|
||||
auto &l = lits.at(idx);
|
||||
if (!lit.s.get_pure() && l.s.get_pure()) {
|
||||
lits_index.erase(l);
|
||||
l.s.unset_pure();
|
||||
lits_index.emplace(l, idx);
|
||||
}
|
||||
return {idx, false};
|
||||
}
|
||||
u32 id = verify_u32(lits.size());
|
||||
lits.push_back(lit);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -727,6 +727,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
|
||||
|
||||
const auto &s_final = lit_final.get_string();
|
||||
bool nocase = lit_final.any_nocase();
|
||||
bool pure = f.s.get_pure();
|
||||
|
||||
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n",
|
||||
f.fragment_id, escapeString(s_final).c_str(), (int)nocase,
|
||||
@ -740,7 +741,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
|
||||
const auto &groups = f.groups;
|
||||
|
||||
mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
|
||||
groups, msk, cmp);
|
||||
groups, msk, cmp, pure);
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -141,6 +141,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
|
||||
s->deduper.current_report_offset = ~0ULL;
|
||||
s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
|
||||
s->fdr_conf = NULL;
|
||||
s->pure = 0;
|
||||
|
||||
// Rose program execution (used for some report paths) depends on these
|
||||
// values being initialised.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -137,6 +137,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
|
||||
s->scratchSize = alloc_size;
|
||||
s->scratch_alloc = (char *)s_tmp;
|
||||
s->fdr_conf = NULL;
|
||||
s->pure = 0;
|
||||
|
||||
// each of these is at an offset from the previous
|
||||
char *current = (char *)s + sizeof(*s);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -208,6 +208,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
|
||||
u64a *fdr_conf; /**< FDR confirm value */
|
||||
u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
|
||||
* in buffer */
|
||||
u8 pure; /**< indicator of pure-literal or cutting-literal */
|
||||
};
|
||||
|
||||
/* array of fatbit ptr; TODO: why not an array of fatbits? */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -211,10 +211,17 @@ public:
|
||||
|
||||
size_t hash() const;
|
||||
|
||||
void set_pure() { pure = true; }
|
||||
void unset_pure() { pure = false; }
|
||||
bool get_pure() const { return pure; }
|
||||
|
||||
/* TODO: consider existing member functions possibly related with pure. */
|
||||
|
||||
private:
|
||||
friend const_iterator;
|
||||
std::string s;
|
||||
boost::dynamic_bitset<> nocase;
|
||||
bool pure = false; /**< born from cutting or not (pure literal). */
|
||||
};
|
||||
|
||||
/// Return a reversed copy of this literal.
|
||||
|
Loading…
x
Reference in New Issue
Block a user