diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index b78390b0..49e060c9 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Intel Corporation + * Copyright (c) 2018-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -254,44 +254,6 @@ void popOperator(vector &op_stack, vector &subid_stack, op_stack.pop_back(); } -static -char getValue(const vector &lv, u32 ckey) { - if (ckey & LOGICAL_OP_BIT) { - return lv[ckey & ~LOGICAL_OP_BIT]; - } else { - return 0; - } -} - -static -bool hasMatchFromPurelyNegative(const vector &tree, - u32 start, u32 result) { - vector lv(tree.size()); - assert(start <= result); - for (u32 i = start; i <= result; i++) { - assert(i & LOGICAL_OP_BIT); - const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT]; - assert(i == op.id); - switch (op.op) { - case LOGICAL_OP_NOT: - lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro); - break; - case LOGICAL_OP_AND: - lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) & - getValue(lv, op.ro); - break; - case LOGICAL_OP_OR: - lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) | - getValue(lv, op.ro); - break; - default: - assert(0); - break; - } - } - return lv[result & ~LOGICAL_OP_BIT]; -} - void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, u32 ekey, u64a min_offset, u64a max_offset) { @@ -366,9 +328,6 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, if (lkey_start == INVALID_LKEY) { throw CompileError("No logical operation."); } - if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) { - throw CompileError("Has match from purely negative sub-expressions."); - } combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result, min_offset, max_offset); } diff --git a/src/report.h b/src/report.h index a2e2d0f3..b35f4c05 100644 --- a/src/report.h +++ b/src/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -222,6 +222,58 @@ char isLogicalCombination(const struct RoseEngine *rose, char *lvec, return getLogicalVal(rose, lvec, result); } +/** \brief Returns 1 if combination matches when no sub-expression matches. */ +static really_inline +char isPurelyNegativeMatch(const struct RoseEngine *rose, char *lvec, + u32 start, u32 result) { + const struct LogicalOp *logicalTree = (const struct LogicalOp *) + ((const char *)rose + rose->logicalTreeOffset); + assert(start >= rose->lkeyCount); + assert(start <= result); + assert(result < rose->lkeyCount + rose->lopCount); + for (u32 i = start; i <= result; i++) { + const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount); + assert(i == op->id); + assert(op->op <= LAST_LOGICAL_OP); + switch ((enum LogicalOpType)op->op) { + case LOGICAL_OP_NOT: + if ((op->ro < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->ro)) { + // sub-expression not negative + return 0; + } + setLogicalVal(rose, lvec, op->id, + !getLogicalVal(rose, lvec, op->ro)); + break; + case LOGICAL_OP_AND: + if (((op->lo < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->lo)) || + ((op->ro < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->ro))) { + // sub-expression not negative + return 0; + } + setLogicalVal(rose, lvec, op->id, + getLogicalVal(rose, lvec, op->lo) & + getLogicalVal(rose, lvec, op->ro)); // && + break; + case LOGICAL_OP_OR: + if (((op->lo < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->lo)) || + ((op->ro < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->ro))) { + // sub-expression not negative + return 0; + } + setLogicalVal(rose, lvec, op->id, + getLogicalVal(rose, lvec, op->lo) | + getLogicalVal(rose, lvec, op->ro)); // || + break; + } + } + return getLogicalVal(rose, lvec, result); +} + /** \brief Clear all keys in the logical vector. */ static really_inline void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) { diff --git a/src/rose/match.c b/src/rose/match.c index 192b4709..c91b2a50 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -591,6 +591,23 @@ int roseRunFlushCombProgram(const struct RoseEngine *rose, return MO_CONTINUE_MATCHING; } +/** + * \brief Execute last flush combination program. + * + * Returns MO_HALT_MATCHING if the stream is exhausted or the user has + * instructed us to halt, or MO_CONTINUE_MATCHING otherwise. + */ +int roseRunLastFlushCombProgram(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a end) { + hwlmcb_rv_t rv = roseRunProgram(rose, scratch, + rose->lastFlushCombProgramOffset, + 0, end, 0); + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + return MO_CONTINUE_MATCHING; +} + int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; assert(scratch && scratch->magic == SCRATCH_MAGIC); diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 7f5150e0..4c487062 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1875,6 +1875,49 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } +static rose_inline +hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end) { + for (u32 i = 0; i < t->ckeyCount; i++) { + const struct CombInfo *combInfoMap = (const struct CombInfo *) + ((const char *)t + t->combInfoMapOffset); + const struct CombInfo *ci = combInfoMap + i; + if ((ci->min_offset != 0) && (end < ci->min_offset)) { + DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset); + continue; + } + if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) { + DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset); + continue; + } + + DEBUG_PRINTF("check ekey %u\n", ci->ekey); + if (ci->ekey != INVALID_EKEY) { + assert(ci->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ci->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ci->ekey); + continue; + } + } + + DEBUG_PRINTF("check ckey %u purely negative\n", i); + char *lvec = scratch->core_info.logicalVector; + if (!isPurelyNegativeMatch(t, lvec, ci->start, ci->result)) { + DEBUG_PRINTF("Logical Combination from purely negative Failed!\n"); + continue; + } + + DEBUG_PRINTF("Logical Combination from purely negative Passed!\n"); + if (roseReport(t, scratch, end, ci->id, 0, + ci->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + return HWLM_CONTINUE_MATCHING; +} + #if !defined(_WIN32) #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ @@ -2004,7 +2047,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, &&LABEL_ROSE_INSTR_SET_LOGICAL, &&LABEL_ROSE_INSTR_SET_COMBINATION, &&LABEL_ROSE_INSTR_FLUSH_COMBINATION, - &&LABEL_ROSE_INSTR_SET_EXHAUST + &&LABEL_ROSE_INSTR_SET_EXHAUST, + &&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION }; #endif @@ -2772,6 +2816,19 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(LAST_FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + if (checkPurelyNegatives(t, scratch, end) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + default: { assert(0); // unreachable scratch->core_info.status |= STATUS_ERROR; @@ -3082,6 +3139,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(LAST_FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + if (checkPurelyNegatives(t, scratch, end) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + default: { assert(0); // unreachable scratch->core_info.status |= STATUS_ERROR; diff --git a/src/rose/rose.h b/src/rose/rose.h index c2b682f6..409b7002 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,4 +56,7 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, int roseRunFlushCombProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a end); +int roseRunLastFlushCombProgram(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a end); + #endif // ROSE_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0ef20f21..908d13c1 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3370,6 +3370,15 @@ RoseProgram makeFlushCombProgram(const RoseEngine &t) { return program; } +static +RoseProgram makeLastFlushCombProgram(const RoseEngine &t) { + RoseProgram program; + if (t.ckeyCount) { + addLastFlushCombinationProgram(program); + } + return program; +} + static u32 history_required(const rose_literal_id &key) { if (key.msk.size() < key.s.length()) { @@ -3740,6 +3749,10 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { auto flushComb_prog = makeFlushCombProgram(proto); proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog)); + auto lastFlushComb_prog = makeLastFlushCombProgram(proto); + proto.lastFlushCombProgramOffset = + writeProgram(bc, move(lastFlushComb_prog)); + // Build anchored matcher. auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); if (atable) { diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 2eb7bb51..8999daef 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1486,6 +1486,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(LAST_FLUSH_COMBINATION) {} + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; @@ -1557,6 +1560,25 @@ void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) { os.close(); } +static +void dumpRoseLastFlushCombPrograms(const RoseEngine *t, + const string &filename) { + ofstream os(filename); + const char *base = (const char *)t; + + if (t->lastFlushCombProgramOffset) { + os << "Last Flush Combination Program @ " + << t->lastFlushCombProgramOffset + << ":" << endl; + dumpProgram(os, t, base + t->lastFlushCombProgramOffset); + os << endl; + } else { + os << "" << endl; + } + + os.close(); +} + static void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); @@ -2249,6 +2271,8 @@ void roseDumpPrograms(const vector &fragments, const RoseEngine *t, dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt"); + dumpRoseLastFlushCombPrograms(t, + base + "/rose_last_flush_comb_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp index 2fe53455..c503f731 100644 --- a/src/rose/rose_build_instructions.cpp +++ b/src/rose/rose_build_instructions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, Intel Corporation + * Copyright (c) 2017-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,6 +48,7 @@ RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; RoseInstrEnd::~RoseInstrEnd() = default; RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; RoseInstrFlushCombination::~RoseInstrFlushCombination() = default; +RoseInstrLastFlushCombination::~RoseInstrLastFlushCombination() = default; using OffsetMap = RoseInstruction::OffsetMap; diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index 61e6d7a6..306a4166 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, Intel Corporation + * Copyright (c) 2017-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2206,6 +2206,14 @@ public: ~RoseInstrFlushCombination() override; }; +class RoseInstrLastFlushCombination + : public RoseInstrBaseTrivial { +public: + ~RoseInstrLastFlushCombination() override; +}; + class RoseInstrSetExhaust : public RoseInstrBaseNoTargets()); } +void addLastFlushCombinationProgram(RoseProgram &program) { + program.add_before_end(make_unique()); +} + static void makeRoleCheckLeftfix(const RoseBuildImpl &build, const map &leftfix_info, diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 8c8c37ed..7d781f31 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -188,6 +188,7 @@ void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); void addSuffixesEodProgram(RoseProgram &program); void addMatcherEodProgram(RoseProgram &program); void addFlushCombinationProgram(RoseProgram &program); +void addLastFlushCombinationProgram(RoseProgram &program); static constexpr u32 INVALID_QUEUE = ~0U; diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 386b035c..ff24a9cc 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -426,6 +426,8 @@ struct RoseEngine { u32 eodProgramOffset; //!< EOD program, otherwise 0. u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */ + u32 lastFlushCombProgramOffset; /**< LastFlushCombination program, + * otherwise 0 */ u32 lastByteHistoryIterOffset; // if non-zero diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 7feee04f..e5485476 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -201,7 +201,14 @@ enum RoseInstructionCode { /** \brief Mark as exhausted instead of report while quiet. */ ROSE_INSTR_SET_EXHAUST, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel. + /** + * \brief Calculate any combination's logical value if none of its + * sub-expression matches until EOD, then check if compliant with any + * logical constraints. + */ + ROSE_INSTR_LAST_FLUSH_COMBINATION, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_LAST_FLUSH_COMBINATION //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -674,4 +681,8 @@ struct ROSE_STRUCT_SET_EXHAUST { u8 code; //!< From enum RoseInstructionCode. u32 ekey; //!< Exhaustion key. }; + +struct ROSE_STRUCT_LAST_FLUSH_COMBINATION { + u8 code; //!< From enum RoseInstructionCode. +}; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/runtime.c b/src/runtime.c index cfcd0f7c..43cdab09 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -455,8 +455,9 @@ set_retval: return HS_UNKNOWN_ERROR; } - if (rose->flushCombProgramOffset) { - if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + if (rose->lastFlushCombProgramOffset) { + if (roseRunLastFlushCombProgram(rose, scratch, length) + == MO_HALT_MATCHING) { if (unlikely(internal_matching_error(scratch))) { unmarkScratchInUse(scratch); return HS_UNKNOWN_ERROR; @@ -698,8 +699,9 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, } } - if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + if (rose->lastFlushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunLastFlushCombProgram(rose, scratch, id->offset) + == MO_HALT_MATCHING) { DEBUG_PRINTF("told to stop matching\n"); scratch->core_info.status |= STATUS_TERMINATED; } @@ -1000,31 +1002,22 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, if (onEvent) { if (!scratch || !validScratch(id->rose, scratch)) { + hs_stream_free(id); return HS_INVALID; } if (unlikely(markScratchInUse(scratch))) { + hs_stream_free(id); return HS_SCRATCH_IN_USE; } report_eod_matches(id, scratch, onEvent, context); if (unlikely(internal_matching_error(scratch))) { unmarkScratchInUse(scratch); + hs_stream_free(id); return HS_UNKNOWN_ERROR; } unmarkScratchInUse(scratch); } - if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) - == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; - if (unlikely(internal_matching_error(scratch))) { - unmarkScratchInUse(scratch); - return HS_UNKNOWN_ERROR; - } - unmarkScratchInUse(scratch); - } - } - hs_stream_free(id); return HS_SUCCESS; @@ -1054,18 +1047,6 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, unmarkScratchInUse(scratch); } - if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) - == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; - if (unlikely(internal_matching_error(scratch))) { - unmarkScratchInUse(scratch); - return HS_UNKNOWN_ERROR; - } - unmarkScratchInUse(scratch); - } - } - // history already initialised init_stream(id, id->rose, 0); diff --git a/tools/hscollider/GraphTruth.cpp b/tools/hscollider/GraphTruth.cpp index b4b3f809..0b67b11c 100644 --- a/tools/hscollider/GraphTruth.cpp +++ b/tools/hscollider/GraphTruth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -299,6 +299,46 @@ char isLogicalCombination(vector &lv, const vector &comb, return lv[result]; } +/** \brief Returns 1 if combination matches when no sub-expression matches. */ +static +char isPurelyNegativeMatch(vector &lv, const vector &comb, + size_t lkeyCount, unsigned start, unsigned result) { + assert(start <= result); + for (unsigned i = start; i <= result; i++) { + const LogicalOp &op = comb[i - lkeyCount]; + assert(i == op.id); + switch (op.op) { + case LOGICAL_OP_NOT: + if ((op.ro < lkeyCount) && lv[op.ro]) { + // sub-expression not negative + return 0; + } + lv[op.id] = !lv[op.ro]; + break; + case LOGICAL_OP_AND: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] & lv[op.ro]; // && + break; + case LOGICAL_OP_OR: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] | lv[op.ro]; // || + break; + default: + assert(0); + break; + } + } + return lv[result]; +} + bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi, const string &buffer, ResultSet &rs, string &error) { if (cngi.quiet) { @@ -359,6 +399,13 @@ bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi, } } } + if (isPurelyNegativeMatch(lv, comb, m_lkey.size(), + li.start, li.result)) { + u64a to = buffer.length(); + if ((to >= cngi.min_offset) && (to <= cngi.max_offset)) { + rs.addMatch(0, to); + } + } return true; } diff --git a/tools/hscollider/GroundTruth.cpp b/tools/hscollider/GroundTruth.cpp index fe038c81..5a4bdc00 100644 --- a/tools/hscollider/GroundTruth.cpp +++ b/tools/hscollider/GroundTruth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -557,6 +557,46 @@ char isLogicalCombination(vector &lv, const vector &comb, return lv[result]; } +/** \brief Returns 1 if combination matches when no sub-expression matches. */ +static +char isPurelyNegativeMatch(vector &lv, const vector &comb, + size_t lkeyCount, unsigned start, unsigned result) { + assert(start <= result); + for (unsigned i = start; i <= result; i++) { + const LogicalOp &op = comb[i - lkeyCount]; + assert(i == op.id); + switch (op.op) { + case LOGICAL_OP_NOT: + if ((op.ro < lkeyCount) && lv[op.ro]) { + // sub-expression not negative + return 0; + } + lv[op.id] = !lv[op.ro]; + break; + case LOGICAL_OP_AND: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] & lv[op.ro]; // && + break; + case LOGICAL_OP_OR: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] | lv[op.ro]; // || + break; + default: + assert(0); + break; + } + } + return lv[result]; +} + bool GroundTruth::run(unsigned, const CompiledPcre &compiled, const string &buffer, ResultSet &rs, string &error) { if (compiled.quiet) { @@ -616,6 +656,13 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled, } } } + if (isPurelyNegativeMatch(lv, comb, m_lkey.size(), + li.start, li.result)) { + u64a to = buffer.length(); + if ((to >= compiled.min_offset) && (to <= compiled.max_offset)) { + rs.addMatch(0, to); + } + } return true; } diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 6d4283da..c4a9f13c 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -155,11 +155,6 @@ 158:/141 & (142|!143) )| 144/C #Not enough left parentheses at index 17. 159:/1234567890 & (142|!143 )/C #Expression id too large at index 10. 160:/141 & (142|!143 )|/C #Not enough operand at index 18. -161:/!141/C #Has match from purely negative sub-expressions. -162:/!141 | 142 | 143/C #Has match from purely negative sub-expressions. -163:/!141 & !142 & !143/C #Has match from purely negative sub-expressions. -164:/(141 | !142 & !143)/C #Has match from purely negative sub-expressions. -165:/!(141 | 142 | 143)/C #Has match from purely negative sub-expressions. -166:/141/C #No logical operation. -167:/119 & 121/C #Unknown sub-expression id. -168:/166 & 167/C #Unknown sub-expression id. +161:/141/C #No logical operation. +162:/119 & 121/C #Unknown sub-expression id. +163:/166 & 167/C #Unknown sub-expression id. diff --git a/unit/hyperscan/logical_combination.cpp b/unit/hyperscan/logical_combination.cpp index 169de333..5b1c1ec2 100644 --- a/unit/hyperscan/logical_combination.cpp +++ b/unit/hyperscan/logical_combination.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Intel Corporation + * Copyright (c) 2018-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -694,3 +694,113 @@ TEST(LogicalCombination, MultiCombQuietUniSub5) { err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); } + +TEST(LogicalCombination, SingleCombPurelyNegative6) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(1U, c.matches.size()); + ASSERT_EQ(MatchRecord(53, 1002), c.matches[0]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, SingleCombQuietPurelyNegative6) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET}; + unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(0U, c.matches.size()); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombPurelyNegativeUniSub6) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "-----------------------------------------------" + "xxxfedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "-----------------------------------------------" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "------------------------------------------"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "cba", "fed", "google.*cn", + "haystacks{4,8}", "ijkl[oOp]", "cab", "fee", + "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]", + "(101 & 102 & 103) | (!104 & !105)", + "(!201 | 202 & 203) & (!204 | 205)", + "((301 | 302) & 303) & (304 | 305)"}; + unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + 302, 303, 304, 305, 1001, 1002, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(3U, c.matches.size()); + ASSERT_EQ(MatchRecord(106, 202), c.matches[0]); + ASSERT_EQ(MatchRecord(106, 1002), c.matches[1]); + ASSERT_EQ(MatchRecord(300, 1001), c.matches[2]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +}