diff --git a/CMakeLists.txt b/CMakeLists.txt index bbc1797b..e33655be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -914,6 +914,8 @@ SET (hs_compile_SRCS src/parser/check_refs.h src/parser/control_verbs.cpp src/parser/control_verbs.h + src/parser/logical_combination.cpp + src/parser/logical_combination.h src/parser/parse_error.cpp src/parser/parse_error.h src/parser/parser_util.cpp diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index df304187..7a7f37ec 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -471,3 +471,93 @@ matching support. Here they are, in a nutshell: Approximate matching is always disabled by default, and can be enabled on a per-pattern basis by using an extended parameter described in :ref:`extparam`. + +.. _logical_combinations: + +******************** +Logical Combinations +******************** + +For situations when a user requires behaviour that depends on the presence or +absence of matches from groups of patterns, Hyperscan provides support for the +logical combination of patterns in a given pattern set, with three operators: +``NOT``, ``AND`` and ``OR``. + +The logical value of such a combination is based on each expression's matching +status at a given offset. The matching status of any expression has a boolean +value: *false* if the expression has not yet matched or *true* if the expression +has already matched. In particular, the value of a ``NOT`` operation at a given +offset is *true* if the expression it refers to is *false* at this offset. + +For example, ``NOT 101`` means that expression 101 has not yet matched at this +offset. + +A logical combination is passed to Hyperscan at compile time as an expression. +This combination expression will raise matches at every offset where one of its +sub-expressions matches and the logical value of the whole expression is *true*. + +To illustrate, here is an example combination expression: :: + + ((301 OR 302) AND 303) AND (304 OR NOT 305) + +If expression 301 matches at offset 10, the logical value of 301 is *true* +while the other patterns' values are *false*. Hence, the whole combination's value is +*false*. + +Then expression 303 matches at offset 20. Now the values of 301 and 303 are +*true* while the other patterns' values are still *false*. In this case, the +combination's value is *true*, so the combination expression raises a match at +offset 20. + +Finally, expression 305 has matches at offset 30. Now the values of 301, 303 and 305 +are *true* while the other patterns' values are still *false*. In this case, the +combination's value is *false* and no match is raised. + +**Using Logical Combinations** + +In logical combination syntax, an expression is written as infix notation, it +consists of operands, operators and parentheses. The operands are expression +IDs, and operators are ``!`` (NOT), ``&`` (AND) or ``|`` (OR). For example, the +combination described in the previous section would be written as: :: + + ((301 | 302) & 303) & (304 | !305) + +In a logical combination expression: + + * The priority of operators are ``!`` > ``&`` > ``|``. For example: + - ``A&B|C`` is treated as ``(A&B)|C``, + - ``A|B&C`` is treated as ``A|(B&C)``, + - ``A&!B`` is treated as ``A&(!B)``. + * Extra parentheses are allowed. For example: + - ``(A)&!(B)`` is the same as ``A&!B``, + - ``(A&B)|C`` is the same as ``A&B|C``. + * Whitespace is ignored. + +To use a logical combination expression, it must be passed to one of the +Hyperscan compile functions (:c:func:`hs_compile_multi`, +:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag, +which identifies the pattern as a logical combination expression. The patterns +referred to in the logical combination expression must be compiled together in +the same pattern set as the combination expression. + +When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores +all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the +:c:member:`HS_FLAG_QUIET` flag. + +Hyperscan will reject logical combination expressions at compile time that +evaluate to *true* when no patterns have matched; for example: :: + + !101 + !101|102 + !101&!102 + !(101&102) + +Patterns that are referred to as operands within a logical combination (for +example, 301 through 305 in the examples above) may also use the +:c:member:`HS_FLAG_QUIET` flag to silence the reporting of individual matches +for those patterns. In the absence of this flag, all matches (for +both individual patterns and their logical combinations) will be reported. + +When an expression has both the :c:member:`HS_FLAG_COMBINATION` flag and the +:c:member:`HS_FLAG_QUIET` flag set, no matches for this logical combination +will be reported. diff --git a/doc/dev-reference/tools.rst b/doc/dev-reference/tools.rst index 9c2ce6eb..e0465fc6 100644 --- a/doc/dev-reference/tools.rst +++ b/doc/dev-reference/tools.rst @@ -246,6 +246,8 @@ Character API Flag Description ``W`` :c:member:`HS_FLAG_UCP` Unicode property support ``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode ``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting +``C`` :c:member:`HS_FLAG_COMBINATION` Logical combination of patterns +``Q`` :c:member:`HS_FLAG_QUIET` Quiet at matching ========= ================================= =========== In addition to the set of flags above, :ref:`extparam` can be supplied diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index c71ee4b9..a34eadd0 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,6 +45,7 @@ #include "parser/buildstate.h" #include "parser/dump.h" #include "parser/Component.h" +#include "parser/logical_combination.h" #include "parser/parse_error.h" #include "parser/Parser.h" // for flags #include "parser/position.h" @@ -111,7 +112,13 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, const hs_expr_ext *ext) : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, - 0, 0, 0) { + 0, 0, 0, flags & HS_FLAG_QUIET) { + // We disallow SOM + Quiet. + if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) { + throw CompileError("HS_FLAG_QUIET is not supported in " + "combination with HS_FLAG_SOM_LEFTMOST."); + } + flags &= ~HS_FLAG_QUIET; ParseMode mode(flags); component = parse(expression, mode); @@ -234,6 +241,45 @@ void addExpression(NG &ng, unsigned index, const char *expression, DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags, expression); + if (flags & HS_FLAG_COMBINATION) { + if (flags & ~(HS_FLAG_COMBINATION | HS_FLAG_QUIET | + HS_FLAG_SINGLEMATCH)) { + throw CompileError("only HS_FLAG_QUIET and HS_FLAG_SINGLEMATCH " + "are supported in combination " + "with HS_FLAG_COMBINATION."); + } + if (flags & HS_FLAG_QUIET) { + DEBUG_PRINTF("skip QUIET logical combination expression %u\n", id); + } else { + u32 ekey = INVALID_EKEY; + u64a min_offset = 0; + u64a max_offset = MAX_OFFSET; + if (flags & HS_FLAG_SINGLEMATCH) { + ekey = ng.rm.getExhaustibleKey(id); + } + if (ext) { + validateExt(*ext); + if (ext->flags & ~(HS_EXT_FLAG_MIN_OFFSET | + HS_EXT_FLAG_MAX_OFFSET)) { + throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and " + "HS_EXT_FLAG_MAX_OFFSET extra flags " + "are supported in combination " + "with HS_FLAG_COMBINATION."); + } + if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { + min_offset = ext->min_offset; + } + if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { + max_offset = ext->max_offset; + } + } + ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset, + max_offset); + DEBUG_PRINTF("parsed logical combination expression %u\n", id); + } + return; + } + // Ensure that our pattern isn't too long (in characters). if (strlen(expression) > cc.grey.limitPatternLength) { throw CompileError("Pattern length exceeds limit."); diff --git a/src/compiler/expression_info.h b/src/compiler/expression_info.h index 45d18cbf..fefb3b58 100644 --- a/src/compiler/expression_info.h +++ b/src/compiler/expression_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,12 +46,12 @@ public: bool highlander_in, bool utf8_in, bool prefilter_in, som_type som_in, ReportID report_in, u64a min_offset_in, u64a max_offset_in, u64a min_length_in, u32 edit_distance_in, - u32 hamm_distance_in) + u32 hamm_distance_in, bool quiet_in) : index(index_in), report(report_in), allow_vacuous(allow_vacuous_in), highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in), som(som_in), min_offset(min_offset_in), max_offset(max_offset_in), min_length(min_length_in), edit_distance(edit_distance_in), - hamm_distance(hamm_distance_in) {} + hamm_distance(hamm_distance_in), quiet(quiet_in) {} /** * \brief Index of the expression represented by this graph. @@ -98,6 +98,9 @@ public: */ u32 edit_distance; u32 hamm_distance; + + /** \brief Quiet on match. */ + bool quiet; }; } diff --git a/src/hs.cpp b/src/hs.cpp index 04ffb479..329702d4 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -245,6 +245,11 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, } } + // Check sub-expression ids + ng.rm.pl.validateSubIDs(ids, expressions, flags, elements); + // Renumber and assign lkey to reports + ng.rm.logicalKeyRenumber(); + unsigned length = 0; struct hs_database *out = build(ng, &length); diff --git a/src/hs_compile.h b/src/hs_compile.h index dc9ba307..c8dcfdf2 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -811,6 +811,28 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform); */ #define HS_FLAG_SOM_LEFTMOST 256 +/** + * Compile flag: Logical combination. + * + * This flag instructs Hyperscan to parse this expression as logical + * combination syntax. + * Logical constraints consist of operands, operators and parentheses. + * The operands are expression indices, and operators can be + * '!'(NOT), '&'(AND) or '|'(OR). + * For example: + * (101&102&103)|(104&!105) + * ((301|302)&303)&(304|305) + */ +#define HS_FLAG_COMBINATION 512 + +/** + * Compile flag: Don't do any match reporting. + * + * This flag instructs Hyperscan to ignore match reporting for this expression. + * It is designed to be used on the sub-expressions in logical combinations. + */ +#define HS_FLAG_QUIET 1024 + /** @} */ /** diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 8b7e4f91..8dccf986 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -577,7 +577,8 @@ bool NG::addHolder(NGHolder &g) { } bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, - u32 external_report, bool highlander, som_type som) { + u32 external_report, bool highlander, som_type som, + bool quiet) { assert(!literal.empty()); if (!cc.grey.shortcutLiterals) { @@ -605,7 +606,7 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, } else { u32 ekey = highlander ? rm.getExhaustibleKey(external_report) : INVALID_EKEY; - Report r = makeECallback(external_report, 0, ekey); + Report r = makeECallback(external_report, 0, ekey, quiet); id = rm.getInternalId(r); } diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index a1304583..a5a9077d 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,7 +77,7 @@ public: /** \brief Adds a literal to Rose, used by literal shortcut passes (instead * of using \ref addGraph) */ bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, - bool highlander, som_type som); + bool highlander, som_type som, bool quiet); /** \brief Maximum history in bytes available for use by SOM reverse NFAs, * a hack for pattern support (see UE-1903). This is always set to the max diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp new file mode 100644 index 00000000..b78390b0 --- /dev/null +++ b/src/parser/logical_combination.cpp @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Parse and build ParsedLogical::logicalTree and combInfoMap. + */ +#include "logical_combination.h" +#include "parser/parse_error.h" +#include "util/container.h" +#include "hs_compile.h" + +#include + +using namespace std; + +namespace ue2 { + +u32 ParsedLogical::getLogicalKey(u32 a) { + auto it = toLogicalKeyMap.find(a); + if (it == toLogicalKeyMap.end()) { + // get size before assigning to avoid wacky LHS shenanigans + u32 size = toLogicalKeyMap.size(); + bool inserted; + tie(it, inserted) = toLogicalKeyMap.emplace(a, size); + assert(inserted); + } + DEBUG_PRINTF("%u -> lkey %u\n", it->first, it->second); + return it->second; +} + +u32 ParsedLogical::getCombKey(u32 a) { + auto it = toCombKeyMap.find(a); + if (it == toCombKeyMap.end()) { + u32 size = toCombKeyMap.size(); + bool inserted; + tie(it, inserted) = toCombKeyMap.emplace(a, size); + assert(inserted); + } + DEBUG_PRINTF("%u -> ckey %u\n", it->first, it->second); + return it->second; +} + +void ParsedLogical::addRelateCKey(u32 lkey, u32 ckey) { + auto it = lkey2ckeys.find(lkey); + if (it == lkey2ckeys.end()) { + bool inserted; + tie(it, inserted) = lkey2ckeys.emplace(lkey, set()); + assert(inserted); + } + it->second.insert(ckey); + DEBUG_PRINTF("lkey %u belongs to combination key %u\n", + it->first, ckey); +} + +#define TRY_RENUM_OP(ckey) \ +do { \ + if (ckey & LOGICAL_OP_BIT) { \ + ckey = (ckey & ~LOGICAL_OP_BIT) + toLogicalKeyMap.size(); \ + } \ +} while(0) + +u32 ParsedLogical::logicalTreeAdd(u32 op, u32 left, u32 right) { + LogicalOp lop; + assert((LOGICAL_OP_BIT & (u32)logicalTree.size()) == 0); + lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size(); + lop.op = op; + lop.lo = left; + lop.ro = right; + logicalTree.push_back(lop); + return lop.id; +} + +void ParsedLogical::combinationInfoAdd(UNUSED u32 ckey, u32 id, u32 ekey, + u32 lkey_start, u32 lkey_result, + u64a min_offset, u64a max_offset) { + assert(ckey == combInfoMap.size()); + CombInfo ci; + ci.id = id; + ci.ekey = ekey; + ci.start = lkey_start; + ci.result = lkey_result; + ci.min_offset = min_offset; + ci.max_offset = max_offset; + combInfoMap.push_back(ci); + + DEBUG_PRINTF("ckey %u (id %u) -> lkey %u..%u, ekey=0x%x\n", ckey, ci.id, + ci.start, ci.result, ci.ekey); +} + +void ParsedLogical::validateSubIDs(const unsigned *ids, + const char *const *expressions, + const unsigned *flags, + unsigned elements) { + for (const auto &it : toLogicalKeyMap) { + bool unknown = true; + u32 i = 0; + for (i = 0; i < elements; i++) { + if ((ids ? ids[i] : 0) == it.first) { + unknown = false; + break; + } + } + if (unknown) { + throw CompileError("Unknown sub-expression id."); + } + if (contains(toCombKeyMap, it.first)) { + throw CompileError("Have combination of combination."); + } + if (flags && (flags[i] & HS_FLAG_SOM_LEFTMOST)) { + throw CompileError("Have SOM flag in sub-expression."); + } + if (flags && (flags[i] & HS_FLAG_PREFILTER)) { + throw CompileError("Have PREFILTER flag in sub-expression."); + } + hs_compile_error_t *compile_err = NULL; + hs_expr_info_t *info = NULL; + hs_error_t err = hs_expression_info(expressions[i], flags[i], &info, + &compile_err); + if (err != HS_SUCCESS) { + hs_free_compile_error(compile_err); + throw CompileError("Run hs_expression_info() failed."); + } + if (!info) { + throw CompileError("Get hs_expr_info_t failed."); + } else { + if (info->unordered_matches) { + throw CompileError("Have unordered match in sub-expressions."); + } + free(info); + } + } +} + +void ParsedLogical::logicalKeyRenumber() { + // renumber operation lkey in op vector + for (auto &op : logicalTree) { + TRY_RENUM_OP(op.id); + TRY_RENUM_OP(op.lo); + TRY_RENUM_OP(op.ro); + } + // renumber operation lkey in info map + for (auto &ci : combInfoMap) { + TRY_RENUM_OP(ci.start); + TRY_RENUM_OP(ci.result); + } +} + +struct LogicalOperator { + LogicalOperator(u32 op_in, u32 paren_in) + : op(op_in), paren(paren_in) {} + u32 op; + u32 paren; +}; + +static +u32 toOperator(char c) { + u32 op = UNKNOWN_OP; + switch (c) { + case '!' : + op = LOGICAL_OP_NOT; + break; + case '&' : + op = LOGICAL_OP_AND; + break; + case '|' : + op = LOGICAL_OP_OR; + break; + default: + break; + }; + return op; +} + +static +bool cmpOperator(const LogicalOperator &op1, const LogicalOperator &op2) { + if (op1.paren < op2.paren) { + return false; + } + if (op1.paren > op2.paren) { + return true; + } + assert(op1.paren == op2.paren); + if (op1.op > op2.op) { + return false; + } + if (op1.op < op2.op) { + return true; + } + return true; +} + +static +u32 fetchSubID(const char *logical, u32 &digit, u32 end) { + if (digit == (u32)-1) { // no digit parsing in progress + return (u32)-1; + } + assert(end > digit); + if (end - digit > 9) { + throw LocatedParseError("Expression id too large"); + } + u32 mult = 1; + u32 sum = 0; + for (u32 j = end - 1; (j >= digit) && (j != (u32)-1) ; j--) { + assert(isdigit(logical[j])); + sum += (logical[j] - '0') * mult; + mult *= 10; + } + digit = (u32)-1; + return sum; +} + +static +void popOperator(vector &op_stack, vector &subid_stack, + ParsedLogical &pl) { + if (subid_stack.empty()) { + throw LocatedParseError("Not enough operand"); + } + u32 right = subid_stack.back(); + subid_stack.pop_back(); + u32 left = 0; + if (op_stack.back().op != LOGICAL_OP_NOT) { + if (subid_stack.empty()) { + throw LocatedParseError("Not enough operand"); + } + left = subid_stack.back(); + subid_stack.pop_back(); + } + subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right)); + op_stack.pop_back(); +} + +static +char getValue(const vector &lv, u32 ckey) { + if (ckey & LOGICAL_OP_BIT) { + return lv[ckey & ~LOGICAL_OP_BIT]; + } else { + return 0; + } +} + +static +bool hasMatchFromPurelyNegative(const vector &tree, + u32 start, u32 result) { + vector lv(tree.size()); + assert(start <= result); + for (u32 i = start; i <= result; i++) { + assert(i & LOGICAL_OP_BIT); + const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT]; + assert(i == op.id); + switch (op.op) { + case LOGICAL_OP_NOT: + lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro); + break; + case LOGICAL_OP_AND: + lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) & + getValue(lv, op.ro); + break; + case LOGICAL_OP_OR: + lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) | + getValue(lv, op.ro); + break; + default: + assert(0); + break; + } + } + return lv[result & ~LOGICAL_OP_BIT]; +} + +void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, + u32 ekey, u64a min_offset, + u64a max_offset) { + u32 ckey = getCombKey(id); + vector op_stack; + vector subid_stack; + u32 lkey_start = INVALID_LKEY; // logical operation's lkey + u32 paren = 0; // parentheses + u32 digit = (u32)-1; // digit start offset, invalid offset is -1 + u32 subid = (u32)-1; + u32 i; + try { + for (i = 0; logical[i]; i++) { + if (isdigit(logical[i])) { + if (digit == (u32)-1) { // new digit start + digit = i; + } + } else { + if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) { + subid_stack.push_back(getLogicalKey(subid)); + addRelateCKey(subid_stack.back(), ckey); + } + if (logical[i] == ' ') { // skip whitespace + continue; + } + if (logical[i] == '(') { + paren += 1; + } else if (logical[i] == ')') { + if (paren <= 0) { + throw LocatedParseError("Not enough left parentheses"); + } + paren -= 1; + } else { + u32 prio = toOperator(logical[i]); + if (prio != UNKNOWN_OP) { + LogicalOperator op(prio, paren); + while (!op_stack.empty() + && cmpOperator(op_stack.back(), op)) { + popOperator(op_stack, subid_stack, *this); + if (lkey_start == INVALID_LKEY) { + lkey_start = subid_stack.back(); + } + } + op_stack.push_back(op); + } else { + throw LocatedParseError("Unknown character"); + } + } + } + } + if (paren != 0) { + throw LocatedParseError("Not enough right parentheses"); + } + if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) { + subid_stack.push_back(getLogicalKey(subid)); + addRelateCKey(subid_stack.back(), ckey); + } + while (!op_stack.empty()) { + popOperator(op_stack, subid_stack, *this); + if (lkey_start == INVALID_LKEY) { + lkey_start = subid_stack.back(); + } + } + if (subid_stack.size() != 1) { + throw LocatedParseError("Not enough operator"); + } + } catch (LocatedParseError &error) { + error.locate(i); + throw; + } + u32 lkey_result = subid_stack.back(); // logical operation's lkey + if (lkey_start == INVALID_LKEY) { + throw CompileError("No logical operation."); + } + if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) { + throw CompileError("Has match from purely negative sub-expressions."); + } + combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result, + min_offset, max_offset); +} + +} // namespace ue2 diff --git a/src/parser/logical_combination.h b/src/parser/logical_combination.h new file mode 100644 index 00000000..7c8eb36e --- /dev/null +++ b/src/parser/logical_combination.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Parse and build ParsedLogical::logicalTree and combInfoMap. + */ + +#ifndef LOGICAL_COMBINATION_H +#define LOGICAL_COMBINATION_H + +#include "util/logical.h" + +#include +#include +#include + +namespace ue2 { + +class ParsedLogical { + friend class ReportManager; +public: + /** \brief Parse 1 logical expression \a logical, assign temporary ckey. */ + void parseLogicalCombination(unsigned id, const char *logical, u32 ekey, + u64a min_offset, u64a max_offset); + + /** \brief Check if all sub-expression id in combinations are valid. */ + void validateSubIDs(const unsigned *ids, const char *const *expressions, + const unsigned *flags, unsigned elements); + + /** \brief Renumber and assign final lkey for each logical operation + * after parsed all logical expressions. */ + void logicalKeyRenumber(); + + /** \brief Fetch the lkey associated with the given expression id, + * assigning one if necessary. */ + u32 getLogicalKey(u32 expressionId); + + /** \brief Fetch the ckey associated with the given expression id, + * assigning one if necessary. */ + u32 getCombKey(u32 expressionId); + + /** \brief Add lkey's corresponding combination id. */ + void addRelateCKey(u32 lkey, u32 ckey); + + /** \brief Add one Logical Operation. */ + u32 logicalTreeAdd(u32 op, u32 left, u32 right); + + /** \brief Assign the combination info associated with the given ckey. */ + void combinationInfoAdd(u32 ckey, u32 id, u32 ekey, u32 lkey_start, + u32 lkey_result, u64a min_offset, u64a max_offset); + + const std::map &getLkeyMap() const { + return toLogicalKeyMap; + } + + const std::vector &getLogicalTree() const { + return logicalTree; + } + + CombInfo getCombInfoById(u32 id) const { + u32 ckey = toCombKeyMap.at(id); + assert(ckey < combInfoMap.size()); + return combInfoMap.at(ckey); + } + +private: + /** \brief Mapping from ckey to combination info. */ + std::vector combInfoMap; + + /** \brief Mapping from combination expression id to combination key, + * combination key is used in combination bit-vector cache. */ + std::map toCombKeyMap; + + /** \brief Mapping from expression id to logical key, logical key is used + * as index in LogicalOp array. */ + std::map toLogicalKeyMap; + + /** \brief Mapping from logical key to related combination keys. */ + std::map> lkey2ckeys; + + /** \brief Logical constraints, each operation from postfix notation. */ + std::vector logicalTree; +}; + +} // namespace ue2 + +#endif diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index 82679c88..7a7ab6ee 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -199,7 +199,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, - expr.som); + expr.som, expr.quiet); } } // namespace ue2 diff --git a/src/report.h b/src/report.h index 4a5f401e..a2e2d0f3 100644 --- a/src/report.h +++ b/src/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,7 @@ #include "rose/runtime.h" #include "som/som_runtime.h" #include "util/exhaust.h" +#include "util/logical.h" #include "util/fatbit.h" enum DedupeResult { @@ -151,6 +152,93 @@ void clearEvec(const struct RoseEngine *rose, char *evec) { mmbit_clear((u8 *)evec, rose->ekeyCount); } +/** \brief Test whether the given key (\a lkey) is set in the logical vector + * \a lvec. */ +static really_inline +char getLogicalVal(const struct RoseEngine *rose, const char *lvec, u32 lkey) { + DEBUG_PRINTF("checking lkey matching %p %u\n", lvec, lkey); + assert(lkey != INVALID_LKEY); + assert(lkey < rose->lkeyCount + rose->lopCount); + return mmbit_isset((const u8 *)lvec, rose->lkeyCount + rose->lopCount, + lkey); +} + +/** \brief Mark key \a lkey on in the logical vector. */ +static really_inline +void setLogicalVal(const struct RoseEngine *rose, char *lvec, u32 lkey, + char val) { + DEBUG_PRINTF("marking as matched logical key %u\n", lkey); + assert(lkey != INVALID_LKEY); + assert(lkey < rose->lkeyCount + rose->lopCount); + switch (val) { + case 0: + mmbit_unset((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey); + break; + default: + mmbit_set((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey); + break; + } +} + +/** \brief Mark key \a ckey on in the combination vector. */ +static really_inline +void setCombinationActive(const struct RoseEngine *rose, char *cvec, u32 ckey) { + DEBUG_PRINTF("marking as active combination key %u\n", ckey); + assert(ckey != INVALID_CKEY); + assert(ckey < rose->ckeyCount); + mmbit_set((u8 *)cvec, rose->ckeyCount, ckey); +} + +/** \brief Returns 1 if compliant to all logical combinations. */ +static really_inline +char isLogicalCombination(const struct RoseEngine *rose, char *lvec, + u32 start, u32 result) { + const struct LogicalOp *logicalTree = (const struct LogicalOp *) + ((const char *)rose + rose->logicalTreeOffset); + assert(start >= rose->lkeyCount); + assert(start <= result); + assert(result < rose->lkeyCount + rose->lopCount); + for (u32 i = start; i <= result; i++) { + const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount); + assert(i == op->id); + assert(op->op <= LAST_LOGICAL_OP); + switch ((enum LogicalOpType)op->op) { + case LOGICAL_OP_NOT: + setLogicalVal(rose, lvec, op->id, + !getLogicalVal(rose, lvec, op->ro)); + break; + case LOGICAL_OP_AND: + setLogicalVal(rose, lvec, op->id, + getLogicalVal(rose, lvec, op->lo) & + getLogicalVal(rose, lvec, op->ro)); // && + break; + case LOGICAL_OP_OR: + setLogicalVal(rose, lvec, op->id, + getLogicalVal(rose, lvec, op->lo) | + getLogicalVal(rose, lvec, op->ro)); // || + break; + } + } + return getLogicalVal(rose, lvec, result); +} + +/** \brief Clear all keys in the logical vector. */ +static really_inline +void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) { + DEBUG_PRINTF("clearing lvec %p %u\n", lvec, + rose->lkeyCount + rose->lopCount); + DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount); + mmbit_clear((u8 *)lvec, rose->lkeyCount + rose->lopCount); + mmbit_clear((u8 *)cvec, rose->ckeyCount); +} + +/** \brief Clear all keys in the combination vector. */ +static really_inline +void clearCvec(const struct RoseEngine *rose, char *cvec) { + DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount); + mmbit_clear((u8 *)cvec, rose->ckeyCount); +} + /** * \brief Deliver the given report to the user callback. * diff --git a/src/rose/block.c b/src/rose/block.c index 2c493219..a32113f4 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -145,6 +145,7 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, tctxt->lastEndOffset = 0; tctxt->filledDelayedSlots = 0; tctxt->lastMatchOffset = 0; + tctxt->lastCombMatchOffset = 0; tctxt->minMatchOffset = 0; tctxt->minNonMpvMatchOffset = 0; tctxt->next_mpv_offset = 0; diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 9e36d091..7a6648da 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -424,6 +424,12 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, } done: + if (t->flushCombProgramOffset) { + if (roseRunFlushCombProgram(t, scratch, mpv_exec_end) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end); scratch->tctxt.next_mpv_offset = MAX(next_pos_match_loc + scratch->core_info.buf_offset, diff --git a/src/rose/catchup.h b/src/rose/catchup.h index 24b843f5..8188d5af 100644 --- a/src/rose/catchup.h +++ b/src/rose/catchup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,6 +51,7 @@ #include "hwlm/hwlm.h" #include "runtime.h" #include "scratch.h" +#include "rose.h" #include "rose_common.h" #include "rose_internal.h" #include "ue2common.h" @@ -105,6 +106,12 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, assert(!can_stop_matching(scratch)); if (canSkipCatchUpMPV(t, scratch, cur_offset)) { + if (t->flushCombProgramOffset) { + if (roseRunFlushCombProgram(t, scratch, cur_offset) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset); return HWLM_CONTINUE_MATCHING; } @@ -139,6 +146,12 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, hwlmcb_rv_t rv; if (!t->activeArrayCount || !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + if (t->flushCombProgramOffset) { + if (roseRunFlushCombProgram(t, scratch, end) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } updateMinMatchOffset(&scratch->tctxt, end); rv = HWLM_CONTINUE_MATCHING; } else { diff --git a/src/rose/match.c b/src/rose/match.c index 5d1b6e07..97e93c93 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -571,6 +571,22 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, return MO_CONTINUE_MATCHING; } +/** + * \brief Execute a flush combination program. + * + * Returns MO_HALT_MATCHING if the stream is exhausted or the user has + * instructed us to halt, or MO_CONTINUE_MATCHING otherwise. + */ +int roseRunFlushCombProgram(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a end) { + hwlmcb_rv_t rv = roseRunProgram(rose, scratch, rose->flushCombProgramOffset, + 0, end, 0); + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + return MO_CONTINUE_MATCHING; +} + int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; assert(scratch && scratch->magic == SCRATCH_MAGIC); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 30ec4bcd..3c11300b 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -557,6 +557,22 @@ void roseHandleSomSom(struct hs_scratch *scratch, setSomFromSomAware(scratch, sr, start, end); } +static rose_inline +hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 ekey) { + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + struct core_info *ci = &scratch->core_info; + + assert(!can_stop_matching(scratch)); + assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey)); + + markAsMatched(ci->rose, ci->exhaustionVector, ekey); + + return roseHaltIfExhausted(t, scratch); +} + static really_inline int reachHasBit(const u8 *reach, u8 c) { return !!(reach[c / 8U] & (u8)1U << (c % 8U)); @@ -1822,6 +1838,56 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset, } } +static rose_inline +hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, + struct hs_scratch *scratch) { + u8 *cvec = (u8 *)scratch->core_info.combVector; + if (!mmbit_any(cvec, t->ckeyCount)) { + return HWLM_CONTINUE_MATCHING; + } + u64a end = scratch->tctxt.lastCombMatchOffset; + for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) { + const struct CombInfo *combInfoMap = (const struct CombInfo *) + ((const char *)t + t->combInfoMapOffset); + const struct CombInfo *ci = combInfoMap + i; + if ((ci->min_offset != 0) && (end < ci->min_offset)) { + DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset); + continue; + } + if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) { + DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset); + continue; + } + + DEBUG_PRINTF("check ekey %u\n", ci->ekey); + if (ci->ekey != INVALID_EKEY) { + assert(ci->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ci->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ci->ekey); + continue; + } + } + + DEBUG_PRINTF("check ckey %u\n", i); + char *lvec = scratch->core_info.logicalVector; + if (!isLogicalCombination(t, lvec, ci->start, ci->result)) { + DEBUG_PRINTF("Logical Combination Failed!\n"); + continue; + } + + DEBUG_PRINTF("Logical Combination Passed!\n"); + if (roseReport(t, scratch, end, ci->id, 0, + ci->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + clearCvec(t, (char *)cvec); + return HWLM_CONTINUE_MATCHING; +} + #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \ @@ -2587,6 +2653,47 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } } PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_LOGICAL) { + DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", + ri->lkey, ri->offset_adjust); + assert(ri->lkey != INVALID_LKEY); + assert(ri->lkey < t->lkeyCount); + char *lvec = scratch->core_info.logicalVector; + setLogicalVal(t, lvec, ri->lkey, 1); + updateLastCombMatchOffset(tctxt, end + ri->offset_adjust); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_COMBINATION) { + DEBUG_PRINTF("set ckey %u as active\n", ri->ckey); + assert(ri->ckey != INVALID_CKEY); + assert(ri->ckey < t->ckeyCount); + char *cvec = scratch->core_info.combVector; + setCombinationActive(t, cvec, ri->ckey); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (end > tctxt->lastCombMatchOffset) { + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_EXHAUST) { + updateSeqPoint(tctxt, end, from_mpv); + if (roseSetExhaust(t, scratch, ri->ekey) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose.h b/src/rose/rose.h index b29519b6..c2b682f6 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,4 +53,7 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context); int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, u64a stream_offset, struct hs_scratch *scratch); +int roseRunFlushCombProgram(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a end); + #endif // ROSE_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 9a546ae4..2c0a9b28 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -426,6 +426,17 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, curr_offset += mmbit_size(build.rm.numEkeys()); so->exhausted_size = mmbit_size(build.rm.numEkeys()); + // Logical multibit. + so->logicalVec = curr_offset; + so->logicalVec_size = mmbit_size(build.rm.numLogicalKeys() + + build.rm.numLogicalOps()); + curr_offset += so->logicalVec_size; + + // Combination multibit. + so->combVec = curr_offset; + so->combVec_size = mmbit_size(build.rm.numCkeys()); + curr_offset += so->combVec_size; + // SOM locations and valid/writeable multibit structures. if (build.ssm.numSomSlots()) { const u32 somWidth = build.ssm.somPrecision(); @@ -2469,6 +2480,18 @@ void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto, proto.rosePrefixCount = countRosePrefixes(leftInfoTable); } +static +void writeLogicalInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, + RoseEngine &proto) { + const auto &tree = rm.getLogicalTree(); + proto.logicalTreeOffset = engine_blob.add_range(tree); + const auto &combMap = rm.getCombInfoMap(); + proto.combInfoMapOffset = engine_blob.add_range(combMap); + proto.lkeyCount = rm.numLogicalKeys(); + proto.lopCount = rm.numLogicalOps(); + proto.ckeyCount = rm.numCkeys(); +} + static void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, RoseEngine &proto, const set &no_retrigger_queues) { @@ -3313,6 +3336,15 @@ RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc, return program; } +static +RoseProgram makeFlushCombProgram(const RoseEngine &t) { + RoseProgram program; + if (t.ckeyCount) { + addFlushCombinationProgram(program); + } + return program; +} + static u32 history_required(const rose_literal_id &key) { if (key.msk.size() < key.s.length()) { @@ -3678,6 +3710,10 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { writeDkeyInfo(rm, bc.engine_blob, proto); writeLeftInfo(bc.engine_blob, proto, leftInfoTable); + writeLogicalInfo(rm, bc.engine_blob, proto); + + auto flushComb_prog = makeFlushCombProgram(proto); + proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog)); // Build anchored matcher. auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index b70112f2..0cc5b5c3 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1469,6 +1469,25 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SET_LOGICAL) { + os << " lkey " << ri->lkey << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_COMBINATION) { + os << " ckey " << ri->ckey << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FLUSH_COMBINATION) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_EXHAUST) { + os << " ekey " << ri->ekey << endl; + } + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; @@ -1523,6 +1542,23 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { os.close(); } +static +void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + const char *base = (const char *)t; + + if (t->flushCombProgramOffset) { + os << "Flush Combination Program @ " << t->flushCombProgramOffset + << ":" << endl; + dumpProgram(os, t, base + t->flushCombProgramOffset); + os << endl; + } else { + os << "" << endl; + } + + os.close(); +} + static void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); @@ -2028,6 +2064,10 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); fprintf(f, " - exhaustion vector : %u bytes\n", t->stateOffsets.exhausted_size); + fprintf(f, " - logical vector : %u bytes\n", + t->stateOffsets.logicalVec_size); + fprintf(f, " - combination vector: %u bytes\n", + t->stateOffsets.combVec_size); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - active array : %u bytes\n", @@ -2092,6 +2132,11 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, mode); DUMP_U32(t, historyRequired); DUMP_U32(t, ekeyCount); + DUMP_U32(t, lkeyCount); + DUMP_U32(t, lopCount); + DUMP_U32(t, ckeyCount); + DUMP_U32(t, logicalTreeOffset); + DUMP_U32(t, combInfoMapOffset); DUMP_U32(t, dkeyCount); DUMP_U32(t, dkeyLogSize); DUMP_U32(t, invDkeyOffset); @@ -2127,6 +2172,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, eodProgramOffset); + DUMP_U32(t, flushCombProgramOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); @@ -2150,6 +2196,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, stateOffsets.history); DUMP_U32(t, stateOffsets.exhausted); DUMP_U32(t, stateOffsets.exhausted_size); + DUMP_U32(t, stateOffsets.logicalVec); + DUMP_U32(t, stateOffsets.logicalVec_size); + DUMP_U32(t, stateOffsets.combVec); + DUMP_U32(t, stateOffsets.combVec_size); DUMP_U32(t, stateOffsets.activeLeafArray); DUMP_U32(t, stateOffsets.activeLeafArray_size); DUMP_U32(t, stateOffsets.activeLeftArray); @@ -2200,6 +2250,7 @@ void roseDumpPrograms(const vector &fragments, const RoseEngine *t, const string &base) { dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); + dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp index 8af08298..2fe53455 100644 --- a/src/rose/rose_build_instructions.cpp +++ b/src/rose/rose_build_instructions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,7 @@ RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; RoseInstrEnd::~RoseInstrEnd() = default; RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; +RoseInstrFlushCombination::~RoseInstrFlushCombination() = default; using OffsetMap = RoseInstruction::OffsetMap; @@ -644,4 +645,26 @@ void RoseInstrIncludedJump::write(void *dest, RoseEngineBlob &blob, inst->squash = squash; } +void RoseInstrSetLogical::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->lkey = lkey; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrSetCombination::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->ckey = ckey; +} + +void RoseInstrSetExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->ekey = ekey; +} + } diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index d3ede29b..61e6d7a6 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2144,6 +2144,94 @@ public: } }; +class RoseInstrSetLogical + : public RoseInstrBaseNoTargets { +public: + u32 lkey; + s32 offset_adjust; + + RoseInstrSetLogical(u32 lkey_in, s32 offset_adjust_in) + : lkey(lkey_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrSetLogical &ri) const { + return lkey == ri.lkey && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(opcode, lkey, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetLogical &ri, const OffsetMap &, + const OffsetMap &) const { + return lkey == ri.lkey && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrSetCombination + : public RoseInstrBaseNoTargets { +public: + u32 ckey; + + RoseInstrSetCombination(u32 ckey_in) : ckey(ckey_in) {} + + bool operator==(const RoseInstrSetCombination &ri) const { + return ckey == ri.ckey; + } + + size_t hash() const override { + return hash_all(opcode, ckey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetCombination &ri, const OffsetMap &, + const OffsetMap &) const { + return ckey == ri.ckey; + } +}; + +class RoseInstrFlushCombination + : public RoseInstrBaseTrivial { +public: + ~RoseInstrFlushCombination() override; +}; + +class RoseInstrSetExhaust + : public RoseInstrBaseNoTargets { +public: + u32 ekey; + + RoseInstrSetExhaust(u32 ekey_in) : ekey(ekey_in) {} + + bool operator==(const RoseInstrSetExhaust &ri) const { + return ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(opcode, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return ekey == ri.ekey; + } +}; + class RoseInstrEnd : public RoseInstrBaseTrivial { diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index e4e68136..2a6581e9 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -313,6 +313,10 @@ void addMatcherEodProgram(RoseProgram &program) { program.add_block(move(block)); } +void addFlushCombinationProgram(RoseProgram &program) { + program.add_before_end(make_unique()); +} + static void makeRoleCheckLeftfix(const RoseBuildImpl &build, const map &leftfix_info, @@ -496,6 +500,23 @@ void writeSomOperation(const Report &report, som_operation *op) { } } +static +void addLogicalSetRequired(const Report &report, ReportManager &rm, + RoseProgram &program) { + if (report.lkey == INVALID_LKEY) { + return; + } + // set matching status of current lkey + auto risl = make_unique(report.lkey, + report.offsetAdjust); + program.add_before_end(move(risl)); + // set current lkey's corresponding ckeys active, pending to check + for (auto ckey : rm.getRelateCKeys(report.lkey)) { + auto risc = make_unique(ckey); + program.add_before_end(move(risc)); + } +} + static void makeReport(const RoseBuildImpl &build, const ReportID id, const bool has_som, RoseProgram &program) { @@ -542,38 +563,62 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, switch (report.type) { case EXTERNAL_CALLBACK: + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } if (!has_som) { // Dedupe is only necessary if this report has a dkey, or if there // are SOM reports to catch up. bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; if (report.ekey == INVALID_EKEY) { if (needs_dedupe) { - report_block.add_before_end( - make_unique( - report.quashSom, build.rm.getDkey(report), - report.onmatch, report.offsetAdjust, end_inst)); + if (!report.quiet) { + report_block.add_before_end( + make_unique( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); + } else { + makeDedupe(build.rm, report, report_block); + } } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); + if (!report.quiet) { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust)); + } } } else { if (needs_dedupe) { makeDedupe(build.rm, report, report_block); } - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); + if (!report.quiet) { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + make_unique(report.ekey)); + } } } else { // has_som makeDedupeSom(build.rm, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); + if (!report.quiet) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } } else { - report_block.add_before_end( - make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); + if (!report.quiet) { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + make_unique(report.ekey)); + } } } + addLogicalSetRequired(report, build.rm, report_block); break; case INTERNAL_SOM_LOC_SET: case INTERNAL_SOM_LOC_SET_IF_UNSET: @@ -586,6 +631,9 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, case INTERNAL_SOM_LOC_MAKE_WRITABLE: case INTERNAL_SOM_LOC_SET_FROM: case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } if (has_som) { auto ri = make_unique(); writeSomOperation(report, &ri->som); @@ -605,24 +653,48 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, case EXTERNAL_CALLBACK_SOM_STORED: case EXTERNAL_CALLBACK_SOM_ABS: case EXTERNAL_CALLBACK_SOM_REV_NFA: + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } makeDedupeSom(build.rm, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); + if (!report.quiet) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); + if (!report.quiet) { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + make_unique(report.ekey)); + } } + addLogicalSetRequired(report, build.rm, report_block); break; case EXTERNAL_CALLBACK_SOM_PASS: + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } makeDedupeSom(build.rm, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); + if (!report.quiet) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); + if (!report.quiet) { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + make_unique(report.ekey)); + } } + addLogicalSetRequired(report, build.rm, report_block); break; default: @@ -630,7 +702,6 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, throw CompileError("Unable to generate bytecode."); } - assert(!report_block.empty()); program.add_block(move(report_block)); } diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index cc59303f..8c8c37ed 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -187,6 +187,7 @@ struct ProgramBuild : noncopyable { void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); void addSuffixesEodProgram(RoseProgram &program); void addMatcherEodProgram(RoseProgram &program); +void addFlushCombinationProgram(RoseProgram &program); static constexpr u32 INVALID_QUEUE = ~0U; diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index d38ee8c0..386b035c 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -199,9 +199,25 @@ struct RoseStateOffsets { * reports with that ekey should not be delivered to the user. */ u32 exhausted; - /** size of exhausted multibit */ + /** size in bytes of exhausted multibit */ u32 exhausted_size; + /** Logical multibit. + * + * entry per logical key(operand/operator) (used by Logical Combination). */ + u32 logicalVec; + + /** size in bytes of logical multibit */ + u32 logicalVec_size; + + /** Combination multibit. + * + * entry per combination key (used by Logical Combination). */ + u32 combVec; + + /** size in bytes of combination multibit */ + u32 combVec_size; + /** Multibit for active suffix/outfix engines. */ u32 activeLeafArray; @@ -327,6 +343,11 @@ struct RoseEngine { u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */ u32 historyRequired; /**< max amount of history required for streaming */ u32 ekeyCount; /**< number of exhaustion keys */ + u32 lkeyCount; /**< number of logical keys */ + u32 lopCount; /**< number of logical ops */ + u32 ckeyCount; /**< number of combination keys */ + u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */ + u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */ u32 dkeyCount; /**< number of dedupe keys */ u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */ u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external @@ -404,6 +425,7 @@ struct RoseEngine { u32 roseCount; u32 eodProgramOffset; //!< EOD program, otherwise 0. + u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */ u32 lastByteHistoryIterOffset; // if non-zero diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index eeebfed1..7feee04f 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -183,7 +183,25 @@ enum RoseInstructionCode { */ ROSE_INSTR_INCLUDED_JUMP, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_INCLUDED_JUMP //!< Sentinel. + /** + * \brief Set matching status of a sub-expression. + */ + ROSE_INSTR_SET_LOGICAL, + + /** + * \brief Set combination status pending checking. + */ + ROSE_INSTR_SET_COMBINATION, + + /** + * \brief Check if compliant with any logical constraints. + */ + ROSE_INSTR_FLUSH_COMBINATION, + + /** \brief Mark as exhausted instead of report while quiet. */ + ROSE_INSTR_SET_EXHAUST, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -636,4 +654,24 @@ struct ROSE_STRUCT_INCLUDED_JUMP { u8 squash; //!< FDR confirm squash mask for included literal. u32 child_offset; //!< Program offset of included literal. }; + +struct ROSE_STRUCT_SET_LOGICAL { + u8 code; //!< From enum RoseInstructionCode. + u32 lkey; //!< Logical key to set. + s32 offset_adjust; //!< offsetAdjust from struct Report triggers the flush. +}; + +struct ROSE_STRUCT_SET_COMBINATION { + u8 code; //!< From enum RoseInstructionCode. + u32 ckey; //!< Combination key to set. +}; + +struct ROSE_STRUCT_FLUSH_COMBINATION { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_SET_EXHAUST { + u8 code; //!< From enum RoseInstructionCode. + u32 ekey; //!< Exhaustion key. +}; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 88342b53..5fbb2b74 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -127,6 +127,15 @@ void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) { tctxt->lastMatchOffset = offset; } +static really_inline +void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) { + DEBUG_PRINTF("match @%llu, last match @%llu\n", offset, + tctxt->lastCombMatchOffset); + + assert(offset >= tctxt->lastCombMatchOffset); + tctxt->lastCombMatchOffset = offset; +} + static really_inline void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) { DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset, diff --git a/src/rose/stream.c b/src/rose/stream.c index d667ae56..26268dd5 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -578,6 +578,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->lastEndOffset = offset; tctxt->filledDelayedSlots = 0; tctxt->lastMatchOffset = 0; + tctxt->lastCombMatchOffset = offset; tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; @@ -700,6 +701,7 @@ void roseStreamInitEod(const struct RoseEngine *t, u64a offset, tctxt->lastEndOffset = offset; tctxt->filledDelayedSlots = 0; tctxt->lastMatchOffset = 0; + tctxt->lastCombMatchOffset = offset; /* DO NOT set 0 here! */ tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = offset; diff --git a/src/runtime.c b/src/runtime.c index c384c031..df276551 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -356,6 +356,15 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, length, NULL, 0, 0, 0, flags); clearEvec(rose, scratch->core_info.exhaustionVector); + if (rose->ckeyCount) { + scratch->core_info.logicalVector = scratch->bstate + + rose->stateOffsets.logicalVec; + scratch->core_info.combVector = scratch->bstate + + rose->stateOffsets.combVec; + scratch->tctxt.lastCombMatchOffset = 0; + clearLvec(rose, scratch->core_info.logicalVector, + scratch->core_info.combVector); + } if (!length) { if (rose->boundary.reportZeroEodOffset) { @@ -436,6 +445,13 @@ done_scan: scratch); } + if (rose->flushCombProgramOffset) { + if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + unmarkScratchInUse(scratch); + return HS_SCAN_TERMINATED; + } + } + set_retval: DEBUG_PRINTF("done. told_to_stop_matching=%d\n", told_to_stop_matching(scratch)); @@ -500,6 +516,10 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose, roseInitState(rose, state); clearEvec(rose, state + rose->stateOffsets.exhausted); + if (rose->ckeyCount) { + clearLvec(rose, state + rose->stateOffsets.logicalVec, + state + rose->stateOffsets.combVec); + } // SOM state multibit structures. initSomState(rose, state); @@ -614,6 +634,13 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, getHistory(state, rose, id->offset), getHistoryAmount(rose, id->offset), id->offset, status, 0); + if (rose->ckeyCount) { + scratch->core_info.logicalVector = state + + rose->stateOffsets.logicalVec; + scratch->core_info.combVector = state + rose->stateOffsets.combVec; + scratch->tctxt.lastCombMatchOffset = id->offset; + } + if (rose->somLocationCount) { loadSomFromStream(scratch, id->offset); } @@ -657,6 +684,13 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, scratch->core_info.status |= STATUS_TERMINATED; } } + + if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + DEBUG_PRINTF("told to stop matching\n"); + scratch->core_info.status |= STATUS_TERMINATED; + } + } } HS_PUBLIC_API @@ -849,6 +883,12 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, populateCoreInfo(scratch, rose, state, onEvent, context, data, length, getHistory(state, rose, id->offset), historyAmount, id->offset, status, flags); + if (rose->ckeyCount) { + scratch->core_info.logicalVector = state + + rose->stateOffsets.logicalVec; + scratch->core_info.combVector = state + rose->stateOffsets.combVec; + scratch->tctxt.lastCombMatchOffset = id->offset; + } assert(scratch->core_info.hlen <= id->offset && scratch->core_info.hlen <= rose->historyRequired); @@ -894,6 +934,12 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, } } + if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + scratch->core_info.status |= STATUS_TERMINATED; + } + } + setStreamStatus(state, scratch->core_info.status); if (likely(!can_stop_matching(scratch))) { diff --git a/src/scratch.h b/src/scratch.h index fa998e84..0653b743 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,6 +94,8 @@ struct core_info { const struct RoseEngine *rose; char *state; /**< full stream state */ char *exhaustionVector; /**< pointer to evec for this stream */ + char *logicalVector; /**< pointer to lvec for this stream */ + char *combVector; /**< pointer to cvec for this stream */ const u8 *buf; /**< main scan buffer */ size_t len; /**< length of main scan buffer in bytes */ const u8 *hbuf; /**< history buffer */ @@ -115,6 +117,7 @@ struct RoseContext { * stream */ u64a lastMatchOffset; /**< last match offset report up out of rose; * used _only_ for debugging, asserts */ + u64a lastCombMatchOffset; /**< last match offset of active combinations */ u64a minMatchOffset; /**< the earliest offset that we are still allowed to * report */ u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h index 54aebd71..d1ccf5e6 100644 --- a/src/stream_compress_impl.h +++ b/src/stream_compress_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -148,6 +148,13 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, /* copy the exhaustion multibit */ COPY_MULTIBIT(stream_body + so->exhausted, rose->ekeyCount); + /* copy the logical multibit */ + COPY_MULTIBIT(stream_body + so->logicalVec, + rose->lkeyCount + rose->lopCount); + + /* copy the combination multibit */ + COPY_MULTIBIT(stream_body + so->combVec, rose->ckeyCount); + /* copy nfa stream state for endfixes */ /* Note: in the expand case the active array has already been copied into * the stream. */ diff --git a/src/util/logical.h b/src/util/logical.h new file mode 100644 index 00000000..0c8b6469 --- /dev/null +++ b/src/util/logical.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Inline functions for manipulating logical combinations. + */ + +#ifndef LOGICAL_H +#define LOGICAL_H + +#include "ue2common.h" + +/** Index meaning a given logical key is invalid. */ +#define INVALID_LKEY (~(u32)0) +#define INVALID_CKEY INVALID_LKEY + +/** Logical operation type, the priority is from high to low. */ +enum LogicalOpType { + LOGICAL_OP_NOT, + LOGICAL_OP_AND, + LOGICAL_OP_OR, + LAST_LOGICAL_OP = LOGICAL_OP_OR //!< Sentinel. +}; + +#define UNKNOWN_OP (~(u32)0) + +/** Logical Operation is consist of 4 parts. */ +struct LogicalOp { + u32 id; //!< logical operator/operation id + u32 op; //!< LogicalOpType + u32 lo; //!< left operand + u32 ro; //!< right operand +}; + +/** Each logical combination has its info: + * It occupies a region in LogicalOp vector. + * It has an exhaustion key for single-match mode. */ +struct CombInfo { + u32 id; + u32 ekey; //!< exhaustion key + u32 start; //!< ckey of logical operation to start calculating + u32 result; //!< ckey of logical operation to give final result + u64a min_offset; + u64a max_offset; +}; + +/** Temporarily use to seperate operations' id from reports' lkey + * when building logicalTree in shunting yard algorithm, + * operations' id will be finally renumbered following reports' lkey. */ +#define LOGICAL_OP_BIT 0x80000000UL + +#endif diff --git a/src/util/report.h b/src/util/report.h index 0d5e69b8..ee830d0f 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ #include "ue2common.h" #include "util/exhaust.h" // for INVALID_EKEY +#include "util/logical.h" // for INVALID_LKEY #include "util/hash.h" #include "util/order_check.h" @@ -107,6 +108,16 @@ struct Report { * exhaustible, this will be INVALID_EKEY. */ u32 ekey = INVALID_EKEY; + /** \brief Logical Combination key in each combination. + * + * If in Logical Combination, the lkey to check before reporting a match. + * Additionally before checking the lkey will be set. If not + * in Logical Combination, this will be INVALID_LKEY. */ + u32 lkey = INVALID_LKEY; + + /** \brief Quiet flag for expressions in any logical combination. */ + bool quiet = false; + /** \brief Adjustment to add to the match offset when we report a match. * * This is usually used for reports attached to states that form part of a @@ -207,16 +218,17 @@ bool operator==(const Report &a, const Report &b) { } static inline -Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) { +Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey, bool quiet) { Report ir(EXTERNAL_CALLBACK, report); ir.offsetAdjust = offsetAdjust; ir.ekey = ekey; + ir.quiet = (u8)quiet; return ir; } static inline Report makeCallback(u32 report, s32 offsetAdjust) { - return makeECallback(report, offsetAdjust, INVALID_EKEY); + return makeECallback(report, offsetAdjust, INVALID_EKEY, false); } static inline diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index c0e9ee15..78b9b73d 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -95,6 +95,31 @@ u32 ReportManager::getExhaustibleKey(u32 a) { return it->second; } +const set &ReportManager::getRelateCKeys(u32 lkey) { + auto it = pl.lkey2ckeys.find(lkey); + assert(it != pl.lkey2ckeys.end()); + return it->second; +} + +void ReportManager::logicalKeyRenumber() { + pl.logicalKeyRenumber(); + // assign to corresponding report + for (u32 i = 0; i < reportIds.size(); i++) { + Report &ir = reportIds[i]; + if (contains(pl.toLogicalKeyMap, ir.onmatch)) { + ir.lkey = pl.toLogicalKeyMap.at(ir.onmatch); + } + } +} + +const vector &ReportManager::getLogicalTree() const { + return pl.logicalTree; +} + +const vector &ReportManager::getCombInfoMap() const { + return pl.combInfoMap; +} + u32 ReportManager::getUnassociatedExhaustibleKey(void) { u32 rv = toExhaustibleKeyMap.size(); bool inserted; @@ -115,6 +140,18 @@ u32 ReportManager::numEkeys() const { return (u32) toExhaustibleKeyMap.size(); } +u32 ReportManager::numLogicalKeys() const { + return (u32) pl.toLogicalKeyMap.size(); +} + +u32 ReportManager::numLogicalOps() const { + return (u32) pl.logicalTree.size(); +} + +u32 ReportManager::numCkeys() const { + return (u32) pl.toCombKeyMap.size(); +} + bool ReportManager::patternSetCanExhaust() const { return global_exhaust && !toExhaustibleKeyMap.empty(); } @@ -219,7 +256,7 @@ Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr, ekey = getExhaustibleKey(expr.report); } - return makeECallback(expr.report, adj, ekey); + return makeECallback(expr.report, adj, ekey, expr.quiet); } void ReportManager::setProgramOffset(ReportID id, u32 programOffset) { diff --git a/src/util/report_manager.h b/src/util/report_manager.h index aa359ed7..015dc9c8 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,7 @@ #include "util/compile_error.h" #include "util/noncopyable.h" #include "util/report.h" +#include "parser/logical_combination.h" #include #include @@ -80,6 +81,15 @@ public: /** \brief Total number of exhaustion keys. */ u32 numEkeys() const; + /** \brief Total number of logical keys. */ + u32 numLogicalKeys() const; + + /** \brief Total number of logical operators. */ + u32 numLogicalOps() const; + + /** \brief Total number of combination keys. */ + u32 numCkeys() const; + /** \brief True if the pattern set can exhaust (i.e. all patterns are * highlander). */ bool patternSetCanExhaust() const; @@ -110,6 +120,19 @@ public: * assigning one if necessary. */ u32 getExhaustibleKey(u32 expressionIndex); + /** \brief Get lkey's corresponding ckeys. */ + const std::set &getRelateCKeys(u32 lkey); + + /** \brief Renumber lkey for logical operations, after parsed + * all logical expressions. */ + void logicalKeyRenumber(); + + /** \brief Used in Rose for writing bytecode. */ + const std::vector &getLogicalTree() const; + + /** \brief Used in Rose for writing bytecode. */ + const std::vector &getCombInfoMap() const; + /** \brief Fetch the dedupe key associated with the given report. Returns * ~0U if no dkey is needed. */ u32 getDkey(const Report &r) const; @@ -122,6 +145,9 @@ public: * set. */ u32 getProgramOffset(ReportID id) const; + /** \brief Parsed logical combination structure. */ + ParsedLogical pl; + private: /** \brief Grey box ref, for checking resource limits. */ const Grey &grey; diff --git a/tools/hscheck/main.cpp b/tools/hscheck/main.cpp index 59f80244..73687e2a 100644 --- a/tools/hscheck/main.cpp +++ b/tools/hscheck/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,12 +82,30 @@ string g_signatureFile(""); bool g_allSignatures = false; bool g_forceEditDistance = false; bool build_sigs = false; +bool check_logical = false; unsigned int g_signature; unsigned int g_editDistance; unsigned int globalFlags = 0; unsigned int num_of_threads = 1; unsigned int countFailures = 0; +class ParsedExpr { +public: + ParsedExpr(string regex_in, unsigned int flags_in, hs_expr_ext ext_in) + : regex(regex_in), flags(flags_in), ext(ext_in) {} + ~ParsedExpr() {} + string regex; + unsigned int flags; + hs_expr_ext ext; +}; + +typedef map ExprExtMap; +ExprExtMap g_combs; +ExprExtMap g_validSubs; + +// Iterator pointing to next logical expression to process. +ExprExtMap::const_iterator comb_read_it; + // Global greybox structure, used in non-release builds. unique_ptr g_grey; @@ -106,6 +124,12 @@ std::mutex lk_read; // Mutex serialising access to output map and stdout. std::mutex lk_output; +// Mutex guarding access to write g_combs. +std::mutex lk_write_comb; + +// Mutex guarding access to write g_validSubs. +std::mutex lk_write_sub; + // Possible values for pattern check results. enum ExprStatus {NOT_PROCESSED, SUCCESS, FAILURE}; @@ -126,6 +150,32 @@ bool getNextExpressionId(ExpressionMap::const_iterator &it) { } } +static +bool getNextLogicalExpression(ExprExtMap::const_iterator &it) { + lock_guard lock(lk_read); + if (comb_read_it != g_combs.end()) { + it = comb_read_it; + ++comb_read_it; + return true; + } else { + return false; + } +} + +static +void cacheCombExpr(unsigned id, const string ®ex, unsigned int flags, + const hs_expr_ext &ext) { + lock_guard lock(lk_write_comb); + g_combs.emplace(id, ParsedExpr(regex, flags, ext)); +} + +static +void cacheSubExpr(unsigned id, const string ®ex, unsigned int flags, + const hs_expr_ext &ext) { + lock_guard lock(lk_write_sub); + g_validSubs.emplace(id, ParsedExpr(regex, flags, ext)); +} + // This function prints the Pattern IDs order // It creates the output for build sigs // Caller is required to hold lk_output when calling this function @@ -221,6 +271,15 @@ void checkExpression(UNUSED void *threadarg) { ext.flags |= HS_EXT_FLAG_EDIT_DISTANCE; } + if (flags & HS_FLAG_COMBINATION) { + if (check_logical) { + cacheCombExpr(it->first, regex, flags, ext); + } else { + recordFailure(g_exprMap, it->first, "Unsupported flag used."); + } + continue; + } + // Try and compile a database. const char *regexp = regex.c_str(); const hs_expr_ext *extp = &ext; @@ -239,6 +298,112 @@ void checkExpression(UNUSED void *threadarg) { nullptr, &db, &compile_err); #endif + if (err == HS_SUCCESS) { + assert(db); + recordSuccess(g_exprMap, it->first); + hs_free_database(db); + if (check_logical) { + cacheSubExpr(it->first, regex, flags, ext); + } + } else { + assert(!db); + assert(compile_err); + recordFailure(g_exprMap, it->first, compile_err->message); + hs_free_compile_error(compile_err); + } + } +} + +static +bool fetchSubIds(const char *logical, vector &ids) { + unsigned mult = 1; + unsigned id = 0; + for (int i = strlen(logical) - 1; i >= 0; i--) { + if (isdigit(logical[i])) { + if (mult > 100000000) { + return false; + } + id += (logical[i] - '0') * mult; + mult *= 10; + } else if (mult > 1) { + ids.push_back(id); + mult = 1; + id = 0; + } + } + if (mult > 1) { + ids.push_back(id); + } + return true; +} + +static +void checkLogicalExpression(UNUSED void *threadarg) { + unsigned int mode = g_streaming ? HS_MODE_STREAM + : g_vectored ? HS_MODE_VECTORED + : HS_MODE_BLOCK; + if (g_streaming) { + // Use SOM mode, for permissiveness' sake. + mode |= HS_MODE_SOM_HORIZON_LARGE; + } + + ExprExtMap::const_iterator it; + while (getNextLogicalExpression(it)) { + const ParsedExpr &comb = it->second; + + vector subIds; + if (!fetchSubIds(comb.regex.c_str(), subIds)) { + recordFailure(g_exprMap, it->first, "Sub-expression id too large."); + continue; + } + + vector regexv; + vector flagsv; + vector idv; + vector extv; + bool valid = true; + + for (const auto i : subIds) { + ExprExtMap::const_iterator jt = g_validSubs.find(i); + if (jt != g_validSubs.end()) { + const ParsedExpr &sub = jt->second; + regexv.push_back(sub.regex.c_str()); + flagsv.push_back(sub.flags); + idv.push_back(i); + extv.push_back(&sub.ext); + } else { + valid = false; + break; + } + } + + if (valid) { + regexv.push_back(comb.regex.c_str()); + flagsv.push_back(comb.flags); + idv.push_back(it->first); + extv.push_back(&comb.ext); + } else { + recordFailure(g_exprMap, it->first, "Sub-expression id not valid."); + continue; + } + + // Try and compile a database. + hs_error_t err; + hs_compile_error_t *compile_err; + hs_database_t *db = nullptr; + +#if !defined(RELEASE_BUILD) + // This variant is available in non-release builds and allows us to + // modify greybox settings. + err = hs_compile_multi_int(regexv.data(), flagsv.data(), idv.data(), + extv.data(), regexv.size(), mode, + nullptr, &db, &compile_err, *g_grey); +#else + err = hs_compile_ext_multi(regexv.data(), flagsv.data(), idv.data(), + extv.data(), regexv.size(), mode, + nullptr, &db, &compile_err); +#endif + if (err == HS_SUCCESS) { assert(db); recordSuccess(g_exprMap, it->first); @@ -269,12 +434,13 @@ void usage() { << " -T NUM Run with NUM threads." << endl << " -h Display this help." << endl << " -B Build signature set." << endl + << " -C Check logical combinations (default: off)." << endl << endl; } static void processArgs(int argc, char *argv[], UNUSED unique_ptr &grey) { - const char options[] = "e:E:s:z:hLNV8G:T:B"; + const char options[] = "e:E:s:z:hLNV8G:T:BC"; bool signatureSet = false; for (;;) { @@ -332,6 +498,9 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr &grey) { case 'B': build_sigs = true; break; + case 'C': + check_logical = true; + break; default: usage(); exit(1); @@ -468,6 +637,18 @@ int main(int argc, char **argv) { threads[i].join(); } + if (check_logical) { + comb_read_it = g_combs.begin(); + + for (unsigned int i = 0; i < num_of_threads; i++) { + threads[i] = thread(checkLogicalExpression, nullptr); + } + + for (unsigned int i = 0; i < num_of_threads; i++) { + threads[i].join(); + } + } + if (!g_exprMap.empty() && !build_sigs) { cout << "SUMMARY: " << countFailures << " of " << g_exprMap.size() << " failed." << endl; diff --git a/tools/hscollider/GraphTruth.cpp b/tools/hscollider/GraphTruth.cpp index 5c4cd8e7..b4b3f809 100644 --- a/tools/hscollider/GraphTruth.cpp +++ b/tools/hscollider/GraphTruth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,6 +48,7 @@ #include "nfagraph/ng_util.h" #include "parser/Parser.h" #include "parser/unsupported.h" +#include "parser/logical_combination.h" #include "util/compile_context.h" #include "util/make_unique.h" #include "util/report_manager.h" @@ -69,8 +70,11 @@ public: CompiledNG(unique_ptr g_in, unique_ptr rm_in) : g(std::move(g_in)), rm(std::move(rm_in)) {} + CompiledNG(unique_ptr pl_in) + : pl(std::move(pl_in)) {} unique_ptr g; unique_ptr rm; + unique_ptr pl; }; static @@ -126,6 +130,14 @@ void CNGInfo::compile() { } try { + if (combination) { + auto pl = ue2::make_unique(); + pl->parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL); + pl->logicalKeyRenumber(); + cng = make_unique(move(pl)); + return; + } + bool isStreaming = colliderMode == MODE_STREAMING; bool isVectored = colliderMode == MODE_VECTORED; CompileContext cc(isStreaming, isVectored, get_current_target(), @@ -199,6 +211,8 @@ unique_ptr GraphTruth::preprocess(unsigned id, bool highlander = false; bool prefilter = false; bool som = false; + bool combination = false; + bool quiet = false; auto i = m_expr.find(id); if (i == m_expr.end()) { @@ -214,7 +228,8 @@ unique_ptr GraphTruth::preprocess(unsigned id, throw NGCompileFailure("Cannot parse expression flags."); } // read PCRE flags - if (!getPcreFlags(hs_flags, &flags, &highlander, &prefilter, &som)) { + if (!getPcreFlags(hs_flags, &flags, &highlander, &prefilter, &som, + &combination, &quiet)) { throw NGCompileFailure("Cannot get PCRE flags."); } if (force_utf8) { @@ -247,6 +262,8 @@ unique_ptr GraphTruth::preprocess(unsigned id, cngi->highlander = highlander; cngi->prefilter = prefilter; cngi->som = som; + cngi->combination = combination; + cngi->quiet = quiet; cngi->min_offset = ext.min_offset; cngi->max_offset = ext.max_offset; cngi->min_length = ext.min_length; @@ -256,8 +273,95 @@ unique_ptr GraphTruth::preprocess(unsigned id, return cngi; } +/** \brief Returns 1 if compliant to all logical combinations. */ +static +char isLogicalCombination(vector &lv, const vector &comb, + size_t lkeyCount, unsigned start, unsigned result) { + assert(start <= result); + for (unsigned i = start; i <= result; i++) { + const LogicalOp &op = comb[i - lkeyCount]; + assert(i == op.id); + switch (op.op) { + case LOGICAL_OP_NOT: + lv[op.id] = !lv[op.ro]; + break; + case LOGICAL_OP_AND: + lv[op.id] = lv[op.lo] & lv[op.ro]; // && + break; + case LOGICAL_OP_OR: + lv[op.id] = lv[op.lo] | lv[op.ro]; // || + break; + default: + assert(0); + break; + } + } + return lv[result]; +} + bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi, - const string &buffer, ResultSet &rs, string &) { + const string &buffer, ResultSet &rs, string &error) { + if (cngi.quiet) { + return true; + } + + if (cngi.combination) { + // Compile and run sub-expressions, store match results. + map> offset_to_matches; + map> offset_to_lkeys; + set sub_exps; + const auto &m_lkey = cng.pl->getLkeyMap(); + for (const auto &it_lkey : m_lkey) { + if (sub_exps.find(it_lkey.first) == sub_exps.end()) { + sub_exps.emplace(it_lkey.first); + ResultSet sub_rs(RESULT_FROM_PCRE); + shared_ptr sub_cngi = preprocess(it_lkey.first); + const CompiledNG *sub_cng; + try { + sub_cng = sub_cngi->get(); + } + catch (const NGCompileFailure &err) { + return false; + } + catch (const NGUnsupportedFailure &err) { + return false; + } + sub_cngi->quiet = false; // force not quiet in sub-exp. + if (!run(it_lkey.first, *sub_cng, *sub_cngi, buffer, sub_rs, error)) { + rs.clear(); + return false; + } + for (const auto &it_mr : sub_rs.matches) { + offset_to_matches[it_mr.to].emplace(it_mr); + offset_to_lkeys[it_mr.to].emplace(it_lkey.second); + if (sub_cngi->highlander) { + break; + } + } + } + } + // Calculate rs for combination expression. + vector lv; + const auto &comb = cng.pl->getLogicalTree(); + lv.resize(m_lkey.size() + comb.size()); + const auto &li = cng.pl->getCombInfoById(cngi.id); + for (const auto &it : offset_to_lkeys) { + for (auto report : it.second) { + lv[report] = 1; + } + if (isLogicalCombination(lv, comb, m_lkey.size(), + li.start, li.result)) { + for (const auto &mr : offset_to_matches.at(it.first)) { + if ((mr.to >= cngi.min_offset) && + (mr.to <= cngi.max_offset)) { + rs.addMatch(mr.from, mr.to); + } + } + } + } + return true; + } + set> matches; if (g_streamOffset) { diff --git a/tools/hscollider/GraphTruth.h b/tools/hscollider/GraphTruth.h index 5f53899c..e9f601db 100644 --- a/tools/hscollider/GraphTruth.h +++ b/tools/hscollider/GraphTruth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -106,6 +106,10 @@ public: bool highlander = false; bool prefilter = false; bool som = false; + bool combination = false; + bool quiet = false; + + unsigned id; private: void compile(); // If NFA graph scan failed for some reason, we mark it as bad and skip @@ -116,8 +120,6 @@ private: std::unique_ptr cng; // compiled NFA graph std::mutex cng_mutex; // serialised accesses to NFA graph - unsigned id; - // Our expression map const ExpressionMap &m_expr; }; diff --git a/tools/hscollider/GroundTruth.cpp b/tools/hscollider/GroundTruth.cpp index b0fe384d..abd54778 100644 --- a/tools/hscollider/GroundTruth.cpp +++ b/tools/hscollider/GroundTruth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,7 +100,8 @@ int pcreCallOut(pcre_callout_block *block) { static bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander, - bool *prefilter, bool *som, hs_expr_ext *ext) { + bool *prefilter, bool *som, bool *combination, + bool *quiet, hs_expr_ext *ext) { string regex; unsigned int hs_flags = 0; if (!readExpression(expr, regex, &hs_flags, ext)) { @@ -109,7 +110,8 @@ bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander, expr.swap(regex); - if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som)) { + if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som, + combination, quiet)) { return false; } @@ -221,6 +223,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) { bool highlander = false; bool prefilter = false; bool som = false; + bool combination = false; + bool quiet = false; // we can still match approximate matching patterns with PCRE if edit // distance 0 is requested @@ -238,7 +242,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) { hs_expr_ext ext; // Decode the flags - if (!decodeExprPcre(re, &flags, &highlander, &prefilter, &som, &ext)) { + if (!decodeExprPcre(re, &flags, &highlander, &prefilter, &som, + &combination, &quiet, &ext)) { throw PcreCompileFailure("Unable to decode flags."); } @@ -261,7 +266,7 @@ GroundTruth::compile(unsigned id, bool no_callouts) { som |= !!somFlags; // For traditional Hyperscan, add global callout to pattern. - if (!no_callouts) { + if (!combination && !no_callouts) { addCallout(re); } @@ -275,12 +280,22 @@ GroundTruth::compile(unsigned id, bool no_callouts) { compiled->highlander = highlander; compiled->prefilter = prefilter; compiled->som = som; + compiled->combination = combination; + compiled->quiet = quiet; compiled->min_offset = ext.min_offset; compiled->max_offset = ext.max_offset; compiled->min_length = ext.min_length; compiled->expression = i->second; // original PCRE flags |= PCRE_NO_AUTO_POSSESS; + if (compiled->combination) { + compiled->pl.parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL); + compiled->pl.logicalKeyRenumber(); + compiled->report = id; + return compiled; + } + + compiled->bytecode = pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr); @@ -424,8 +439,94 @@ int scanOffset(const CompiledPcre &compiled, const string &buffer, return ret; } +/** \brief Returns 1 if compliant to all logical combinations. */ +static +char isLogicalCombination(vector &lv, const vector &comb, + size_t lkeyCount, unsigned start, unsigned result) { + assert(start <= result); + for (unsigned i = start; i <= result; i++) { + const LogicalOp &op = comb[i - lkeyCount]; + assert(i == op.id); + switch (op.op) { + case LOGICAL_OP_NOT: + lv[op.id] = !lv[op.ro]; + break; + case LOGICAL_OP_AND: + lv[op.id] = lv[op.lo] & lv[op.ro]; // && + break; + case LOGICAL_OP_OR: + lv[op.id] = lv[op.lo] | lv[op.ro]; // || + break; + default: + assert(0); + break; + } + } + return lv[result]; +} + bool GroundTruth::run(unsigned, const CompiledPcre &compiled, const string &buffer, ResultSet &rs, string &error) { + if (compiled.quiet) { + return true; + } + + if (compiled.combination) { + // Compile and run sub-expressions, store match results. + map> offset_to_matches; + map> offset_to_lkeys; + set sub_exps; + const auto &m_lkey = compiled.pl.getLkeyMap(); + for (const auto &it_lkey : m_lkey) { + if (sub_exps.find(it_lkey.first) == sub_exps.end()) { + sub_exps.emplace(it_lkey.first); + ResultSet sub_rs(RESULT_FROM_PCRE); + shared_ptr sub_pcre; + try { + sub_pcre = compile(it_lkey.first); + } + catch (const SoftPcreCompileFailure &err) { + return false; + } + catch (const PcreCompileFailure &err) { + return false; + } + sub_pcre->quiet = false; // force not quiet in sub-exp. + if (!run(it_lkey.first, *sub_pcre, buffer, sub_rs, error)) { + rs.clear(); + return false; + } + for (const auto &it_mr : sub_rs.matches) { + offset_to_matches[it_mr.to].emplace(it_mr); + offset_to_lkeys[it_mr.to].emplace(it_lkey.second); + if (sub_pcre->highlander) { + break; + } + } + } + } + // Calculate rs for combination expression. + vector lv; + const auto &comb = compiled.pl.getLogicalTree(); + lv.resize(m_lkey.size() + comb.size()); + const auto &li = compiled.pl.getCombInfoById(compiled.report); + for (const auto &it : offset_to_lkeys) { + for (auto report : it.second) { + lv[report] = 1; + } + if (isLogicalCombination(lv, comb, m_lkey.size(), + li.start, li.result)) { + for (const auto &mr : offset_to_matches.at(it.first)) { + if ((mr.to >= compiled.min_offset) && + (mr.to <= compiled.max_offset)) { + rs.addMatch(mr.from, mr.to); + } + } + } + } + return true; + } + CalloutContext ctx(out); pcre_extra extra; diff --git a/tools/hscollider/GroundTruth.h b/tools/hscollider/GroundTruth.h index bcab5599..1607ef1d 100644 --- a/tools/hscollider/GroundTruth.h +++ b/tools/hscollider/GroundTruth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,7 @@ #include "expressions.h" #include "ResultSet.h" +#include "parser/logical_combination.h" #include #include @@ -85,6 +86,14 @@ public: bool highlander = false; bool prefilter = false; bool som = false; + bool combination = false; + bool quiet = false; + + // Parsed logical combinations. + ue2::ParsedLogical pl; + + // Combination expression report id. + unsigned report; private: // If a PCRE has hit its match recursion limit when scanning a corpus, we diff --git a/tools/hscollider/NfaGeneratedCorpora.cpp b/tools/hscollider/NfaGeneratedCorpora.cpp index 32933be4..b7c77ee1 100644 --- a/tools/hscollider/NfaGeneratedCorpora.cpp +++ b/tools/hscollider/NfaGeneratedCorpora.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,6 +80,39 @@ void NfaGeneratedCorpora::generate(unsigned id, vector &data) { throw CorpusFailure("Expression could not be read: " + i->second); } + // Combination's corpus is consist of sub-expressions' corpuses. + if (hs_flags & HS_FLAG_COMBINATION) { + ParsedLogical pl; + pl.parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL); + pl.logicalKeyRenumber(); + const auto &m_lkey = pl.getLkeyMap(); + assert(!m_lkey.empty()); + u32 a_subid; // arbitrary sub id + unordered_map> m_data; + for (const auto &it : m_lkey) { + a_subid = it.first; + vector sub_data; + generate(a_subid, sub_data); + m_data.emplace(a_subid, move(sub_data)); + } + assert(!m_data.empty()); + size_t num_corpus = m_data[a_subid].size(); + data.reserve(data.size() + num_corpus); + while (num_corpus) { + string cc; // 1 combination corpus + for (const auto &it : m_lkey) { + assert(!m_data[it.first].empty()); + cc += m_data[it.first].back().data; + if (m_data[it.first].size() > 1) { + m_data[it.first].pop_back(); + } + } + data.push_back(Corpus(cc)); + num_corpus--; + } + return; + } + if (force_utf8_mode) { hs_flags |= HS_FLAG_UTF8; } diff --git a/tools/hscollider/ResultSet.h b/tools/hscollider/ResultSet.h index 23c628ec..b7736d52 100644 --- a/tools/hscollider/ResultSet.h +++ b/tools/hscollider/ResultSet.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -114,6 +114,13 @@ public: } } + // Clear all matches. + void clear() { + matches.clear(); + dupe_matches.clear(); + matches_by_block.clear(); + } + // Unexpected out of order match seen. bool uoom = false; diff --git a/tools/hscollider/pcre_util.cpp b/tools/hscollider/pcre_util.cpp index 0e1aa0ec..da8dbd11 100644 --- a/tools/hscollider/pcre_util.cpp +++ b/tools/hscollider/pcre_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,8 @@ #include /* for pcre flags */ bool getPcreFlags(unsigned int hs_flags, unsigned int *flags, - bool *highlander, bool *prefilter, bool *som) { + bool *highlander, bool *prefilter, bool *som, + bool *combination, bool *quiet) { assert(flags); assert(highlander); assert(prefilter); @@ -76,6 +77,14 @@ bool getPcreFlags(unsigned int hs_flags, unsigned int *flags, *som = true; hs_flags &= ~HS_FLAG_SOM_LEFTMOST; } + if (hs_flags & HS_FLAG_COMBINATION) { + *combination = true; + hs_flags &= ~HS_FLAG_COMBINATION; + } + if (hs_flags & HS_FLAG_QUIET) { + *quiet = true; + hs_flags &= ~HS_FLAG_QUIET; + } // Flags that are irrelevant to PCRE. hs_flags &= ~HS_FLAG_ALLOWEMPTY; diff --git a/tools/hscollider/pcre_util.h b/tools/hscollider/pcre_util.h index 87758873..4355579b 100644 --- a/tools/hscollider/pcre_util.h +++ b/tools/hscollider/pcre_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,8 @@ * Returns false if an unknown hyperscan flag is encountered. */ bool getPcreFlags(unsigned int hs_flags, unsigned int *pcre_flags, - bool *highlander, bool *prefilter, bool *som); + bool *highlander, bool *prefilter, bool *som, + bool *combination = nullptr, bool *quiet = nullptr); #endif /* PCRE_UTIL_H */ diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 7c39ae90..61418510 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -42,6 +42,7 @@ set(unit_hyperscan_SOURCES hyperscan/extparam.cpp hyperscan/identical.cpp hyperscan/literals.cpp + hyperscan/logical_combination.cpp hyperscan/main.cpp hyperscan/multi.cpp hyperscan/order.cpp diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index 0ff4ce5f..2cbd0842 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -171,7 +171,9 @@ TEST(HyperscanArgChecks, SingleCompileBogusFlags) { nullptr, &db, &compile_err); EXPECT_EQ(HS_COMPILER_ERROR, err); EXPECT_TRUE(compile_err != nullptr); - EXPECT_STREQ("Unrecognised flag.", compile_err->message); + EXPECT_STREQ("only HS_FLAG_QUIET and HS_FLAG_SINGLEMATCH " + "are supported in combination " + "with HS_FLAG_COMBINATION.", compile_err->message); hs_free_compile_error(compile_err); } diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 7cc03834..6d4283da 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -145,3 +145,21 @@ 148:/\QÀ\Eaaaa/8 #Expression is not valid UTF-8. 149:/[\QÀ\Eaaaa]/8 #Expression is not valid UTF-8. 150:/abcd/{edit_distance=1,hamming_distance=1} #In hs_expr_ext, cannot have both edit distance and Hamming distance. +151:/141 | abc/C #Unknown character at index 6. +152:/141 & | 142/C #Not enough operand at index 6. +153:/141 142 & 143/C #Not enough operator at index 13. +154:/141 !142/C #Not enough operator at index 8. +155:/141 & 142 |/C #Not enough operand at index 11. +156:/)141 & 142 /C #Not enough left parentheses at index 0. +157:/(141 & (142|!143) |144/C #Not enough right parentheses at index 22. +158:/141 & (142|!143) )| 144/C #Not enough left parentheses at index 17. +159:/1234567890 & (142|!143 )/C #Expression id too large at index 10. +160:/141 & (142|!143 )|/C #Not enough operand at index 18. +161:/!141/C #Has match from purely negative sub-expressions. +162:/!141 | 142 | 143/C #Has match from purely negative sub-expressions. +163:/!141 & !142 & !143/C #Has match from purely negative sub-expressions. +164:/(141 | !142 & !143)/C #Has match from purely negative sub-expressions. +165:/!(141 | 142 | 143)/C #Has match from purely negative sub-expressions. +166:/141/C #No logical operation. +167:/119 & 121/C #Unknown sub-expression id. +168:/166 & 167/C #Unknown sub-expression id. diff --git a/unit/hyperscan/logical_combination.cpp b/unit/hyperscan/logical_combination.cpp new file mode 100644 index 00000000..169de333 --- /dev/null +++ b/unit/hyperscan/logical_combination.cpp @@ -0,0 +1,696 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "hs.h" +#include "config.h" +#include "test_util.h" + +using namespace std; + +TEST(LogicalCombination, SingleComb1) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 1001}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(16U, c.matches.size()); + ASSERT_EQ(MatchRecord(3, 101), c.matches[0]); + ASSERT_EQ(MatchRecord(6, 102), c.matches[1]); + ASSERT_EQ(MatchRecord(18, 103), c.matches[2]); + ASSERT_EQ(MatchRecord(18, 1001), c.matches[3]); + ASSERT_EQ(MatchRecord(21, 101), c.matches[4]); + ASSERT_EQ(MatchRecord(21, 1001), c.matches[5]); + ASSERT_EQ(MatchRecord(25, 102), c.matches[6]); + ASSERT_EQ(MatchRecord(25, 1001), c.matches[7]); + ASSERT_EQ(MatchRecord(38, 104), c.matches[8]); + ASSERT_EQ(MatchRecord(38, 1001), c.matches[9]); + ASSERT_EQ(MatchRecord(39, 104), c.matches[10]); + ASSERT_EQ(MatchRecord(39, 1001), c.matches[11]); + ASSERT_EQ(MatchRecord(48, 105), c.matches[12]); + ASSERT_EQ(MatchRecord(48, 1001), c.matches[13]); + ASSERT_EQ(MatchRecord(53, 102), c.matches[14]); + ASSERT_EQ(MatchRecord(53, 1001), c.matches[15]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, SingleCombQuietSub1) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)"}; + unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 1001}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(8U, c.matches.size()); + ASSERT_EQ(MatchRecord(18, 1001), c.matches[0]); + ASSERT_EQ(MatchRecord(21, 1001), c.matches[1]); + ASSERT_EQ(MatchRecord(25, 1001), c.matches[2]); + ASSERT_EQ(MatchRecord(38, 1001), c.matches[3]); + ASSERT_EQ(MatchRecord(39, 1001), c.matches[4]); + ASSERT_EQ(MatchRecord(48, 105), c.matches[5]); + ASSERT_EQ(MatchRecord(48, 1001), c.matches[6]); + ASSERT_EQ(MatchRecord(53, 1001), c.matches[7]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombQuietSub1) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)", + "!101 & 102", "!(!101 | 102)", "101 & !102"}; + unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(10U, c.matches.size()); + ASSERT_EQ(MatchRecord(3, 1003), c.matches[0]); + ASSERT_EQ(MatchRecord(3, 1004), c.matches[1]); + ASSERT_EQ(MatchRecord(18, 1001), c.matches[2]); + ASSERT_EQ(MatchRecord(21, 1001), c.matches[3]); + ASSERT_EQ(MatchRecord(25, 1001), c.matches[4]); + ASSERT_EQ(MatchRecord(38, 1001), c.matches[5]); + ASSERT_EQ(MatchRecord(39, 1001), c.matches[6]); + ASSERT_EQ(MatchRecord(48, 105), c.matches[7]); + ASSERT_EQ(MatchRecord(48, 1001), c.matches[8]); + ASSERT_EQ(MatchRecord(53, 1001), c.matches[9]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiHighlanderCombQuietSub1) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)", + "!101 & 102", "!(!101 | 102)", "101 & !102"}; + unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + HS_FLAG_QUIET, 0, + HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH, + HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH, + HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH}; + unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(4U, c.matches.size()); + ASSERT_EQ(MatchRecord(3, 1003), c.matches[0]); + ASSERT_EQ(MatchRecord(3, 1004), c.matches[1]); + ASSERT_EQ(MatchRecord(18, 1001), c.matches[2]); + ASSERT_EQ(MatchRecord(48, 105), c.matches[3]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiQuietCombQuietSub1) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)", + "!101 & 102", "!(!101 | 102)", "101 & !102"}; + unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, + HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION | HS_FLAG_QUIET}; + unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(2U, c.matches.size()); + ASSERT_EQ(MatchRecord(3, 1003), c.matches[0]); + ASSERT_EQ(MatchRecord(48, 105), c.matches[1]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, SingleComb2) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(201 | 202 & 203) & (!204 | 205)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(13U, c.matches.size()); + ASSERT_EQ(MatchRecord(6, 202), c.matches[0]); + ASSERT_EQ(MatchRecord(18, 203), c.matches[1]); + ASSERT_EQ(MatchRecord(18, 1002), c.matches[2]); + ASSERT_EQ(MatchRecord(21, 201), c.matches[3]); + ASSERT_EQ(MatchRecord(21, 1002), c.matches[4]); + ASSERT_EQ(MatchRecord(25, 202), c.matches[5]); + ASSERT_EQ(MatchRecord(25, 1002), c.matches[6]); + ASSERT_EQ(MatchRecord(38, 204), c.matches[7]); + ASSERT_EQ(MatchRecord(39, 204), c.matches[8]); + ASSERT_EQ(MatchRecord(48, 205), c.matches[9]); + ASSERT_EQ(MatchRecord(48, 1002), c.matches[10]); + ASSERT_EQ(MatchRecord(53, 202), c.matches[11]); + ASSERT_EQ(MatchRecord(53, 1002), c.matches[12]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, SingleCombQuietSub2) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(201 | 202 & 203) & (!204 | 205)"}; + unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, + HS_FLAG_COMBINATION}; + unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(8U, c.matches.size()); + ASSERT_EQ(MatchRecord(18, 1002), c.matches[0]); + ASSERT_EQ(MatchRecord(21, 201), c.matches[1]); + ASSERT_EQ(MatchRecord(21, 1002), c.matches[2]); + ASSERT_EQ(MatchRecord(25, 1002), c.matches[3]); + ASSERT_EQ(MatchRecord(38, 204), c.matches[4]); + ASSERT_EQ(MatchRecord(39, 204), c.matches[5]); + ASSERT_EQ(MatchRecord(48, 1002), c.matches[6]); + ASSERT_EQ(MatchRecord(53, 1002), c.matches[7]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, SingleComb3) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcijklndefxxfoobarrrghabcxdefxteakettleeeeexxxxijklnxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "((301 | 302) & 303) & (304 | 305)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + unsigned ids[] = {301, 302, 303, 304, 305, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(17U, c.matches.size()); + ASSERT_EQ(MatchRecord(3, 301), c.matches[0]); + ASSERT_EQ(MatchRecord(8, 305), c.matches[1]); + ASSERT_EQ(MatchRecord(11, 302), c.matches[2]); + ASSERT_EQ(MatchRecord(23, 303), c.matches[3]); + ASSERT_EQ(MatchRecord(23, 1003), c.matches[4]); + ASSERT_EQ(MatchRecord(26, 301), c.matches[5]); + ASSERT_EQ(MatchRecord(26, 1003), c.matches[6]); + ASSERT_EQ(MatchRecord(30, 302), c.matches[7]); + ASSERT_EQ(MatchRecord(30, 1003), c.matches[8]); + ASSERT_EQ(MatchRecord(43, 304), c.matches[9]); + ASSERT_EQ(MatchRecord(43, 1003), c.matches[10]); + ASSERT_EQ(MatchRecord(44, 304), c.matches[11]); + ASSERT_EQ(MatchRecord(44, 1003), c.matches[12]); + ASSERT_EQ(MatchRecord(53, 305), c.matches[13]); + ASSERT_EQ(MatchRecord(53, 1003), c.matches[14]); + ASSERT_EQ(MatchRecord(58, 302), c.matches[15]); + ASSERT_EQ(MatchRecord(58, 1003), c.matches[16]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, SingleCombQuietSub3) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcijklndefxxfoobarrrghabcxdefxteakettleeeeexxxxijklnxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "((301 | 302) & 303) & (304 | 305)"}; + unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, + HS_FLAG_QUIET, HS_FLAG_COMBINATION}; + unsigned ids[] = {301, 302, 303, 304, 305, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(8U, c.matches.size()); + ASSERT_EQ(MatchRecord(23, 303), c.matches[0]); + ASSERT_EQ(MatchRecord(23, 1003), c.matches[1]); + ASSERT_EQ(MatchRecord(26, 1003), c.matches[2]); + ASSERT_EQ(MatchRecord(30, 1003), c.matches[3]); + ASSERT_EQ(MatchRecord(43, 1003), c.matches[4]); + ASSERT_EQ(MatchRecord(44, 1003), c.matches[5]); + ASSERT_EQ(MatchRecord(53, 1003), c.matches[6]); + ASSERT_EQ(MatchRecord(58, 1003), c.matches[7]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombDupSub4) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(201 & 202 & 203) | (204 & !205)", + "(201 | 202 & 203) & (!204 | 205)", + "((201 | 202) & 203) & (204 | 205)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; + unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 8, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(23U, c.matches.size()); + ASSERT_EQ(MatchRecord(6, 202), c.matches[0]); + ASSERT_EQ(MatchRecord(18, 203), c.matches[1]); + ASSERT_EQ(MatchRecord(18, 1002), c.matches[2]); + ASSERT_EQ(MatchRecord(21, 201), c.matches[3]); + ASSERT_EQ(MatchRecord(21, 1001), c.matches[4]); + ASSERT_EQ(MatchRecord(21, 1002), c.matches[5]); + ASSERT_EQ(MatchRecord(25, 202), c.matches[6]); + ASSERT_EQ(MatchRecord(25, 1001), c.matches[7]); + ASSERT_EQ(MatchRecord(25, 1002), c.matches[8]); + ASSERT_EQ(MatchRecord(38, 204), c.matches[9]); + ASSERT_EQ(MatchRecord(38, 1001), c.matches[10]); + ASSERT_EQ(MatchRecord(38, 1003), c.matches[11]); + ASSERT_EQ(MatchRecord(39, 204), c.matches[12]); + ASSERT_EQ(MatchRecord(39, 1001), c.matches[13]); + ASSERT_EQ(MatchRecord(39, 1003), c.matches[14]); + ASSERT_EQ(MatchRecord(48, 205), c.matches[15]); + ASSERT_EQ(MatchRecord(48, 1001), c.matches[16]); + ASSERT_EQ(MatchRecord(48, 1002), c.matches[17]); + ASSERT_EQ(MatchRecord(48, 1003), c.matches[18]); + ASSERT_EQ(MatchRecord(53, 202), c.matches[19]); + ASSERT_EQ(MatchRecord(53, 1001), c.matches[20]); + ASSERT_EQ(MatchRecord(53, 1002), c.matches[21]); + ASSERT_EQ(MatchRecord(53, 1003), c.matches[22]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombQuietDupSub4) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(201 & 202 & 203) | (204 & !205)", + "(201 | 202 & 203) & (!204 | 205)", + "((201 | 202) & 203) & (204 | 205)"}; + unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, + HS_FLAG_QUIET, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; + unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 8, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(17U, c.matches.size()); + ASSERT_EQ(MatchRecord(18, 1002), c.matches[0]); + ASSERT_EQ(MatchRecord(21, 1001), c.matches[1]); + ASSERT_EQ(MatchRecord(21, 1002), c.matches[2]); + ASSERT_EQ(MatchRecord(25, 1001), c.matches[3]); + ASSERT_EQ(MatchRecord(25, 1002), c.matches[4]); + ASSERT_EQ(MatchRecord(38, 204), c.matches[5]); + ASSERT_EQ(MatchRecord(38, 1001), c.matches[6]); + ASSERT_EQ(MatchRecord(38, 1003), c.matches[7]); + ASSERT_EQ(MatchRecord(39, 204), c.matches[8]); + ASSERT_EQ(MatchRecord(39, 1001), c.matches[9]); + ASSERT_EQ(MatchRecord(39, 1003), c.matches[10]); + ASSERT_EQ(MatchRecord(48, 1001), c.matches[11]); + ASSERT_EQ(MatchRecord(48, 1002), c.matches[12]); + ASSERT_EQ(MatchRecord(48, 1003), c.matches[13]); + ASSERT_EQ(MatchRecord(53, 1001), c.matches[14]); + ASSERT_EQ(MatchRecord(53, 1002), c.matches[15]); + ASSERT_EQ(MatchRecord(53, 1003), c.matches[16]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombUniSub5) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef" + "-----------------------------------------------" + "cbbfedxxgoogleeecncbaxfedxhaystacksssssxxxxijkloxxfed" + "-----------------------------------------------" + "cabijklRfeexxgoobarrrjpcabxfeexshockwaveeeeexxxxijklsxxfee" + "------------------------------------------"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "cba", "fed", "google.*cn", + "haystacks{4,8}", "ijkl[oOp]", "cab", "fee", + "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]", + "(101 & 102 & 103) | (104 & !105)", + "(201 | 202 & 203) & (!204 | 205)", + "((301 | 302) & 303) & (304 | 305)"}; + unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + 302, 303, 304, 305, 1001, 1002, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(46U, c.matches.size()); + ASSERT_EQ(MatchRecord(3, 101), c.matches[0]); + ASSERT_EQ(MatchRecord(6, 102), c.matches[1]); + ASSERT_EQ(MatchRecord(18, 103), c.matches[2]); + ASSERT_EQ(MatchRecord(18, 1001), c.matches[3]); + ASSERT_EQ(MatchRecord(21, 101), c.matches[4]); + ASSERT_EQ(MatchRecord(21, 1001), c.matches[5]); + ASSERT_EQ(MatchRecord(25, 102), c.matches[6]); + ASSERT_EQ(MatchRecord(25, 1001), c.matches[7]); + ASSERT_EQ(MatchRecord(38, 104), c.matches[8]); + ASSERT_EQ(MatchRecord(38, 1001), c.matches[9]); + ASSERT_EQ(MatchRecord(39, 104), c.matches[10]); + ASSERT_EQ(MatchRecord(39, 1001), c.matches[11]); + ASSERT_EQ(MatchRecord(48, 105), c.matches[12]); + ASSERT_EQ(MatchRecord(48, 1001), c.matches[13]); + ASSERT_EQ(MatchRecord(53, 102), c.matches[14]); + ASSERT_EQ(MatchRecord(53, 1001), c.matches[15]); + ASSERT_EQ(MatchRecord(106, 202), c.matches[16]); + ASSERT_EQ(MatchRecord(118, 203), c.matches[17]); + ASSERT_EQ(MatchRecord(118, 1002), c.matches[18]); + ASSERT_EQ(MatchRecord(121, 201), c.matches[19]); + ASSERT_EQ(MatchRecord(121, 1002), c.matches[20]); + ASSERT_EQ(MatchRecord(125, 202), c.matches[21]); + ASSERT_EQ(MatchRecord(125, 1002), c.matches[22]); + ASSERT_EQ(MatchRecord(138, 204), c.matches[23]); + ASSERT_EQ(MatchRecord(139, 204), c.matches[24]); + ASSERT_EQ(MatchRecord(148, 205), c.matches[25]); + ASSERT_EQ(MatchRecord(148, 1002), c.matches[26]); + ASSERT_EQ(MatchRecord(153, 202), c.matches[27]); + ASSERT_EQ(MatchRecord(153, 1002), c.matches[28]); + ASSERT_EQ(MatchRecord(203, 301), c.matches[29]); + ASSERT_EQ(MatchRecord(208, 305), c.matches[30]); + ASSERT_EQ(MatchRecord(211, 302), c.matches[31]); + ASSERT_EQ(MatchRecord(223, 303), c.matches[32]); + ASSERT_EQ(MatchRecord(223, 1003), c.matches[33]); + ASSERT_EQ(MatchRecord(226, 301), c.matches[34]); + ASSERT_EQ(MatchRecord(226, 1003), c.matches[35]); + ASSERT_EQ(MatchRecord(230, 302), c.matches[36]); + ASSERT_EQ(MatchRecord(230, 1003), c.matches[37]); + ASSERT_EQ(MatchRecord(243, 304), c.matches[38]); + ASSERT_EQ(MatchRecord(243, 1003), c.matches[39]); + ASSERT_EQ(MatchRecord(244, 304), c.matches[40]); + ASSERT_EQ(MatchRecord(244, 1003), c.matches[41]); + ASSERT_EQ(MatchRecord(253, 305), c.matches[42]); + ASSERT_EQ(MatchRecord(253, 1003), c.matches[43]); + ASSERT_EQ(MatchRecord(258, 302), c.matches[44]); + ASSERT_EQ(MatchRecord(258, 1003), c.matches[45]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombQuietUniSub5) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef" + "-----------------------------------------------" + "cbbfedxxgoogleeecncbaxfedxhaystacksssssxxxxijkloxxfed" + "-----------------------------------------------" + "cabijklRfeexxgoobarrrjpcabxfeexshockwaveeeeexxxxijklsxxfee" + "------------------------------------------"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "cba", "fed", "google.*cn", + "haystacks{4,8}", "ijkl[oOp]", "cab", "fee", + "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]", + "(101 & 102 & 103) | (104 & !105)", + "(201 | 202 & 203) & (!204 | 205)", + "((301 | 302) & 303) & (304 | 305)"}; + unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, + HS_FLAG_QUIET, 0, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, + HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, 0, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + 302, 303, 304, 305, 1001, 1002, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(30U, c.matches.size()); + ASSERT_EQ(MatchRecord(3, 101), c.matches[0]); + ASSERT_EQ(MatchRecord(18, 1001), c.matches[1]); + ASSERT_EQ(MatchRecord(21, 101), c.matches[2]); + ASSERT_EQ(MatchRecord(21, 1001), c.matches[3]); + ASSERT_EQ(MatchRecord(25, 1001), c.matches[4]); + ASSERT_EQ(MatchRecord(38, 1001), c.matches[5]); + ASSERT_EQ(MatchRecord(39, 1001), c.matches[6]); + ASSERT_EQ(MatchRecord(48, 105), c.matches[7]); + ASSERT_EQ(MatchRecord(48, 1001), c.matches[8]); + ASSERT_EQ(MatchRecord(53, 1001), c.matches[9]); + ASSERT_EQ(MatchRecord(106, 202), c.matches[10]); + ASSERT_EQ(MatchRecord(118, 1002), c.matches[11]); + ASSERT_EQ(MatchRecord(121, 1002), c.matches[12]); + ASSERT_EQ(MatchRecord(125, 202), c.matches[13]); + ASSERT_EQ(MatchRecord(125, 1002), c.matches[14]); + ASSERT_EQ(MatchRecord(138, 204), c.matches[15]); + ASSERT_EQ(MatchRecord(139, 204), c.matches[16]); + ASSERT_EQ(MatchRecord(148, 1002), c.matches[17]); + ASSERT_EQ(MatchRecord(153, 202), c.matches[18]); + ASSERT_EQ(MatchRecord(153, 1002), c.matches[19]); + ASSERT_EQ(MatchRecord(208, 305), c.matches[20]); + ASSERT_EQ(MatchRecord(223, 303), c.matches[21]); + ASSERT_EQ(MatchRecord(223, 1003), c.matches[22]); + ASSERT_EQ(MatchRecord(226, 1003), c.matches[23]); + ASSERT_EQ(MatchRecord(230, 1003), c.matches[24]); + ASSERT_EQ(MatchRecord(243, 1003), c.matches[25]); + ASSERT_EQ(MatchRecord(244, 1003), c.matches[26]); + ASSERT_EQ(MatchRecord(253, 305), c.matches[27]); + ASSERT_EQ(MatchRecord(253, 1003), c.matches[28]); + ASSERT_EQ(MatchRecord(258, 1003), c.matches[29]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} diff --git a/util/ExpressionParser.rl b/util/ExpressionParser.rl index 94d03508..fec47922 100644 --- a/util/ExpressionParser.rl +++ b/util/ExpressionParser.rl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,6 +76,8 @@ enum ParamKey { case '8': *flags |= HS_FLAG_UTF8; break; case 'P': *flags |= HS_FLAG_PREFILTER; break; case 'L': *flags |= HS_FLAG_SOM_LEFTMOST; break; + case 'C': *flags |= HS_FLAG_COMBINATION; break; + case 'Q': *flags |= HS_FLAG_QUIET; break; default: fbreak; } } @@ -159,7 +161,7 @@ bool HS_CDECL readExpression(const std::string &input, std::string &expr, enum ParamKey key = PARAM_NONE; %%{ - single_flag = [ismW8HPLVO]; + single_flag = [ismW8HPLVOCQ]; param = ('min_offset' @{ key = PARAM_MIN_OFFSET; } | 'max_offset' @{ key = PARAM_MAX_OFFSET; } | 'min_length' @{ key = PARAM_MIN_LENGTH; } |