From 12921b7c979b52151e9c6c358ccadfb358bf2dbc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 23 Oct 2015 17:15:24 +1100 Subject: [PATCH] Add hs_expression_ext_info() API function This is a variant of hs_expression_info() that can also accept extended parameters. --- src/hs.cpp | 18 +++- src/hs_compile.h | 63 +++++++++++++- src/nfagraph/ng_expr_info.cpp | 33 ++++++-- unit/hyperscan/arg_checks.cpp | 31 +++++++ unit/hyperscan/expr_info.cpp | 150 ++++++++++++++++++++++++---------- 5 files changed, 239 insertions(+), 56 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index e665539b..3680e79e 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -39,6 +39,7 @@ #include "compiler/error.h" #include "nfagraph/ng.h" #include "nfagraph/ng_expr_info.h" +#include "nfagraph/ng_extparam.h" #include "parser/parse_error.h" #include "parser/Parser.h" #include "parser/prefilter.h" @@ -310,7 +311,8 @@ hs_error_t hs_compile_ext_multi(const char * const *expressions, static hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, - unsigned int mode, hs_expr_info_t **info, + const hs_expr_ext_t *ext, unsigned int mode, + hs_expr_info_t **info, hs_compile_error_t **error) { if (!error) { // nowhere to write an error, but we can still return an error code. @@ -347,7 +349,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } ReportManager rm(cc.grey); - ParsedExpression pe(0, expression, flags, 0); + ParsedExpression pe(0, expression, flags, 0, ext); assert(pe.component); // Apply prefiltering transformations if desired. @@ -362,6 +364,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, throw ParseError("Internal error."); } + handleExtendedParams(rm, *g, cc); fillExpressionInfo(rm, *g, &local_info); } catch (const CompileError &e) { @@ -394,7 +397,16 @@ extern "C" HS_PUBLIC_API hs_error_t hs_expression_info(const char *expression, unsigned int flags, hs_expr_info_t **info, hs_compile_error_t **error) { - return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info, + return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK, + info, error); +} + +extern "C" HS_PUBLIC_API +hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error) { + return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info, error); } diff --git a/src/hs_compile.h b/src/hs_compile.h index 68f38584..48168cc2 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -158,7 +158,7 @@ typedef struct hs_platform_info { /** * A type containing information related to an expression that is returned by - * @ref hs_expression_info(). + * @ref hs_expression_info() or @ref hs_expression_ext_info. */ typedef struct hs_expr_info { /** @@ -201,7 +201,8 @@ typedef struct hs_expr_info { /** * A structure containing additional parameters related to an expression, - * passed in at build time to @ref hs_compile_ext_multi(). + * passed in at build time to @ref hs_compile_ext_multi() or @ref + * hs_expression_ext_info. * * These parameters allow the set of matches produced by a pattern to be * constrained at compile time, rather than relying on the application to @@ -401,7 +402,7 @@ hs_error_t hs_compile_multi(const char *const *expressions, hs_database_t **db, hs_compile_error_t **error); /** - * The multiple regular expression compiler with extended pattern support. + * The multiple regular expression compiler with extended parameter support. * * This function call compiles a group of expressions into a database in the * same way as @ref hs_compile_multi(), but allows additional parameters to be @@ -550,6 +551,62 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags, hs_expr_info_t **info, hs_compile_error_t **error); +/** + * Utility function providing information about a regular expression, with + * extended parameter support. The information provided in @ref hs_expr_info_t + * includes the minimum and maximum width of a pattern match. + * + * @param expression + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @a flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a + * flags. + * + * @param flags + * Flags which modify the behaviour of the expression. Multiple flags may + * be used by ORing them together. Valid values are: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated by the + * expression per stream. + * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an + * empty string, such as `.*`. + * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters. + * - HS_FLAG_UCP - Use Unicode properties for character classes. + * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * + * @param ext + * A pointer to a filled @ref hs_expr_ext_t structure that defines + * extended behaviour for this pattern. NULL may be specified if no + * extended parameters are needed. + * + * @param info + * On success, a pointer to the pattern information will be returned in + * this parameter, or NULL on failure. This structure is allocated using + * the allocator supplied in @ref hs_set_allocator() (or malloc() if no + * allocator was set) and should be freed by the caller. + * + * @param error + * If the call fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error); + /** * Populates the platform information based on the current host. * diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index 2afb568b..cfd34ce6 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -94,11 +94,34 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, const DepthMinMax &d = depths.at(idx); for (ReportID report_id : w[v].reports) { - const Report &ir = rm.getReport(report_id); - assert(ir.type == EXTERNAL_CALLBACK); - s32 adjust = ir.offsetAdjust; - info.min = min(info.min, d.min + adjust); - info.max = max(info.max, d.max + adjust); + const Report &report = rm.getReport(report_id); + assert(report.type == EXTERNAL_CALLBACK); + + DepthMinMax rd = d; + + // Compute graph width to this report, taking any offset adjustment + // into account. + rd.min += report.offsetAdjust; + rd.max += report.offsetAdjust; + + // A min_length param is a lower bound for match width. + if (report.minLength && report.minLength <= depth::max_value()) { + depth min_len((u32)report.minLength); + rd.min = max(rd.min, min_len); + rd.max = max(rd.max, min_len); + } + + // A max_offset param is an upper bound for match width. + if (report.maxOffset && report.maxOffset <= depth::max_value()) { + depth max_offset((u32)report.maxOffset); + rd.min = min(rd.min, max_offset); + rd.max = min(rd.max, max_offset); + } + + DEBUG_PRINTF("vertex %u report %u: %s\n", w[v].index, report_id, + rd.str().c_str()); + + info = unionDepthMinMax(info, rd); } } diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index dbc692c5..6d4e5fa9 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -2154,6 +2154,37 @@ TEST(HyperscanArgChecks, ExprInfoNullErrPtr) { EXPECT_TRUE(info == nullptr); } +// hs_expression_ext_info: Compile a NULL pattern +TEST(HyperscanArgChecks, ExprExtInfoNullExpression) { + hs_expr_info_t *info = nullptr; + hs_compile_error_t *compile_err = nullptr; + hs_error_t err = + hs_expression_ext_info(nullptr, 0, nullptr, &info, &compile_err); + EXPECT_EQ(HS_COMPILER_ERROR, err); + EXPECT_TRUE(info == nullptr); + EXPECT_TRUE(compile_err != nullptr); + hs_free_compile_error(compile_err); +} + +// hs_expression_ext_info: NULL info block ptr +TEST(HyperscanArgChecks, ExprExtInfoNullInfoPtr) { + hs_compile_error_t *compile_err = nullptr; + hs_error_t err = + hs_expression_ext_info("foobar", 0, nullptr, nullptr, &compile_err); + EXPECT_EQ(HS_COMPILER_ERROR, err); + EXPECT_TRUE(compile_err != nullptr); + hs_free_compile_error(compile_err); +} + +// hs_expression_ext_info: No compiler error block +TEST(HyperscanArgChecks, ExprExtInfoNullErrPtr) { + hs_expr_info_t *info = nullptr; + hs_error_t err = + hs_expression_ext_info("foobar", 0, nullptr, &info, nullptr); + EXPECT_EQ(HS_COMPILER_ERROR, err); + EXPECT_TRUE(info == nullptr); +} + TEST(HyperscanArgChecks, hs_free_database_null) { hs_error_t err = hs_free_database(nullptr); ASSERT_EQ(HS_SUCCESS, err); diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index 4d4a1c97..984104c5 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -42,6 +42,8 @@ namespace /* anonymous */ { struct expected_info { const char *pattern; + hs_expr_ext ext; + unsigned min; unsigned max; char unordered_matches; @@ -52,10 +54,25 @@ struct expected_info { class ExprInfop : public TestWithParam { }; -TEST_P(ExprInfop, width) { +static +void check_info(const expected_info &ei, const hs_expr_info_t *info) { + EXPECT_EQ(ei.min, info->min_width); + EXPECT_EQ(ei.max, info->max_width); + EXPECT_EQ(ei.unordered_matches, info->unordered_matches); + EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod); + EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod); +} + +// Check with hs_expression_info function. +TEST_P(ExprInfop, check_no_ext) { const expected_info &ei = GetParam(); SCOPED_TRACE(ei.pattern); + if (ei.ext.flags) { + // This is an extparam test, skip it. + return; + } + hs_expr_info_t *info = nullptr; hs_compile_error_t *c_err = nullptr; hs_error_t err = hs_expression_info(ei.pattern, 0, &info, &c_err); @@ -63,54 +80,97 @@ TEST_P(ExprInfop, width) { ASSERT_TRUE(info != nullptr); ASSERT_TRUE(c_err == nullptr); - EXPECT_EQ(ei.min, info->min_width); - EXPECT_EQ(ei.max, info->max_width); - EXPECT_EQ(ei.unordered_matches, info->unordered_matches); - EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod); - EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod); - + check_info(ei, info); free(info); } +// Check with hs_expression_ext_info function. +TEST_P(ExprInfop, check_ext) { + const expected_info &ei = GetParam(); + SCOPED_TRACE(ei.pattern); + + hs_expr_info_t *info = nullptr; + hs_compile_error_t *c_err = nullptr; + hs_error_t err = + hs_expression_ext_info(ei.pattern, 0, &ei.ext, &info, &c_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(info != nullptr); + ASSERT_TRUE(c_err == nullptr); + + check_info(ei, info); + free(info); +} + +// Check with hs_expression_ext_info function and a nullptr ext param, for +// cases where ext.flags == 0. Functionally identical to check_no_ext above. +TEST_P(ExprInfop, check_ext_null) { + const expected_info &ei = GetParam(); + SCOPED_TRACE(ei.pattern); + + if (ei.ext.flags) { + // This is an extparam test, skip it. + return; + } + + hs_expr_info_t *info = nullptr; + hs_compile_error_t *c_err = nullptr; + hs_error_t err = + hs_expression_ext_info(ei.pattern, 0, nullptr, &info, &c_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(info != nullptr); + ASSERT_TRUE(c_err == nullptr); + + check_info(ei, info); + free(info); +} + +static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0 }; + static const expected_info ei_test[] = { - {"abc", 3, 3, 0, 0, 0}, - {"abc.*def", 6, UINT_MAX, 0, 0, 0}, - {"abc|defghi", 3, 6, 0, 0, 0}, - {"abc(def)?", 3, 6, 0, 0, 0}, - {"abc(def){0,3}", 3, 12, 0, 0, 0}, - {"abc(def){1,4}", 6, 15, 0, 0, 0}, - {"", 0, 0, 0, 0, 0}, - {"^", 0, 0, 0, 0, 0}, - {"^\\b", 0, 0, 1, 0, 0}, - {"\\b$", 0, 0, 1, 1, 1}, - {"(?m)\\b$", 0, 0, 1, 1, 0}, - {"\\A", 0, 0, 0, 0, 0}, - {"\\z", 0, 0, 0, 1, 1}, - {"\\Z", 0, 0, 1, 1, 1}, - {"$", 0, 0, 1, 1, 1}, - {"(?m)$", 0, 0, 1, 1, 0}, - {"^foo", 3, 3, 0, 0, 0}, - {"^foo.*bar", 6, UINT_MAX, 0, 0, 0}, - {"^foo.*bar?", 5, UINT_MAX, 0, 0, 0}, - {"^foo.*bar$", 6, UINT_MAX, 1, 1, 1}, - {"^foobar$", 6, 6, 1, 1, 1}, - {"foobar$", 6, 6, 1, 1, 1}, - {"^.*foo", 3, UINT_MAX, 0, 0, 0}, - {"foo\\b", 3, 3, 1, 1, 0}, - {"foo.{1,13}bar", 7, 19, 0, 0, 0}, - {"foo.{10,}bar", 16, UINT_MAX, 0, 0, 0}, - {"foo.{0,10}bar", 6, 16, 0, 0, 0}, - {"foo.{,10}bar", 12, 12, 0, 0, 0}, - {"foo.{10}bar", 16, 16, 0, 0, 0}, - {"(^|\n)foo", 3, 4, 0, 0, 0}, - {"(^\n|)foo", 3, 4, 0, 0, 0}, - {"(?m)^foo", 3, 3, 0, 0, 0}, - {"\\bfoo", 3, 3, 0, 0, 0}, - {"^\\bfoo", 3, 3, 0, 0, 0}, - {"(?m)^\\bfoo", 3, 3, 0, 0, 0}, - {"\\Bfoo", 3, 3, 0, 0, 0}, - {"(foo|bar\\z)", 3, 3, 0, 1, 0}, - {"(foo|bar)\\z", 3, 3, 0, 1, 1}, + {"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"abc.*def", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0}, + {"abc|defghi", NO_EXT_PARAM, 3, 6, 0, 0, 0}, + {"abc(def)?", NO_EXT_PARAM, 3, 6, 0, 0, 0}, + {"abc(def){0,3}", NO_EXT_PARAM, 3, 12, 0, 0, 0}, + {"abc(def){1,4}", NO_EXT_PARAM, 6, 15, 0, 0, 0}, + {"", NO_EXT_PARAM, 0, 0, 0, 0, 0}, + {"^", NO_EXT_PARAM, 0, 0, 0, 0, 0}, + {"^\\b", NO_EXT_PARAM, 0, 0, 1, 0, 0}, + {"\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 1}, + {"(?m)\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 0}, + {"\\A", NO_EXT_PARAM, 0, 0, 0, 0, 0}, + {"\\z", NO_EXT_PARAM, 0, 0, 0, 1, 1}, + {"\\Z", NO_EXT_PARAM, 0, 0, 1, 1, 1}, + {"$", NO_EXT_PARAM, 0, 0, 1, 1, 1}, + {"(?m)$", NO_EXT_PARAM, 0, 0, 1, 1, 0}, + {"^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"^foo.*bar", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0}, + {"^foo.*bar?", NO_EXT_PARAM, 5, UINT_MAX, 0, 0, 0}, + {"^foo.*bar$", NO_EXT_PARAM, 6, UINT_MAX, 1, 1, 1}, + {"^foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1}, + {"foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1}, + {"^.*foo", NO_EXT_PARAM, 3, UINT_MAX, 0, 0, 0}, + {"foo\\b", NO_EXT_PARAM, 3, 3, 1, 1, 0}, + {"foo.{1,13}bar", NO_EXT_PARAM, 7, 19, 0, 0, 0}, + {"foo.{10,}bar", NO_EXT_PARAM, 16, UINT_MAX, 0, 0, 0}, + {"foo.{0,10}bar", NO_EXT_PARAM, 6, 16, 0, 0, 0}, + {"foo.{,10}bar", NO_EXT_PARAM, 12, 12, 0, 0, 0}, + {"foo.{10}bar", NO_EXT_PARAM, 16, 16, 0, 0, 0}, + {"(^|\n)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0}, + {"(^\n|)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0}, + {"(?m)^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"(?m)^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"\\Bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0}, + {"(foo|bar\\z)", NO_EXT_PARAM, 3, 3, 0, 1, 0}, + {"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1}, + + // Some cases with extended parameters. + {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, }; INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));