Add hs_expression_ext_info() API function

This is a variant of hs_expression_info() that can also accept extended
parameters.
This commit is contained in:
Justin Viiret 2015-10-23 17:15:24 +11:00 committed by Matthew Barr
parent 67b9784dae
commit 12921b7c97
5 changed files with 239 additions and 56 deletions

View File

@ -39,6 +39,7 @@
#include "compiler/error.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_expr_info.h"
#include "nfagraph/ng_extparam.h"
#include "parser/parse_error.h"
#include "parser/Parser.h"
#include "parser/prefilter.h"
@ -310,7 +311,8 @@ hs_error_t hs_compile_ext_multi(const char * const *expressions,
static
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
unsigned int mode, hs_expr_info_t **info,
const hs_expr_ext_t *ext, unsigned int mode,
hs_expr_info_t **info,
hs_compile_error_t **error) {
if (!error) {
// nowhere to write an error, but we can still return an error code.
@ -347,7 +349,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
}
ReportManager rm(cc.grey);
ParsedExpression pe(0, expression, flags, 0);
ParsedExpression pe(0, expression, flags, 0, ext);
assert(pe.component);
// Apply prefiltering transformations if desired.
@ -362,6 +364,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
throw ParseError("Internal error.");
}
handleExtendedParams(rm, *g, cc);
fillExpressionInfo(rm, *g, &local_info);
}
catch (const CompileError &e) {
@ -394,7 +397,16 @@ extern "C" HS_PUBLIC_API
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error) {
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK,
info, error);
}
extern "C" HS_PUBLIC_API
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext,
hs_expr_info_t **info,
hs_compile_error_t **error) {
return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info,
error);
}

View File

@ -158,7 +158,7 @@ typedef struct hs_platform_info {
/**
* A type containing information related to an expression that is returned by
* @ref hs_expression_info().
* @ref hs_expression_info() or @ref hs_expression_ext_info.
*/
typedef struct hs_expr_info {
/**
@ -201,7 +201,8 @@ typedef struct hs_expr_info {
/**
* A structure containing additional parameters related to an expression,
* passed in at build time to @ref hs_compile_ext_multi().
* passed in at build time to @ref hs_compile_ext_multi() or @ref
* hs_expression_ext_info.
*
* These parameters allow the set of matches produced by a pattern to be
* constrained at compile time, rather than relying on the application to
@ -401,7 +402,7 @@ hs_error_t hs_compile_multi(const char *const *expressions,
hs_database_t **db, hs_compile_error_t **error);
/**
* The multiple regular expression compiler with extended pattern support.
* The multiple regular expression compiler with extended parameter support.
*
* This function call compiles a group of expressions into a database in the
* same way as @ref hs_compile_multi(), but allows additional parameters to be
@ -550,6 +551,62 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error);
/**
* Utility function providing information about a regular expression, with
* extended parameter support. The information provided in @ref hs_expr_info_t
* includes the minimum and maximum width of a pattern match.
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @a flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
* flags.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
* expression per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines
* extended behaviour for this pattern. NULL may be specified if no
* extended parameters are needed.
*
* @param info
* On success, a pointer to the pattern information will be returned in
* this parameter, or NULL on failure. This structure is allocated using
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
* allocator was set) and should be freed by the caller.
*
* @param error
* If the call fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext,
hs_expr_info_t **info,
hs_compile_error_t **error);
/**
* Populates the platform information based on the current host.
*

View File

@ -94,11 +94,34 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
const DepthMinMax &d = depths.at(idx);
for (ReportID report_id : w[v].reports) {
const Report &ir = rm.getReport(report_id);
assert(ir.type == EXTERNAL_CALLBACK);
s32 adjust = ir.offsetAdjust;
info.min = min(info.min, d.min + adjust);
info.max = max(info.max, d.max + adjust);
const Report &report = rm.getReport(report_id);
assert(report.type == EXTERNAL_CALLBACK);
DepthMinMax rd = d;
// Compute graph width to this report, taking any offset adjustment
// into account.
rd.min += report.offsetAdjust;
rd.max += report.offsetAdjust;
// A min_length param is a lower bound for match width.
if (report.minLength && report.minLength <= depth::max_value()) {
depth min_len((u32)report.minLength);
rd.min = max(rd.min, min_len);
rd.max = max(rd.max, min_len);
}
// A max_offset param is an upper bound for match width.
if (report.maxOffset && report.maxOffset <= depth::max_value()) {
depth max_offset((u32)report.maxOffset);
rd.min = min(rd.min, max_offset);
rd.max = min(rd.max, max_offset);
}
DEBUG_PRINTF("vertex %u report %u: %s\n", w[v].index, report_id,
rd.str().c_str());
info = unionDepthMinMax(info, rd);
}
}

View File

@ -2154,6 +2154,37 @@ TEST(HyperscanArgChecks, ExprInfoNullErrPtr) {
EXPECT_TRUE(info == nullptr);
}
// hs_expression_ext_info: Compile a NULL pattern
TEST(HyperscanArgChecks, ExprExtInfoNullExpression) {
hs_expr_info_t *info = nullptr;
hs_compile_error_t *compile_err = nullptr;
hs_error_t err =
hs_expression_ext_info(nullptr, 0, nullptr, &info, &compile_err);
EXPECT_EQ(HS_COMPILER_ERROR, err);
EXPECT_TRUE(info == nullptr);
EXPECT_TRUE(compile_err != nullptr);
hs_free_compile_error(compile_err);
}
// hs_expression_ext_info: NULL info block ptr
TEST(HyperscanArgChecks, ExprExtInfoNullInfoPtr) {
hs_compile_error_t *compile_err = nullptr;
hs_error_t err =
hs_expression_ext_info("foobar", 0, nullptr, nullptr, &compile_err);
EXPECT_EQ(HS_COMPILER_ERROR, err);
EXPECT_TRUE(compile_err != nullptr);
hs_free_compile_error(compile_err);
}
// hs_expression_ext_info: No compiler error block
TEST(HyperscanArgChecks, ExprExtInfoNullErrPtr) {
hs_expr_info_t *info = nullptr;
hs_error_t err =
hs_expression_ext_info("foobar", 0, nullptr, &info, nullptr);
EXPECT_EQ(HS_COMPILER_ERROR, err);
EXPECT_TRUE(info == nullptr);
}
TEST(HyperscanArgChecks, hs_free_database_null) {
hs_error_t err = hs_free_database(nullptr);
ASSERT_EQ(HS_SUCCESS, err);

View File

@ -42,6 +42,8 @@ namespace /* anonymous */ {
struct expected_info {
const char *pattern;
hs_expr_ext ext;
unsigned min;
unsigned max;
char unordered_matches;
@ -52,10 +54,25 @@ struct expected_info {
class ExprInfop : public TestWithParam<expected_info> {
};
TEST_P(ExprInfop, width) {
static
void check_info(const expected_info &ei, const hs_expr_info_t *info) {
EXPECT_EQ(ei.min, info->min_width);
EXPECT_EQ(ei.max, info->max_width);
EXPECT_EQ(ei.unordered_matches, info->unordered_matches);
EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod);
EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod);
}
// Check with hs_expression_info function.
TEST_P(ExprInfop, check_no_ext) {
const expected_info &ei = GetParam();
SCOPED_TRACE(ei.pattern);
if (ei.ext.flags) {
// This is an extparam test, skip it.
return;
}
hs_expr_info_t *info = nullptr;
hs_compile_error_t *c_err = nullptr;
hs_error_t err = hs_expression_info(ei.pattern, 0, &info, &c_err);
@ -63,54 +80,97 @@ TEST_P(ExprInfop, width) {
ASSERT_TRUE(info != nullptr);
ASSERT_TRUE(c_err == nullptr);
EXPECT_EQ(ei.min, info->min_width);
EXPECT_EQ(ei.max, info->max_width);
EXPECT_EQ(ei.unordered_matches, info->unordered_matches);
EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod);
EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod);
check_info(ei, info);
free(info);
}
// Check with hs_expression_ext_info function.
TEST_P(ExprInfop, check_ext) {
const expected_info &ei = GetParam();
SCOPED_TRACE(ei.pattern);
hs_expr_info_t *info = nullptr;
hs_compile_error_t *c_err = nullptr;
hs_error_t err =
hs_expression_ext_info(ei.pattern, 0, &ei.ext, &info, &c_err);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(info != nullptr);
ASSERT_TRUE(c_err == nullptr);
check_info(ei, info);
free(info);
}
// Check with hs_expression_ext_info function and a nullptr ext param, for
// cases where ext.flags == 0. Functionally identical to check_no_ext above.
TEST_P(ExprInfop, check_ext_null) {
const expected_info &ei = GetParam();
SCOPED_TRACE(ei.pattern);
if (ei.ext.flags) {
// This is an extparam test, skip it.
return;
}
hs_expr_info_t *info = nullptr;
hs_compile_error_t *c_err = nullptr;
hs_error_t err =
hs_expression_ext_info(ei.pattern, 0, nullptr, &info, &c_err);
ASSERT_EQ(HS_SUCCESS, err);
ASSERT_TRUE(info != nullptr);
ASSERT_TRUE(c_err == nullptr);
check_info(ei, info);
free(info);
}
static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0 };
static const expected_info ei_test[] = {
{"abc", 3, 3, 0, 0, 0},
{"abc.*def", 6, UINT_MAX, 0, 0, 0},
{"abc|defghi", 3, 6, 0, 0, 0},
{"abc(def)?", 3, 6, 0, 0, 0},
{"abc(def){0,3}", 3, 12, 0, 0, 0},
{"abc(def){1,4}", 6, 15, 0, 0, 0},
{"", 0, 0, 0, 0, 0},
{"^", 0, 0, 0, 0, 0},
{"^\\b", 0, 0, 1, 0, 0},
{"\\b$", 0, 0, 1, 1, 1},
{"(?m)\\b$", 0, 0, 1, 1, 0},
{"\\A", 0, 0, 0, 0, 0},
{"\\z", 0, 0, 0, 1, 1},
{"\\Z", 0, 0, 1, 1, 1},
{"$", 0, 0, 1, 1, 1},
{"(?m)$", 0, 0, 1, 1, 0},
{"^foo", 3, 3, 0, 0, 0},
{"^foo.*bar", 6, UINT_MAX, 0, 0, 0},
{"^foo.*bar?", 5, UINT_MAX, 0, 0, 0},
{"^foo.*bar$", 6, UINT_MAX, 1, 1, 1},
{"^foobar$", 6, 6, 1, 1, 1},
{"foobar$", 6, 6, 1, 1, 1},
{"^.*foo", 3, UINT_MAX, 0, 0, 0},
{"foo\\b", 3, 3, 1, 1, 0},
{"foo.{1,13}bar", 7, 19, 0, 0, 0},
{"foo.{10,}bar", 16, UINT_MAX, 0, 0, 0},
{"foo.{0,10}bar", 6, 16, 0, 0, 0},
{"foo.{,10}bar", 12, 12, 0, 0, 0},
{"foo.{10}bar", 16, 16, 0, 0, 0},
{"(^|\n)foo", 3, 4, 0, 0, 0},
{"(^\n|)foo", 3, 4, 0, 0, 0},
{"(?m)^foo", 3, 3, 0, 0, 0},
{"\\bfoo", 3, 3, 0, 0, 0},
{"^\\bfoo", 3, 3, 0, 0, 0},
{"(?m)^\\bfoo", 3, 3, 0, 0, 0},
{"\\Bfoo", 3, 3, 0, 0, 0},
{"(foo|bar\\z)", 3, 3, 0, 1, 0},
{"(foo|bar)\\z", 3, 3, 0, 1, 1},
{"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0},
{"abc.*def", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0},
{"abc|defghi", NO_EXT_PARAM, 3, 6, 0, 0, 0},
{"abc(def)?", NO_EXT_PARAM, 3, 6, 0, 0, 0},
{"abc(def){0,3}", NO_EXT_PARAM, 3, 12, 0, 0, 0},
{"abc(def){1,4}", NO_EXT_PARAM, 6, 15, 0, 0, 0},
{"", NO_EXT_PARAM, 0, 0, 0, 0, 0},
{"^", NO_EXT_PARAM, 0, 0, 0, 0, 0},
{"^\\b", NO_EXT_PARAM, 0, 0, 1, 0, 0},
{"\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 1},
{"(?m)\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 0},
{"\\A", NO_EXT_PARAM, 0, 0, 0, 0, 0},
{"\\z", NO_EXT_PARAM, 0, 0, 0, 1, 1},
{"\\Z", NO_EXT_PARAM, 0, 0, 1, 1, 1},
{"$", NO_EXT_PARAM, 0, 0, 1, 1, 1},
{"(?m)$", NO_EXT_PARAM, 0, 0, 1, 1, 0},
{"^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
{"^foo.*bar", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0},
{"^foo.*bar?", NO_EXT_PARAM, 5, UINT_MAX, 0, 0, 0},
{"^foo.*bar$", NO_EXT_PARAM, 6, UINT_MAX, 1, 1, 1},
{"^foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1},
{"foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1},
{"^.*foo", NO_EXT_PARAM, 3, UINT_MAX, 0, 0, 0},
{"foo\\b", NO_EXT_PARAM, 3, 3, 1, 1, 0},
{"foo.{1,13}bar", NO_EXT_PARAM, 7, 19, 0, 0, 0},
{"foo.{10,}bar", NO_EXT_PARAM, 16, UINT_MAX, 0, 0, 0},
{"foo.{0,10}bar", NO_EXT_PARAM, 6, 16, 0, 0, 0},
{"foo.{,10}bar", NO_EXT_PARAM, 12, 12, 0, 0, 0},
{"foo.{10}bar", NO_EXT_PARAM, 16, 16, 0, 0, 0},
{"(^|\n)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0},
{"(^\n|)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0},
{"(?m)^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
{"\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
{"^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
{"(?m)^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
{"\\Bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
{"(foo|bar\\z)", NO_EXT_PARAM, 3, 3, 0, 1, 0},
{"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1},
// Some cases with extended parameters.
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100}, 100, UINT_MAX, 0, 0, 0},
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0},
};
INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));