mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Add hs_expression_ext_info() API function
This is a variant of hs_expression_info() that can also accept extended parameters.
This commit is contained in:
parent
67b9784dae
commit
12921b7c97
18
src/hs.cpp
18
src/hs.cpp
@ -39,6 +39,7 @@
|
||||
#include "compiler/error.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_expr_info.h"
|
||||
#include "nfagraph/ng_extparam.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/prefilter.h"
|
||||
@ -310,7 +311,8 @@ hs_error_t hs_compile_ext_multi(const char * const *expressions,
|
||||
|
||||
static
|
||||
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
unsigned int mode, hs_expr_info_t **info,
|
||||
const hs_expr_ext_t *ext, unsigned int mode,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
if (!error) {
|
||||
// nowhere to write an error, but we can still return an error code.
|
||||
@ -347,7 +349,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
}
|
||||
|
||||
ReportManager rm(cc.grey);
|
||||
ParsedExpression pe(0, expression, flags, 0);
|
||||
ParsedExpression pe(0, expression, flags, 0, ext);
|
||||
assert(pe.component);
|
||||
|
||||
// Apply prefiltering transformations if desired.
|
||||
@ -362,6 +364,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
throw ParseError("Internal error.");
|
||||
}
|
||||
|
||||
handleExtendedParams(rm, *g, cc);
|
||||
fillExpressionInfo(rm, *g, &local_info);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
@ -394,7 +397,16 @@ extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
|
||||
return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK,
|
||||
info, error);
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
||||
const hs_expr_ext_t *ext,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info,
|
||||
error);
|
||||
}
|
||||
|
||||
|
@ -158,7 +158,7 @@ typedef struct hs_platform_info {
|
||||
|
||||
/**
|
||||
* A type containing information related to an expression that is returned by
|
||||
* @ref hs_expression_info().
|
||||
* @ref hs_expression_info() or @ref hs_expression_ext_info.
|
||||
*/
|
||||
typedef struct hs_expr_info {
|
||||
/**
|
||||
@ -201,7 +201,8 @@ typedef struct hs_expr_info {
|
||||
|
||||
/**
|
||||
* A structure containing additional parameters related to an expression,
|
||||
* passed in at build time to @ref hs_compile_ext_multi().
|
||||
* passed in at build time to @ref hs_compile_ext_multi() or @ref
|
||||
* hs_expression_ext_info.
|
||||
*
|
||||
* These parameters allow the set of matches produced by a pattern to be
|
||||
* constrained at compile time, rather than relying on the application to
|
||||
@ -401,7 +402,7 @@ hs_error_t hs_compile_multi(const char *const *expressions,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler with extended pattern support.
|
||||
* The multiple regular expression compiler with extended parameter support.
|
||||
*
|
||||
* This function call compiles a group of expressions into a database in the
|
||||
* same way as @ref hs_compile_multi(), but allows additional parameters to be
|
||||
@ -550,6 +551,62 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a regular expression, with
|
||||
* extended parameter support. The information provided in @ref hs_expr_info_t
|
||||
* includes the minimum and maximum width of a pattern match.
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param ext
|
||||
* A pointer to a filled @ref hs_expr_ext_t structure that defines
|
||||
* extended behaviour for this pattern. NULL may be specified if no
|
||||
* extended parameters are needed.
|
||||
*
|
||||
* @param info
|
||||
* On success, a pointer to the pattern information will be returned in
|
||||
* this parameter, or NULL on failure. This structure is allocated using
|
||||
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
|
||||
* allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @param error
|
||||
* If the call fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
||||
const hs_expr_ext_t *ext,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Populates the platform information based on the current host.
|
||||
*
|
||||
|
@ -94,11 +94,34 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
|
||||
const DepthMinMax &d = depths.at(idx);
|
||||
|
||||
for (ReportID report_id : w[v].reports) {
|
||||
const Report &ir = rm.getReport(report_id);
|
||||
assert(ir.type == EXTERNAL_CALLBACK);
|
||||
s32 adjust = ir.offsetAdjust;
|
||||
info.min = min(info.min, d.min + adjust);
|
||||
info.max = max(info.max, d.max + adjust);
|
||||
const Report &report = rm.getReport(report_id);
|
||||
assert(report.type == EXTERNAL_CALLBACK);
|
||||
|
||||
DepthMinMax rd = d;
|
||||
|
||||
// Compute graph width to this report, taking any offset adjustment
|
||||
// into account.
|
||||
rd.min += report.offsetAdjust;
|
||||
rd.max += report.offsetAdjust;
|
||||
|
||||
// A min_length param is a lower bound for match width.
|
||||
if (report.minLength && report.minLength <= depth::max_value()) {
|
||||
depth min_len((u32)report.minLength);
|
||||
rd.min = max(rd.min, min_len);
|
||||
rd.max = max(rd.max, min_len);
|
||||
}
|
||||
|
||||
// A max_offset param is an upper bound for match width.
|
||||
if (report.maxOffset && report.maxOffset <= depth::max_value()) {
|
||||
depth max_offset((u32)report.maxOffset);
|
||||
rd.min = min(rd.min, max_offset);
|
||||
rd.max = min(rd.max, max_offset);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("vertex %u report %u: %s\n", w[v].index, report_id,
|
||||
rd.str().c_str());
|
||||
|
||||
info = unionDepthMinMax(info, rd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2154,6 +2154,37 @@ TEST(HyperscanArgChecks, ExprInfoNullErrPtr) {
|
||||
EXPECT_TRUE(info == nullptr);
|
||||
}
|
||||
|
||||
// hs_expression_ext_info: Compile a NULL pattern
|
||||
TEST(HyperscanArgChecks, ExprExtInfoNullExpression) {
|
||||
hs_expr_info_t *info = nullptr;
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
hs_error_t err =
|
||||
hs_expression_ext_info(nullptr, 0, nullptr, &info, &compile_err);
|
||||
EXPECT_EQ(HS_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(info == nullptr);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
hs_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// hs_expression_ext_info: NULL info block ptr
|
||||
TEST(HyperscanArgChecks, ExprExtInfoNullInfoPtr) {
|
||||
hs_compile_error_t *compile_err = nullptr;
|
||||
hs_error_t err =
|
||||
hs_expression_ext_info("foobar", 0, nullptr, nullptr, &compile_err);
|
||||
EXPECT_EQ(HS_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(compile_err != nullptr);
|
||||
hs_free_compile_error(compile_err);
|
||||
}
|
||||
|
||||
// hs_expression_ext_info: No compiler error block
|
||||
TEST(HyperscanArgChecks, ExprExtInfoNullErrPtr) {
|
||||
hs_expr_info_t *info = nullptr;
|
||||
hs_error_t err =
|
||||
hs_expression_ext_info("foobar", 0, nullptr, &info, nullptr);
|
||||
EXPECT_EQ(HS_COMPILER_ERROR, err);
|
||||
EXPECT_TRUE(info == nullptr);
|
||||
}
|
||||
|
||||
TEST(HyperscanArgChecks, hs_free_database_null) {
|
||||
hs_error_t err = hs_free_database(nullptr);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
|
@ -42,6 +42,8 @@ namespace /* anonymous */ {
|
||||
|
||||
struct expected_info {
|
||||
const char *pattern;
|
||||
hs_expr_ext ext;
|
||||
|
||||
unsigned min;
|
||||
unsigned max;
|
||||
char unordered_matches;
|
||||
@ -52,10 +54,25 @@ struct expected_info {
|
||||
class ExprInfop : public TestWithParam<expected_info> {
|
||||
};
|
||||
|
||||
TEST_P(ExprInfop, width) {
|
||||
static
|
||||
void check_info(const expected_info &ei, const hs_expr_info_t *info) {
|
||||
EXPECT_EQ(ei.min, info->min_width);
|
||||
EXPECT_EQ(ei.max, info->max_width);
|
||||
EXPECT_EQ(ei.unordered_matches, info->unordered_matches);
|
||||
EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod);
|
||||
EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod);
|
||||
}
|
||||
|
||||
// Check with hs_expression_info function.
|
||||
TEST_P(ExprInfop, check_no_ext) {
|
||||
const expected_info &ei = GetParam();
|
||||
SCOPED_TRACE(ei.pattern);
|
||||
|
||||
if (ei.ext.flags) {
|
||||
// This is an extparam test, skip it.
|
||||
return;
|
||||
}
|
||||
|
||||
hs_expr_info_t *info = nullptr;
|
||||
hs_compile_error_t *c_err = nullptr;
|
||||
hs_error_t err = hs_expression_info(ei.pattern, 0, &info, &c_err);
|
||||
@ -63,54 +80,97 @@ TEST_P(ExprInfop, width) {
|
||||
ASSERT_TRUE(info != nullptr);
|
||||
ASSERT_TRUE(c_err == nullptr);
|
||||
|
||||
EXPECT_EQ(ei.min, info->min_width);
|
||||
EXPECT_EQ(ei.max, info->max_width);
|
||||
EXPECT_EQ(ei.unordered_matches, info->unordered_matches);
|
||||
EXPECT_EQ(ei.matches_at_eod, info->matches_at_eod);
|
||||
EXPECT_EQ(ei.matches_only_at_eod, info->matches_only_at_eod);
|
||||
|
||||
check_info(ei, info);
|
||||
free(info);
|
||||
}
|
||||
|
||||
// Check with hs_expression_ext_info function.
|
||||
TEST_P(ExprInfop, check_ext) {
|
||||
const expected_info &ei = GetParam();
|
||||
SCOPED_TRACE(ei.pattern);
|
||||
|
||||
hs_expr_info_t *info = nullptr;
|
||||
hs_compile_error_t *c_err = nullptr;
|
||||
hs_error_t err =
|
||||
hs_expression_ext_info(ei.pattern, 0, &ei.ext, &info, &c_err);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(info != nullptr);
|
||||
ASSERT_TRUE(c_err == nullptr);
|
||||
|
||||
check_info(ei, info);
|
||||
free(info);
|
||||
}
|
||||
|
||||
// Check with hs_expression_ext_info function and a nullptr ext param, for
|
||||
// cases where ext.flags == 0. Functionally identical to check_no_ext above.
|
||||
TEST_P(ExprInfop, check_ext_null) {
|
||||
const expected_info &ei = GetParam();
|
||||
SCOPED_TRACE(ei.pattern);
|
||||
|
||||
if (ei.ext.flags) {
|
||||
// This is an extparam test, skip it.
|
||||
return;
|
||||
}
|
||||
|
||||
hs_expr_info_t *info = nullptr;
|
||||
hs_compile_error_t *c_err = nullptr;
|
||||
hs_error_t err =
|
||||
hs_expression_ext_info(ei.pattern, 0, nullptr, &info, &c_err);
|
||||
ASSERT_EQ(HS_SUCCESS, err);
|
||||
ASSERT_TRUE(info != nullptr);
|
||||
ASSERT_TRUE(c_err == nullptr);
|
||||
|
||||
check_info(ei, info);
|
||||
free(info);
|
||||
}
|
||||
|
||||
static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0 };
|
||||
|
||||
static const expected_info ei_test[] = {
|
||||
{"abc", 3, 3, 0, 0, 0},
|
||||
{"abc.*def", 6, UINT_MAX, 0, 0, 0},
|
||||
{"abc|defghi", 3, 6, 0, 0, 0},
|
||||
{"abc(def)?", 3, 6, 0, 0, 0},
|
||||
{"abc(def){0,3}", 3, 12, 0, 0, 0},
|
||||
{"abc(def){1,4}", 6, 15, 0, 0, 0},
|
||||
{"", 0, 0, 0, 0, 0},
|
||||
{"^", 0, 0, 0, 0, 0},
|
||||
{"^\\b", 0, 0, 1, 0, 0},
|
||||
{"\\b$", 0, 0, 1, 1, 1},
|
||||
{"(?m)\\b$", 0, 0, 1, 1, 0},
|
||||
{"\\A", 0, 0, 0, 0, 0},
|
||||
{"\\z", 0, 0, 0, 1, 1},
|
||||
{"\\Z", 0, 0, 1, 1, 1},
|
||||
{"$", 0, 0, 1, 1, 1},
|
||||
{"(?m)$", 0, 0, 1, 1, 0},
|
||||
{"^foo", 3, 3, 0, 0, 0},
|
||||
{"^foo.*bar", 6, UINT_MAX, 0, 0, 0},
|
||||
{"^foo.*bar?", 5, UINT_MAX, 0, 0, 0},
|
||||
{"^foo.*bar$", 6, UINT_MAX, 1, 1, 1},
|
||||
{"^foobar$", 6, 6, 1, 1, 1},
|
||||
{"foobar$", 6, 6, 1, 1, 1},
|
||||
{"^.*foo", 3, UINT_MAX, 0, 0, 0},
|
||||
{"foo\\b", 3, 3, 1, 1, 0},
|
||||
{"foo.{1,13}bar", 7, 19, 0, 0, 0},
|
||||
{"foo.{10,}bar", 16, UINT_MAX, 0, 0, 0},
|
||||
{"foo.{0,10}bar", 6, 16, 0, 0, 0},
|
||||
{"foo.{,10}bar", 12, 12, 0, 0, 0},
|
||||
{"foo.{10}bar", 16, 16, 0, 0, 0},
|
||||
{"(^|\n)foo", 3, 4, 0, 0, 0},
|
||||
{"(^\n|)foo", 3, 4, 0, 0, 0},
|
||||
{"(?m)^foo", 3, 3, 0, 0, 0},
|
||||
{"\\bfoo", 3, 3, 0, 0, 0},
|
||||
{"^\\bfoo", 3, 3, 0, 0, 0},
|
||||
{"(?m)^\\bfoo", 3, 3, 0, 0, 0},
|
||||
{"\\Bfoo", 3, 3, 0, 0, 0},
|
||||
{"(foo|bar\\z)", 3, 3, 0, 1, 0},
|
||||
{"(foo|bar)\\z", 3, 3, 0, 1, 1},
|
||||
{"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
{"abc.*def", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0},
|
||||
{"abc|defghi", NO_EXT_PARAM, 3, 6, 0, 0, 0},
|
||||
{"abc(def)?", NO_EXT_PARAM, 3, 6, 0, 0, 0},
|
||||
{"abc(def){0,3}", NO_EXT_PARAM, 3, 12, 0, 0, 0},
|
||||
{"abc(def){1,4}", NO_EXT_PARAM, 6, 15, 0, 0, 0},
|
||||
{"", NO_EXT_PARAM, 0, 0, 0, 0, 0},
|
||||
{"^", NO_EXT_PARAM, 0, 0, 0, 0, 0},
|
||||
{"^\\b", NO_EXT_PARAM, 0, 0, 1, 0, 0},
|
||||
{"\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 1},
|
||||
{"(?m)\\b$", NO_EXT_PARAM, 0, 0, 1, 1, 0},
|
||||
{"\\A", NO_EXT_PARAM, 0, 0, 0, 0, 0},
|
||||
{"\\z", NO_EXT_PARAM, 0, 0, 0, 1, 1},
|
||||
{"\\Z", NO_EXT_PARAM, 0, 0, 1, 1, 1},
|
||||
{"$", NO_EXT_PARAM, 0, 0, 1, 1, 1},
|
||||
{"(?m)$", NO_EXT_PARAM, 0, 0, 1, 1, 0},
|
||||
{"^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
{"^foo.*bar", NO_EXT_PARAM, 6, UINT_MAX, 0, 0, 0},
|
||||
{"^foo.*bar?", NO_EXT_PARAM, 5, UINT_MAX, 0, 0, 0},
|
||||
{"^foo.*bar$", NO_EXT_PARAM, 6, UINT_MAX, 1, 1, 1},
|
||||
{"^foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1},
|
||||
{"foobar$", NO_EXT_PARAM, 6, 6, 1, 1, 1},
|
||||
{"^.*foo", NO_EXT_PARAM, 3, UINT_MAX, 0, 0, 0},
|
||||
{"foo\\b", NO_EXT_PARAM, 3, 3, 1, 1, 0},
|
||||
{"foo.{1,13}bar", NO_EXT_PARAM, 7, 19, 0, 0, 0},
|
||||
{"foo.{10,}bar", NO_EXT_PARAM, 16, UINT_MAX, 0, 0, 0},
|
||||
{"foo.{0,10}bar", NO_EXT_PARAM, 6, 16, 0, 0, 0},
|
||||
{"foo.{,10}bar", NO_EXT_PARAM, 12, 12, 0, 0, 0},
|
||||
{"foo.{10}bar", NO_EXT_PARAM, 16, 16, 0, 0, 0},
|
||||
{"(^|\n)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0},
|
||||
{"(^\n|)foo", NO_EXT_PARAM, 3, 4, 0, 0, 0},
|
||||
{"(?m)^foo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
{"\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
{"^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
{"(?m)^\\bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
{"\\Bfoo", NO_EXT_PARAM, 3, 3, 0, 0, 0},
|
||||
{"(foo|bar\\z)", NO_EXT_PARAM, 3, 3, 0, 1, 0},
|
||||
{"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1},
|
||||
|
||||
// Some cases with extended parameters.
|
||||
{"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100}, 100, UINT_MAX, 0, 0, 0},
|
||||
{"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test));
|
||||
|
Loading…
x
Reference in New Issue
Block a user