Add hs_expression_ext_info() API function

This is a variant of hs_expression_info() that can also accept extended
parameters.
This commit is contained in:
Justin Viiret
2015-10-23 17:15:24 +11:00
committed by Matthew Barr
parent 67b9784dae
commit 12921b7c97
5 changed files with 239 additions and 56 deletions

View File

@@ -39,6 +39,7 @@
#include "compiler/error.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_expr_info.h"
#include "nfagraph/ng_extparam.h"
#include "parser/parse_error.h"
#include "parser/Parser.h"
#include "parser/prefilter.h"
@@ -310,7 +311,8 @@ hs_error_t hs_compile_ext_multi(const char * const *expressions,
static
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
unsigned int mode, hs_expr_info_t **info,
const hs_expr_ext_t *ext, unsigned int mode,
hs_expr_info_t **info,
hs_compile_error_t **error) {
if (!error) {
// nowhere to write an error, but we can still return an error code.
@@ -347,7 +349,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
}
ReportManager rm(cc.grey);
ParsedExpression pe(0, expression, flags, 0);
ParsedExpression pe(0, expression, flags, 0, ext);
assert(pe.component);
// Apply prefiltering transformations if desired.
@@ -362,6 +364,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
throw ParseError("Internal error.");
}
handleExtendedParams(rm, *g, cc);
fillExpressionInfo(rm, *g, &local_info);
}
catch (const CompileError &e) {
@@ -394,7 +397,16 @@ extern "C" HS_PUBLIC_API
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error) {
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK,
info, error);
}
extern "C" HS_PUBLIC_API
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext,
hs_expr_info_t **info,
hs_compile_error_t **error) {
return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info,
error);
}

View File

@@ -158,7 +158,7 @@ typedef struct hs_platform_info {
/**
* A type containing information related to an expression that is returned by
* @ref hs_expression_info().
* @ref hs_expression_info() or @ref hs_expression_ext_info.
*/
typedef struct hs_expr_info {
/**
@@ -201,7 +201,8 @@ typedef struct hs_expr_info {
/**
* A structure containing additional parameters related to an expression,
* passed in at build time to @ref hs_compile_ext_multi().
* passed in at build time to @ref hs_compile_ext_multi() or @ref
* hs_expression_ext_info.
*
* These parameters allow the set of matches produced by a pattern to be
* constrained at compile time, rather than relying on the application to
@@ -401,7 +402,7 @@ hs_error_t hs_compile_multi(const char *const *expressions,
hs_database_t **db, hs_compile_error_t **error);
/**
* The multiple regular expression compiler with extended pattern support.
* The multiple regular expression compiler with extended parameter support.
*
* This function call compiles a group of expressions into a database in the
* same way as @ref hs_compile_multi(), but allows additional parameters to be
@@ -550,6 +551,62 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error);
/**
* Utility function providing information about a regular expression, with
* extended parameter support. The information provided in @ref hs_expr_info_t
* includes the minimum and maximum width of a pattern match.
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @a flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
* flags.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
* expression per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines
* extended behaviour for this pattern. NULL may be specified if no
* extended parameters are needed.
*
* @param info
* On success, a pointer to the pattern information will be returned in
* this parameter, or NULL on failure. This structure is allocated using
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
* allocator was set) and should be freed by the caller.
*
* @param error
* If the call fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext,
hs_expr_info_t **info,
hs_compile_error_t **error);
/**
* Populates the platform information based on the current host.
*

View File

@@ -94,11 +94,34 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
const DepthMinMax &d = depths.at(idx);
for (ReportID report_id : w[v].reports) {
const Report &ir = rm.getReport(report_id);
assert(ir.type == EXTERNAL_CALLBACK);
s32 adjust = ir.offsetAdjust;
info.min = min(info.min, d.min + adjust);
info.max = max(info.max, d.max + adjust);
const Report &report = rm.getReport(report_id);
assert(report.type == EXTERNAL_CALLBACK);
DepthMinMax rd = d;
// Compute graph width to this report, taking any offset adjustment
// into account.
rd.min += report.offsetAdjust;
rd.max += report.offsetAdjust;
// A min_length param is a lower bound for match width.
if (report.minLength && report.minLength <= depth::max_value()) {
depth min_len((u32)report.minLength);
rd.min = max(rd.min, min_len);
rd.max = max(rd.max, min_len);
}
// A max_offset param is an upper bound for match width.
if (report.maxOffset && report.maxOffset <= depth::max_value()) {
depth max_offset((u32)report.maxOffset);
rd.min = min(rd.min, max_offset);
rd.max = min(rd.max, max_offset);
}
DEBUG_PRINTF("vertex %u report %u: %s\n", w[v].index, report_id,
rd.str().c_str());
info = unionDepthMinMax(info, rd);
}
}