mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2026-01-01 14:15:43 +03:00
add new Literal API for pure literal expressions:
Design compile time api hs_compile_lit() and hs_compile_lit_multi() to handle pure literal pattern sets. Corresponding option --literal-on is added for hyperscan testing suites. Extended parameters and part of flags are not supported for this api.
This commit is contained in:
committed by
Chang, Harry
parent
8bfbf07f75
commit
23e5f06594
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -56,11 +56,13 @@
|
||||
#include "parser/unsupported.h"
|
||||
#include "parser/utf8_validate.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_internal.h"
|
||||
#include "som/slot_manager_dump.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
@@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) {
|
||||
|
||||
}
|
||||
|
||||
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
|
||||
bool nocase) {
|
||||
const char *c = expression;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
lit.push_back(*c, nocase);
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
ParsedLitExpression::ParsedLitExpression(unsigned index_in,
|
||||
const char *expression,
|
||||
size_t expLength, unsigned flags,
|
||||
ReportID report)
|
||||
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
|
||||
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
|
||||
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
|
||||
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
|
||||
|
||||
if (flags & ~HS_FLAG_ALL) {
|
||||
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
||||
throw CompileError("Unrecognised flag.");
|
||||
}
|
||||
|
||||
// FIXME: we disallow highlander + SOM, see UE-1850.
|
||||
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
|
||||
// Set SOM type.
|
||||
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
||||
expr.som = SOM_LEFT;
|
||||
}
|
||||
|
||||
// Transfer expression text into ue2_literal.
|
||||
bool nocase = flags & HS_FLAG_CASELESS ? true : false;
|
||||
parseLiteral(expression, expLength, nocase);
|
||||
|
||||
}
|
||||
|
||||
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||
unsigned flags, ReportID report,
|
||||
const hs_expr_ext *ext)
|
||||
@@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
}
|
||||
}
|
||||
|
||||
void addLitExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID id,
|
||||
size_t expLength) {
|
||||
assert(expression);
|
||||
const CompileContext &cc = ng.cc;
|
||||
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index,
|
||||
id, flags, expression, expLength);
|
||||
|
||||
// Extended parameters are not supported for pure literal patterns.
|
||||
if (ext && ext->flags != 0LLU) {
|
||||
throw CompileError("Extended parameters are not supported for pure "
|
||||
"literal matching API.");
|
||||
}
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw CompileError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
// filter out flags not supported by pure literal API.
|
||||
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
|
||||
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
|
||||
HS_FLAG_QUIET;
|
||||
|
||||
if (flags & not_supported) {
|
||||
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
|
||||
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
|
||||
"supported in literal API.");
|
||||
}
|
||||
|
||||
// This expression must be a pure literal, we can build ue2_literal
|
||||
// directly based on expression text.
|
||||
ParsedLitExpression ple(index, expression, expLength, flags, id);
|
||||
|
||||
// Feed the ue2_literal into Rose.
|
||||
const auto &expr = ple.expr;
|
||||
if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander,
|
||||
expr.som, expr.quiet)) {
|
||||
DEBUG_PRINTF("took pure literal\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
||||
const u32 minWidth =
|
||||
@@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
||||
}
|
||||
|
||||
|
||||
struct hs_database *build(NG &ng, unsigned int *length) {
|
||||
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
|
||||
assert(length);
|
||||
|
||||
auto rose = generateRoseEngine(ng);
|
||||
struct RoseEngine *roseHead = rose.get();
|
||||
roseHead->pureLiteral = pureFlag;
|
||||
|
||||
if (!rose) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user