add new Literal API for pure literal expressions:

Design compile time api hs_compile_lit() and hs_compile_lit_multi()
to handle pure literal pattern sets. Corresponding option --literal-on
is added for hyperscan testing suites. Extended parameters and part of
flags are not supported for this api.
This commit is contained in:
Hong, Yang A
2019-07-18 00:29:27 +08:00
committed by Chang, Harry
parent 8bfbf07f75
commit 23e5f06594
36 changed files with 745 additions and 116 deletions

View File

@@ -43,6 +43,7 @@
#include "parser/Parser.h"
#include "parser/parse_error.h"
#include "util/make_unique.h"
#include "util/string_util.h"
#include "util/unicode_def.h"
#include "util/unordered.h"
@@ -111,6 +112,15 @@ bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander,
return false;
}
if (use_literal_api) {
// filter out flags not supported by pure literal API.
u32 not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
HS_FLAG_UCP | HS_FLAG_PREFILTER;
hs_flags &= ~not_supported;
force_utf8 = false;
force_prefilter = false;
}
expr.swap(regex);
if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som,
@@ -260,9 +270,29 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
throw PcreCompileFailure("Unable to decode flags.");
}
// When hyperscan literal api is on, transfer the regex string into hex.
if (use_literal_api && !combination) {
unsigned char *pat
= reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
char *str = makeHex(pat, re.length());
if (!str) {
throw PcreCompileFailure("makeHex() malloc failure.");
}
re.assign(str);
free(str);
}
// filter out flags not supported by PCRE
u64a supported = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET |
HS_EXT_FLAG_MIN_LENGTH;
if (use_literal_api) {
ext.flags &= 0ULL;
ext.min_offset = 0;
ext.max_offset = MAX_OFFSET;
ext.min_length = 0;
ext.edit_distance = 0;
ext.hamming_distance = 0;
}
if (ext.flags & ~supported) {
// edit distance is a known unsupported flag, so just throw a soft error
if (ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) {
@@ -314,7 +344,6 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
return compiled;
}
compiled->bytecode =
pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,7 @@
#include "ng_corpus_generator.h"
#include "NfaGeneratedCorpora.h"
#include "ExpressionParser.h"
#include "common.h"
#include "grey.h"
#include "hs_compile.h"
@@ -44,6 +45,7 @@
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/report_manager.h"
#include "util/string_util.h"
#include "util/target_info.h"
#include <string>
@@ -80,6 +82,18 @@ void NfaGeneratedCorpora::generate(unsigned id, vector<Corpus> &data) {
throw CorpusFailure("Expression could not be read: " + i->second);
}
// When hyperscan literal api is on, transfer the regex string into hex.
if (use_literal_api && !(hs_flags & HS_FLAG_COMBINATION)) {
unsigned char *pat
= reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
char *str = makeHex(pat, re.length());
if (!str) {
throw CorpusFailure("makeHex() malloc failure.");
}
re.assign(str);
free(str);
}
// Combination's corpus is consist of sub-expressions' corpuses.
if (hs_flags & HS_FLAG_COMBINATION) {
ParsedLogical pl;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -925,11 +925,22 @@ compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
const unsigned count = patterns.size();
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err;
hs_error_t err;
hs_error_t err = hs_compile_multi_int(&patterns[0], &flags[0],
&idsvec[0], ext.c_array(), count,
mode, platform, &db,
&compile_err, grey);
if (use_literal_api) {
// Compute length of each pattern.
vector<size_t> lens(count);
for (unsigned int i = 0; i < count; i++) {
lens[i] = strlen(patterns[i]);
}
err = hs_compile_lit_multi_int(&patterns[0], &flags[0], &idsvec[0],
ext.c_array(), &lens[0], count, mode,
platform, &db, &compile_err, grey);
} else {
err = hs_compile_multi_int(&patterns[0], &flags[0], &idsvec[0],
ext.c_array(), count, mode, platform, &db,
&compile_err, grey);
}
if (err != HS_SUCCESS) {
error = compile_err->message;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -116,6 +116,7 @@ void usage(const char *name, const char *error) {
printf(" --abort-on-fail Abort, rather than exit, on failure.\n");
printf(" --no-signal-handler Do not handle handle signals (to generate "
"backtraces).\n");
printf(" --literal-on Use Hyperscan pure literal matching.\n");
printf("\n");
printf("Memory and resource control options:\n");
printf("\n");
@@ -174,6 +175,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
int mangleScratch = 0;
int compressFlag = 0;
int compressResetFlag = 0;
int literalFlag = 0;
static const struct option longopts[] = {
{"copy-scratch", 0, &copyScratch, 1},
{"copy-stream", 0, &copyStream, 1},
@@ -187,6 +189,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
{"compress-expand", 0, &compressFlag, 1},
{"compress-reset-expand", 0, &compressResetFlag, 1},
{"no-groups", 0, &no_groups, 1},
{"literal-on", 0, &literalFlag, 1},
{nullptr, 0, nullptr, 0}};
for (;;) {
@@ -589,4 +592,5 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
use_mangle_scratch = (bool) mangleScratch;
use_compress_expand = (bool)compressFlag;
use_compress_reset_expand = (bool)compressResetFlag;
use_literal_api = (bool)literalFlag;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -82,6 +82,7 @@ extern bool use_copy_stream;
extern bool use_mangle_scratch;
extern bool use_compress_expand;
extern bool use_compress_reset_expand;
extern bool use_literal_api;
extern int abort_on_failure;
extern int no_signal_handler;
extern bool force_edit_distance;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -118,6 +118,7 @@ bool use_copy_stream = false;
bool use_mangle_scratch = false;
bool use_compress_expand = false;
bool use_compress_reset_expand = false;
bool use_literal_api = false;
int abort_on_failure = 0;
int no_signal_handler = 0;
size_t max_scan_queue_len = 25000;