mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
add new Literal API for pure literal expressions:
Design compile time api hs_compile_lit() and hs_compile_lit_multi() to handle pure literal pattern sets. Corresponding option --literal-on is added for hyperscan testing suites. Extended parameters and part of flags are not supported for this api.
This commit is contained in:
committed by
Chang, Harry
parent
8bfbf07f75
commit
23e5f06594
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@@ -238,10 +238,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
|
||||
assert(id && id < t->size); // id is an offset into bytecode
|
||||
const u64a som = 0;
|
||||
const u8 flags = 0;
|
||||
if (!scratch->pure) {
|
||||
return roseRunProgram(t, scratch, id, som, end, flags);
|
||||
} else {
|
||||
if (t->pureLiteral) {
|
||||
return roseRunProgram_l(t, scratch, id, som, end, flags);
|
||||
} else {
|
||||
return roseRunProgram(t, scratch, id, som, end, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -619,8 +619,12 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
|
||||
// Our match ID is the program offset.
|
||||
const u32 program = id;
|
||||
const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
|
||||
hwlmcb_rv_t rv =
|
||||
roseRunProgram(rose, scratch, program, start, end, flags);
|
||||
hwlmcb_rv_t rv;
|
||||
if (rose->pureLiteral) {
|
||||
rv = roseRunProgram_l(rose, scratch, program, start, end, flags);
|
||||
} else {
|
||||
rv = roseRunProgram(rose, scratch, program, start, end, flags);
|
||||
}
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
@@ -2884,6 +2884,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
assert(programOffset >= sizeof(struct RoseEngine));
|
||||
assert(programOffset < t->size);
|
||||
|
||||
const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
|
||||
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
|
||||
|
||||
const char *pc_base = getByOffset(t, programOffset);
|
||||
@@ -2911,6 +2912,56 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_GROUPS) {
|
||||
DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
|
||||
tctxt->groups, ri->groups);
|
||||
if (!(ri->groups & tctxt->groups)) {
|
||||
DEBUG_PRINTF("halt: no groups are set\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_MASK) {
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask,
|
||||
ri->neg_mask, ri->offset, end)) {
|
||||
DEBUG_PRINTF("failed mask check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_MASK_32) {
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
|
||||
ri->neg_mask, ri->offset, end)) {
|
||||
assert(ri->fail_jump);
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CHECK_BYTE) {
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
||||
ri->negation, ri->offset, end)) {
|
||||
DEBUG_PRINTF("failed byte check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(PUSH_DELAYED) {
|
||||
rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end);
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(CATCH_UP) {
|
||||
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
@@ -2967,6 +3018,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(REPORT_CHAIN) {
|
||||
// Note: sequence points updated inside this function.
|
||||
if (roseCatchUpAndHandleChainMatch(
|
||||
t, scratch, ri->event, ri->top_squash_distance, end,
|
||||
in_catchup) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
work_done = 1;
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(REPORT) {
|
||||
updateSeqPoint(tctxt, end, from_mpv);
|
||||
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
|
||||
@@ -3117,6 +3179,24 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(INCLUDED_JUMP) {
|
||||
if (scratch->fdr_conf) {
|
||||
// squash the bucket of included literal
|
||||
u8 shift = scratch->fdr_conf_offset & ~7U;
|
||||
u64a mask = ((~(u64a)ri->squash) << shift);
|
||||
*(scratch->fdr_conf) &= mask;
|
||||
|
||||
pc = getByOffset(t, ri->child_offset);
|
||||
pc_base = pc;
|
||||
programOffset = (const u8 *)pc_base -(const u8 *)t;
|
||||
DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
|
||||
pc_base, pc, ri->child_offset, ri->squash);
|
||||
work_done = 0;
|
||||
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||
}
|
||||
}
|
||||
L_PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
L_PROGRAM_CASE(SET_LOGICAL) {
|
||||
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
|
||||
ri->lkey, ri->offset_adjust);
|
||||
|
@@ -2843,34 +2843,9 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
|
||||
|
||||
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
|
||||
dumpString(lit.s).c_str());
|
||||
|
||||
/** 0:/xxabcdefgh/ */
|
||||
/** 1:/yyabcdefgh/ */
|
||||
/** 2:/yyabcdefgh.+/ */
|
||||
// Above 3 patterns should firstly convert into RoseLiteralMap with
|
||||
// 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into
|
||||
// LitFragment with 1 element ("abcdefgh"). Special care should be
|
||||
// taken to handle the 'pure' flag during the conversion.
|
||||
|
||||
rose_literal_id lit_frag = getFragment(lit);
|
||||
auto it = frag_info.find(lit_frag);
|
||||
if (it != frag_info.end()) {
|
||||
if (!lit_frag.s.get_pure() && it->first.s.get_pure()) {
|
||||
struct FragmentInfo f_info = it->second;
|
||||
f_info.lit_ids.push_back(lit_id);
|
||||
f_info.groups |= groups;
|
||||
frag_info.erase(it->first);
|
||||
frag_info.emplace(lit_frag, f_info);
|
||||
} else {
|
||||
it->second.lit_ids.push_back(lit_id);
|
||||
it->second.groups |= groups;
|
||||
}
|
||||
} else {
|
||||
struct FragmentInfo f_info;
|
||||
f_info.lit_ids.push_back(lit_id);
|
||||
f_info.groups |= groups;
|
||||
frag_info.emplace(lit_frag, f_info);
|
||||
}
|
||||
auto &fi = frag_info[getFragment(lit)];
|
||||
fi.lit_ids.push_back(lit_id);
|
||||
fi.groups |= groups;
|
||||
}
|
||||
|
||||
for (auto &m : frag_info) {
|
||||
|
@@ -340,14 +340,7 @@ public:
|
||||
std::pair<u32, bool> insert(const rose_literal_id &lit) {
|
||||
auto it = lits_index.find(lit);
|
||||
if (it != lits_index.end()) {
|
||||
u32 idx = it->second;
|
||||
auto &l = lits.at(idx);
|
||||
if (!lit.s.get_pure() && l.s.get_pure()) {
|
||||
lits_index.erase(l);
|
||||
l.s.unset_pure();
|
||||
lits_index.emplace(l, idx);
|
||||
}
|
||||
return {idx, false};
|
||||
return {it->second, false};
|
||||
}
|
||||
u32 id = verify_u32(lits.size());
|
||||
lits.push_back(lit);
|
||||
|
@@ -727,7 +727,6 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
|
||||
|
||||
const auto &s_final = lit_final.get_string();
|
||||
bool nocase = lit_final.any_nocase();
|
||||
bool pure = f.s.get_pure();
|
||||
|
||||
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n",
|
||||
f.fragment_id, escapeString(s_final).c_str(), (int)nocase,
|
||||
@@ -741,7 +740,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
|
||||
const auto &groups = f.groups;
|
||||
|
||||
mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
|
||||
groups, msk, cmp, pure);
|
||||
groups, msk, cmp);
|
||||
}
|
||||
|
||||
static
|
||||
|
@@ -328,6 +328,7 @@ struct RoseBoundaryReports {
|
||||
* nfas). Rose nfa info table can distinguish the cases.
|
||||
*/
|
||||
struct RoseEngine {
|
||||
u8 pureLiteral; /* Indicator of pure literal API */
|
||||
u8 noFloatingRoots; /* only need to run the anchored table if something
|
||||
* matched in the anchored table */
|
||||
u8 requiresEodCheck; /* stuff happens at eod time */
|
||||
|
Reference in New Issue
Block a user