rose: remap reports to program offsets

This commit is contained in:
Justin Viiret 2016-04-18 14:40:58 +10:00 committed by Matthew Barr
parent f40aa6bd40
commit 611579511c
13 changed files with 252 additions and 26 deletions

View File

@ -593,6 +593,8 @@ SET (hs_SRCS
src/nfa/goughcompile_accel.cpp
src/nfa/goughcompile_internal.h
src/nfa/goughcompile_reg.cpp
src/nfa/goughcompile_util.cpp
src/nfa/goughcompile_util.h
src/nfa/mcclellan.h
src/nfa/mcclellan_internal.h
src/nfa/mcclellancompile.cpp

View File

@ -50,6 +50,7 @@
#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/multibit_internal.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
#include "grey.h"
@ -990,4 +991,29 @@ unique_ptr<NGHolder> makeHolder(const CastleProto &proto, nfa_kind kind,
return g;
}
static
void remapReportsToPrograms(PureRepeat &pr, const ReportManager &rm) {
if (pr.reports.empty()) {
return;
}
auto old_reports = pr.reports;
pr.reports.clear();
for (const auto &r : old_reports) {
pr.reports.insert(rm.getProgramOffset(r));
}
}
void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm) {
for (auto &m : castle.repeats) {
remapReportsToPrograms(m.second, rm);
}
auto old_report_map = castle.report_map;
castle.report_map.clear();
for (auto &m : old_report_map) {
u32 program = rm.getProgramOffset(m.first);
castle.report_map[program].insert(begin(m.second), end(m.second));
}
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -51,6 +51,7 @@ namespace ue2 {
class CharReach;
class NGHolder;
class ReportManager;
struct CompileContext;
/**
@ -158,6 +159,8 @@ bool requiresDedupe(const CastleProto &proto,
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, nfa_kind kind,
const CompileContext &cc);
void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm);
} // namespace ue2
#endif // NFA_CASTLECOMPILE_H

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "goughcompile.h"
#include "goughcompile_util.h"
#include "mcclellancompile_util.h"
#include "util/report_manager.h"
#include "ue2common.h"
using namespace std;
using namespace ue2;
namespace ue2 {
static
void remapReportsToPrograms(set<som_report> &reports,
const ReportManager &rm) {
if (reports.empty()) {
return;
}
auto old_reports = reports;
reports.clear();
for (const auto &r : old_reports) {
u32 program = rm.getProgramOffset(r.report);
reports.emplace(program, r.slot);
}
}
void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm) {
DEBUG_PRINTF("remap haig reports\n");
for (auto &ds : haig.state_som) {
remapReportsToPrograms(ds.reports, rm);
remapReportsToPrograms(ds.reports_eod, rm);
}
// McClellan-style reports too.
raw_dfa &rdfa = haig;
remapReportsToPrograms(rdfa, rm);
}
} // namespace ue2

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GOUGHCOMPILE_UTIL_H
#define GOUGHCOMPILE_UTIL_H
namespace ue2 {
struct raw_som_dfa;
class ReportManager;
void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm);
} // namespace ue2
#endif // GOUGHCOMPILE_UTIL_H

View File

@ -395,4 +395,27 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
}
}
static
void remapReportsToPrograms(flat_set<ReportID> &reports,
const ReportManager &rm) {
if (reports.empty()) {
return;
}
auto old_reports = reports;
reports.clear();
for (const ReportID &id : old_reports) {
u32 program = rm.getProgramOffset(id);
reports.insert(program);
}
}
void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm) {
DEBUG_PRINTF("remap dfa reports\n");
for (auto &ds : rdfa.states) {
remapReportsToPrograms(ds.reports, rm);
remapReportsToPrograms(ds.reports_eod, rm);
}
}
} // namespace ue2

View File

@ -31,6 +31,7 @@
#include "rdfa.h"
#include "ue2common.h"
#include "util/report_manager.h"
#include <set>
@ -57,6 +58,8 @@ size_t hash_dfa(const raw_dfa &rdfa);
dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm);
} // namespace ue2
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -50,8 +50,9 @@
#include "util/compile_context.h"
#include "util/container.h"
#include "util/graph_range.h"
#include "util/verify_types.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
#include <map>
#include <vector>
@ -346,6 +347,25 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm,
return h;
}
static
void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) {
for (const auto &v : vertices_range(h)) {
auto &reports = h[v].reports;
if (reports.empty()) {
continue;
}
auto old_reports = reports;
reports.clear();
for (const ReportID &id : old_reports) {
u32 program = rm.getProgramOffset(id);
reports.insert(program);
}
DEBUG_PRINTF("vertex %u: remapped reports {%s} to programs {%s}\n",
h[v].index, as_string_list(old_reports).c_str(),
as_string_list(reports).c_str());
}
}
static
aligned_unique_ptr<NFA>
constructNFA(const NGHolder &h_in, const ReportManager *rm,
@ -393,6 +413,11 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
if (generates_callbacks(*h)) {
assert(rm);
remapReportsToPrograms(*h, *rm);
}
if (!cc.streaming || !cc.grey.compressNFAState) {
compress_state = false;
}

View File

@ -49,14 +49,15 @@ typedef struct queue_match PQ_T;
static really_inline
int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch,
u64a som, u64a offset, ReportID id, const char from_mpv) {
assert(id < rose->reportProgramCount);
const u32 *programs = getByOffset(rose, rose->reportProgramOffset);
const u32 program = id;
assert(program > 0);
assert(program % ROSE_INSTR_MIN_ALIGN == 0);
const size_t match_len = 0; // Unused in this path.
const char in_anchored = 0;
const char in_catchup = 1;
roseRunProgram(rose, scratch, programs[id], som, offset, match_len,
in_anchored, in_catchup, from_mpv, 0);
roseRunProgram(rose, scratch, program, som, offset, match_len, in_anchored,
in_catchup, from_mpv, 0);
return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING;
}

View File

@ -644,16 +644,15 @@ int roseReportAdaptor_i(u64a som, u64a offset, ReportID id, void *context) {
const struct RoseEngine *rose = scratch->core_info.rose;
assert(id < rose->reportProgramCount);
const u32 *programs = getByOffset(rose, rose->reportProgramOffset);
// Our match ID is the program offset.
const u32 program = id;
const size_t match_len = 0; // Unused in this path.
const char in_anchored = 0;
const char in_catchup = 0;
const char from_mpv = 0;
const char skip_mpv_catchup = 1;
hwlmcb_rv_t rv =
roseRunProgram(rose, scratch, programs[id], som, offset, match_len,
roseRunProgram(rose, scratch, program, som, offset, match_len,
in_anchored, in_catchup, from_mpv, skip_mpv_catchup);
if (rv == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;

View File

@ -43,7 +43,9 @@
#include "hwlm/hwlm.h" /* engine types */
#include "nfa/castlecompile.h"
#include "nfa/goughcompile.h"
#include "nfa/goughcompile_util.h"
#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_build_util.h"
#include "nfa/nfa_internal.h"
@ -885,19 +887,25 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
const map<u32, vector<vector<CharReach>>> &triggers,
suffix_id suff, const CompileContext &cc) {
if (suff.castle()) {
auto n = buildRepeatEngine(*suff.castle(), triggers, cc);
auto remapped_castle = *suff.castle();
remapReportsToPrograms(remapped_castle, rm);
auto n = buildRepeatEngine(remapped_castle, triggers, cc);
assert(n);
return n;
}
if (suff.haig()) {
auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc);
auto remapped_haig = *suff.haig();
remapReportsToPrograms(remapped_haig, rm);
auto n = goughCompile(remapped_haig, ssm.somPrecision(), cc);
assert(n);
return n;
}
if (suff.dfa()) {
auto d = mcclellanCompile(*suff.dfa(), cc);
auto remapped_rdfa = *suff.dfa();
remapReportsToPrograms(remapped_rdfa, rm);
auto d = mcclellanCompile(remapped_rdfa, cc);
assert(d);
return d;
}
@ -910,7 +918,9 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
// Take a shot at the LBR engine.
if (oneTop) {
auto lbr = constructLBR(holder, triggers.at(0), cc);
auto remapped_holder = cloneHolder(holder);
remapReportsToPrograms(*remapped_holder, rm);
auto lbr = constructLBR(*remapped_holder, triggers.at(0), cc);
if (lbr) {
return lbr;
}
@ -926,6 +936,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
cc.grey);
if (rdfa) {
remapReportsToPrograms(*rdfa, rm);
auto d = mcclellanCompile(*rdfa, cc);
assert(d);
if (cc.grey.roseMcClellanSuffix != 2) {
@ -1267,12 +1278,16 @@ public:
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const {
// Unleash the McClellan!
return mcclellanCompile(*rdfa, build.cc);
raw_dfa tmp(*rdfa);
remapReportsToPrograms(tmp, build.rm);
return mcclellanCompile(tmp, build.cc);
}
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const {
// Unleash the Goughfish!
return goughCompile(*haig, build.ssm.somPrecision(), build.cc);
raw_som_dfa tmp(*haig);
remapReportsToPrograms(tmp, build.rm);
return goughCompile(tmp, build.ssm.somPrecision(), build.cc);
}
aligned_unique_ptr<NFA> operator()(unique_ptr<NGHolder> &holder) const {
@ -1327,6 +1342,16 @@ aligned_unique_ptr<NFA> buildOutfix(RoseBuildImpl &build, OutfixInfo &outfix) {
return n;
}
static
void remapReportsToPrograms(MpvProto &mpv, const ReportManager &rm) {
for (auto &puff : mpv.puffettes) {
puff.report = rm.getProgramOffset(puff.report);
}
for (auto &puff : mpv.triggered_puffettes) {
puff.report = rm.getProgramOffset(puff.report);
}
}
static
void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired,
bool *mpv_as_outfix) {
@ -1349,7 +1374,9 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired,
}
auto *mpv = mpv_outfix->mpv();
auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes);
auto tmp = *mpv; // copy
remapReportsToPrograms(tmp, tbi.rm);
auto nfa = mpvCompile(tmp.puffettes, tmp.triggered_puffettes);
assert(nfa);
if (!nfa) {
throw CompileError("Unable to generate bytecode.");
@ -4000,6 +4027,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary);
u32 reportProgramOffset = buildReportPrograms(*this, bc);
// Build NFAs
set<u32> no_retrigger_queues;
bool mpv_as_outfix;
@ -4045,8 +4074,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
u32 eodIterOffset;
tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc);
u32 reportProgramOffset = buildReportPrograms(*this, bc);
vector<mmbit_sparse_iter> activeLeftIter;
buildActiveLeftIter(leftInfoTable, activeLeftIter);

View File

@ -376,6 +376,8 @@ SmallWriteBuildImpl::build(u32 roseQuality) {
DEBUG_PRINTF("building rdfa %p\n", rdfa.get());
remapReportsToPrograms(*rdfa, rm);
u32 start_offset;
u32 small_region;
auto nfa = prepEngine(*rdfa, roseQuality, cc, &start_offset, &small_region);

View File

@ -46,7 +46,9 @@ using namespace std;
using namespace testing;
using namespace ue2;
static const string SCAN_DATA = "___foo______\n___foofoo_foo_^^^^^^^^^^^^^^^^^^^^^^__bar_bar______0_______z_____bar";
static const string SCAN_DATA = "___foo______\n___foofoo_foo_^^^^^^^^^^^^^^^^^^"
"^^^^__bar_bar______0_______z_____bar";
static const u32 MATCH_REPORT = 1024;
static
int onMatch(u64a, ReportID, void *ctx) {
@ -75,6 +77,8 @@ protected:
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
ASSERT_TRUE(g != nullptr);
rm.setProgramOffset(0, MATCH_REPORT);
const map<u32, u32> fixed_depth_tops;
const map<u32, vector<vector<CharReach>>> triggers;
bool compress_state = false;
@ -223,7 +227,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) {
char rv = nfaQueueExecToMatch(nfa.get(), &q, end);
ASSERT_EQ(MO_MATCHES_PENDING, rv);
ASSERT_EQ(0, matches);
ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q));
ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q));
nfaReportCurrentMatches(nfa.get(), &q);
ASSERT_EQ(1, matches);
@ -232,7 +236,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) {
rv = nfaQueueExecToMatch(nfa.get(), &q, end);
ASSERT_EQ(MO_MATCHES_PENDING, rv);
ASSERT_EQ(1, matches);
ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q));
ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q));
nfaReportCurrentMatches(nfa.get(), &q);
ASSERT_EQ(2, matches);
@ -241,7 +245,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) {
rv = nfaQueueExecToMatch(nfa.get(), &q, end);
ASSERT_EQ(MO_MATCHES_PENDING, rv);
ASSERT_EQ(2, matches);
ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q));
ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q));
nfaReportCurrentMatches(nfa.get(), &q);
ASSERT_EQ(3, matches);
@ -267,10 +271,10 @@ TEST_P(LimExModelTest, QueueExecRose) {
pushQueue(&q, MQE_TOP, 0);
pushQueue(&q, MQE_END, end);
char rv = nfaQueueExecRose(nfa.get(), &q, 0 /* report id */);
char rv = nfaQueueExecRose(nfa.get(), &q, MATCH_REPORT);
ASSERT_EQ(MO_MATCHES_PENDING, rv);
pushQueue(&q, MQE_START, end);
ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q));
ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q));
}
TEST_P(LimExModelTest, CheckFinalState) {
@ -367,6 +371,8 @@ protected:
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
ASSERT_TRUE(g != nullptr);
rm.setProgramOffset(0, MATCH_REPORT);
const map<u32, u32> fixed_depth_tops;
const map<u32, vector<vector<CharReach>>> triggers;
bool compress_state = false;