From 5354b7a5ca559232d4ad99f2394921ce776341e0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 6 Apr 2016 15:06:48 +1000 Subject: [PATCH] mpv: fire only one report when simple-exhaustible --- src/nfa/mpv.c | 6 ++++-- src/nfa/mpv_dump.cpp | 5 ++++- src/nfa/mpv_internal.h | 11 ++++++++++- src/nfa/mpvcompile.cpp | 9 ++++----- src/nfa/mpvcompile.h | 9 ++++++--- src/nfagraph/ng_puff.cpp | 10 +++++++--- 6 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index 7ae15d24..4bae7b18 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -124,7 +124,7 @@ char processReports(const struct mpv *m, u8 *reporters, DEBUG_PRINTF("report %u at %llu\n", curr->report, report_offset); - if (curr->unbounded) { + if (curr->unbounded && !curr->simple_exhaust) { assert(rl_count < m->puffette_count); *rl = curr->report; ++rl; @@ -176,6 +176,8 @@ char processReportsForRange(const struct mpv *m, u8 *reporters, return MO_CONTINUE_MATCHING; } + DEBUG_PRINTF("length=%zu, rl_count=%u\n", length, rl_count); + for (size_t i = 2; i <= length; i++) { for (u32 j = 0; j < rl_count; j++) { if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index e731df87..504cc677 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -108,6 +108,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) { fprintf(f, " Puffette %u\n", i); fprintf(f, " repeats: %u%s\n", p[i].repeats, p[i].unbounded ? "," : ""); + if (p[i].simple_exhaust) { + fprintf(f, " simple exhaustible\n"); + } fprintf(f, " report id: %u\n", p[i].report); } diff --git a/src/nfa/mpv_internal.h b/src/nfa/mpv_internal.h index 7a1a2273..a52853dc 100644 --- a/src/nfa/mpv_internal.h +++ b/src/nfa/mpv_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,15 @@ struct mpv_puffette { u32 repeats; char unbounded; + + /** + * \brief Report is simple-exhaustible. + * + * If this is true, we do best-effort suppression of runs of reports, only + * delivering the first one. + */ + char simple_exhaust; + ReportID report; }; diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index e4741ef1..7521afef 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,10 +53,8 @@ namespace ue2 { namespace { struct pcomp { bool operator()(const raw_puff &a, const raw_puff &b) const { - ORDER_CHECK(repeats); - ORDER_CHECK(unbounded); - ORDER_CHECK(report); - return false; + return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) < + tie(b.repeats, b.unbounded, b.simple_exhaust, b.report); } }; @@ -89,6 +87,7 @@ void writePuffette(mpv_puffette *out, const raw_puff &rp) { rp.report, out); out->repeats = rp.repeats; out->unbounded = rp.unbounded; + out->simple_exhaust = rp.simple_exhaust; out->report = rp.report; } diff --git a/src/nfa/mpvcompile.h b/src/nfa/mpvcompile.h index 01c0de79..ff4906ee 100644 --- a/src/nfa/mpvcompile.h +++ b/src/nfa/mpvcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,12 +42,15 @@ namespace ue2 { struct raw_puff { raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in, - const CharReach &reach_in, bool auto_restart_in = false) + const CharReach &reach_in, bool auto_restart_in = false, + bool simple_exhaust_in = false) : repeats(repeats_in), unbounded(unbounded_in), - auto_restart(auto_restart_in), report(report_in), reach(reach_in) {} + auto_restart(auto_restart_in), simple_exhaust(simple_exhaust_in), + report(report_in), reach(reach_in) {} u32 repeats; /**< report match after this many matching bytes */ bool unbounded; /**< keep producing matches after repeats are reached */ bool auto_restart; /**< for /[^X]{n}/ type patterns */ + bool simple_exhaust; /* first report will exhaust us */ ReportID report; CharReach reach; /**< = ~escapes */ }; diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 501d8f7b..c8b6843d 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -270,12 +270,16 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, DEBUG_PRINTF("constructing Puff for report %u\n", report); DEBUG_PRINTF("a = %u\n", g[a].index); + const Report &puff_report = rm.getReport(report); + const bool simple_exhaust = isSimpleExhaustible(puff_report); + const bool pureAnchored = a == g.start && singleStart(g); if (!pureAnchored) { if (a == g.startDs || a == g.start) { DEBUG_PRINTF("add outfix ar(false)\n"); - raw_puff rp(width, unbounded, report, cr, auto_restart); + raw_puff rp(width, unbounded, report, cr, auto_restart, + simple_exhaust); rose.addOutfix(rp); return; } @@ -300,7 +304,7 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, } else { DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart); assert(!auto_restart || unbounded); - raw_puff rp(width, unbounded, report, cr, auto_restart); + raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust); rose.addOutfix(rp); } }