From 4ce268af47aa74f8dcd0ce50eb06646af2d208b5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 18 Jul 2016 12:41:31 +1000 Subject: [PATCH] ng: ensure that only match states have reports Ensure (and assert) that vertices without an edge to {accept, acceptEod} do not have reports set. --- src/nfagraph/ng.cpp | 5 +++++ src/nfagraph/ng_asserts.cpp | 3 ++- src/nfagraph/ng_calc_components.cpp | 6 ++++++ src/nfagraph/ng_limex.cpp | 10 +++++++--- src/nfagraph/ng_rose.cpp | 1 + src/nfagraph/ng_util.cpp | 20 ++++++++++++++------ src/nfagraph/ng_util.h | 22 ++++++++++++++++++---- unit/internal/lbr.cpp | 2 ++ unit/internal/limex_nfa.cpp | 11 ++++++++--- unit/internal/nfagraph_repeat.cpp | 10 +++++++--- 10 files changed, 70 insertions(+), 20 deletions(-) diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 35b2eb35..deca3fd5 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -105,6 +105,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, DEBUG_PRINTF("doing som\n"); dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey); assert(hasCorrectlyNumberedVertices(g)); + assert(allMatchStatesHaveReports(w)); // First, we try the "SOM chain" support in ng_som.cpp. @@ -208,6 +209,8 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey); + assert(allMatchStatesHaveReports(w)); + reduceGraph(g, som, w.utf8, cc); dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey); @@ -232,6 +235,8 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, } } + assert(allMatchStatesHaveReports(w)); + if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { return true; } diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index 2d02751f..e9e39345 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -553,6 +553,7 @@ void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { add_edge(g.startDs, v_4, g); remove_edge(orig, g); g.renumberEdges(); + clearReports(g); } } diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index 9365cfb3..658e7001 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -363,6 +363,12 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, *shell_comp = true; } + // Ensure that only vertices with accept edges have reports. + for (auto &gc : comps) { + assert(gc); + clearReports(*gc); + } + // We should never produce empty component graphs. assert(all_of(begin(comps), end(comps), [](const unique_ptr &g_comp) { diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index a82d18b6..a8a5113d 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -79,13 +79,17 @@ bool sanityCheckGraph(const NGHolder &g, } } - // Vertices with edges to accept or acceptEod must have reports. + // Vertices with edges to accept or acceptEod must have reports and + // other vertices must not have them. if (is_match_vertex(v, g) && v != g.accept) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports\n", - g[v].index); + DEBUG_PRINTF("vertex %u has no reports\n", g[v].index); return false; } + } else if (!g[v].reports.empty()) { + DEBUG_PRINTF("vertex %u has reports but no accept edge\n", + g[v].index); + return false; } // Participant vertices should have distinct state indices. diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 4b16364a..aba4a7c3 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -872,6 +872,7 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, } clear_in_edges(g.accept, g); + clearReports(g); vector verts(pred.begin(), pred.end()); sort(verts.begin(), verts.end(), VertexIndexOrdering(g)); diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index e9f6be55..c629d553 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -631,16 +631,18 @@ unique_ptr cloneHolder(const NGHolder &in) { } #ifndef NDEBUG -/** \brief Used in sanity-checking assertions: returns true if all vertices - * leading to accept or acceptEod have at least one report ID. */ + bool allMatchStatesHaveReports(const NGHolder &g) { + unordered_set reporters; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (g[v].reports.empty()) { DEBUG_PRINTF("vertex %u has no reports!\n", g[v].index); return false; } + reporters.insert(v); } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { if (v == g.accept) { continue; // stylised edge @@ -650,12 +652,20 @@ bool allMatchStatesHaveReports(const NGHolder &g) { g[v].index); return false; } + reporters.insert(v); } + + for (auto v : vertices_range(g)) { + if (!contains(reporters, v) && !g[v].reports.empty()) { + DEBUG_PRINTF("vertex %u is not a match state, but has reports!\n", + g[v].index); + return false; + } + } + return true; } -/** Assertion: returns true if the vertices in this graph are contiguously (and - * uniquely) numbered from zero. */ bool hasCorrectlyNumberedVertices(const NGHolder &g) { size_t count = num_vertices(g); vector ids(count, false); @@ -670,8 +680,6 @@ bool hasCorrectlyNumberedVertices(const NGHolder &g) { && num_vertices(g) == num_vertices(g.g); } -/** Assertion: returns true if the edges in this graph are contiguously (and - * uniquely) numbered from zero. */ bool hasCorrectlyNumberedEdges(const NGHolder &g) { size_t count = num_edges(g); vector ids(count, false); diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 833523c7..4f58dc45 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -297,15 +297,29 @@ void clearReports(NGHolder &g); void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); #ifndef NDEBUG -// Assertions: only available in internal builds -/** \brief Used in sanity-checking assertions: returns true if all vertices - * leading to accept or acceptEod have at least one report ID. */ +// Assertions: only available in internal builds. + +/** + * Used in sanity-checking assertions: returns true if all vertices + * with edges to accept or acceptEod have at least one report ID. Additionally, + * checks that ONLY vertices with edges to accept or acceptEod has reports. + */ bool allMatchStatesHaveReports(const NGHolder &g); +/** + * Assertion: returns true if the vertices in this graph are contiguously (and + * uniquely) numbered from zero. + */ bool hasCorrectlyNumberedVertices(const NGHolder &g); + +/** + * Assertion: returns true if the edges in this graph are contiguously (and + * uniquely) numbered from zero. + */ bool hasCorrectlyNumberedEdges(const NGHolder &g); -#endif + +#endif // NDEBUG } // namespace ue2 diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index bd799c0f..e40bda02 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -36,6 +36,7 @@ #include "nfa/nfa_internal.h" #include "nfa/nfa_api_util.h" #include "nfagraph/ng_lbr.h" +#include "nfagraph/ng_util.h" #include "util/alloc.h" #include "util/compile_context.h" #include "grey.h" @@ -97,6 +98,7 @@ protected: ParsedExpression parsed(0, pattern.c_str(), flags, 0); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); ASSERT_TRUE(isLBR(*g, grey)); diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 926bf6eb..6bb4fcb9 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -31,14 +31,15 @@ #include "grey.h" #include "compiler/compiler.h" -#include "nfagraph/ng.h" -#include "nfagraph/ng_limex.h" -#include "nfagraph/ng_restructuring.h" #include "nfa/limex_context.h" #include "nfa/limex_internal.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_util.h" #include "nfa/nfa_internal.h" +#include "nfagraph/ng.h" +#include "nfagraph/ng_limex.h" +#include "nfagraph/ng_restructuring.h" +#include "nfagraph/ng_util.h" #include "util/alloc.h" #include "util/target_info.h" @@ -76,6 +77,7 @@ protected: ParsedExpression parsed(0, expr.c_str(), flags, 0); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); rm.setProgramOffset(0, MATCH_REPORT); @@ -310,10 +312,12 @@ protected: ParsedExpression parsed(0, expr.c_str(), flags, 0); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); // Reverse the graph and add some reports on the accept vertices. NGHolder g_rev(NFA_REV_PREFIX); reverseHolder(*g, g_rev); + clearReports(g_rev); for (NFAVertex v : inv_adjacent_vertices_range(g_rev.accept, g_rev)) { g_rev[v].reports.insert(0); } @@ -367,6 +371,7 @@ protected: ReportManager rm(cc.grey); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); rm.setProgramOffset(0, MATCH_REPORT); diff --git a/unit/internal/nfagraph_repeat.cpp b/unit/internal/nfagraph_repeat.cpp index 2473d755..b34d1271 100644 --- a/unit/internal/nfagraph_repeat.cpp +++ b/unit/internal/nfagraph_repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,6 +32,7 @@ #include "gtest/gtest.h" #include "nfagraph/ng_repeat.h" +#include "nfagraph/ng_util.h" #include "util/depth.h" #include "hs_compile.h" @@ -89,12 +90,15 @@ static const PureRepeatTest pureRepeatTests[] = { { "^..?..?..?..?..?", 5, 10 } }; -INSTANTIATE_TEST_CASE_P(PureRepeat, NFAPureRepeatTest, ValuesIn(pureRepeatTests)); +INSTANTIATE_TEST_CASE_P(PureRepeat, NFAPureRepeatTest, + ValuesIn(pureRepeatTests)); TEST_P(NFAPureRepeatTest, Check) { const PureRepeatTest &t = GetParam(); SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern); - unique_ptr w(constructGraph(t.pattern, HS_FLAG_ALLOWEMPTY)); + auto w = constructGraph(t.pattern, HS_FLAG_ALLOWEMPTY); + ASSERT_TRUE(w != nullptr); + clearReports(*w); PureRepeat repeat; bool result = isPureRepeat(*w, repeat);