violet: initial implementation

This commit is contained in:
Alex Coyte
2016-07-15 13:07:00 +10:00
committed by Matthew Barr
parent b13a90e5d2
commit 5c5ec905cc
31 changed files with 3171 additions and 201 deletions

View File

@@ -57,6 +57,7 @@
#include "ng_small_literal_set.h"
#include "ng_som.h"
#include "ng_vacuous.h"
#include "ng_violet.h"
#include "ng_utf8.h"
#include "ng_util.h"
#include "ng_width.h"
@@ -244,6 +245,10 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
return true;
}
if (doViolet(*ng.rose, g, w.prefilter, cc)) {
return true;
}
if (splitOffRose(*ng.rose, g, w.prefilter, cc)) {
return true;
}
@@ -261,6 +266,10 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
return true;
}
if (doViolet(*ng.rose, g, w.prefilter, cc)) {
return true;
}
if (splitOffRose(*ng.rose, g, w.prefilter, cc)) {
return true;
}

View File

@@ -239,6 +239,16 @@ vertices(const NGHolder &h) {
*/
void clear_graph(NGHolder &h);
inline
void renumber_edges(NGHolder &h) {
h.renumberEdges();
}
inline
void renumber_vertices(NGHolder &h) {
h.renumberVertices();
}
/*
* \brief Clear and remove all of the vertices pointed to by the given iterator
* range.

View File

@@ -339,6 +339,12 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e,
g[source(e, g)].index, g[target(e, g)].index, s.size());
}
bool bad_mixed_sensitivity(const ue2_literal &s) {
/* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end,
* we should be able to handle it */
return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH;
}
static
u64a litUniqueness(const string &s) {
CharReach seen(s);
@@ -624,6 +630,48 @@ u64a compressAndScore(set<ue2_literal> &s) {
return score;
}
/* like compressAndScore, but replaces long mixed sensitivity literals with
* something weaker. */
u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) {
const size_t maxExploded = 8; // only case-explode this far
/* TODO: the whole compression thing could be made better by systematically
* considering replacing literal sets not just by common suffixes but also
* by nocase literals. */
vector<ue2_literal> replacements;
for (auto it = lits.begin(); it != lits.end();) {
auto jt = it;
++it;
if (!bad_mixed_sensitivity(*jt)) {
continue;
}
/* we have to replace *jt with something... */
ue2_literal s = *jt;
lits.erase(jt);
vector<ue2_literal> exploded;
for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) {
exploded.emplace_back(*cit, false);
if (exploded.size() > maxExploded) {
goto dont_explode;
}
}
insert(&replacements, replacements.end(), exploded);
continue;
dont_explode:
make_nocase(&s);
replacements.push_back(s);
}
insert(&lits, replacements);
return compressAndScore(lits);
}
u64a scoreSet(const set<ue2_literal> &s) {
if (s.empty()) {
return NO_LITERAL_AT_EDGE_SCORE;
@@ -674,7 +722,7 @@ set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
return s;
}
vector<u64a> scoreEdges(const NGHolder &g) {
vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) {
assert(hasCorrectlyNumberedEdges(g));
vector<u64a> scores(num_edges(g));
@@ -682,8 +730,12 @@ vector<u64a> scoreEdges(const NGHolder &g) {
for (const auto &e : edges_range(g)) {
u32 eidx = g[e].index;
assert(eidx < scores.size());
set<ue2_literal> ls = getLiteralSet(g, e);
scores[eidx] = compressAndScore(ls);
if (contains(known_bad, e)) {
scores[eidx] = NO_LITERAL_AT_EDGE_SCORE;
} else {
set<ue2_literal> ls = getLiteralSet(g, e);
scores[eidx] = compressAndScore(ls);
}
}
return scores;
@@ -842,4 +894,49 @@ bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) {
return true;
}
bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
NFAVertex v = g.accept;
for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
NGHolder::inv_adjacency_iterator ai, ae;
tie(ai, ae) = inv_adjacent_vertices(v, g);
if (ai == ae) {
assert(0); // no predecessors?
return false;
}
v = *ai++;
if (ai != ae) {
DEBUG_PRINTF("branch, fail\n");
return false;
}
if (is_special(v, g)) {
DEBUG_PRINTF("special found, fail\n");
return false;
}
const CharReach &cr_g = g[v].char_reach;
const CharReach &cr_l = *it;
if (!cr_l.isSubsetOf(cr_g)) {
/* running over the prefix is needed to prevent false postives */
DEBUG_PRINTF("reach fail\n");
return false;
}
}
// Our last value for v should have only start states for predecessors.
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (!is_any_start(u, g)) {
DEBUG_PRINTF("pred is not start\n");
return false;
}
}
assert(num_vertices(g) == lit.length() + N_SPECIALS);
DEBUG_PRINTF("ok\n");
return true;
}
} // namespace ue2

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -42,9 +42,7 @@
namespace ue2 {
#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL
/* Score for special-to-special edges */
#define INVALID_EDGE_CAP 100000000ULL
#define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */
class NGHolder;
@@ -59,9 +57,20 @@ std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
bool only_first_encounter = true);
std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e);
/** Score all the edges in the given graph, returning them in \p scores indexed
/**
* Returns true if we are unable to use a mixed sensitivity literal in rose (as
* our literal matchers are generally either case sensitive or not).
*
* Shortish mixed sensitivity literals can be handled by confirm checks in rose
* and are not flagged as bad.
*/
bool bad_mixed_sensitivity(const ue2_literal &s);
/**
* Score all the edges in the given graph, returning them in \p scores indexed
* by edge_index. */
std::vector<u64a> scoreEdges(const NGHolder &h);
std::vector<u64a> scoreEdges(const NGHolder &h,
const flat_set<NFAEdge> &known_bad = {});
/** Returns a score for a literal set. Lower scores are better. */
u64a scoreSet(const std::set<ue2_literal> &s);
@@ -69,6 +78,12 @@ u64a scoreSet(const std::set<ue2_literal> &s);
/** Compress a literal set to fewer literals. */
u64a compressAndScore(std::set<ue2_literal> &s);
/**
* Compress a literal set to fewer literals and replace any long mixed
* sensitivity literals with supported literals.
*/
u64a sanitizeAndCompressAndScore(std::set<ue2_literal> &s);
bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
NGHolder *rhs);
@@ -77,6 +92,10 @@ bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out);
/** \brief Returns true if the given literal is the only thing in the graph,
* from (start or startDs) to accept. */
bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit);
} // namespace ue2
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -186,7 +186,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
/** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGWrapper &g) {
if (!ng.cc.grey.allowRose) {
if (!ng.cc.grey.allowLiteral) {
return false;
}

View File

@@ -773,51 +773,6 @@ unique_ptr<VertLitInfo> LitCollection::pickNext() {
}
/** \brief Returns true if the given literal is the only thing in the graph,
* from start to accept. */
static
bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
NFAVertex v = g.accept;
for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
NGHolder::inv_adjacency_iterator ai, ae;
tie(ai, ae) = inv_adjacent_vertices(v, g);
if (ai == ae) {
assert(0); // no predecessors?
return false;
}
v = *ai++;
if (ai != ae) {
DEBUG_PRINTF("branch, fail\n");
return false;
}
if (is_special(v, g)) {
DEBUG_PRINTF("special found, fail\n");
return false;
}
const CharReach &cr = g[v].char_reach;
if (cr != *it) {
DEBUG_PRINTF("reach fail\n");
return false;
}
}
// Our last value for v should have only start states for predecessors.
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (!is_any_start(u, g)) {
DEBUG_PRINTF("pred is not start\n");
return false;
}
}
assert(num_vertices(g) == lit.length() + N_SPECIALS);
DEBUG_PRINTF("ok\n");
return true;
}
static
bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) {
set<NFAVertex> curr, next;
@@ -933,20 +888,11 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
return delay;
}
static
void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 delay) {
u32 delay, const vector<NFAVertex> &preds) {
assert(delay <= lit.length());
DEBUG_PRINTF("adding on '%s' %u\n", ((const string &)lit).c_str(), delay);
vector<NFAVertex> preds;
insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g));
clear_in_edges(g.accept, g);
for (auto v : preds) {
g[v].reports.clear(); /* clear report from old accepts */
}
NFAVertex prev = g.accept;
auto it = lit.rbegin();
while (delay--) {
@@ -972,6 +918,19 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
assert(allMatchStatesHaveReports(g));
}
void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 delay) {
vector<NFAVertex> preds;
insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g));
clear_in_edges(g.accept, g);
for (auto v : preds) {
g[v].reports.clear(); /* clear report from old accepts */
}
restoreTrailingLiteralStates(g, lit, delay, preds);
}
/* return false if we should get rid of the edge altogether */
static
bool removeLiteralFromLHS(RoseInGraph &ig, const RoseInEdge &lhs,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -33,8 +33,11 @@
#ifndef NG_ROSE_H
#define NG_ROSE_H
#include "ng_holder.h"
#include "ue2common.h"
#include <vector>
namespace ue2 {
class NGHolder;
@@ -65,6 +68,13 @@ bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter,
u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 max_delay, bool overhang_ok = true);
void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 delay);
void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 delay,
const std::vector<NFAVertex> &preds);
} // namespace ue2
#endif // NG_ROSE_H

View File

@@ -2064,8 +2064,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
ReportManager &rm = ng.rm;
SomSlotManager &ssm = ng.ssm;
// This approach relies on Rose.
if (!cc.grey.allowRose) {
if (!cc.grey.allowHaigLit) {
return SOMBE_FAIL;
}

View File

@@ -100,7 +100,12 @@ void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
add_edge((*lhs_map)[pivot], lhs->accept, *lhs);
}
pruneUseless(*lhs);
/* should do the renumbering unconditionally as we know edges are already
* misnumbered */
pruneUseless(*lhs, false);
renumber_edges(*lhs);
renumber_vertices(*lhs);
filterSplitMap(*lhs, lhs_map);
switch (base.kind) {
@@ -148,7 +153,12 @@ void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
assert(contains(*rhs_map, pivot));
add_edge(rhs->start, (*rhs_map)[pivot], *rhs);
}
pruneUseless(*rhs);
/* should do the renumbering unconditionally as we know edges are already
* misnumbered */
pruneUseless(*rhs, false);
renumber_edges(*rhs);
renumber_vertices(*rhs);
filterSplitMap(*rhs, rhs_map);
switch (base.kind) {

View File

@@ -209,6 +209,15 @@ bool isAnchored(const NGHolder &g) {
return true;
}
bool isFloating(const NGHolder &g) {
for (auto v : adjacent_vertices_range(g.start, g)) {
if (v != g.startDs && !edge(g.startDs, v, g).second) {
return false;
}
}
return true;
}
bool isAcyclic(const NGHolder &g) {
try {
depth_first_search(
@@ -657,7 +666,8 @@ bool hasCorrectlyNumberedVertices(const NGHolder &g) {
}
ids[id] = true;
}
return find(ids.begin(), ids.end(), false) == ids.end();
return find(ids.begin(), ids.end(), false) == ids.end()
&& num_vertices(g) == num_vertices(g.g);
}
/** Assertion: returns true if the edges in this graph are contiguously (and
@@ -672,8 +682,10 @@ bool hasCorrectlyNumberedEdges(const NGHolder &g) {
}
ids[id] = true;
}
return find(ids.begin(), ids.end(), false) == ids.end();
return find(ids.begin(), ids.end(), false) == ids.end()
&& num_edges(g) == num_edges(g.g);
}
#endif // NDEBUG
} // namespace ue2

View File

@@ -228,6 +228,10 @@ bool isVacuous(const NGHolder &h);
* proper successors). */
bool isAnchored(const NGHolder &h);
/** \brief True if the graph contains no anchored vertices (start has no
* successors aside from startDs or vertices connected to startDs). */
bool isFloating(const NGHolder &h);
/** True if the graph contains no back-edges at all, other than the
* startDs self-loop. */
bool isAcyclic(const NGHolder &g);

2642
src/nfagraph/ng_violet.cpp Normal file

File diff suppressed because it is too large Load Diff

52
src/nfagraph/ng_violet.h Normal file
View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Violet method of rose construction from NGHolder.
*/
#ifndef NG_VIOLET_H
#define NG_VIOLET_H
#include "ue2common.h"
namespace ue2 {
class NGHolder;
class RoseBuild;
struct CompileContext;
/** \brief Attempt to consume the entire pattern in graph \a h with Rose.
* Returns true if successful. */
bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
const CompileContext &cc);
} // namespace ue2
#endif