diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 54aeb28a..c0caf1b9 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -80,6 +80,10 @@ static const size_t BOUNDED_REPEAT_COUNT = 4; /** Scoring penalty for boundary regions. */ static const size_t PENALTY_BOUNDARY = 32; +/** Regions with max bounds greater than this value will have their max bound + * replaced with inf. */ +static const size_t MAX_REPLACE_BOUND = 10000; + namespace { /** Information describing a region. */ @@ -158,7 +162,7 @@ void markBoundaryRegions(const NGHolder &h, } u32 id = region_map.at(v); - map::iterator ri = regions.find(id); + auto ri = regions.find(id); if (ri == regions.end()) { continue; // Not tracking this region as it's too small. } @@ -176,16 +180,14 @@ map findRegionInfo(const NGHolder &h, continue; } u32 id = region_map.at(v); - RegionInfo &ri = regions.insert( - make_pair(id, RegionInfo(id))).first->second; + RegionInfo &ri = regions.emplace(id, RegionInfo(id)).first->second; ri.vertices.push_back(v); ri.reach |= h[v].char_reach; } // There's no point tracking more information about regions that we won't // consider replacing, so we remove them from the region map. - for (map::iterator it = regions.begin(); - it != regions.end();) { + for (auto it = regions.begin(); it != regions.end();) { if (it->second.vertices.size() < MIN_REPLACE_VERTICES) { regions.erase(it++); } else { @@ -217,7 +219,10 @@ void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to, if (contains(rverts, u)) { continue; } - if (edge(u, to, g).second) { + + // Check with edge_by_target to cope with predecessors with large + // fan-out. + if (edge_by_target(u, to, g).second) { continue; } @@ -250,17 +255,27 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri, assert(ri.vertices.size() >= MIN_REPLACE_VERTICES); assert(ri.minWidth.is_finite()); + depth minWidth = ri.minWidth; + depth maxWidth = ri.maxWidth; + + if (maxWidth > depth(MAX_REPLACE_BOUND)) { + DEBUG_PRINTF("using inf instead of large bound %s\n", + maxWidth.str().c_str()); + maxWidth = depth::infinity(); + } + size_t replacementSize; - if (ri.minWidth == ri.maxWidth || ri.maxWidth.is_infinite()) { - replacementSize = ri.minWidth; // {N} or {N,} + if (minWidth == maxWidth || maxWidth.is_infinite()) { + replacementSize = minWidth; // {N} or {N,} } else { - replacementSize = ri.maxWidth; // {N,M} case + replacementSize = maxWidth; // {N,M} case } DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(), replacementSize); - deque verts; + vector verts; + verts.reserve(replacementSize); for (size_t i = 0; i < replacementSize; i++) { NFAVertex v = add_vertex(g); g[v].char_reach = ri.reach; @@ -360,7 +375,8 @@ void prefilterReductions(NGHolder &h, const CompileContext &cc) { return; } - DEBUG_PRINTF("graph with %zu vertices\n", num_vertices(h)); + DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n", + num_vertices(h), num_edges(h)); h.renumberVertices(); h.renumberEdges(); @@ -369,6 +385,10 @@ void prefilterReductions(NGHolder &h, const CompileContext &cc) { h.renumberVertices(); h.renumberEdges(); + + DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n", + num_vertices(h), num_edges(h)); + } } // namespace ue2 diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index fb2a2357..9fc3a413 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -117,7 +117,6 @@ 117:/[\x{ff]/ #Value in \x{...} sequence is non-hex or missing } at index 1. 118:/foo/{min_offset=10,max_offset=9} #In hs_expr_ext, min_offset must be less than or equal to max_offset. 120:/foo/{min_length=10,max_offset=9} #In hs_expr_ext, min_length must be less than or equal to max_offset. -121:/.e(?:(((eEbd..(d[^Be]{1,7}|A)){8,22}aD.){7}|EecA?(?:\b)c|bB[Dd])){29,37}[adb](?:.|A|c|[BEA]|D)..((?:c|[Cba]))?([Ee]|D)B+(.|[dbB]|E|E).[EcCe]ce(?:C|D)dD[EA]Ac.[aE]d/smiHWP #Pattern too large. 122:/ÀÀ/8 #Expression is not valid UTF-8. 123:/hello \6 world/P #Invalid back reference to expression 6. 124:/hello \6 world|dog/P #Invalid back reference to expression 6.