/* * Copyright (c) 2015, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** \file * \brief Region Redundancy optimisation pass. * * Identifies and removes entire regions that are adjacent to a cyclic state * with a superset of their character reachability. */ #include "ng_region_redundancy.h" #include "ng_holder.h" #include "ng_region.h" #include "ng_util.h" #include "ue2common.h" #include "util/container.h" #include "util/graph_range.h" #include using namespace std; namespace ue2 { namespace { /** Precalculated information about a region. */ struct RegionInfo { NFAVertex entry; //!< arbitrary entry vertex CharReach cr; //!< union of the reach of all vertices in region }; } // namespace static bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, const flat_set &expected_reports, const ue2::unordered_map ®ion_map) { /* TODO: only check vertices connected to accept/acceptEOD */ for (auto v : vertices_range(g)) { if (region != region_map.at(v)) { continue; } if (is_any_accept(v, g)) { return true; /* encountering an actual special in the region is * possible but definitely unexpected */ } for (auto w : adjacent_vertices_range(v, g)) { if (is_any_accept(w, g) && g[v].reports != expected_reports) { return true; } } } return false; } static void processCyclicStateForward(NGHolder &h, NFAVertex cyc, const map &info, const ue2::unordered_map ®ion_map, set &deadRegions) { u32 region = region_map.at(cyc); CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; DEBUG_PRINTF("going forward from %u/%u\n", h[cyc].index, region); map::const_iterator it; while ((it = info.find(++region)) != info.end()) { NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); DEBUG_PRINTF("checking %u\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); break; } if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { DEBUG_PRINTF("cyclic state %u leads to optional region leader %u\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { /* we can use this region as straw and suck in optional regions on * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */ cr = h[v].char_reach; reports = h[v].reports; DEBUG_PRINTF("%u is straw\n", region); assert(cr.isSubsetOf(h[cyc].char_reach)); if (hasSelfLoop(v, h)) { DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region); remove_edge(v, v, h); } } else { break; } } } static void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, const map &info, const ue2::unordered_map ®ion_map, set &deadRegions) { u32 region = region_map.at(cyc); CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; DEBUG_PRINTF("going back from %u/%u\n", h[cyc].index, region); map::const_iterator it; while ((it = info.find(--region)) != info.end()) { NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); DEBUG_PRINTF("checking %u\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); break; } if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { DEBUG_PRINTF("cyclic state %u trails optional region leader %u\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { /* we can use this region as a reverse straw and suck in optional * regions on the other side. This allows us to transform * /^a?a{n}.*b/ to /^a{n}.*b/ */ cr = h[v].char_reach; reports = h[v].reports; DEBUG_PRINTF("%u is straw\n", region); assert(cr.isSubsetOf(h[cyc].char_reach)); if (hasSelfLoop(v, h)) { DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region); remove_edge(v, v, h); } } else { break; } if (!region) { // No wrapping break; } } } static map buildRegionInfoMap(const NGHolder &g, const ue2::unordered_map ®ion_map) { map info; for (auto v : vertices_range(g)) { u32 region = region_map.at(v); if (is_special(v, g) || region == 0) { continue; } RegionInfo &ri = info[region]; ri.cr |= g[v].char_reach; if (isRegionEntry(g, v, region_map)) { ri.entry = v; } } return info; } static bool hasNoStartAnchoring(const NGHolder &h) { for (auto v : adjacent_vertices_range(h.start, h)) { if (!edge(h.startDs, v, h).second) { return false; } } return true; } void removeRegionRedundancy(NGHolder &g, som_type som) { auto region_map = assignRegions(g); map info = buildRegionInfoMap(g, region_map); set deadRegions; /* if we are not tracking som, we can treat sds as a cyclic region if there * is no anchoring */ if (!som && hasNoStartAnchoring(g)) { processCyclicStateForward(g, g.startDs, info, region_map, deadRegions); } // Walk the region mapping, looking for regions that consist of a single // cyclic node. for (const auto &m : info) { // Must not have already been removed if (contains(deadRegions, m.first)) { continue; } NFAVertex v = m.second.entry; /* require a singleton cyclic region */ if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) { continue; } if (som && is_virtual_start(v, g)) { continue; } processCyclicStateForward(g, v, info, region_map, deadRegions); processCyclicStateReverse(g, v, info, region_map, deadRegions); } if (deadRegions.empty()) { return; } vector dead; for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } u32 region = region_map.at(v); if (contains(deadRegions, region)) { dead.push_back(v); } } if (!dead.empty()) { DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n", dead.size(), deadRegions.size()); remove_vertices(dead, g); } } } // namespace ue2