ComponentRepeat: wire R{0,N} as (R{1,N})?

Change the way that we wire up the edges in a bounded repeat to avoid
large fan-out from predecessors.
This commit is contained in:
Justin Viiret 2015-12-15 14:34:25 +11:00 committed by Matthew Barr
parent 98eff64edf
commit 997c0c9efd
2 changed files with 29 additions and 33 deletions

View File

@ -209,7 +209,7 @@ void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs,
} }
} }
wireRepeats(bs, lastPos); wireRepeats(bs);
DEBUG_PRINTF("leave\n"); DEBUG_PRINTF("leave\n");
} }
@ -279,26 +279,24 @@ vector<PositionInfo> ComponentRepeat::last() const {
assert(!m_firsts.empty()); // notePositions should already have run assert(!m_firsts.empty()); // notePositions should already have run
assert(!m_lasts.empty()); assert(!m_lasts.empty());
// Optimisation: when we're not maintaining edge priorities, handling const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0];
// optional repeats has been taken care of by our FIRSTS. Thus, only
// the last mandatory repeat and (if different) the last optional
// repeat contributes to lasts.
if (m_min) {
const vector<PositionInfo> &l = m_lasts[m_min - 1];
lasts.insert(lasts.end(), l.begin(), l.end()); lasts.insert(lasts.end(), l.begin(), l.end());
}
if (!m_min || m_min != m_lasts.size()) { if (!m_min || m_min != m_lasts.size()) {
lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end()); lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end());
} }
DEBUG_PRINTF("lasts = %s\n",
dumpPositions(lasts.begin(), lasts.end()).c_str());
return lasts; return lasts;
} }
void ComponentRepeat::wireRepeats(GlushkovBuildState &bs, void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) {
const vector<PositionInfo> &lastPos) {
/* note: m_lasts[0] already valid */ /* note: m_lasts[0] already valid */
u32 copies = m_firsts.size(); u32 copies = m_firsts.size();
const bool isEmpty = sub_comp->empty(); const bool isEmpty = sub_comp->empty();
const vector<PositionInfo> &optLasts = m_min ? m_lasts[m_min - 1] : lastPos; const vector<PositionInfo> &optLasts =
m_min ? m_lasts[m_min - 1] : m_lasts[0];
if (!copies) { if (!copies) {
goto inf_check; goto inf_check;
@ -317,7 +315,7 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs,
DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min);
for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { for (u32 rep = MAX(m_min, 1); rep < copies; rep++) {
vector<PositionInfo> lasts = m_lasts[rep - 1]; vector<PositionInfo> lasts = m_lasts[rep - 1];
if (m_min && rep != m_min) { if (rep != m_min) {
lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); lasts.insert(lasts.end(), optLasts.begin(), optLasts.end());
sort(lasts.begin(), lasts.end()); sort(lasts.begin(), lasts.end());
lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end()); lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end());
@ -340,8 +338,8 @@ void ComponentRepeat::precalc_firsts() {
/* For normal repeat, our optional repeats each have an epsilon at the end /* For normal repeat, our optional repeats each have an epsilon at the end
* of their firsts lists. * of their firsts lists.
*/ */
for (u32 i = m_min; i < m_firsts.size();i++) { for (u32 i = m_min; i < m_firsts.size(); i++) {
m_firsts[i].insert(m_firsts[i].end(), GlushkovBuildState::POS_EPSILON); m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON);
} }
firsts_cache.clear(); firsts_cache.clear();
@ -352,11 +350,7 @@ void ComponentRepeat::precalc_firsts() {
assert(!m_firsts.empty()); // notePositions should already have run assert(!m_firsts.empty()); // notePositions should already have run
const vector<PositionInfo> &f = m_firsts.front(); const vector<PositionInfo> &f = m_firsts.front();
// If we're running without edge priorities, then we want to generate the if (sub_comp->empty()) {
// repeat in such a way that the firsts do all the work. This will minimise
// the number of exceptional states in a LimEx NFA implementation.
if (!m_min || sub_comp->empty()) {
// Emptiable: all our repeats contribute to firsts. // Emptiable: all our repeats contribute to firsts.
// Each repeat's firsts is spliced in at the location of the epsilon // Each repeat's firsts is spliced in at the location of the epsilon
// (if any) in the previous repeat's firsts. // (if any) in the previous repeat's firsts.

View File

@ -42,10 +42,11 @@
namespace ue2 { namespace ue2 {
/** \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', /**
* \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}',
* etc). * etc).
* *
* Ascii Art Time: * ASCII Art Time:
* *
* Our standard representation of standard repeats. Other constructions (fan-in * Our standard representation of standard repeats. Other constructions (fan-in
* vs fan-out) would also be possible and equivalent for our purposes. * vs fan-out) would also be possible and equivalent for our purposes.
@ -59,13 +60,15 @@ namespace ue2 {
* *
* {0,m} * {0,m}
* *
* /-----------\
* | |
* | V
* S->O->O->O->T * S->O->O->O->T
* | ^ ^ ^ * | ^ ^ ^
* | | | | * | | | |
* \-----------/ * \--------/
* *
*/ */
class ComponentRepeat : public Component { class ComponentRepeat : public Component {
friend class ConstructLiteralVisitor; friend class ConstructLiteralVisitor;
friend class DumpVisitor; friend class DumpVisitor;
@ -120,8 +123,7 @@ protected:
/** Called by \ref buildFollowSet to connect up the various repeats. */ /** Called by \ref buildFollowSet to connect up the various repeats. */
void precalc_firsts(); void precalc_firsts();
void postSubNotePositionHook(); void postSubNotePositionHook();
void wireRepeats(GlushkovBuildState &bs, void wireRepeats(GlushkovBuildState &bs);
const std::vector<PositionInfo> &lastPos);
std::unique_ptr<Component> sub_comp; std::unique_ptr<Component> sub_comp;
u32 m_min; u32 m_min;