diff --git a/src/parser/ComponentRepeat.cpp b/src/parser/ComponentRepeat.cpp index 8cd88372..5ce703ac 100644 --- a/src/parser/ComponentRepeat.cpp +++ b/src/parser/ComponentRepeat.cpp @@ -209,7 +209,7 @@ void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, } } - wireRepeats(bs, lastPos); + wireRepeats(bs); DEBUG_PRINTF("leave\n"); } @@ -279,26 +279,24 @@ vector ComponentRepeat::last() const { assert(!m_firsts.empty()); // notePositions should already have run assert(!m_lasts.empty()); - // Optimisation: when we're not maintaining edge priorities, handling - // optional repeats has been taken care of by our FIRSTS. Thus, only - // the last mandatory repeat and (if different) the last optional - // repeat contributes to lasts. - if (m_min) { - const vector &l = m_lasts[m_min - 1]; - lasts.insert(lasts.end(), l.begin(), l.end()); - } + const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0]; + lasts.insert(lasts.end(), l.begin(), l.end()); + if (!m_min || m_min != m_lasts.size()) { lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end()); } + + DEBUG_PRINTF("lasts = %s\n", + dumpPositions(lasts.begin(), lasts.end()).c_str()); return lasts; } -void ComponentRepeat::wireRepeats(GlushkovBuildState &bs, - const vector &lastPos) { +void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { /* note: m_lasts[0] already valid */ u32 copies = m_firsts.size(); const bool isEmpty = sub_comp->empty(); - const vector &optLasts = m_min ? m_lasts[m_min - 1] : lastPos; + const vector &optLasts = + m_min ? m_lasts[m_min - 1] : m_lasts[0]; if (!copies) { goto inf_check; @@ -317,7 +315,7 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs, DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { vector lasts = m_lasts[rep - 1]; - if (m_min && rep != m_min) { + if (rep != m_min) { lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); sort(lasts.begin(), lasts.end()); lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end()); @@ -340,8 +338,8 @@ void ComponentRepeat::precalc_firsts() { /* For normal repeat, our optional repeats each have an epsilon at the end * of their firsts lists. */ - for (u32 i = m_min; i < m_firsts.size();i++) { - m_firsts[i].insert(m_firsts[i].end(), GlushkovBuildState::POS_EPSILON); + for (u32 i = m_min; i < m_firsts.size(); i++) { + m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON); } firsts_cache.clear(); @@ -352,11 +350,7 @@ void ComponentRepeat::precalc_firsts() { assert(!m_firsts.empty()); // notePositions should already have run const vector &f = m_firsts.front(); - // If we're running without edge priorities, then we want to generate the - // repeat in such a way that the firsts do all the work. This will minimise - // the number of exceptional states in a LimEx NFA implementation. - - if (!m_min || sub_comp->empty()) { + if (sub_comp->empty()) { // Emptiable: all our repeats contribute to firsts. // Each repeat's firsts is spliced in at the location of the epsilon // (if any) in the previous repeat's firsts. diff --git a/src/parser/ComponentRepeat.h b/src/parser/ComponentRepeat.h index e8305ffd..b708e062 100644 --- a/src/parser/ComponentRepeat.h +++ b/src/parser/ComponentRepeat.h @@ -42,30 +42,33 @@ namespace ue2 { -/** \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', +/** + * \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', * etc). * - * Ascii Art Time: + * ASCII Art Time: * * Our standard representation of standard repeats. Other constructions (fan-in * vs fan-out) would also be possible and equivalent for our purposes. * * {n,m} * - * S->M->M->M->O->O->O->T - * | ^ ^ ^ - * | | | | - * \-----------/ + * S->M->M->M->O->O->O->T + * | ^ ^ ^ + * | | | | + * \-----------/ * * {0,m} * - * S->O->O->O->T - * | ^ ^ ^ - * | | | | - * \-----------/ + * /-----------\ + * | | + * | V + * S->O->O->O->T + * | ^ ^ ^ + * | | | | + * \--------/ * */ - class ComponentRepeat : public Component { friend class ConstructLiteralVisitor; friend class DumpVisitor; @@ -120,8 +123,7 @@ protected: /** Called by \ref buildFollowSet to connect up the various repeats. */ void precalc_firsts(); void postSubNotePositionHook(); - void wireRepeats(GlushkovBuildState &bs, - const std::vector &lastPos); + void wireRepeats(GlushkovBuildState &bs); std::unique_ptr sub_comp; u32 m_min;