ComponentRepeat: wire R{0,N} as (R{1,N})?

Change the way that we wire up the edges in a bounded repeat to avoid
large fan-out from predecessors.
This commit is contained in:
Justin Viiret 2015-12-15 14:34:25 +11:00 committed by Matthew Barr
parent 98eff64edf
commit 997c0c9efd
2 changed files with 29 additions and 33 deletions

View File

@ -209,7 +209,7 @@ void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs,
}
}
wireRepeats(bs, lastPos);
wireRepeats(bs);
DEBUG_PRINTF("leave\n");
}
@ -279,26 +279,24 @@ vector<PositionInfo> ComponentRepeat::last() const {
assert(!m_firsts.empty()); // notePositions should already have run
assert(!m_lasts.empty());
// Optimisation: when we're not maintaining edge priorities, handling
// optional repeats has been taken care of by our FIRSTS. Thus, only
// the last mandatory repeat and (if different) the last optional
// repeat contributes to lasts.
if (m_min) {
const vector<PositionInfo> &l = m_lasts[m_min - 1];
lasts.insert(lasts.end(), l.begin(), l.end());
}
const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0];
lasts.insert(lasts.end(), l.begin(), l.end());
if (!m_min || m_min != m_lasts.size()) {
lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end());
}
DEBUG_PRINTF("lasts = %s\n",
dumpPositions(lasts.begin(), lasts.end()).c_str());
return lasts;
}
void ComponentRepeat::wireRepeats(GlushkovBuildState &bs,
const vector<PositionInfo> &lastPos) {
void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) {
/* note: m_lasts[0] already valid */
u32 copies = m_firsts.size();
const bool isEmpty = sub_comp->empty();
const vector<PositionInfo> &optLasts = m_min ? m_lasts[m_min - 1] : lastPos;
const vector<PositionInfo> &optLasts =
m_min ? m_lasts[m_min - 1] : m_lasts[0];
if (!copies) {
goto inf_check;
@ -317,7 +315,7 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs,
DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min);
for (u32 rep = MAX(m_min, 1); rep < copies; rep++) {
vector<PositionInfo> lasts = m_lasts[rep - 1];
if (m_min && rep != m_min) {
if (rep != m_min) {
lasts.insert(lasts.end(), optLasts.begin(), optLasts.end());
sort(lasts.begin(), lasts.end());
lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end());
@ -340,8 +338,8 @@ void ComponentRepeat::precalc_firsts() {
/* For normal repeat, our optional repeats each have an epsilon at the end
* of their firsts lists.
*/
for (u32 i = m_min; i < m_firsts.size();i++) {
m_firsts[i].insert(m_firsts[i].end(), GlushkovBuildState::POS_EPSILON);
for (u32 i = m_min; i < m_firsts.size(); i++) {
m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON);
}
firsts_cache.clear();
@ -352,11 +350,7 @@ void ComponentRepeat::precalc_firsts() {
assert(!m_firsts.empty()); // notePositions should already have run
const vector<PositionInfo> &f = m_firsts.front();
// If we're running without edge priorities, then we want to generate the
// repeat in such a way that the firsts do all the work. This will minimise
// the number of exceptional states in a LimEx NFA implementation.
if (!m_min || sub_comp->empty()) {
if (sub_comp->empty()) {
// Emptiable: all our repeats contribute to firsts.
// Each repeat's firsts is spliced in at the location of the epsilon
// (if any) in the previous repeat's firsts.

View File

@ -42,30 +42,33 @@
namespace ue2 {
/** \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}',
/**
* \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}',
* etc).
*
* Ascii Art Time:
* ASCII Art Time:
*
* Our standard representation of standard repeats. Other constructions (fan-in
* vs fan-out) would also be possible and equivalent for our purposes.
*
* {n,m}
*
* S->M->M->M->O->O->O->T
* | ^ ^ ^
* | | | |
* \-----------/
* S->M->M->M->O->O->O->T
* | ^ ^ ^
* | | | |
* \-----------/
*
* {0,m}
*
* S->O->O->O->T
* | ^ ^ ^
* | | | |
* \-----------/
* /-----------\
* | |
* | V
* S->O->O->O->T
* | ^ ^ ^
* | | | |
* \--------/
*
*/
class ComponentRepeat : public Component {
friend class ConstructLiteralVisitor;
friend class DumpVisitor;
@ -120,8 +123,7 @@ protected:
/** Called by \ref buildFollowSet to connect up the various repeats. */
void precalc_firsts();
void postSubNotePositionHook();
void wireRepeats(GlushkovBuildState &bs,
const std::vector<PositionInfo> &lastPos);
void wireRepeats(GlushkovBuildState &bs);
std::unique_ptr<Component> sub_comp;
u32 m_min;