rose: shift program construction functions to rose_build_program

This commit is contained in:
Alex Coyte 2017-04-26 13:45:31 +10:00 committed by Matthew Barr
parent 82838f5728
commit bb29aeb298
7 changed files with 2567 additions and 2408 deletions

View File

@ -971,6 +971,7 @@ SET (hs_SRCS
src/rose/rose_build_misc.cpp
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_build_resources.h
src/rose/rose_build_role_aliasing.cpp
src/rose/rose_build_scatter.cpp
src/rose/rose_build_scatter.h

File diff suppressed because it is too large Load Diff

View File

@ -622,6 +622,11 @@ u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
std::vector<u8> &cmp);
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id);
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id);
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e);
#ifndef NDEBUG
bool canImplementGraphs(const RoseBuildImpl &tbi);
#endif

View File

@ -909,6 +909,59 @@ u32 roseQuality(const RoseEngine *t) {
return 1;
}
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
assert(!lit_vertices.empty());
u32 min_offset = UINT32_MAX;
for (const auto &v : lit_vertices) {
min_offset = min(min_offset, build.g[v].min_offset);
}
return min_offset;
}
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
assert(!lit_vertices.empty());
u32 max_offset = 0;
for (const auto &v : lit_vertices) {
max_offset = max(max_offset, build.g[v].max_offset);
}
return max_offset;
}
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
const auto &g = build.g;
const auto v = target(e, g);
if (!build.g[v].eod_accept) {
return false;
}
// If there's a graph between us and EOD, we shouldn't be eager.
if (build.g[v].left) {
return false;
}
// Must be exactly at EOD.
if (g[e].minBound != 0 || g[e].maxBound != 0) {
return false;
}
// In streaming mode, we can only eagerly report EOD for literals in the
// EOD-anchored table, as that's the only time we actually know where EOD
// is. In block mode, we always have this information.
const auto u = source(e, g);
if (build.cc.streaming && !build.isInETable(u)) {
return false;
}
return true;
}
#ifndef NDEBUG
/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
* graph are implementable. */

File diff suppressed because it is too large Load Diff

View File

@ -42,8 +42,10 @@
namespace ue2 {
struct LookEntry;
class RoseEngineBlob;
class RoseInstruction;
struct RoseResources;
/**
* \brief Container for a list of program instructions.
@ -145,11 +147,161 @@ public:
bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const;
};
/* Removes any CHECK_HANDLED instructions from the given program */
void stripCheckHandledInstruction(RoseProgram &prog);
/** \brief Data only used during construction of various programs (literal,
* anchored, delay, etc). */
struct ProgramBuild : noncopyable {
explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh,
bool catchup)
: floatingMinLiteralMatchOffset(fMinLitOffset),
longLitLengthThreshold(longLitThresh), needs_catchup(catchup) {
}
/** Returns true if the program may read the the interpreter's work_done flag */
bool reads_work_done_flag(const RoseProgram &prog);
/** \brief Minimum offset of a match from the floating table. */
const u32 floatingMinLiteralMatchOffset;
/** \brief Long literal length threshold, used in streaming mode. */
const size_t longLitLengthThreshold;
/** \brief True if reports need CATCH_UP instructions to catch up suffixes,
* outfixes etc. */
const bool needs_catchup;
/** \brief Mapping from vertex to key, for vertices with a
* CHECK_NOT_HANDLED instruction. */
ue2::unordered_map<RoseVertex, u32> handledKeys;
/** \brief Mapping from Rose literal ID to anchored program index. */
std::map<u32, u32> anchored_programs;
/** \brief Mapping from Rose literal ID to delayed program index. */
std::map<u32, u32> delay_programs;
/** \brief Mapping from every vertex to the groups that must be on for that
* vertex to be reached. */
ue2::unordered_map<RoseVertex, rose_group> vertex_group_map;
/** \brief Global bitmap of groups that can be squashed. */
rose_group squashable_groups = 0;
};
void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
void addSuffixesEodProgram(RoseProgram &program);
void addMatcherEodProgram(RoseProgram &program);
static constexpr u32 INVALID_QUEUE = ~0U;
struct left_build_info {
// Constructor for an engine implementation.
left_build_info(u32 q, u32 l, u32 t, rose_group sm,
const std::vector<u8> &stops, u32 max_ql, u8 cm_count,
const CharReach &cm_cr);
// Constructor for a lookaround implementation.
explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks);
u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */
u32 lag = 0;
u32 transient = 0;
rose_group squash_mask = ~rose_group{0};
std::vector<u8> stopAlphabet;
u32 max_queuelen = 0;
u8 countingMiracleCount = 0;
CharReach countingMiracleReach;
u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */
bool has_lookaround = false;
// alternative implementation to the NFA
std::vector<std::vector<LookEntry>> lookaround;
};
struct lookaround_info : noncopyable {
/** \brief LookEntry list cache, so that we can reuse the look index and
* reach index for the same lookaround. */
ue2::unordered_map<std::vector<std::vector<LookEntry>>,
std::pair<size_t, size_t>> cache;
/** \brief Lookaround table for Rose roles. */
std::vector<std::vector<std::vector<LookEntry>>> table;
/** \brief Lookaround look table size. */
size_t lookTableSize = 0;
/** \brief Lookaround reach table size.
* since single path lookaround and multi-path lookaround have different
* bitvectors range (32 and 256), we need to maintain both look table size
* and reach table size. */
size_t reachTableSize = 0;
};
/**
* \brief Provides a brief summary of properties of an NFA that has already been
* finalised and stored in the blob.
*/
struct engine_info {
engine_info(const NFA *nfa, bool trans);
enum NFAEngineType type;
bool accepts_eod;
u32 stream_size;
u32 scratch_size;
u32 scratch_align;
bool transient;
};
/**
* \brief Consumes list of program blocks corresponding to different literals,
* checks them for duplicates and then concatenates them into one program.
*
* Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is
* inserted to prevent the work_done flag being contaminated by early blocks.
*/
RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks);
RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
const std::map<RoseVertex, left_build_info> &leftfix_info,
const std::map<suffix_id, u32> &suffixes,
const std::map<u32, engine_info> &engine_info_by_queue,
lookaround_info &lookarounds,
unordered_map<RoseVertex, u32> roleStateIndices,
ProgramBuild &prog_build, u32 lit_id,
const std::vector<RoseEdge> &lit_edges,
bool is_anchored_replay_program);
RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
lookaround_info &lookarounds,
ProgramBuild &prog_build,
const std::vector<u32> &lit_ids);
RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build,
ProgramBuild &prog_build, const RoseEdge &e,
const bool multiple_preds);
RoseProgram makeReportProgram(const RoseBuildImpl &build,
bool needs_mpv_catchup, ReportID id);
RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
const std::set<ReportID> &reports);
struct TriggerInfo {
TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {}
bool cancel;
u32 queue;
u32 event;
bool operator==(const TriggerInfo &b) const {
return cancel == b.cancel && queue == b.queue && event == b.event;
}
};
void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states,
RoseProgram &program);
void applyFinalSpecialisation(RoseProgram &program);
void recordLongLiterals(std::vector<ue2_case_string> &longLiterals,
const RoseProgram &program);
void recordResources(RoseResources &resources, const RoseProgram &program);
} // namespace ue2

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_RESOURCES_H
#define ROSE_BUILD_RESOURCES_H
namespace ue2 {
/**
* \brief Structure tracking which resources are used by this Rose instance at
* runtime.
*
* We use this to control how much initialisation we need to do at the
* beginning of a stream/block at runtime.
*/
struct RoseResources {
bool has_outfixes = false;
bool has_suffixes = false;
bool has_leftfixes = false;
bool has_literals = false;
bool has_states = false;
bool checks_groups = false;
bool has_lit_delay = false;
bool has_lit_check = false; // long literal support
bool has_anchored = false;
bool has_floating = false;
bool has_eod = false;
};
}
#endif