mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: shift program construction functions to rose_build_program
This commit is contained in:
parent
82838f5728
commit
bb29aeb298
@ -971,6 +971,7 @@ SET (hs_SRCS
|
|||||||
src/rose/rose_build_misc.cpp
|
src/rose/rose_build_misc.cpp
|
||||||
src/rose/rose_build_program.cpp
|
src/rose/rose_build_program.cpp
|
||||||
src/rose/rose_build_program.h
|
src/rose/rose_build_program.h
|
||||||
|
src/rose/rose_build_resources.h
|
||||||
src/rose/rose_build_role_aliasing.cpp
|
src/rose/rose_build_role_aliasing.cpp
|
||||||
src/rose/rose_build_scatter.cpp
|
src/rose/rose_build_scatter.cpp
|
||||||
src/rose/rose_build_scatter.h
|
src/rose/rose_build_scatter.h
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -622,6 +622,11 @@ u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
|
|||||||
void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
|
void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
|
||||||
std::vector<u8> &cmp);
|
std::vector<u8> &cmp);
|
||||||
|
|
||||||
|
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id);
|
||||||
|
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id);
|
||||||
|
|
||||||
|
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e);
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
bool canImplementGraphs(const RoseBuildImpl &tbi);
|
bool canImplementGraphs(const RoseBuildImpl &tbi);
|
||||||
#endif
|
#endif
|
||||||
|
@ -909,6 +909,59 @@ u32 roseQuality(const RoseEngine *t) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
|
||||||
|
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
|
||||||
|
assert(!lit_vertices.empty());
|
||||||
|
|
||||||
|
u32 min_offset = UINT32_MAX;
|
||||||
|
for (const auto &v : lit_vertices) {
|
||||||
|
min_offset = min(min_offset, build.g[v].min_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return min_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
|
||||||
|
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
|
||||||
|
assert(!lit_vertices.empty());
|
||||||
|
|
||||||
|
u32 max_offset = 0;
|
||||||
|
for (const auto &v : lit_vertices) {
|
||||||
|
max_offset = max(max_offset, build.g[v].max_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return max_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
|
||||||
|
const auto &g = build.g;
|
||||||
|
const auto v = target(e, g);
|
||||||
|
|
||||||
|
if (!build.g[v].eod_accept) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there's a graph between us and EOD, we shouldn't be eager.
|
||||||
|
if (build.g[v].left) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must be exactly at EOD.
|
||||||
|
if (g[e].minBound != 0 || g[e].maxBound != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// In streaming mode, we can only eagerly report EOD for literals in the
|
||||||
|
// EOD-anchored table, as that's the only time we actually know where EOD
|
||||||
|
// is. In block mode, we always have this information.
|
||||||
|
const auto u = source(e, g);
|
||||||
|
if (build.cc.streaming && !build.isInETable(u)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
|
/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
|
||||||
* graph are implementable. */
|
* graph are implementable. */
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -42,8 +42,10 @@
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
|
struct LookEntry;
|
||||||
class RoseEngineBlob;
|
class RoseEngineBlob;
|
||||||
class RoseInstruction;
|
class RoseInstruction;
|
||||||
|
struct RoseResources;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Container for a list of program instructions.
|
* \brief Container for a list of program instructions.
|
||||||
@ -145,11 +147,161 @@ public:
|
|||||||
bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const;
|
bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Removes any CHECK_HANDLED instructions from the given program */
|
/** \brief Data only used during construction of various programs (literal,
|
||||||
void stripCheckHandledInstruction(RoseProgram &prog);
|
* anchored, delay, etc). */
|
||||||
|
struct ProgramBuild : noncopyable {
|
||||||
|
explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh,
|
||||||
|
bool catchup)
|
||||||
|
: floatingMinLiteralMatchOffset(fMinLitOffset),
|
||||||
|
longLitLengthThreshold(longLitThresh), needs_catchup(catchup) {
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns true if the program may read the the interpreter's work_done flag */
|
/** \brief Minimum offset of a match from the floating table. */
|
||||||
bool reads_work_done_flag(const RoseProgram &prog);
|
const u32 floatingMinLiteralMatchOffset;
|
||||||
|
|
||||||
|
/** \brief Long literal length threshold, used in streaming mode. */
|
||||||
|
const size_t longLitLengthThreshold;
|
||||||
|
|
||||||
|
/** \brief True if reports need CATCH_UP instructions to catch up suffixes,
|
||||||
|
* outfixes etc. */
|
||||||
|
const bool needs_catchup;
|
||||||
|
|
||||||
|
/** \brief Mapping from vertex to key, for vertices with a
|
||||||
|
* CHECK_NOT_HANDLED instruction. */
|
||||||
|
ue2::unordered_map<RoseVertex, u32> handledKeys;
|
||||||
|
|
||||||
|
/** \brief Mapping from Rose literal ID to anchored program index. */
|
||||||
|
std::map<u32, u32> anchored_programs;
|
||||||
|
|
||||||
|
/** \brief Mapping from Rose literal ID to delayed program index. */
|
||||||
|
std::map<u32, u32> delay_programs;
|
||||||
|
|
||||||
|
/** \brief Mapping from every vertex to the groups that must be on for that
|
||||||
|
* vertex to be reached. */
|
||||||
|
ue2::unordered_map<RoseVertex, rose_group> vertex_group_map;
|
||||||
|
|
||||||
|
/** \brief Global bitmap of groups that can be squashed. */
|
||||||
|
rose_group squashable_groups = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
|
||||||
|
void addSuffixesEodProgram(RoseProgram &program);
|
||||||
|
void addMatcherEodProgram(RoseProgram &program);
|
||||||
|
|
||||||
|
static constexpr u32 INVALID_QUEUE = ~0U;
|
||||||
|
|
||||||
|
struct left_build_info {
|
||||||
|
// Constructor for an engine implementation.
|
||||||
|
left_build_info(u32 q, u32 l, u32 t, rose_group sm,
|
||||||
|
const std::vector<u8> &stops, u32 max_ql, u8 cm_count,
|
||||||
|
const CharReach &cm_cr);
|
||||||
|
|
||||||
|
// Constructor for a lookaround implementation.
|
||||||
|
explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks);
|
||||||
|
|
||||||
|
u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */
|
||||||
|
u32 lag = 0;
|
||||||
|
u32 transient = 0;
|
||||||
|
rose_group squash_mask = ~rose_group{0};
|
||||||
|
std::vector<u8> stopAlphabet;
|
||||||
|
u32 max_queuelen = 0;
|
||||||
|
u8 countingMiracleCount = 0;
|
||||||
|
CharReach countingMiracleReach;
|
||||||
|
u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */
|
||||||
|
bool has_lookaround = false;
|
||||||
|
|
||||||
|
// alternative implementation to the NFA
|
||||||
|
std::vector<std::vector<LookEntry>> lookaround;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct lookaround_info : noncopyable {
|
||||||
|
/** \brief LookEntry list cache, so that we can reuse the look index and
|
||||||
|
* reach index for the same lookaround. */
|
||||||
|
ue2::unordered_map<std::vector<std::vector<LookEntry>>,
|
||||||
|
std::pair<size_t, size_t>> cache;
|
||||||
|
|
||||||
|
/** \brief Lookaround table for Rose roles. */
|
||||||
|
std::vector<std::vector<std::vector<LookEntry>>> table;
|
||||||
|
|
||||||
|
/** \brief Lookaround look table size. */
|
||||||
|
size_t lookTableSize = 0;
|
||||||
|
|
||||||
|
/** \brief Lookaround reach table size.
|
||||||
|
* since single path lookaround and multi-path lookaround have different
|
||||||
|
* bitvectors range (32 and 256), we need to maintain both look table size
|
||||||
|
* and reach table size. */
|
||||||
|
size_t reachTableSize = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Provides a brief summary of properties of an NFA that has already been
|
||||||
|
* finalised and stored in the blob.
|
||||||
|
*/
|
||||||
|
struct engine_info {
|
||||||
|
engine_info(const NFA *nfa, bool trans);
|
||||||
|
|
||||||
|
enum NFAEngineType type;
|
||||||
|
bool accepts_eod;
|
||||||
|
u32 stream_size;
|
||||||
|
u32 scratch_size;
|
||||||
|
u32 scratch_align;
|
||||||
|
bool transient;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Consumes list of program blocks corresponding to different literals,
|
||||||
|
* checks them for duplicates and then concatenates them into one program.
|
||||||
|
*
|
||||||
|
* Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is
|
||||||
|
* inserted to prevent the work_done flag being contaminated by early blocks.
|
||||||
|
*/
|
||||||
|
RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks);
|
||||||
|
|
||||||
|
RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||||
|
const std::map<RoseVertex, left_build_info> &leftfix_info,
|
||||||
|
const std::map<suffix_id, u32> &suffixes,
|
||||||
|
const std::map<u32, engine_info> &engine_info_by_queue,
|
||||||
|
lookaround_info &lookarounds,
|
||||||
|
unordered_map<RoseVertex, u32> roleStateIndices,
|
||||||
|
ProgramBuild &prog_build, u32 lit_id,
|
||||||
|
const std::vector<RoseEdge> &lit_edges,
|
||||||
|
bool is_anchored_replay_program);
|
||||||
|
|
||||||
|
RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
|
||||||
|
lookaround_info &lookarounds,
|
||||||
|
ProgramBuild &prog_build,
|
||||||
|
const std::vector<u32> &lit_ids);
|
||||||
|
|
||||||
|
RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build,
|
||||||
|
ProgramBuild &prog_build, const RoseEdge &e,
|
||||||
|
const bool multiple_preds);
|
||||||
|
|
||||||
|
RoseProgram makeReportProgram(const RoseBuildImpl &build,
|
||||||
|
bool needs_mpv_catchup, ReportID id);
|
||||||
|
|
||||||
|
RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
|
||||||
|
const std::set<ReportID> &reports);
|
||||||
|
|
||||||
|
struct TriggerInfo {
|
||||||
|
TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {}
|
||||||
|
bool cancel;
|
||||||
|
u32 queue;
|
||||||
|
u32 event;
|
||||||
|
|
||||||
|
bool operator==(const TriggerInfo &b) const {
|
||||||
|
return cancel == b.cancel && queue == b.queue && event == b.event;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states,
|
||||||
|
RoseProgram &program);
|
||||||
|
|
||||||
|
void applyFinalSpecialisation(RoseProgram &program);
|
||||||
|
|
||||||
|
void recordLongLiterals(std::vector<ue2_case_string> &longLiterals,
|
||||||
|
const RoseProgram &program);
|
||||||
|
|
||||||
|
void recordResources(RoseResources &resources, const RoseProgram &program);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
57
src/rose/rose_build_resources.h
Normal file
57
src/rose/rose_build_resources.h
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ROSE_BUILD_RESOURCES_H
|
||||||
|
#define ROSE_BUILD_RESOURCES_H
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Structure tracking which resources are used by this Rose instance at
|
||||||
|
* runtime.
|
||||||
|
*
|
||||||
|
* We use this to control how much initialisation we need to do at the
|
||||||
|
* beginning of a stream/block at runtime.
|
||||||
|
*/
|
||||||
|
struct RoseResources {
|
||||||
|
bool has_outfixes = false;
|
||||||
|
bool has_suffixes = false;
|
||||||
|
bool has_leftfixes = false;
|
||||||
|
bool has_literals = false;
|
||||||
|
bool has_states = false;
|
||||||
|
bool checks_groups = false;
|
||||||
|
bool has_lit_delay = false;
|
||||||
|
bool has_lit_check = false; // long literal support
|
||||||
|
bool has_anchored = false;
|
||||||
|
bool has_floating = false;
|
||||||
|
bool has_eod = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Loading…
x
Reference in New Issue
Block a user