From 15c8a7bd98639ad3a4820f196692cc9aa82f0686 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 1 May 2017 16:09:10 +1000 Subject: [PATCH] rose: rework storage of extra lookaround information - remove explicit lookaround table from bytecode - make the RoseInstr responsible for adding required info to blob --- CMakeLists.txt | 1 + src/rose/program_runtime.h | 16 ++-- src/rose/rose_build_bytecode.cpp | 74 +---------------- src/rose/rose_build_dump.cpp | 19 ++--- src/rose/rose_build_engine_blob.cpp | 117 +++++++++++++++++++++++++++ src/rose/rose_build_engine_blob.h | 18 +++++ src/rose/rose_build_instructions.cpp | 37 +++++++-- src/rose/rose_build_instructions.h | 63 ++++++--------- src/rose/rose_build_program.cpp | 116 +++++--------------------- src/rose/rose_build_program.h | 21 ----- src/rose/rose_internal.h | 4 - src/rose/rose_program.h | 8 +- 12 files changed, 231 insertions(+), 263 deletions(-) create mode 100644 src/rose/rose_build_engine_blob.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bc42c659..4f5d661f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -948,6 +948,7 @@ SET (hs_SRCS src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h src/rose/rose_build_dedupe.cpp + src/rose/rose_build_engine_blob.cpp src/rose/rose_build_engine_blob.h src/rose/rose_build_exclusive.cpp src/rose/rose_build_exclusive.h diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index dac8345e..c67a4acb 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1031,8 +1031,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, return 0; } - const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundReachIndex; + const u8 *reach = getByOffset(t, lookaroundReachIndex); u8 c; if (offset >= 0 && offset < (s64a)ci->len) { @@ -1069,14 +1068,11 @@ int roseCheckLookaround(const struct RoseEngine *t, DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + lookaroundLookIndex; + const s8 *look = getByOffset(t, lookaroundLookIndex); const s8 *look_end = look + lookaroundCount; assert(look < look_end); - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundReachIndex; + const u8 *reach = getByOffset(t, lookaroundReachIndex); // The following code assumes that the lookaround structures are ordered by // increasing offset. @@ -1166,13 +1162,11 @@ int roseMultipathLookaround(const struct RoseEngine *t, DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); - const s8 *look_base = getByOffset(t, t->lookaroundTableOffset); - const s8 *look = look_base + multipathLookaroundLookIndex; + const s8 *look = getByOffset(t, multipathLookaroundLookIndex); const s8 *look_end = look + multipathLookaroundCount; assert(look < look_end); - const u8 *reach_base = getByOffset(t, t->lookaroundReachOffset); - const u8 *reach = reach_base + multipathLookaroundReachIndex; + const u8 *reach = getByOffset(t, multipathLookaroundReachIndex); const s64a base_offset = (s64a)end - ci->buf_offset; DEBUG_PRINTF("base_offset=%lld\n", base_offset); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index dfe4ff63..02304ae2 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -147,8 +147,6 @@ struct build_context : noncopyable { ue2::unordered_map program_cache; - lookaround_info lookarounds; - /** \brief State indices, for those roles that have them. * Each vertex present has a unique state index in the range * [0, roleStateIndices.size()). */ @@ -2428,70 +2426,6 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, return false; } -static -void writeLookaround(const vector &look_vec, s8 *&look, u8 *&reach) { - for (const auto &le : look_vec) { - *look = verify_s8(le.offset); - const CharReach &cr = le.reach; - - assert(cr.any()); // Should be at least one character! - fill_bitvector(cr, reach); - - ++look; - reach += REACH_BITVECTOR_LEN; - } -} - -static -void writeMultipathLookaround(const vector> &multi_look, - s8 *&look, u8 *&reach) { - for (const auto &m : multi_look) { - u8 u = 0; - assert(m.size() == MAX_LOOKAROUND_PATHS); - for (size_t i = 0; i < m.size(); i++) { - if (m[i].reach.none()) { - u |= (u8)1U << i; - } - } - std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u); - - for (size_t i = 0; i < m.size(); i++) { - const CharReach &cr = m[i].reach; - if (cr.none()) { - continue; - } - *look = m[i].offset; - - for (size_t c = cr.find_first(); c != cr.npos; - c = cr.find_next(c)) { - reach[c] |= (u8)1U << i; - } - } - - ++look; - reach += MULTI_REACH_BITVECTOR_LEN; - } -} - -static -void writeLookaroundTables(const lookaround_info &lookarounds, - RoseEngineBlob &engine_blob, RoseEngine &proto) { - vector look_table(lookarounds.lookTableSize, 0); - vector reach_table(lookarounds.reachTableSize, 0); - s8 *look = look_table.data(); - u8 *reach = reach_table.data(); - for (const auto &la : lookarounds.table) { - if (la.size() == 1) { - writeLookaround(la.front(), look, reach); - } else { - writeMultipathLookaround(la, look, reach); - } - } - - proto.lookaroundTableOffset = engine_blob.add_range(look_table); - proto.lookaroundReachOffset = engine_blob.add_range(reach_table); -} - static void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, RoseEngine &proto) { @@ -2752,7 +2686,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, - bc.engine_info_by_queue, bc.lookarounds, + bc.engine_info_by_queue, bc.roleStateIndices, prog_build, lit_id, *edges_ptr, is_anchored_replay_program); } @@ -2917,8 +2851,7 @@ void buildLiteralPrograms(const RoseBuildImpl &build, continue; } - auto rebuild_prog = makeDelayRebuildProgram(build, - bc.lookarounds, prog_build, + auto rebuild_prog = makeDelayRebuildProgram(build, prog_build, frag.lit_ids); frag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); } @@ -3181,7 +3114,7 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, }); auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, - bc.engine_info_by_queue, bc.lookarounds, + bc.engine_info_by_queue, bc.roleStateIndices, prog_build, build.eod_event_literal_id, edge_list, false); @@ -3555,7 +3488,6 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { addSomRevNfas(bc, proto, ssm); - writeLookaroundTables(bc.lookarounds, bc.engine_blob, proto); writeDkeyInfo(rm, bc.engine_blob, proto); writeLeftInfo(bc.engine_blob, proto, leftInfoTable); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 7fd19d43..b527db6c 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -625,12 +625,10 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, assert(ri); const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - const s8 *look = look_base + ri->look_index; + const s8 *look = (const s8 *)base + ri->look_index; const s8 *look_end = look + ri->count; - const u8 *reach = reach_base + ri->reach_index; + const u8 *reach = base + ri->reach_index; os << " contents:" << endl; @@ -648,12 +646,10 @@ void dumpMultipathLookaround(ofstream &os, const RoseEngine *t, assert(ri); const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - const s8 *look_begin = look_base + ri->look_index; + const s8 *look_begin = (const s8 *)base + ri->look_index; const s8 *look_end = look_begin + ri->count; - const u8 *reach_begin = reach_base + ri->reach_index; + const u8 *reach_begin = base + ri->reach_index; os << " contents:" << endl; @@ -926,10 +922,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { os << " offset " << int{ri->offset} << endl; os << " reach_index " << ri->reach_index << endl; os << " fail_jump " << offset + ri->fail_jump << endl; - const u8 *base = (const u8 *)t; - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + - ri->reach_index * REACH_BITVECTOR_LEN; + const u8 *reach = (const u8 *)t + ri->reach_index; os << " contents "; describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); os << endl; @@ -2146,8 +2139,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, handledKeyFatbitSize); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); - DUMP_U32(t, lookaroundTableOffset); - DUMP_U32(t, lookaroundReachOffset); DUMP_U32(t, eodProgramOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); diff --git a/src/rose/rose_build_engine_blob.cpp b/src/rose/rose_build_engine_blob.cpp new file mode 100644 index 00000000..d3957207 --- /dev/null +++ b/src/rose/rose_build_engine_blob.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_engine_blob.h" + +#include "rose_build_lookaround.h" +#include "util/charreach_util.h" + +using namespace std; + +namespace ue2 { + +u32 lookaround_info::get_offset_of(const vector> &reaches, + RoseEngineBlob &blob) { + assert(reaches.size() != 1); + + // Check the cache. + auto it = multi_cache.find(reaches); + if (it != multi_cache.end()) { + DEBUG_PRINTF("reusing reach at idx %u\n", it->second); + return it->second; + } + + vector raw_reach(reaches.size() * MULTI_REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &m : reaches) { + u8 u = 0; + assert(m.size() == MAX_LOOKAROUND_PATHS); + for (size_t i = 0; i < m.size(); i++) { + if (m[i].none()) { + u |= (u8)1U << i; + } + } + fill_n(raw_reach.data() + off, MULTI_REACH_BITVECTOR_LEN, u); + + for (size_t i = 0; i < m.size(); i++) { + const CharReach &cr = m[i]; + if (cr.none()) { + continue; + } + + for (size_t c = cr.find_first(); c != cr.npos; + c = cr.find_next(c)) { + raw_reach[c + off] |= (u8)1U << i; + } + } + + off += MULTI_REACH_BITVECTOR_LEN; + } + + u32 reach_idx = blob.add_range(raw_reach); + DEBUG_PRINTF("adding reach at idx %u\n", reach_idx); + multi_cache.emplace(reaches, reach_idx); + + return reach_idx; +} + +u32 lookaround_info::get_offset_of(const vector &reach, + RoseEngineBlob &blob) { + if (contains(rcache, reach)) { + u32 offset = rcache[reach]; + DEBUG_PRINTF("reusing reach at idx %u\n", offset); + return offset; + } + + vector raw_reach(reach.size() * REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &cr : reach) { + assert(cr.any()); // Should be at least one character! + fill_bitvector(cr, raw_reach.data() + off); + off += REACH_BITVECTOR_LEN; + } + + u32 offset = blob.add_range(raw_reach); + rcache.emplace(reach, offset); + return offset; +} + +u32 lookaround_info::get_offset_of(const vector &look, + RoseEngineBlob &blob) { + if (contains(lcache, look)) { + u32 offset = lcache[look]; + DEBUG_PRINTF("reusing look at idx %u\n", offset); + return offset; + } + + u32 offset = blob.add_range(look); + lcache.emplace(look, offset); + return offset; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 69e8201e..a22f2dff 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -34,6 +34,7 @@ #include "ue2common.h" #include "util/alloc.h" #include "util/bytecode_ptr.h" +#include "util/charreach.h" #include "util/container.h" #include "util/multibit_build.h" #include "util/noncopyable.h" @@ -45,6 +46,21 @@ namespace ue2 { +class RoseEngineBlob; + +struct lookaround_info : noncopyable { + u32 get_offset_of(const std::vector> &look, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector &reach, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector &look, RoseEngineBlob &blob); + +private: + unordered_map>, u32> multi_cache; + unordered_map, u32> lcache; + unordered_map, u32> rcache; +}; + class RoseEngineBlob : noncopyable { public: /** \brief Base offset of engine_blob in the Rose engine bytecode. */ @@ -133,6 +149,8 @@ public: copy_bytes((char *)engine + base_offset, blob); } + lookaround_info lookaround_cache; + private: void pad(size_t align) { assert(ISALIGNED_N(base_offset, align)); diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp index f39fbe98..b00c36be 100644 --- a/src/rose/rose_build_instructions.cpp +++ b/src/rose/rose_build_instructions.cpp @@ -118,7 +118,7 @@ void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); inst->offset = offset; - inst->reach_index = reach_index; + inst->reach_index = blob.lookaround_cache.get_offset_of({reach}, blob); inst->fail_jump = calc_jump(offset_map, this, target); } @@ -126,9 +126,15 @@ void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); - inst->look_index = look_index; - inst->reach_index = reach_index; - inst->count = count; + vector look_offsets; + vector reaches; + for (const auto &le : look) { + look_offsets.push_back(le.offset); + reaches.push_back(le.reach); + } + inst->look_index = blob.lookaround_cache.get_offset_of(look_offsets, blob); + inst->reach_index = blob.lookaround_cache.get_offset_of(reaches, blob); + inst->count = verify_u32(look.size()); inst->fail_jump = calc_jump(offset_map, this, target); } @@ -532,9 +538,26 @@ void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); - inst->look_index = look_index; - inst->reach_index = reach_index; - inst->count = count; + auto &cache = blob.lookaround_cache; + vector look_offsets; + vector> reaches; + for (const auto &vle : multi_look) { + reaches.push_back({}); + bool done_offset = false; + + for (const auto &le : vle) { + reaches.back().push_back(le.reach); + + /* empty reaches don't have valid offsets */ + if (!done_offset && le.reach.any()) { + look_offsets.push_back(le.offset); + done_offset = true; + } + } + } + inst->look_index = cache.get_offset_of(look_offsets, blob); + inst->reach_index = cache.get_offset_of(reaches, blob); + inst->count = verify_u32(multi_look.size()); inst->last_start = last_start; copy(begin(start_mask), end(start_mask), inst->start_mask); inst->fail_jump = calc_jump(offset_map, this, target); diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index 06d146a5..025f6a67 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -37,6 +37,7 @@ #ifndef ROSE_BUILD_INSTRUCTIONS_H #define ROSE_BUILD_INSTRUCTIONS_H +#include "rose_build_lookaround.h" #include "rose_build_program.h" #include "util/verify_types.h" @@ -382,20 +383,19 @@ class RoseInstrCheckSingleLookaround RoseInstrCheckSingleLookaround> { public: s8 offset; - u32 reach_index; + CharReach reach; const RoseInstruction *target; - RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in, + RoseInstrCheckSingleLookaround(s8 offset_in, CharReach reach_in, const RoseInstruction *target_in) - : offset(offset_in), reach_index(reach_index_in), target(target_in) {} + : offset(offset_in), reach(std::move(reach_in)), target(target_in) {} bool operator==(const RoseInstrCheckSingleLookaround &ri) const { - return offset == ri.offset && reach_index == ri.reach_index && - target == ri.target; + return offset == ri.offset && reach == ri.reach && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), offset, reach_index); + return hash_all(static_cast(opcode), offset, reach); } void write(void *dest, RoseEngineBlob &blob, @@ -404,7 +404,7 @@ public: bool equiv_to(const RoseInstrCheckSingleLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return offset == ri.offset && reach_index == ri.reach_index && + return offset == ri.offset && reach == ri.reach && offsets.at(target) == other_offsets.at(ri.target); } }; @@ -414,24 +414,19 @@ class RoseInstrCheckLookaround ROSE_STRUCT_CHECK_LOOKAROUND, RoseInstrCheckLookaround> { public: - u32 look_index; - u32 reach_index; - u32 count; + std::vector look; const RoseInstruction *target; - RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in, - u32 count_in, const RoseInstruction *target_in) - : look_index(look_index_in), reach_index(reach_index_in), - count(count_in), target(target_in) {} + RoseInstrCheckLookaround(std::vector look_in, + const RoseInstruction *target_in) + : look(std::move(look_in)), target(target_in) {} bool operator==(const RoseInstrCheckLookaround &ri) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && target == ri.target; + return look == ri.look && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), look_index, reach_index, - count); + return hash_all(static_cast(opcode), look); } void write(void *dest, RoseEngineBlob &blob, @@ -439,9 +434,8 @@ public: bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && - offsets.at(target) == other_offsets.at(ri.target); + return look == ri.look + && offsets.at(target) == other_offsets.at(ri.target); } }; @@ -1837,30 +1831,26 @@ class RoseInstrMultipathLookaround ROSE_STRUCT_MULTIPATH_LOOKAROUND, RoseInstrMultipathLookaround> { public: - u32 look_index; - u32 reach_index; - u32 count; + std::vector> multi_look; s32 last_start; std::array start_mask; const RoseInstruction *target; - RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in, - u32 count_in, s32 last_start_in, + RoseInstrMultipathLookaround(std::vector> ml, + s32 last_start_in, std::array start_mask_in, const RoseInstruction *target_in) - : look_index(look_index_in), reach_index(reach_index_in), - count(count_in), last_start(last_start_in), + : multi_look(std::move(ml)), last_start(last_start_in), start_mask(std::move(start_mask_in)), target(target_in) {} bool operator==(const RoseInstrMultipathLookaround &ri) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && last_start == ri.last_start && - start_mask == ri.start_mask && target == ri.target; + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), look_index, reach_index, - count, last_start, start_mask); + return hash_all(static_cast(opcode), multi_look, last_start, + start_mask); } void write(void *dest, RoseEngineBlob &blob, @@ -1869,10 +1859,9 @@ public: bool equiv_to(const RoseInstrMultipathLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && last_start == ri.last_start && - start_mask == ri.start_mask && - offsets.at(target) == other_offsets.at(ri.target); + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask + && offsets.at(target) == other_offsets.at(ri.target); } }; diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 92eeff63..eb9db5a6 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -28,6 +28,7 @@ #include "rose_build_program.h" +#include "rose_build_engine_blob.h" #include "rose_build_instructions.h" #include "rose_build_lookaround.h" #include "rose_build_resources.h" @@ -39,7 +40,6 @@ #include "util/container.h" #include "util/compile_context.h" #include "util/compile_error.h" -#include "util/dump_charclass.h" #include "util/report_manager.h" #include "util/verify_types.h" @@ -851,40 +851,6 @@ void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, program.add_before_end(make_unique(groups)); } -static -void addLookaround(lookaround_info &lookarounds, - const vector> &look, - u32 &look_index, u32 &reach_index) { - // Check the cache. - auto it = lookarounds.cache.find(look); - if (it != lookarounds.cache.end()) { - look_index = verify_u32(it->second.first); - reach_index = verify_u32(it->second.second); - DEBUG_PRINTF("reusing look at idx %u\n", look_index); - DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); - return; - } - - size_t look_idx = lookarounds.lookTableSize; - size_t reach_idx = lookarounds.reachTableSize; - - if (look.size() == 1) { - lookarounds.lookTableSize += look.front().size(); - lookarounds.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; - } else { - lookarounds.lookTableSize += look.size(); - lookarounds.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; - } - - lookarounds.cache.emplace(look, make_pair(look_idx, reach_idx)); - lookarounds.table.emplace_back(look); - - DEBUG_PRINTF("adding look at idx %zu\n", look_idx); - DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); - look_index = verify_u32(look_idx); - reach_index = verify_u32(reach_idx); -} - static bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { size_t reach_size = cr.count(); @@ -1278,8 +1244,7 @@ bool makeRoleShufti(const vector &look, RoseProgram &program) { * available. */ static -void makeLookaroundInstruction(lookaround_info &lookarounds, - const vector &look, +void makeLookaroundInstruction(const vector &look, RoseProgram &program) { assert(!look.empty()); @@ -1289,12 +1254,8 @@ void makeLookaroundInstruction(lookaround_info &lookarounds, if (look.size() == 1) { s8 offset = look.begin()->offset; - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(lookarounds, lookaround, look_idx, reach_idx); - // We don't need look_idx here. - auto ri = make_unique(offset, reach_idx, + const CharReach &reach = look.begin()->reach; + auto ri = make_unique(offset, reach, program.end_instruction()); program.add_before_end(move(ri)); return; @@ -1312,21 +1273,13 @@ void makeLookaroundInstruction(lookaround_info &lookarounds, return; } - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(lookarounds, lookaround, look_idx, reach_idx); - u32 look_count = verify_u32(look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, + auto ri = make_unique(look, program.end_instruction()); program.add_before_end(move(ri)); } static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, - lookaround_info &lookarounds, u32 lit_id, +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, RoseProgram &program) { const auto &info = build.literal_info.at(lit_id); if (!info.requires_benefits) { @@ -1348,7 +1301,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, } assert(!look.empty()); - makeLookaroundInstruction(lookarounds, look, program); + makeLookaroundInstruction(look, program); } static @@ -1417,7 +1370,6 @@ bool hasDelayedLiteral(const RoseBuildImpl &build, static RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, - lookaround_info &lookarounds, ProgramBuild &prog_build, u32 lit_id, const vector &lit_edges, bool is_anchored_replay_program) { @@ -1431,7 +1383,7 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, } // Check lit mask. - makeCheckLitMaskInstruction(build, lookarounds, lit_id, program); + makeCheckLitMaskInstruction(build, lit_id, program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we @@ -1458,20 +1410,6 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, return program; } -#if defined(DEBUG) || defined(DUMP_SUPPORT) -static UNUSED -string dumpMultiLook(const vector &looks) { - ostringstream oss; - for (auto it = looks.begin(); it != looks.end(); ++it) { - if (it != looks.begin()) { - oss << ", "; - } - oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; - } - return oss.str(); -} -#endif - static bool makeRoleMultipathShufti(const vector> &multi_look, RoseProgram &program) { @@ -1612,8 +1550,7 @@ bool makeRoleMultipathShufti(const vector> &multi_look, } static -void makeRoleMultipathLookaround(lookaround_info &lookarounds, - const vector> &multi_look, +void makeRoleMultipathLookaround(const vector> &multi_look, RoseProgram &program) { assert(!multi_look.empty()); assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); @@ -1675,13 +1612,8 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds, ordered_look.emplace_back(multi_entry); } - u32 look_idx, reach_idx; - addLookaround(lookarounds, ordered_look, look_idx, reach_idx); - u32 look_count = verify_u32(ordered_look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, last_start, - start_mask, + auto ri = make_unique(move(ordered_look), + last_start, start_mask, program.end_instruction()); program.add_before_end(move(ri)); } @@ -1689,8 +1621,7 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds, static void makeRoleLookaround(const RoseBuildImpl &build, const map &leftfix_info, - lookaround_info &lookarounds, RoseVertex v, - RoseProgram &program) { + RoseVertex v, RoseProgram &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -1714,14 +1645,14 @@ void makeRoleLookaround(const RoseBuildImpl &build, findLookaroundMasks(build, v, look_more); mergeLookaround(look, look_more); if (!look.empty()) { - makeLookaroundInstruction(lookarounds, look, program); + makeLookaroundInstruction(look, program); } return; } if (!makeRoleMultipathShufti(looks, program)) { assert(looks.size() <= 8); - makeRoleMultipathLookaround(lookarounds, looks, program); + makeRoleMultipathLookaround(looks, program); } } @@ -1902,7 +1833,6 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build, const map &leftfix_info, const map &suffixes, const map &engine_info_by_queue, - lookaround_info &lookarounds, const unordered_map &roleStateIndices, ProgramBuild &prog_build, const RoseEdge &e) { const RoseGraph &g = build.g; @@ -1929,7 +1859,7 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build, makeRoleCheckNotHandled(prog_build, v, program); } - makeRoleLookaround(build, leftfix_info, lookarounds, v, program); + makeRoleLookaround(build, leftfix_info, v, program); makeRoleCheckLeftfix(build, leftfix_info, v, program); // Next, we can add program instructions that have effects. This must be @@ -2029,7 +1959,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, const map &leftfix_info, const map &suffixes, const map &engine_info_by_queue, - lookaround_info &lookarounds, const unordered_map &roleStateIndices, ProgramBuild &prog_build, u32 lit_id, const vector &lit_edges, @@ -2040,8 +1969,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, // Construct initial program up front, as its early checks must be able // to jump to end and terminate processing for this literal. - auto lit_program = makeLitInitialProgram(build, lookarounds, prog_build, - lit_id, lit_edges, + auto lit_program = makeLitInitialProgram(build, prog_build, lit_id, + lit_edges, is_anchored_replay_program); RoseProgram role_programs; @@ -2060,8 +1989,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, assert(contains(roleStateIndices, u)); u32 pred_state = roleStateIndices.at(u); auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, - engine_info_by_queue, lookarounds, - roleStateIndices, prog_build, e); + engine_info_by_queue, roleStateIndices, + prog_build, e); if (!role_prog.empty()) { pred_blocks[pred_state].add_block(move(role_prog)); } @@ -2080,8 +2009,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, g[target(e, g)].index); auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, - engine_info_by_queue, lookarounds, - roleStateIndices, prog_build, e); + engine_info_by_queue, roleStateIndices, + prog_build, e); role_programs.add_block(move(role_prog)); } @@ -2104,7 +2033,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, } RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, - lookaround_info &lookarounds, ProgramBuild &prog_build, const vector &lit_ids) { assert(!lit_ids.empty()); @@ -2126,7 +2054,7 @@ RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, build.cc); } - makeCheckLitMaskInstruction(build, lookarounds, lit_id, prog); + makeCheckLitMaskInstruction(build, lit_id, prog); makePushDelayedInstructions(build.literals, prog_build, build.literal_info.at(lit_id).delayed_ids, prog); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index d8e542b8..8758ef64 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -214,25 +214,6 @@ struct left_build_info { std::vector> lookaround; }; -struct lookaround_info : noncopyable { - /** \brief LookEntry list cache, so that we can reuse the look index and - * reach index for the same lookaround. */ - ue2::unordered_map>, - std::pair> cache; - - /** \brief Lookaround table for Rose roles. */ - std::vector>> table; - - /** \brief Lookaround look table size. */ - size_t lookTableSize = 0; - - /** \brief Lookaround reach table size. - * since single path lookaround and multi-path lookaround have different - * bitvectors range (32 and 256), we need to maintain both look table size - * and reach table size. */ - size_t reachTableSize = 0; -}; - /** * \brief Provides a brief summary of properties of an NFA that has already been * finalised and stored in the blob. @@ -261,14 +242,12 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, const std::map &leftfix_info, const std::map &suffixes, const std::map &engine_info_by_queue, - lookaround_info &lookarounds, const unordered_map &roleStateIndices, ProgramBuild &prog_build, u32 lit_id, const std::vector &lit_edges, bool is_anchored_replay_program); RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, - lookaround_info &lookarounds, ProgramBuild &prog_build, const std::vector &lit_ids); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 777e7234..57395c9d 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -383,10 +383,6 @@ struct RoseEngine { u32 leftOffset; u32 roseCount; - u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values) - u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 - * bytes for single-path lookaround and 256 bytes - * for multi-path lookaround) */ u32 eodProgramOffset; //!< EOD program, otherwise 0. diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index cdfe96ac..78b123d5 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -231,8 +231,8 @@ struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. - u32 look_index; //!< Index for lookaround offset list. - u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. u32 count; //!< The count of lookaround entries in one instruction. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -561,8 +561,8 @@ struct ROSE_STRUCT_CLEAR_WORK_DONE { struct ROSE_STRUCT_MULTIPATH_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. - u32 look_index; //!< Index for lookaround offset list. - u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. u32 count; //!< The lookaround byte numbers for each path. s32 last_start; //!< The latest start offset among 8 paths. u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most