mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: rework storage of extra lookaround information
- remove explicit lookaround table from bytecode - make the RoseInstr responsible for adding required info to blob
This commit is contained in:
parent
1a04d1330e
commit
15c8a7bd98
@ -948,6 +948,7 @@ SET (hs_SRCS
|
||||
src/rose/rose_build_convert.cpp
|
||||
src/rose/rose_build_convert.h
|
||||
src/rose/rose_build_dedupe.cpp
|
||||
src/rose/rose_build_engine_blob.cpp
|
||||
src/rose/rose_build_engine_blob.h
|
||||
src/rose/rose_build_exclusive.cpp
|
||||
src/rose/rose_build_exclusive.h
|
||||
|
@ -1031,8 +1031,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t,
|
||||
return 0;
|
||||
}
|
||||
|
||||
const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset;
|
||||
const u8 *reach = reach_base + lookaroundReachIndex;
|
||||
const u8 *reach = getByOffset(t, lookaroundReachIndex);
|
||||
|
||||
u8 c;
|
||||
if (offset >= 0 && offset < (s64a)ci->len) {
|
||||
@ -1069,14 +1068,11 @@ int roseCheckLookaround(const struct RoseEngine *t,
|
||||
DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
|
||||
ci->buf_offset, ci->buf_offset + ci->len);
|
||||
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const s8 *look = look_base + lookaroundLookIndex;
|
||||
const s8 *look = getByOffset(t, lookaroundLookIndex);
|
||||
const s8 *look_end = look + lookaroundCount;
|
||||
assert(look < look_end);
|
||||
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
const u8 *reach = reach_base + lookaroundReachIndex;
|
||||
const u8 *reach = getByOffset(t, lookaroundReachIndex);
|
||||
|
||||
// The following code assumes that the lookaround structures are ordered by
|
||||
// increasing offset.
|
||||
@ -1166,13 +1162,11 @@ int roseMultipathLookaround(const struct RoseEngine *t,
|
||||
DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
|
||||
ci->buf_offset, ci->buf_offset + ci->len);
|
||||
|
||||
const s8 *look_base = getByOffset(t, t->lookaroundTableOffset);
|
||||
const s8 *look = look_base + multipathLookaroundLookIndex;
|
||||
const s8 *look = getByOffset(t, multipathLookaroundLookIndex);
|
||||
const s8 *look_end = look + multipathLookaroundCount;
|
||||
assert(look < look_end);
|
||||
|
||||
const u8 *reach_base = getByOffset(t, t->lookaroundReachOffset);
|
||||
const u8 *reach = reach_base + multipathLookaroundReachIndex;
|
||||
const u8 *reach = getByOffset(t, multipathLookaroundReachIndex);
|
||||
|
||||
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||
DEBUG_PRINTF("base_offset=%lld\n", base_offset);
|
||||
|
@ -147,8 +147,6 @@ struct build_context : noncopyable {
|
||||
ue2::unordered_map<RoseProgram, u32, RoseProgramHash,
|
||||
RoseProgramEquivalence> program_cache;
|
||||
|
||||
lookaround_info lookarounds;
|
||||
|
||||
/** \brief State indices, for those roles that have them.
|
||||
* Each vertex present has a unique state index in the range
|
||||
* [0, roleStateIndices.size()). */
|
||||
@ -2428,70 +2426,6 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void writeLookaround(const vector<LookEntry> &look_vec, s8 *&look, u8 *&reach) {
|
||||
for (const auto &le : look_vec) {
|
||||
*look = verify_s8(le.offset);
|
||||
const CharReach &cr = le.reach;
|
||||
|
||||
assert(cr.any()); // Should be at least one character!
|
||||
fill_bitvector(cr, reach);
|
||||
|
||||
++look;
|
||||
reach += REACH_BITVECTOR_LEN;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void writeMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
|
||||
s8 *&look, u8 *&reach) {
|
||||
for (const auto &m : multi_look) {
|
||||
u8 u = 0;
|
||||
assert(m.size() == MAX_LOOKAROUND_PATHS);
|
||||
for (size_t i = 0; i < m.size(); i++) {
|
||||
if (m[i].reach.none()) {
|
||||
u |= (u8)1U << i;
|
||||
}
|
||||
}
|
||||
std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u);
|
||||
|
||||
for (size_t i = 0; i < m.size(); i++) {
|
||||
const CharReach &cr = m[i].reach;
|
||||
if (cr.none()) {
|
||||
continue;
|
||||
}
|
||||
*look = m[i].offset;
|
||||
|
||||
for (size_t c = cr.find_first(); c != cr.npos;
|
||||
c = cr.find_next(c)) {
|
||||
reach[c] |= (u8)1U << i;
|
||||
}
|
||||
}
|
||||
|
||||
++look;
|
||||
reach += MULTI_REACH_BITVECTOR_LEN;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void writeLookaroundTables(const lookaround_info &lookarounds,
|
||||
RoseEngineBlob &engine_blob, RoseEngine &proto) {
|
||||
vector<s8> look_table(lookarounds.lookTableSize, 0);
|
||||
vector<u8> reach_table(lookarounds.reachTableSize, 0);
|
||||
s8 *look = look_table.data();
|
||||
u8 *reach = reach_table.data();
|
||||
for (const auto &la : lookarounds.table) {
|
||||
if (la.size() == 1) {
|
||||
writeLookaround(la.front(), look, reach);
|
||||
} else {
|
||||
writeMultipathLookaround(la, look, reach);
|
||||
}
|
||||
}
|
||||
|
||||
proto.lookaroundTableOffset = engine_blob.add_range(look_table);
|
||||
proto.lookaroundReachOffset = engine_blob.add_range(reach_table);
|
||||
}
|
||||
|
||||
static
|
||||
void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
|
||||
RoseEngine &proto) {
|
||||
@ -2752,7 +2686,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc,
|
||||
}
|
||||
|
||||
return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
|
||||
bc.engine_info_by_queue, bc.lookarounds,
|
||||
bc.engine_info_by_queue,
|
||||
bc.roleStateIndices, prog_build, lit_id,
|
||||
*edges_ptr, is_anchored_replay_program);
|
||||
}
|
||||
@ -2917,8 +2851,7 @@ void buildLiteralPrograms(const RoseBuildImpl &build,
|
||||
continue;
|
||||
}
|
||||
|
||||
auto rebuild_prog = makeDelayRebuildProgram(build,
|
||||
bc.lookarounds, prog_build,
|
||||
auto rebuild_prog = makeDelayRebuildProgram(build, prog_build,
|
||||
frag.lit_ids);
|
||||
frag.delay_program_offset = writeProgram(bc, move(rebuild_prog));
|
||||
}
|
||||
@ -3181,7 +3114,7 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc,
|
||||
});
|
||||
|
||||
auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
|
||||
bc.engine_info_by_queue, bc.lookarounds,
|
||||
bc.engine_info_by_queue,
|
||||
bc.roleStateIndices, prog_build,
|
||||
build.eod_event_literal_id, edge_list,
|
||||
false);
|
||||
@ -3555,7 +3488,6 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
|
||||
addSomRevNfas(bc, proto, ssm);
|
||||
|
||||
writeLookaroundTables(bc.lookarounds, bc.engine_blob, proto);
|
||||
writeDkeyInfo(rm, bc.engine_blob, proto);
|
||||
writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
|
||||
|
||||
|
@ -625,12 +625,10 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
|
||||
assert(ri);
|
||||
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
|
||||
const s8 *look = look_base + ri->look_index;
|
||||
const s8 *look = (const s8 *)base + ri->look_index;
|
||||
const s8 *look_end = look + ri->count;
|
||||
const u8 *reach = reach_base + ri->reach_index;
|
||||
const u8 *reach = base + ri->reach_index;
|
||||
|
||||
os << " contents:" << endl;
|
||||
|
||||
@ -648,12 +646,10 @@ void dumpMultipathLookaround(ofstream &os, const RoseEngine *t,
|
||||
assert(ri);
|
||||
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
|
||||
const s8 *look_begin = look_base + ri->look_index;
|
||||
const s8 *look_begin = (const s8 *)base + ri->look_index;
|
||||
const s8 *look_end = look_begin + ri->count;
|
||||
const u8 *reach_begin = reach_base + ri->reach_index;
|
||||
const u8 *reach_begin = base + ri->reach_index;
|
||||
|
||||
os << " contents:" << endl;
|
||||
|
||||
@ -926,10 +922,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
os << " offset " << int{ri->offset} << endl;
|
||||
os << " reach_index " << ri->reach_index << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
const u8 *base = (const u8 *)t;
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
const u8 *reach = reach_base +
|
||||
ri->reach_index * REACH_BITVECTOR_LEN;
|
||||
const u8 *reach = (const u8 *)t + ri->reach_index;
|
||||
os << " contents ";
|
||||
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
|
||||
os << endl;
|
||||
@ -2146,8 +2139,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, handledKeyFatbitSize);
|
||||
DUMP_U32(t, leftOffset);
|
||||
DUMP_U32(t, roseCount);
|
||||
DUMP_U32(t, lookaroundTableOffset);
|
||||
DUMP_U32(t, lookaroundReachOffset);
|
||||
DUMP_U32(t, eodProgramOffset);
|
||||
DUMP_U32(t, lastByteHistoryIterOffset);
|
||||
DUMP_U32(t, minWidth);
|
||||
|
117
src/rose/rose_build_engine_blob.cpp
Normal file
117
src/rose/rose_build_engine_blob.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rose_build_engine_blob.h"
|
||||
|
||||
#include "rose_build_lookaround.h"
|
||||
#include "util/charreach_util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
u32 lookaround_info::get_offset_of(const vector<vector<CharReach>> &reaches,
|
||||
RoseEngineBlob &blob) {
|
||||
assert(reaches.size() != 1);
|
||||
|
||||
// Check the cache.
|
||||
auto it = multi_cache.find(reaches);
|
||||
if (it != multi_cache.end()) {
|
||||
DEBUG_PRINTF("reusing reach at idx %u\n", it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
vector<u8> raw_reach(reaches.size() * MULTI_REACH_BITVECTOR_LEN);
|
||||
size_t off = 0;
|
||||
for (const auto &m : reaches) {
|
||||
u8 u = 0;
|
||||
assert(m.size() == MAX_LOOKAROUND_PATHS);
|
||||
for (size_t i = 0; i < m.size(); i++) {
|
||||
if (m[i].none()) {
|
||||
u |= (u8)1U << i;
|
||||
}
|
||||
}
|
||||
fill_n(raw_reach.data() + off, MULTI_REACH_BITVECTOR_LEN, u);
|
||||
|
||||
for (size_t i = 0; i < m.size(); i++) {
|
||||
const CharReach &cr = m[i];
|
||||
if (cr.none()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t c = cr.find_first(); c != cr.npos;
|
||||
c = cr.find_next(c)) {
|
||||
raw_reach[c + off] |= (u8)1U << i;
|
||||
}
|
||||
}
|
||||
|
||||
off += MULTI_REACH_BITVECTOR_LEN;
|
||||
}
|
||||
|
||||
u32 reach_idx = blob.add_range(raw_reach);
|
||||
DEBUG_PRINTF("adding reach at idx %u\n", reach_idx);
|
||||
multi_cache.emplace(reaches, reach_idx);
|
||||
|
||||
return reach_idx;
|
||||
}
|
||||
|
||||
u32 lookaround_info::get_offset_of(const vector<CharReach> &reach,
|
||||
RoseEngineBlob &blob) {
|
||||
if (contains(rcache, reach)) {
|
||||
u32 offset = rcache[reach];
|
||||
DEBUG_PRINTF("reusing reach at idx %u\n", offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
vector<u8> raw_reach(reach.size() * REACH_BITVECTOR_LEN);
|
||||
size_t off = 0;
|
||||
for (const auto &cr : reach) {
|
||||
assert(cr.any()); // Should be at least one character!
|
||||
fill_bitvector(cr, raw_reach.data() + off);
|
||||
off += REACH_BITVECTOR_LEN;
|
||||
}
|
||||
|
||||
u32 offset = blob.add_range(raw_reach);
|
||||
rcache.emplace(reach, offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
u32 lookaround_info::get_offset_of(const vector<s8> &look,
|
||||
RoseEngineBlob &blob) {
|
||||
if (contains(lcache, look)) {
|
||||
u32 offset = lcache[look];
|
||||
DEBUG_PRINTF("reusing look at idx %u\n", offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
u32 offset = blob.add_range(look);
|
||||
lcache.emplace(look, offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
@ -34,6 +34,7 @@
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/container.h"
|
||||
#include "util/multibit_build.h"
|
||||
#include "util/noncopyable.h"
|
||||
@ -45,6 +46,21 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseEngineBlob;
|
||||
|
||||
struct lookaround_info : noncopyable {
|
||||
u32 get_offset_of(const std::vector<std::vector<CharReach>> &look,
|
||||
RoseEngineBlob &blob);
|
||||
u32 get_offset_of(const std::vector<CharReach> &reach,
|
||||
RoseEngineBlob &blob);
|
||||
u32 get_offset_of(const std::vector<s8> &look, RoseEngineBlob &blob);
|
||||
|
||||
private:
|
||||
unordered_map<std::vector<std::vector<CharReach>>, u32> multi_cache;
|
||||
unordered_map<std::vector<s8>, u32> lcache;
|
||||
unordered_map<std::vector<CharReach>, u32> rcache;
|
||||
};
|
||||
|
||||
class RoseEngineBlob : noncopyable {
|
||||
public:
|
||||
/** \brief Base offset of engine_blob in the Rose engine bytecode. */
|
||||
@ -133,6 +149,8 @@ public:
|
||||
copy_bytes((char *)engine + base_offset, blob);
|
||||
}
|
||||
|
||||
lookaround_info lookaround_cache;
|
||||
|
||||
private:
|
||||
void pad(size_t align) {
|
||||
assert(ISALIGNED_N(base_offset, align));
|
||||
|
@ -118,7 +118,7 @@ void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob,
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->offset = offset;
|
||||
inst->reach_index = reach_index;
|
||||
inst->reach_index = blob.lookaround_cache.get_offset_of({reach}, blob);
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
@ -126,9 +126,15 @@ void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->look_index = look_index;
|
||||
inst->reach_index = reach_index;
|
||||
inst->count = count;
|
||||
vector<s8> look_offsets;
|
||||
vector<CharReach> reaches;
|
||||
for (const auto &le : look) {
|
||||
look_offsets.push_back(le.offset);
|
||||
reaches.push_back(le.reach);
|
||||
}
|
||||
inst->look_index = blob.lookaround_cache.get_offset_of(look_offsets, blob);
|
||||
inst->reach_index = blob.lookaround_cache.get_offset_of(reaches, blob);
|
||||
inst->count = verify_u32(look.size());
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
@ -532,9 +538,26 @@ void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->look_index = look_index;
|
||||
inst->reach_index = reach_index;
|
||||
inst->count = count;
|
||||
auto &cache = blob.lookaround_cache;
|
||||
vector<s8> look_offsets;
|
||||
vector<vector<CharReach>> reaches;
|
||||
for (const auto &vle : multi_look) {
|
||||
reaches.push_back({});
|
||||
bool done_offset = false;
|
||||
|
||||
for (const auto &le : vle) {
|
||||
reaches.back().push_back(le.reach);
|
||||
|
||||
/* empty reaches don't have valid offsets */
|
||||
if (!done_offset && le.reach.any()) {
|
||||
look_offsets.push_back(le.offset);
|
||||
done_offset = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
inst->look_index = cache.get_offset_of(look_offsets, blob);
|
||||
inst->reach_index = cache.get_offset_of(reaches, blob);
|
||||
inst->count = verify_u32(multi_look.size());
|
||||
inst->last_start = last_start;
|
||||
copy(begin(start_mask), end(start_mask), inst->start_mask);
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
|
@ -37,6 +37,7 @@
|
||||
#ifndef ROSE_BUILD_INSTRUCTIONS_H
|
||||
#define ROSE_BUILD_INSTRUCTIONS_H
|
||||
|
||||
#include "rose_build_lookaround.h"
|
||||
#include "rose_build_program.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
@ -382,20 +383,19 @@ class RoseInstrCheckSingleLookaround
|
||||
RoseInstrCheckSingleLookaround> {
|
||||
public:
|
||||
s8 offset;
|
||||
u32 reach_index;
|
||||
CharReach reach;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in,
|
||||
RoseInstrCheckSingleLookaround(s8 offset_in, CharReach reach_in,
|
||||
const RoseInstruction *target_in)
|
||||
: offset(offset_in), reach_index(reach_index_in), target(target_in) {}
|
||||
: offset(offset_in), reach(std::move(reach_in)), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckSingleLookaround &ri) const {
|
||||
return offset == ri.offset && reach_index == ri.reach_index &&
|
||||
target == ri.target;
|
||||
return offset == ri.offset && reach == ri.reach && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), offset, reach_index);
|
||||
return hash_all(static_cast<int>(opcode), offset, reach);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
@ -404,7 +404,7 @@ public:
|
||||
bool equiv_to(const RoseInstrCheckSingleLookaround &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return offset == ri.offset && reach_index == ri.reach_index &&
|
||||
return offset == ri.offset && reach == ri.reach &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
@ -414,24 +414,19 @@ class RoseInstrCheckLookaround
|
||||
ROSE_STRUCT_CHECK_LOOKAROUND,
|
||||
RoseInstrCheckLookaround> {
|
||||
public:
|
||||
u32 look_index;
|
||||
u32 reach_index;
|
||||
u32 count;
|
||||
std::vector<LookEntry> look;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in,
|
||||
u32 count_in, const RoseInstruction *target_in)
|
||||
: look_index(look_index_in), reach_index(reach_index_in),
|
||||
count(count_in), target(target_in) {}
|
||||
RoseInstrCheckLookaround(std::vector<LookEntry> look_in,
|
||||
const RoseInstruction *target_in)
|
||||
: look(std::move(look_in)), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckLookaround &ri) const {
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count && target == ri.target;
|
||||
return look == ri.look && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), look_index, reach_index,
|
||||
count);
|
||||
return hash_all(static_cast<int>(opcode), look);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
@ -439,9 +434,8 @@ public:
|
||||
|
||||
bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
return look == ri.look
|
||||
&& offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1837,30 +1831,26 @@ class RoseInstrMultipathLookaround
|
||||
ROSE_STRUCT_MULTIPATH_LOOKAROUND,
|
||||
RoseInstrMultipathLookaround> {
|
||||
public:
|
||||
u32 look_index;
|
||||
u32 reach_index;
|
||||
u32 count;
|
||||
std::vector<std::vector<LookEntry>> multi_look;
|
||||
s32 last_start;
|
||||
std::array<u8, 16> start_mask;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in,
|
||||
u32 count_in, s32 last_start_in,
|
||||
RoseInstrMultipathLookaround(std::vector<std::vector<LookEntry>> ml,
|
||||
s32 last_start_in,
|
||||
std::array<u8, 16> start_mask_in,
|
||||
const RoseInstruction *target_in)
|
||||
: look_index(look_index_in), reach_index(reach_index_in),
|
||||
count(count_in), last_start(last_start_in),
|
||||
: multi_look(std::move(ml)), last_start(last_start_in),
|
||||
start_mask(std::move(start_mask_in)), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrMultipathLookaround &ri) const {
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count && last_start == ri.last_start &&
|
||||
start_mask == ri.start_mask && target == ri.target;
|
||||
return multi_look == ri.multi_look && last_start == ri.last_start
|
||||
&& start_mask == ri.start_mask && target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), look_index, reach_index,
|
||||
count, last_start, start_mask);
|
||||
return hash_all(static_cast<int>(opcode), multi_look, last_start,
|
||||
start_mask);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
@ -1869,10 +1859,9 @@ public:
|
||||
bool equiv_to(const RoseInstrMultipathLookaround &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return look_index == ri.look_index && reach_index == ri.reach_index &&
|
||||
count == ri.count && last_start == ri.last_start &&
|
||||
start_mask == ri.start_mask &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
return multi_look == ri.multi_look && last_start == ri.last_start
|
||||
&& start_mask == ri.start_mask
|
||||
&& offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#include "rose_build_program.h"
|
||||
|
||||
#include "rose_build_engine_blob.h"
|
||||
#include "rose_build_instructions.h"
|
||||
#include "rose_build_lookaround.h"
|
||||
#include "rose_build_resources.h"
|
||||
@ -39,7 +40,6 @@
|
||||
#include "util/container.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
@ -851,40 +851,6 @@ void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build,
|
||||
program.add_before_end(make_unique<RoseInstrSetGroups>(groups));
|
||||
}
|
||||
|
||||
static
|
||||
void addLookaround(lookaround_info &lookarounds,
|
||||
const vector<vector<LookEntry>> &look,
|
||||
u32 &look_index, u32 &reach_index) {
|
||||
// Check the cache.
|
||||
auto it = lookarounds.cache.find(look);
|
||||
if (it != lookarounds.cache.end()) {
|
||||
look_index = verify_u32(it->second.first);
|
||||
reach_index = verify_u32(it->second.second);
|
||||
DEBUG_PRINTF("reusing look at idx %u\n", look_index);
|
||||
DEBUG_PRINTF("reusing reach at idx %u\n", reach_index);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t look_idx = lookarounds.lookTableSize;
|
||||
size_t reach_idx = lookarounds.reachTableSize;
|
||||
|
||||
if (look.size() == 1) {
|
||||
lookarounds.lookTableSize += look.front().size();
|
||||
lookarounds.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN;
|
||||
} else {
|
||||
lookarounds.lookTableSize += look.size();
|
||||
lookarounds.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN;
|
||||
}
|
||||
|
||||
lookarounds.cache.emplace(look, make_pair(look_idx, reach_idx));
|
||||
lookarounds.table.emplace_back(look);
|
||||
|
||||
DEBUG_PRINTF("adding look at idx %zu\n", look_idx);
|
||||
DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx);
|
||||
look_index = verify_u32(look_idx);
|
||||
reach_index = verify_u32(reach_idx);
|
||||
}
|
||||
|
||||
static
|
||||
bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) {
|
||||
size_t reach_size = cr.count();
|
||||
@ -1278,8 +1244,7 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
||||
* available.
|
||||
*/
|
||||
static
|
||||
void makeLookaroundInstruction(lookaround_info &lookarounds,
|
||||
const vector<LookEntry> &look,
|
||||
void makeLookaroundInstruction(const vector<LookEntry> &look,
|
||||
RoseProgram &program) {
|
||||
assert(!look.empty());
|
||||
|
||||
@ -1289,12 +1254,8 @@ void makeLookaroundInstruction(lookaround_info &lookarounds,
|
||||
|
||||
if (look.size() == 1) {
|
||||
s8 offset = look.begin()->offset;
|
||||
u32 look_idx, reach_idx;
|
||||
vector<vector<LookEntry>> lookaround;
|
||||
lookaround.emplace_back(look);
|
||||
addLookaround(lookarounds, lookaround, look_idx, reach_idx);
|
||||
// We don't need look_idx here.
|
||||
auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach_idx,
|
||||
const CharReach &reach = look.begin()->reach;
|
||||
auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach,
|
||||
program.end_instruction());
|
||||
program.add_before_end(move(ri));
|
||||
return;
|
||||
@ -1312,21 +1273,13 @@ void makeLookaroundInstruction(lookaround_info &lookarounds,
|
||||
return;
|
||||
}
|
||||
|
||||
u32 look_idx, reach_idx;
|
||||
vector<vector<LookEntry>> lookaround;
|
||||
lookaround.emplace_back(look);
|
||||
addLookaround(lookarounds, lookaround, look_idx, reach_idx);
|
||||
u32 look_count = verify_u32(look.size());
|
||||
|
||||
auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, reach_idx,
|
||||
look_count,
|
||||
auto ri = make_unique<RoseInstrCheckLookaround>(look,
|
||||
program.end_instruction());
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
|
||||
static
|
||||
void makeCheckLitMaskInstruction(const RoseBuildImpl &build,
|
||||
lookaround_info &lookarounds, u32 lit_id,
|
||||
void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id,
|
||||
RoseProgram &program) {
|
||||
const auto &info = build.literal_info.at(lit_id);
|
||||
if (!info.requires_benefits) {
|
||||
@ -1348,7 +1301,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build,
|
||||
}
|
||||
|
||||
assert(!look.empty());
|
||||
makeLookaroundInstruction(lookarounds, look, program);
|
||||
makeLookaroundInstruction(look, program);
|
||||
}
|
||||
|
||||
static
|
||||
@ -1417,7 +1370,6 @@ bool hasDelayedLiteral(const RoseBuildImpl &build,
|
||||
|
||||
static
|
||||
RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
|
||||
lookaround_info &lookarounds,
|
||||
ProgramBuild &prog_build, u32 lit_id,
|
||||
const vector<RoseEdge> &lit_edges,
|
||||
bool is_anchored_replay_program) {
|
||||
@ -1431,7 +1383,7 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
|
||||
}
|
||||
|
||||
// Check lit mask.
|
||||
makeCheckLitMaskInstruction(build, lookarounds, lit_id, program);
|
||||
makeCheckLitMaskInstruction(build, lit_id, program);
|
||||
|
||||
// Check literal groups. This is an optimisation that we only perform for
|
||||
// delayed literals, as their groups may be switched off; ordinarily, we
|
||||
@ -1458,20 +1410,6 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
|
||||
return program;
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||
static UNUSED
|
||||
string dumpMultiLook(const vector<LookEntry> &looks) {
|
||||
ostringstream oss;
|
||||
for (auto it = looks.begin(); it != looks.end(); ++it) {
|
||||
if (it != looks.begin()) {
|
||||
oss << ", ";
|
||||
}
|
||||
oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
|
||||
RoseProgram &program) {
|
||||
@ -1612,8 +1550,7 @@ bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
|
||||
}
|
||||
|
||||
static
|
||||
void makeRoleMultipathLookaround(lookaround_info &lookarounds,
|
||||
const vector<vector<LookEntry>> &multi_look,
|
||||
void makeRoleMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
|
||||
RoseProgram &program) {
|
||||
assert(!multi_look.empty());
|
||||
assert(multi_look.size() <= MAX_LOOKAROUND_PATHS);
|
||||
@ -1675,13 +1612,8 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds,
|
||||
ordered_look.emplace_back(multi_entry);
|
||||
}
|
||||
|
||||
u32 look_idx, reach_idx;
|
||||
addLookaround(lookarounds, ordered_look, look_idx, reach_idx);
|
||||
u32 look_count = verify_u32(ordered_look.size());
|
||||
|
||||
auto ri = make_unique<RoseInstrMultipathLookaround>(look_idx, reach_idx,
|
||||
look_count, last_start,
|
||||
start_mask,
|
||||
auto ri = make_unique<RoseInstrMultipathLookaround>(move(ordered_look),
|
||||
last_start, start_mask,
|
||||
program.end_instruction());
|
||||
program.add_before_end(move(ri));
|
||||
}
|
||||
@ -1689,8 +1621,7 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds,
|
||||
static
|
||||
void makeRoleLookaround(const RoseBuildImpl &build,
|
||||
const map<RoseVertex, left_build_info> &leftfix_info,
|
||||
lookaround_info &lookarounds, RoseVertex v,
|
||||
RoseProgram &program) {
|
||||
RoseVertex v, RoseProgram &program) {
|
||||
if (!build.cc.grey.roseLookaroundMasks) {
|
||||
return;
|
||||
}
|
||||
@ -1714,14 +1645,14 @@ void makeRoleLookaround(const RoseBuildImpl &build,
|
||||
findLookaroundMasks(build, v, look_more);
|
||||
mergeLookaround(look, look_more);
|
||||
if (!look.empty()) {
|
||||
makeLookaroundInstruction(lookarounds, look, program);
|
||||
makeLookaroundInstruction(look, program);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!makeRoleMultipathShufti(looks, program)) {
|
||||
assert(looks.size() <= 8);
|
||||
makeRoleMultipathLookaround(lookarounds, looks, program);
|
||||
makeRoleMultipathLookaround(looks, program);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1902,7 +1833,6 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build,
|
||||
const map<RoseVertex, left_build_info> &leftfix_info,
|
||||
const map<suffix_id, u32> &suffixes,
|
||||
const map<u32, engine_info> &engine_info_by_queue,
|
||||
lookaround_info &lookarounds,
|
||||
const unordered_map<RoseVertex, u32> &roleStateIndices,
|
||||
ProgramBuild &prog_build, const RoseEdge &e) {
|
||||
const RoseGraph &g = build.g;
|
||||
@ -1929,7 +1859,7 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build,
|
||||
makeRoleCheckNotHandled(prog_build, v, program);
|
||||
}
|
||||
|
||||
makeRoleLookaround(build, leftfix_info, lookarounds, v, program);
|
||||
makeRoleLookaround(build, leftfix_info, v, program);
|
||||
makeRoleCheckLeftfix(build, leftfix_info, v, program);
|
||||
|
||||
// Next, we can add program instructions that have effects. This must be
|
||||
@ -2029,7 +1959,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||
const map<RoseVertex, left_build_info> &leftfix_info,
|
||||
const map<suffix_id, u32> &suffixes,
|
||||
const map<u32, engine_info> &engine_info_by_queue,
|
||||
lookaround_info &lookarounds,
|
||||
const unordered_map<RoseVertex, u32> &roleStateIndices,
|
||||
ProgramBuild &prog_build, u32 lit_id,
|
||||
const vector<RoseEdge> &lit_edges,
|
||||
@ -2040,8 +1969,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||
|
||||
// Construct initial program up front, as its early checks must be able
|
||||
// to jump to end and terminate processing for this literal.
|
||||
auto lit_program = makeLitInitialProgram(build, lookarounds, prog_build,
|
||||
lit_id, lit_edges,
|
||||
auto lit_program = makeLitInitialProgram(build, prog_build, lit_id,
|
||||
lit_edges,
|
||||
is_anchored_replay_program);
|
||||
|
||||
RoseProgram role_programs;
|
||||
@ -2060,8 +1989,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||
assert(contains(roleStateIndices, u));
|
||||
u32 pred_state = roleStateIndices.at(u);
|
||||
auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
|
||||
engine_info_by_queue, lookarounds,
|
||||
roleStateIndices, prog_build, e);
|
||||
engine_info_by_queue, roleStateIndices,
|
||||
prog_build, e);
|
||||
if (!role_prog.empty()) {
|
||||
pred_blocks[pred_state].add_block(move(role_prog));
|
||||
}
|
||||
@ -2080,8 +2009,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||
DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index,
|
||||
g[target(e, g)].index);
|
||||
auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
|
||||
engine_info_by_queue, lookarounds,
|
||||
roleStateIndices, prog_build, e);
|
||||
engine_info_by_queue, roleStateIndices,
|
||||
prog_build, e);
|
||||
role_programs.add_block(move(role_prog));
|
||||
}
|
||||
|
||||
@ -2104,7 +2033,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||
}
|
||||
|
||||
RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
|
||||
lookaround_info &lookarounds,
|
||||
ProgramBuild &prog_build,
|
||||
const vector<u32> &lit_ids) {
|
||||
assert(!lit_ids.empty());
|
||||
@ -2126,7 +2054,7 @@ RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
|
||||
build.cc);
|
||||
}
|
||||
|
||||
makeCheckLitMaskInstruction(build, lookarounds, lit_id, prog);
|
||||
makeCheckLitMaskInstruction(build, lit_id, prog);
|
||||
makePushDelayedInstructions(build.literals, prog_build,
|
||||
build.literal_info.at(lit_id).delayed_ids,
|
||||
prog);
|
||||
|
@ -214,25 +214,6 @@ struct left_build_info {
|
||||
std::vector<std::vector<LookEntry>> lookaround;
|
||||
};
|
||||
|
||||
struct lookaround_info : noncopyable {
|
||||
/** \brief LookEntry list cache, so that we can reuse the look index and
|
||||
* reach index for the same lookaround. */
|
||||
ue2::unordered_map<std::vector<std::vector<LookEntry>>,
|
||||
std::pair<size_t, size_t>> cache;
|
||||
|
||||
/** \brief Lookaround table for Rose roles. */
|
||||
std::vector<std::vector<std::vector<LookEntry>>> table;
|
||||
|
||||
/** \brief Lookaround look table size. */
|
||||
size_t lookTableSize = 0;
|
||||
|
||||
/** \brief Lookaround reach table size.
|
||||
* since single path lookaround and multi-path lookaround have different
|
||||
* bitvectors range (32 and 256), we need to maintain both look table size
|
||||
* and reach table size. */
|
||||
size_t reachTableSize = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Provides a brief summary of properties of an NFA that has already been
|
||||
* finalised and stored in the blob.
|
||||
@ -261,14 +242,12 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
|
||||
const std::map<RoseVertex, left_build_info> &leftfix_info,
|
||||
const std::map<suffix_id, u32> &suffixes,
|
||||
const std::map<u32, engine_info> &engine_info_by_queue,
|
||||
lookaround_info &lookarounds,
|
||||
const unordered_map<RoseVertex, u32> &roleStateIndices,
|
||||
ProgramBuild &prog_build, u32 lit_id,
|
||||
const std::vector<RoseEdge> &lit_edges,
|
||||
bool is_anchored_replay_program);
|
||||
|
||||
RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
|
||||
lookaround_info &lookarounds,
|
||||
ProgramBuild &prog_build,
|
||||
const std::vector<u32> &lit_ids);
|
||||
|
||||
|
@ -383,10 +383,6 @@ struct RoseEngine {
|
||||
|
||||
u32 leftOffset;
|
||||
u32 roseCount;
|
||||
u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values)
|
||||
u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
|
||||
* bytes for single-path lookaround and 256 bytes
|
||||
* for multi-path lookaround) */
|
||||
|
||||
u32 eodProgramOffset; //!< EOD program, otherwise 0.
|
||||
|
||||
|
@ -231,8 +231,8 @@ struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND {
|
||||
|
||||
struct ROSE_STRUCT_CHECK_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 look_index; //!< Index for lookaround offset list.
|
||||
u32 reach_index; //!< Index for lookaround reach bitvectors.
|
||||
u32 look_index; //!< Offset in bytecode of lookaround offset list.
|
||||
u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors.
|
||||
u32 count; //!< The count of lookaround entries in one instruction.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
@ -561,8 +561,8 @@ struct ROSE_STRUCT_CLEAR_WORK_DONE {
|
||||
|
||||
struct ROSE_STRUCT_MULTIPATH_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 look_index; //!< Index for lookaround offset list.
|
||||
u32 reach_index; //!< Index for lookaround reach bitvectors.
|
||||
u32 look_index; //!< Offset in bytecode of lookaround offset list.
|
||||
u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors.
|
||||
u32 count; //!< The lookaround byte numbers for each path.
|
||||
s32 last_start; //!< The latest start offset among 8 paths.
|
||||
u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most
|
||||
|
Loading…
x
Reference in New Issue
Block a user