mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: add CHECK_SINGLE_LOOKAROUND instruction
This specialisation is cheaper than the shufti-based variants, so we prefer it for single character class tests.
This commit is contained in:
parent
385f71b44e
commit
997787bd4b
@ -1019,6 +1019,46 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask,
|
||||
}
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseCheckSingleLookaround(const struct RoseEngine *t,
|
||||
const struct hs_scratch *scratch,
|
||||
s8 checkOffset, u32 lookaroundIndex, u64a end) {
|
||||
assert(lookaroundIndex != MO_INVALID_IDX);
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
|
||||
ci->buf_offset, ci->buf_offset + ci->len);
|
||||
|
||||
const s64a base_offset = end - ci->buf_offset;
|
||||
const s64a offset = base_offset + checkOffset;
|
||||
DEBUG_PRINTF("base_offset=%lld\n", base_offset);
|
||||
DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset);
|
||||
|
||||
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||
DEBUG_PRINTF("too early, fail\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset;
|
||||
const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN;
|
||||
|
||||
u8 c;
|
||||
if (offset >= 0 && offset < (s64a)ci->len) {
|
||||
c = ci->buf[offset];
|
||||
} else if (offset < 0 && offset >= -(s64a)ci->hlen) {
|
||||
c = ci->hbuf[ci->hlen + offset];
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!reachHasBit(reach, c)) {
|
||||
DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("OK :)\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Scan around a literal, checking that that "lookaround" reach masks
|
||||
* are satisfied.
|
||||
@ -1415,6 +1455,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) {
|
||||
if (!roseCheckSingleLookaround(t, scratch, ri->offset,
|
||||
ri->reach_index, end)) {
|
||||
DEBUG_PRINTF("failed lookaround check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_LOOKAROUND) {
|
||||
if (!roseCheckLookaround(t, scratch, ri->index, ri->count,
|
||||
end)) {
|
||||
|
@ -3076,6 +3076,15 @@ void makeLookaroundInstruction(build_context &bc, const vector<LookEntry> &look,
|
||||
return;
|
||||
}
|
||||
|
||||
if (look.size() == 1) {
|
||||
s8 offset = look.begin()->offset;
|
||||
u32 look_idx = addLookaround(bc, look);
|
||||
auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, look_idx,
|
||||
program.end_instruction());
|
||||
program.add_before_end(move(ri));
|
||||
return;
|
||||
}
|
||||
|
||||
if (makeRoleMask(look, program)) {
|
||||
return;
|
||||
}
|
||||
|
@ -112,6 +112,15 @@ void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob,
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
auto *inst = static_cast<impl_type *>(dest);
|
||||
inst->offset = offset;
|
||||
inst->reach_index = reach_index;
|
||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||
}
|
||||
|
||||
void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const {
|
||||
RoseInstrBase::write(dest, blob, offset_map);
|
||||
|
@ -378,6 +378,39 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrCheckSingleLookaround
|
||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SINGLE_LOOKAROUND,
|
||||
ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND,
|
||||
RoseInstrCheckSingleLookaround> {
|
||||
public:
|
||||
s8 offset;
|
||||
u32 reach_index;
|
||||
const RoseInstruction *target;
|
||||
|
||||
RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in,
|
||||
const RoseInstruction *target_in)
|
||||
: offset(offset_in), reach_index(reach_index_in), target(target_in) {}
|
||||
|
||||
bool operator==(const RoseInstrCheckSingleLookaround &ri) const {
|
||||
return offset == ri.offset && reach_index == ri.reach_index &&
|
||||
target == ri.target;
|
||||
}
|
||||
|
||||
size_t hash() const override {
|
||||
return hash_all(static_cast<int>(opcode), offset, reach_index);
|
||||
}
|
||||
|
||||
void write(void *dest, RoseEngineBlob &blob,
|
||||
const OffsetMap &offset_map) const override;
|
||||
|
||||
bool equiv_to(const RoseInstrCheckSingleLookaround &ri,
|
||||
const OffsetMap &offsets,
|
||||
const OffsetMap &other_offsets) const {
|
||||
return offset == ri.offset && reach_index == ri.reach_index &&
|
||||
offsets.at(target) == other_offsets.at(ri.target);
|
||||
}
|
||||
};
|
||||
|
||||
class RoseInstrCheckLookaround
|
||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LOOKAROUND,
|
||||
ROSE_STRUCT_CHECK_LOOKAROUND,
|
||||
|
@ -273,6 +273,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) {
|
||||
os << " offset " << int{ri->offset} << endl;
|
||||
os << " reach_index " << ri->reach_index << endl;
|
||||
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||
const u8 *base = (const u8 *)t;
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
const u8 *reach = reach_base +
|
||||
ri->reach_index * REACH_BITVECTOR_LEN;
|
||||
os << " contents:" << endl;
|
||||
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
|
||||
os << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_LOOKAROUND) {
|
||||
os << " index " << ri->index << endl;
|
||||
os << " count " << ri->count << endl;
|
||||
|
@ -48,6 +48,7 @@ enum RoseInstructionCode {
|
||||
ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
|
||||
ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0.
|
||||
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
|
||||
ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check.
|
||||
ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
|
||||
ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check.
|
||||
ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check.
|
||||
@ -154,6 +155,13 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED {
|
||||
u32 fail_jump; //!< Jump forward this many bytes if we have seen key before.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
s8 offset; //!< The offset of the byte to examine.
|
||||
u32 reach_index; //!< The index of the reach table entry to use.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_LOOKAROUND {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 index;
|
||||
|
Loading…
x
Reference in New Issue
Block a user