mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-10-09 07:42:21 +03:00
Add SVE2 support for dvermicelli
Change-Id: I056ef15e162ab6fb1f78964321ce893f4096367e
This commit is contained in:
committed by
Konstantinos Margaritis
parent
455789db9f
commit
185c45263b
@@ -37,51 +37,20 @@
|
||||
#define VERM_TYPE m128
|
||||
#define VERM_SET_FN set1_16x8
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *lastMatchOffset(const u8 *buf_end, u32 z) {
|
||||
assert(z);
|
||||
return buf_end - 16 + 31 - clz32(z);
|
||||
}
|
||||
const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2,
|
||||
m128 mask1, m128 mask2, const u8 *buf) {
|
||||
m128 data = loadu128(buf); // unaligned
|
||||
m128 v1 = eq128(chars1, and128(data, mask1));
|
||||
m128 v2 = eq128(chars2, and128(data, mask2));
|
||||
u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1)));
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
for (; buf + 16 < buf_end; buf += 16) {
|
||||
m128 data = load128(buf);
|
||||
u32 z = movemask128(and128(eq128(chars1, data),
|
||||
rshiftbyte_m128(eq128(chars2, data), 1)));
|
||||
if (buf[15] == c1 && buf[16] == c2) {
|
||||
z |= (1 << 15);
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
u32 pos = ctz32(z);
|
||||
return buf + pos;
|
||||
}
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
u32 pos = ctz32(z);
|
||||
return buf + pos;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf % 16 == 0);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 16 < buf_end; buf += 16) {
|
||||
m128 data = load128(buf);
|
||||
m128 v = and128(casemask, data);
|
||||
u32 z = movemask128(and128(eq128(chars1, v),
|
||||
rshiftbyte_m128(eq128(chars2, v), 1)));
|
||||
if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) {
|
||||
z |= (1 << 15);
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
u32 pos = ctz32(z);
|
||||
return buf + pos;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -106,128 +75,5 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
m128 data = loadu128(buf); // unaligned
|
||||
u32 z = movemask128(and128(eq128(chars1, data),
|
||||
rshiftbyte_m128(eq128(chars2, data), 1)));
|
||||
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
u32 pos = ctz32(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
/* due to laziness, nonalphas and nocase having interesting behaviour */
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
m128 data = loadu128(buf); // unaligned
|
||||
m128 v = and128(casemask, data);
|
||||
u32 z = movemask128(and128(eq128(chars1, v),
|
||||
rshiftbyte_m128(eq128(chars2, v), 1)));
|
||||
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
u32 pos = ctz32(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2,
|
||||
m128 mask1, m128 mask2, const u8 *buf) {
|
||||
m128 data = loadu128(buf); // unaligned
|
||||
m128 v1 = eq128(chars1, and128(data, mask1));
|
||||
m128 v2 = eq128(chars2, and128(data, mask2));
|
||||
u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1)));
|
||||
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
u32 pos = ctz32(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf_end % 16 == 0);
|
||||
|
||||
for (; buf + 16 < buf_end; buf_end -= 16) {
|
||||
m128 data = load128(buf_end - 16);
|
||||
u32 z = movemask128(and128(eq128(chars2, data),
|
||||
lshiftbyte_m128(eq128(chars1, data), 1)));
|
||||
if (buf_end[-17] == c1 && buf_end[-16] == c2) {
|
||||
z |= 1;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf_end, z);
|
||||
}
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf_end % 16 == 0);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 16 < buf_end; buf_end -= 16) {
|
||||
m128 data = load128(buf_end - 16);
|
||||
m128 v = and128(casemask, data);
|
||||
u32 z = movemask128(and128(eq128(chars2, v),
|
||||
lshiftbyte_m128(eq128(chars1, v), 1)));
|
||||
if ((buf_end[-17] & CASE_CLEAR) == c1
|
||||
&& (buf_end[-16] & CASE_CLEAR) == c2) {
|
||||
z |= 1;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf_end, z);
|
||||
}
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
m128 data = loadu128(buf);
|
||||
u32 z = movemask128(and128(eq128(chars2, data),
|
||||
lshiftbyte_m128(eq128(chars1, data), 1)));
|
||||
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 16, z);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
/* due to laziness, nonalphas and nocase having interesting behaviour */
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
m128 data = loadu128(buf);
|
||||
m128 v = and128(casemask, data);
|
||||
u32 z = movemask128(and128(eq128(chars2, v),
|
||||
lshiftbyte_m128(eq128(chars1, v), 1)));
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 16, z);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
Reference in New Issue
Block a user