mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
fix names, use own intrinsic instead of explicit _mm* ones
This commit is contained in:
@@ -59,7 +59,7 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
|
||||
|
||||
if (len) {
|
||||
m128 ss_char = load128(sherman_state);
|
||||
m128 cur_char = set16x8(cprime);
|
||||
m128 cur_char = set1_16x8(cprime);
|
||||
|
||||
u32 z = movemask128(eq128(ss_char, cur_char));
|
||||
|
||||
|
@@ -72,7 +72,7 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
|
||||
|
||||
if (len) {
|
||||
m128 ss_char = load128(sherman_state);
|
||||
m128 cur_char = set16x8(cprime);
|
||||
m128 cur_char = set1_16x8(cprime);
|
||||
|
||||
u32 z = movemask128(eq128(ss_char, cur_char));
|
||||
|
||||
@@ -153,7 +153,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
||||
assert(s_in); /* should not already be dead */
|
||||
assert(soft_c_end <= hard_c_end);
|
||||
DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1);
|
||||
m128 s = set16x8(s_in - 1);
|
||||
m128 s = set1_16x8(s_in - 1);
|
||||
const u8 *c = *c_inout;
|
||||
const u8 *c_end = hard_c_end - SHENG_CHUNK + 1;
|
||||
if (!do_accel) {
|
||||
@@ -171,8 +171,8 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
||||
|
||||
#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
|
||||
u32 sheng_limit_x4 = sheng_limit * 0x01010101;
|
||||
m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
|
||||
m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
|
||||
m128 simd_stop_limit = set1_4x32(sheng_stop_limit_x4);
|
||||
m128 accel_delta = set1_16x8(sheng_limit - sheng_stop_limit);
|
||||
DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
|
||||
m->sheng_accel_limit, sheng_stop_limit);
|
||||
#endif
|
||||
|
@@ -52,7 +52,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
m128 cur_state = set16x8(*state);
|
||||
m128 cur_state = set1_16x8(*state);
|
||||
const m128 *masks = s->shuffle_masks;
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
|
@@ -86,7 +86,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
m128 cur_state = set16x8(*state);
|
||||
m128 cur_state = set1_16x8(*state);
|
||||
const m128 *masks = s->shuffle_masks;
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
|
@@ -159,7 +159,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
}
|
||||
|
||||
const m128 zeroes = zeroes128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const m128 low4bits = set1_16x8(0xf);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 16;
|
||||
@@ -246,7 +246,7 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
}
|
||||
|
||||
const m128 zeroes = zeroes128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const m128 low4bits = set1_16x8(0xf);
|
||||
const u8 *rv;
|
||||
|
||||
assert(buf_end - buf >= 16);
|
||||
@@ -320,7 +320,7 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
|
||||
m128 mask2_lo, m128 mask2_hi,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
const m128 ones = ones128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const m128 low4bits = set1_16x8(0xf);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 16;
|
||||
@@ -455,15 +455,15 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
buf, buf_end);
|
||||
}
|
||||
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
|
||||
if (buf_end - buf <= 32) {
|
||||
return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits);
|
||||
}
|
||||
|
||||
const m256 zeroes = zeroes256();
|
||||
const m256 wide_mask_lo = set2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set2x128(mask_hi);
|
||||
const m256 wide_mask_lo = set1_2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set1_2x128(mask_hi);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 32;
|
||||
@@ -579,15 +579,15 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
buf, buf_end);
|
||||
}
|
||||
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
|
||||
if (buf_end - buf <= 32) {
|
||||
return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits);
|
||||
}
|
||||
|
||||
const m256 zeroes = zeroes256();
|
||||
const m256 wide_mask_lo = set2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set2x128(mask_hi);
|
||||
const m256 wide_mask_lo = set1_2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set1_2x128(mask_hi);
|
||||
const u8 *rv;
|
||||
|
||||
assert(buf_end - buf >= 32);
|
||||
@@ -676,7 +676,7 @@ static really_inline
|
||||
const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo,
|
||||
m128 mask2_hi, const u8 *buf, const u8 *buf_end) {
|
||||
DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf);
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
// run shufti over two overlapping 16-byte unaligned reads
|
||||
const m256 mask1 = combine2x128(mask1_hi, mask1_lo);
|
||||
const m256 mask2 = combine2x128(mask2_hi, mask2_lo);
|
||||
@@ -708,11 +708,11 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
|
||||
}
|
||||
|
||||
const m256 ones = ones256();
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 wide_mask1_lo = set2x128(mask1_lo);
|
||||
const m256 wide_mask1_hi = set2x128(mask1_hi);
|
||||
const m256 wide_mask2_lo = set2x128(mask2_lo);
|
||||
const m256 wide_mask2_hi = set2x128(mask2_hi);
|
||||
const m256 low4bits = set1_32x8(0xf);
|
||||
const m256 wide_mask1_lo = set1_2x128(mask1_lo);
|
||||
const m256 wide_mask1_hi = set1_2x128(mask1_hi);
|
||||
const m256 wide_mask2_lo = set1_2x128(mask2_lo);
|
||||
const m256 wide_mask2_hi = set1_2x128(mask2_hi);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 32;
|
||||
|
@@ -64,8 +64,8 @@ const u8 *firstMatch(const u8 *buf, u32 z) {
|
||||
static really_inline
|
||||
u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
|
||||
|
||||
m128 highconst = _mm_set1_epi8(0x80);
|
||||
m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
|
||||
m128 highconst = set1_16x8(0x80);
|
||||
m128 shuf_mask_hi = set1_2x64(0x8040201008040201);
|
||||
|
||||
// and now do the real work
|
||||
m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v);
|
||||
@@ -260,8 +260,8 @@ const u8 *firstMatch(const u8 *buf, u32 z) {
|
||||
static really_inline
|
||||
u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
|
||||
|
||||
m256 highconst = _mm256_set1_epi8(0x80);
|
||||
m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
|
||||
m256 highconst = set1_32x8(0x80);
|
||||
m256 shuf_mask_hi = set1_4x64(0x8040201008040201);
|
||||
|
||||
// and now do the real work
|
||||
m256 shuf1 = pshufb_m256(shuf_mask_lo_highclear, v);
|
||||
@@ -315,8 +315,8 @@ const u8 *truffleExec(m128 shuf_mask_lo_highclear,
|
||||
m128 shuf_mask_lo_highset,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
DEBUG_PRINTF("len %zu\n", buf_end - buf);
|
||||
const m256 wide_clear = set2x128(shuf_mask_lo_highclear);
|
||||
const m256 wide_set = set2x128(shuf_mask_lo_highset);
|
||||
const m256 wide_clear = set1_2x128(shuf_mask_lo_highclear);
|
||||
const m256 wide_set = set1_2x128(shuf_mask_lo_highset);
|
||||
|
||||
assert(buf && buf_end);
|
||||
assert(buf < buf_end);
|
||||
@@ -382,8 +382,8 @@ const u8 *truffleRevMini(m256 shuf_mask_lo_highclear,
|
||||
const u8 *rtruffleExec(m128 shuf_mask_lo_highclear,
|
||||
m128 shuf_mask_lo_highset,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
const m256 wide_clear = set2x128(shuf_mask_lo_highclear);
|
||||
const m256 wide_set = set2x128(shuf_mask_lo_highset);
|
||||
const m256 wide_clear = set1_2x128(shuf_mask_lo_highclear);
|
||||
const m256 wide_set = set1_2x128(shuf_mask_lo_highset);
|
||||
assert(buf && buf_end);
|
||||
assert(buf < buf_end);
|
||||
const u8 *rv;
|
||||
|
@@ -36,7 +36,7 @@
|
||||
|
||||
#define VERM_BOUNDARY 16
|
||||
#define VERM_TYPE m128
|
||||
#define VERM_SET_FN set16x8
|
||||
#define VERM_SET_FN set1_16x8
|
||||
|
||||
static really_inline
|
||||
const u8 *vermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end,
|
||||
@@ -74,7 +74,7 @@ static really_inline
|
||||
const u8 *vermSearchAlignedNocase(m128 chars, const u8 *buf,
|
||||
const u8 *buf_end, char negate) {
|
||||
assert((size_t)buf % 16 == 0);
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 31 < buf_end; buf += 32) {
|
||||
m128 data = load128(buf);
|
||||
@@ -122,7 +122,7 @@ const u8 *vermUnalign(m128 chars, const u8 *buf, char negate) {
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *vermUnalignNocase(m128 chars, const u8 *buf, char negate) {
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
m128 data = loadu128(buf); // unaligned
|
||||
u32 z = movemask128(eq128(chars, and128(casemask, data)));
|
||||
if (negate) {
|
||||
@@ -157,7 +157,7 @@ static really_inline
|
||||
const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf % 16 == 0);
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 16 < buf_end; buf += 16) {
|
||||
m128 data = load128(buf);
|
||||
@@ -219,7 +219,7 @@ const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
static really_inline
|
||||
const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
/* due to laziness, nonalphas and nocase having interesting behaviour */
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
m128 data = loadu128(buf); // unaligned
|
||||
m128 v = and128(casemask, data);
|
||||
u32 z = movemask128(and128(eq128(chars1, v),
|
||||
@@ -277,7 +277,7 @@ static really_inline
|
||||
const u8 *rvermSearchAlignedNocase(m128 chars, const u8 *buf,
|
||||
const u8 *buf_end, char negate) {
|
||||
assert((size_t)buf_end % 16 == 0);
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 15 < buf_end; buf_end -= 16) {
|
||||
m128 data = load128(buf_end - 16);
|
||||
@@ -309,7 +309,7 @@ const u8 *rvermUnalign(m128 chars, const u8 *buf, char negate) {
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *rvermUnalignNocase(m128 chars, const u8 *buf, char negate) {
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
m128 data = loadu128(buf); // unaligned
|
||||
u32 z = movemask128(eq128(chars, and128(casemask, data)));
|
||||
if (negate) {
|
||||
@@ -344,7 +344,7 @@ static really_inline
|
||||
const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf_end % 16 == 0);
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 16 < buf_end; buf_end -= 16) {
|
||||
m128 data = load128(buf_end - 16);
|
||||
@@ -381,7 +381,7 @@ const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
static really_inline
|
||||
const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
/* due to laziness, nonalphas and nocase having interesting behaviour */
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
m128 casemask = set1_16x8(CASE_CLEAR);
|
||||
m128 data = loadu128(buf);
|
||||
m128 v = and128(casemask, data);
|
||||
u32 z = movemask128(and128(eq128(chars2, v),
|
||||
@@ -398,7 +398,7 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
|
||||
#define VERM_BOUNDARY 64
|
||||
#define VERM_TYPE m512
|
||||
#define VERM_SET_FN set64x8
|
||||
#define VERM_SET_FN set1_64x8
|
||||
|
||||
static really_inline
|
||||
const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
|
||||
|
Reference in New Issue
Block a user