Merge pull request #239 from ypicchi-arm/feature/add-sheng-unit-tests

Feature/add sheng unit tests
This commit is contained in:
Konstantinos Margaritis 2024-05-01 00:07:14 +03:00 committed by GitHub
commit 27bb2b9134
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 863 additions and 46 deletions

View File

@ -154,7 +154,7 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
return MO_CONTINUE_MATCHING; /* continue execution */
}
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32
static really_inline
const struct sheng32 *get_sheng32(const struct NFA *n) {
@ -351,7 +351,7 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt,
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
#endif // end of HAVE_AVX512VBMI
#endif // end of HAVE_AVX512VBMI || HAVE_SVE
/* include Sheng function definitions */
#include "sheng_defs.h"
@ -871,7 +871,7 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest,
return 0;
}
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32
static really_inline
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
@ -1874,4 +1874,4 @@ char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest,
*(u8 *)dest = *(const u8 *)src;
return 0;
}
#endif // end of HAVE_AVX512VBMI
#endif // end of HAVE_AVX512VBMI || HAVE_SVE

View File

@ -58,7 +58,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context);
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
@ -106,8 +106,7 @@ char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context);
#else // !HAVE_AVX512VBMI
#else // !HAVE_AVX512VBMI && !HAVE_SVE
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
@ -138,6 +137,7 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
#define nfaExecSheng64_B NFA_API_NO_IMPL
#endif // end of HAVE_AVX512VBMI
#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
#endif /* SHENG_H_ */

View File

@ -52,7 +52,7 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
}
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline
u8 isDeadState32(const u8 a) {
return a & SHENG32_STATE_DEAD;
@ -108,7 +108,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_cod
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32
@ -121,7 +121,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
@ -135,7 +135,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_co
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
@ -148,7 +148,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
@ -162,7 +162,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_samd
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32
@ -175,7 +175,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
@ -189,7 +189,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_sam
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
@ -202,7 +202,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
@ -216,7 +216,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nmd
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 dummyFunc
@ -229,7 +229,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
@ -243,7 +243,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nm
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 dummyFunc
@ -256,7 +256,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
@ -277,7 +277,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coda
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
@ -296,7 +296,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -316,7 +316,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_cod
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
@ -339,7 +339,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -363,7 +363,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coa
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
@ -382,7 +382,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -402,7 +402,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_co
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
@ -425,7 +425,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -449,7 +449,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samda
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
@ -468,7 +468,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -488,7 +488,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samd
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
@ -511,7 +511,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -535,7 +535,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sama
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
@ -554,7 +554,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -574,7 +574,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sam
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
@ -597,7 +597,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -623,7 +623,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmda
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
@ -642,7 +642,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -662,7 +662,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmd
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
@ -685,7 +685,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
@ -712,7 +712,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nm
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
@ -735,7 +735,7 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32

View File

@ -96,7 +96,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
return MO_CONTINUE_MATCHING;
}
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s,
@ -114,14 +114,28 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
}
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
#if defined(HAVE_SVE)
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
#endif
while (likely(cur_buf != end)) {
const u8 c = *cur_buf;
#if defined(HAVE_SVE)
svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 tmp = svlastb(lane_pred_32, cur_state);
#else
const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
@ -153,7 +167,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
}
cur_buf++;
}
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_32, cur_state);
#else
*state = movd512(cur_state);
#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
@ -175,14 +193,28 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
}
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
#if defined(HAVE_SVE)
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
#endif
while (likely(cur_buf != end)) {
const u8 c = *cur_buf;
#if defined(HAVE_SVE)
svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 tmp = svlastb(lane_pred_64, cur_state);
#else
const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
@ -214,7 +246,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
}
cur_buf++;
}
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_64, cur_state);
#else
*state = movd512(cur_state);
#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}

View File

@ -283,7 +283,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
return MO_CONTINUE_MATCHING;
}
#if defined(HAVE_AVX512VBMI)
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s,
@ -320,8 +320,15 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
return MO_CONTINUE_MATCHING;
}
#if defined(HAVE_SVE)
const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
#endif
while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf;
@ -333,6 +340,23 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const u8 c3 = *b3;
const u8 c4 = *b4;
#if defined(HAVE_SVE)
svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a1 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a2 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a3 = svlastb(lane_pred_32, cur_state);
svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a4 = svlastb(lane_pred_32, cur_state);
#else
const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state);
@ -348,6 +372,7 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
@ -517,7 +542,11 @@ char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
};
cur_buf += 4;
}
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_32, cur_state);
#else
*state = movd512(cur_state);
#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
@ -541,9 +570,15 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
*scan_end = end;
return MO_CONTINUE_MATCHING;
}
#if defined(HAVE_SVE)
const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
svuint8_t cur_state = svdup_u8(*state);
svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
const m512 *masks = s->succ_masks;
#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
#endif
while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf;
@ -555,6 +590,23 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const u8 c3 = *b3;
const u8 c4 = *b4;
#if defined(HAVE_SVE)
svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a1 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a2 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a3 = svlastb(lane_pred_64, cur_state);
svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
const u8 a4 = svlastb(lane_pred_64, cur_state);
#else
const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state);
@ -570,6 +622,7 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state);
#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
@ -703,7 +756,11 @@ char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
}
cur_buf += 4;
}
#if defined(HAVE_SVE)
*state = svlastb(lane_pred_64, cur_state);
#else
*state = movd512(cur_state);
#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}

View File

@ -730,10 +730,17 @@ bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
return nullptr;
}
#ifdef HAVE_SVE
if (svcntb()<32) {
DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
return nullptr;
}
#else
if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr;
}
#endif
sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat);
@ -762,10 +769,17 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
return nullptr;
}
#ifdef HAVE_SVE
if (svcntb()<64) {
DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
return nullptr;
}
#else
if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr;
}
#endif
sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat);

View File

@ -102,6 +102,7 @@ set(unit_internal_SOURCES
internal/rvermicelli.cpp
internal/simd_utils.cpp
internal/supervector.cpp
internal/sheng.cpp
internal/shuffle.cpp
internal/shufti.cpp
internal/state_compress.cpp

709
unit/internal/sheng.cpp Normal file
View File

@ -0,0 +1,709 @@
/*
* Copyright (c) 2024, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "gtest/gtest.h"
#include "nfa/shengcompile.h"
#include "nfa/rdfa.h"
#include "util/bytecode_ptr.h"
#include "util/compile_context.h"
#include "util/report_manager.h"
extern "C" {
#include "hs_compile.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_api_util.h"
#include "nfa/nfa_internal.h"
#include "nfa/rdfa.h"
#include "nfa/sheng.h"
#include "ue2common.h"
}
namespace {
struct callback_context {
unsigned int period;
unsigned int match_count;
unsigned int pattern_length;
};
int dummy_callback(u64a start, u64a end, ReportID id, void *context) {
(void) context;
printf("callback %llu %llu %u\n", start, end, id);
return 1; /* 0 stops matching, !0 continue */
}
int periodic_pattern_callback(u64a start, u64a end, ReportID id, void *raw_context) {
struct callback_context *context = (struct callback_context*) raw_context;
(void) start;
(void) id;
EXPECT_EQ(context->period * context->match_count, end - context->pattern_length);
context->match_count++;
return 1; /* 0 stops matching, !0 continue */
}
/**
* @brief Fill the state matrix with a diagonal pattern: accept the Nth character to go to the N+1 state
*/
static void fill_straight_regex_sequence(struct ue2::raw_dfa *dfa, int start_state, int end_state, int state_count)
{
for (int state = start_state; state < end_state; state++) {
dfa->states[state].next.assign(state_count ,1);
dfa->states[state].next[0] = 2;
dfa->states[state].next[1] = 2;
dfa->states[state].next[state] = state+1;
}
}
static void init_raw_dfa16(struct ue2::raw_dfa *dfa, const ReportID rID)
{
dfa->start_anchored = 1;
dfa->start_floating = 1;
dfa->alpha_size = 8;
int nb_state = 8;
for(int i = 0; i < nb_state; i++) {
struct ue2::dstate state(dfa->alpha_size);
state.next = std::vector<ue2::dstate_id_t>(nb_state);
state.daddy = 0;
state.impl_id = i; /* id of the state */
state.reports = ue2::flat_set<ReportID>();
state.reports_eod = ue2::flat_set<ReportID>();
dfa->states.push_back(state);
}
/* add a report to every accept state */
dfa->states[7].reports.insert(rID);
/**
* [a,b][c-e]{3}of
* (1) -a,b-> (2) -c,d,e-> (3) -c,d,e-> (4) -c,d,e-> (5) -o-> (6) -f-> ((7))
* (0) = dead
*/
for(int i = 0; i < ue2::ALPHABET_SIZE; i++) {
dfa->alpha_remap[i] = 0;
}
dfa->alpha_remap['a'] = 0;
dfa->alpha_remap['b'] = 1;
dfa->alpha_remap['c'] = 2;
dfa->alpha_remap['d'] = 3;
dfa->alpha_remap['e'] = 4;
dfa->alpha_remap['o'] = 5;
dfa->alpha_remap['f'] = 6;
dfa->alpha_remap[256] = 7; /* for some reason there's a check that run on dfa->alpha_size-1 */
/* a b c d e o f */
dfa->states[0].next = {0,0,0,0,0,0,0};
dfa->states[1].next = {2,2,1,1,1,1,1}; /* nothing */
dfa->states[2].next = {2,2,3,3,3,1,1}; /* [a,b] */
dfa->states[3].next = {2,2,4,4,4,1,1}; /* [a,b][c-e]{1} */
dfa->states[4].next = {2,2,5,5,5,1,1}; /* [a,b][c-e]{2} */
fill_straight_regex_sequence(dfa, 5, 7, 7); /* [a,b][c-e]{3}o */
dfa->states[7].next = {2,2,1,1,1,1,1}; /* [a,b][c-e]{3}of */
}
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
/* We need more than 16 states to run sheng32, so make the graph longer */
static void init_raw_dfa32(struct ue2::raw_dfa *dfa, const ReportID rID)
{
dfa->start_anchored = 1;
dfa->start_floating = 1;
dfa->alpha_size = 18;
int nb_state = 18;
for(int i = 0; i < nb_state; i++) {
struct ue2::dstate state(dfa->alpha_size);
state.next = std::vector<ue2::dstate_id_t>(nb_state);
state.daddy = 0;
state.impl_id = i; /* id of the state */
state.reports = ue2::flat_set<ReportID>();
state.reports_eod = ue2::flat_set<ReportID>();
dfa->states.push_back(state);
}
/* add a report to every accept state */
dfa->states[17].reports.insert(rID);
/**
* [a,b][c-e]{3}of0123456789
* (1) -a,b-> (2) -c,d,e-> (3) -c,d,e-> (4) -c,d,e-> (5) -o-> (6) -f-> (7) -<numbers>-> ((17))
* (0) = dead
*/
for(int i = 0; i < ue2::ALPHABET_SIZE; i++) {
dfa->alpha_remap[i] = 0;
}
dfa->alpha_remap['a'] = 0;
dfa->alpha_remap['b'] = 1;
dfa->alpha_remap['c'] = 2;
dfa->alpha_remap['d'] = 3;
dfa->alpha_remap['e'] = 4;
dfa->alpha_remap['o'] = 5;
dfa->alpha_remap['f'] = 6;
// maps 0 to 9
for (int i = 0; i < 10; i ++) {
dfa->alpha_remap[i + '0'] = i + 7;
}
dfa->alpha_remap[256] = 17; /* for some reason there's a check that run on dfa->alpha_size-1 */
/* a b c d e o f 0 1 2 3 4 5 6 7 8 9 */
dfa->states[0].next = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
dfa->states[1].next = {2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; /* nothing */
dfa->states[2].next = {2,2,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b] */
dfa->states[3].next = {2,2,4,4,4,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{1} */
dfa->states[4].next = {2,2,5,5,5,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{2} */
fill_straight_regex_sequence(dfa, 5, 17, 17); /* [a,b][c-e]{3}of012345678 */
dfa->states[17].next = {2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; /* [a,b][c-e]{3}of0123456789 */
}
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
typedef ue2::bytecode_ptr<NFA> (*sheng_compile_ptr)(ue2::raw_dfa&,
const ue2::CompileContext&,
const ue2::ReportManager&,
bool,
std::set<ue2::dstate_id_t>*);
typedef void (*init_raw_dfa_ptr)(struct ue2::raw_dfa*, const ReportID);
static inline void init_nfa(struct NFA **out_nfa, sheng_compile_ptr compile_function, init_raw_dfa_ptr init_dfa_function) {
ue2::Grey *g = new ue2::Grey();
hs_platform_info plat_info = {0, 0, 0, 0};
ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g);
ue2::ReportManager *rm = new ue2::ReportManager(*g);
ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0);
ReportID rID = rm->getInternalId(*report);
rm->setProgramOffset(0, 0);
struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX);
init_dfa_function(dfa, rID);
*out_nfa = (compile_function(*dfa, *cc, *rm, false, nullptr)).release();
ASSERT_NE(nullptr, *out_nfa);
delete report;
delete rm;
delete cc;
delete g;
}
static void init_nfa16(struct NFA **out_nfa) {
init_nfa(out_nfa, ue2::shengCompile, init_raw_dfa16);
}
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static void init_nfa32(struct NFA **out_nfa) {
init_nfa(out_nfa, ue2::sheng32Compile, init_raw_dfa32);
}
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
static char state_buffer;
static inline void init_sheng_queue(struct mq **out_q, uint8_t *buffer, size_t max_size, void (*init_nfa_func)(struct NFA **out_nfa) ) {
struct NFA* nfa;
init_nfa_func(&nfa);
assert(nfa);
struct mq *q = new mq();
memset(q, 0, sizeof(struct mq));
q->nfa = nfa;
q->state = &state_buffer;
q->cb = dummy_callback;
q->buffer = buffer;
q->length = max_size; /* setting this as the max length scanable */
if (nfa != q->nfa) {
printf("Something went wrong while initializing sheng.\n");
}
nfaQueueInitState(nfa, q);
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_END, q->length );
*out_q = q;
}
static void init_sheng_queue16(struct mq **out_q, uint8_t *buffer ,size_t max_size) {
init_sheng_queue(out_q, buffer, max_size, init_nfa16);
}
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static void init_sheng_queue32(struct mq **out_q, uint8_t *buffer, size_t max_size) {
init_sheng_queue(out_q, buffer, max_size, init_nfa32);
}
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
static
void fill_pattern(u8* buf, size_t buffer_size, unsigned int start_offset, unsigned int period, const char *pattern, unsigned int pattern_length) {
memset(buf, '_', buffer_size);
for (unsigned int i = 0; i < buffer_size - 8; i+= 8) {
/* filling with some junk, including some character used for a valid state, to prevent the use of shufti */
memcpy(buf + i, "jgohcxbf", 8);
}
for (unsigned int i = start_offset; i < buffer_size - pattern_length; i += period) {
memcpy(buf + i, pattern, pattern_length);
}
}
/* Generate ground truth to compare to */
struct NFA *get_expected_nfa_header(u8 type, unsigned int length, unsigned int nposition) {
struct NFA *expected_nfa_header = new struct NFA();
memset(expected_nfa_header, 0, sizeof(struct NFA));
expected_nfa_header->length = length;
expected_nfa_header->type = type;
expected_nfa_header->nPositions = nposition;
expected_nfa_header->scratchStateSize = 1;
expected_nfa_header->streamStateSize = 1;
return expected_nfa_header;
}
struct NFA *get_expected_nfa16_header() {
return get_expected_nfa_header(SHENG_NFA, 4736, 8);
}
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
struct NFA *get_expected_nfa32_header() {
return get_expected_nfa_header(SHENG_NFA_32, 17216, 18);
}
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
void test_nfa_equal(const NFA& l, const NFA& r)
{
EXPECT_EQ(l.flags, r.flags);
EXPECT_EQ(l.length, r.length);
EXPECT_EQ(l.type, r.type);
EXPECT_EQ(l.rAccelType, r.rAccelType);
EXPECT_EQ(l.rAccelOffset, r.rAccelOffset);
EXPECT_EQ(l.maxBiAnchoredWidth, r.maxBiAnchoredWidth);
EXPECT_EQ(l.rAccelData.dc, r.rAccelData.dc);
EXPECT_EQ(l.queueIndex, r.queueIndex);
EXPECT_EQ(l.nPositions, r.nPositions);
EXPECT_EQ(l.scratchStateSize, r.scratchStateSize);
EXPECT_EQ(l.streamStateSize, r.streamStateSize);
EXPECT_EQ(l.maxWidth, r.maxWidth);
EXPECT_EQ(l.minWidth, r.minWidth);
EXPECT_EQ(l.maxOffset, r.maxOffset);
}
/* Start of actual tests */
/*
* Runs shengCompile and compares its outputs to previously recorded outputs.
*/
TEST(Sheng16, std_compile_header) {
ue2::Grey *g = new ue2::Grey();
hs_platform_info plat_info = {0, 0, 0, 0};
ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g);
ue2::ReportManager *rm = new ue2::ReportManager(*g);
ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0);
ReportID rID = rm->getInternalId(*report);
rm->setProgramOffset(0, 0);
struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX);
init_raw_dfa16(dfa, rID);
struct NFA *nfa = (shengCompile(*dfa, *cc, *rm, false)).release();
EXPECT_NE(nullptr, nfa);
EXPECT_NE(0, nfa->length);
EXPECT_EQ(SHENG_NFA, nfa->type);
struct NFA *expected_nfa = get_expected_nfa16_header();
test_nfa_equal(*expected_nfa, *nfa);
delete expected_nfa;
delete report;
delete rm;
delete cc;
delete g;
}
/*
* nfaExecSheng_B is the most basic of the sheng variants. It simply calls the core of the algorithm.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng16, std_run_B) {
struct mq *q;
unsigned int pattern_length = 6;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
struct callback_context context = {period, 0, pattern_length};
struct NFA* nfa;
init_nfa16(&nfa);
ASSERT_NE(nullptr, nfa);
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
char ret_val;
unsigned int offset = 0;
unsigned int loop_count = 0;
for (; loop_count < expected_matches + 1; loop_count++) {
ASSERT_LT(offset, buf_size);
ret_val = nfaExecSheng_B(nfa,
offset,
buf + offset,
(s64a) buf_size - offset,
periodic_pattern_callback,
&context);
offset = (context.match_count - 1) * context.period + context.pattern_length;
if(unlikely(ret_val != MO_ALIVE)) {
break;
}
}
/*check normal return*/
EXPECT_EQ(MO_ALIVE, ret_val);
/*check that we don't find additional match nor crash when no match are found*/
EXPECT_EQ(expected_matches + 1, loop_count);
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, context.match_count);
}
/*
* nfaExecSheng_Q runs like the _B version (callback), but exercises the message queue logic.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng16, std_run_Q) {
struct mq *q;
unsigned int pattern_length = 6;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
struct callback_context context = {period, 0, pattern_length};
init_sheng_queue16(&q, buf, buf_size);
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
q->cur = 0;
q->items[q->cur].location = 0;
q->context = &context;
q->cb = periodic_pattern_callback;
nfaExecSheng_Q(q->nfa, q, (s64a) buf_size);
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, context.match_count);
delete q;
}
/*
* nfaExecSheng_Q2 uses the message queue, but stops at match instead of using a callback.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng16, std_run_Q2) {
struct mq *q;
unsigned int pattern_length = 6;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
init_sheng_queue16(&q, buf, buf_size);
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
q->cur = 0;
q->items[q->cur].location = 0;
char ret_val;
int location;
unsigned int loop_count = 0;
do {
ret_val = nfaExecSheng_Q2(q->nfa, q, (s64a) buf_size);
location = q->items[q->cur].location;
loop_count++;
} while(likely((ret_val == MO_MATCHES_PENDING) && (location < (int)buf_size) && ((location % period) == pattern_length)));
/*check if it's a spurious match*/
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && ((location % period) != pattern_length));
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, loop_count-1);
delete q;
}
/*
* The message queue can also run on the "history" buffer. We test it the same way as the normal
* buffer, expecting the same behavior.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng16, history_run_Q2) {
struct mq *q;
unsigned int pattern_length = 6;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
init_sheng_queue16(&q, buf, buf_size);
fill_pattern(buf, buf_size, 0, period, "acecof", pattern_length);
q->history = buf;
q->hlength = buf_size;
q->cur = 0;
q->items[q->cur].location = -200;
char ret_val;
int location;
unsigned int loop_count = 0;
do {
ret_val = nfaExecSheng_Q2(q->nfa, q, 0);
location = q->items[q->cur].location;
loop_count++;
} while(likely((ret_val == MO_MATCHES_PENDING) && (location > -(int)buf_size) && (location < 0) && (((buf_size + location) % period) == pattern_length)));
/*check if it's a spurious match*/
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && (((buf_size + location) % period) != pattern_length));
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, loop_count-1);
delete q;
}
/**
* Those tests only covers the basic paths. More tests can cover:
* - running for history buffer to current buffer in Q2
* - running while expecting no match
* - nfaExecSheng_QR
* - run sheng when it should call an accelerator and confirm it call them
*/
#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
/*
* Runs sheng32Compile and compares its outputs to previously recorded outputs.
*/
TEST(Sheng32, std_compile_header) {
#if defined(HAVE_SVE)
if(svcntb()<32) {
return;
}
#endif
ue2::Grey *g = new ue2::Grey();
hs_platform_info plat_info = {0, 0, 0, 0};
ue2::CompileContext *cc = new ue2::CompileContext(false, false, ue2::target_t(plat_info), *g);
ue2::ReportManager *rm = new ue2::ReportManager(*g);
ue2::Report *report = new ue2::Report(ue2::EXTERNAL_CALLBACK, 0);
ReportID rID = rm->getInternalId(*report);
rm->setProgramOffset(0, 0);
struct ue2::raw_dfa *dfa = new ue2::raw_dfa(ue2::NFA_OUTFIX);
init_raw_dfa32(dfa, rID);
struct NFA *nfa = (sheng32Compile(*dfa, *cc, *rm, false)).release();
EXPECT_NE(nullptr, nfa);
EXPECT_NE(0, nfa->length);
EXPECT_EQ(SHENG_NFA_32, nfa->type);
struct NFA *expected_nfa = get_expected_nfa32_header();
test_nfa_equal(*expected_nfa, *nfa);
delete expected_nfa;
delete report;
delete rm;
delete cc;
delete g;
}
/*
* nfaExecSheng32_B is the most basic of the sheng variants. It simply calls the core of the algorithm.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng32, std_run_B) {
#if defined(HAVE_SVE)
if(svcntb()<32) {
return;
}
#endif
struct mq *q;
unsigned int pattern_length = 16;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
struct callback_context context = {period, 0, pattern_length};
struct NFA* nfa;
init_nfa32(&nfa);
ASSERT_NE(nullptr, nfa);
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
char ret_val;
unsigned int offset = 0;
unsigned int loop_count = 0;
for (; loop_count < expected_matches + 1; loop_count++) {
ASSERT_LT(offset, buf_size);
ret_val = nfaExecSheng32_B(nfa,
offset,
buf + offset,
(s64a) buf_size - offset,
periodic_pattern_callback,
&context);
offset = (context.match_count - 1) * context.period + context.pattern_length;
if(unlikely(ret_val != MO_ALIVE)) {
break;
}
}
/*check normal return*/
EXPECT_EQ(MO_ALIVE, ret_val);
/*check that we don't find additional match nor crash when no match are found*/
EXPECT_EQ(expected_matches + 1, loop_count);
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, context.match_count);
}
/*
* nfaExecSheng32_Q runs like the _B version (callback), but exercises the message queue logic.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng32, std_run_Q) {
#if defined(HAVE_SVE)
if(svcntb()<32) {
return;
}
#endif
struct mq *q;
unsigned int pattern_length = 16;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
struct callback_context context = {period, 0, pattern_length};
init_sheng_queue32(&q, buf, buf_size);
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
q->cur = 0;
q->items[q->cur].location = 0;
q->context = &context;
q->cb = periodic_pattern_callback;
nfaExecSheng32_Q(q->nfa, q, (s64a) buf_size);
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, context.match_count);
delete q;
}
/*
* nfaExecSheng32_Q2 uses the message queue, but stops at match instead of using a callback.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng32, std_run_Q2) {
#if defined(HAVE_SVE)
if(svcntb()<32) {
return;
}
#endif
struct mq *q;
unsigned int pattern_length = 16;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
init_sheng_queue32(&q, buf, buf_size);
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
q->cur = 0;
q->items[q->cur].location = 0;
char ret_val;
int location;
unsigned int loop_count = 0;
do {
ret_val = nfaExecSheng32_Q2(q->nfa, q, (s64a) buf_size);
location = q->items[q->cur].location;
loop_count++;
} while(likely((ret_val == MO_MATCHES_PENDING) && (location < (int)buf_size) && ((location % period) == pattern_length)));
/*check if it's a spurious match*/
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && ((location % period) != pattern_length));
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, loop_count-1);
delete q;
}
/*
* The message queue can also runs on the "history" buffer. We test it the same way as the normal
* buffer, expecting the same behavior.
* We test it with a buffer having a few matches at fixed intervals and check that it finds them all.
*/
TEST(Sheng32, history_run_Q2) {
#if defined(HAVE_SVE)
if(svcntb()<32) {
return;
}
#endif
struct mq *q;
unsigned int pattern_length = 16;
unsigned int period = 128;
const size_t buf_size = 200;
unsigned int expected_matches = buf_size/128 + 1;
u8 buf[buf_size];
init_sheng_queue32(&q, buf, buf_size);
fill_pattern(buf, buf_size, 0, period, "acecof0123456789", pattern_length);
q->history = buf;
q->hlength = buf_size;
q->cur = 0;
q->items[q->cur].location = -200;
char ret_val;
int location;
unsigned int loop_count = 0;
do {
ret_val = nfaExecSheng32_Q2(q->nfa, q, 0);
location = q->items[q->cur].location;
loop_count++;
} while(likely((ret_val == MO_MATCHES_PENDING) && (location > -(int)buf_size) && (location < 0) && (((buf_size + location) % period) == pattern_length)));
/*check if it's a spurious match*/
EXPECT_EQ(0, (ret_val == MO_MATCHES_PENDING) && (((buf_size + location) % period) != pattern_length));
/*check that we have all the matches*/
EXPECT_EQ(expected_matches, loop_count-1);
delete q;
}
#endif /* defined(HAVE_AVX512VBMI) || defined(HAVE_SVE) */
} /* namespace */