SHENG32: 32-state 1-byte shuffle based DFA.

This commit is contained in:
Chang, Harry
2018-11-01 16:33:58 +08:00
committed by Konstantinos Margaritis
parent 6f8bfa1854
commit 2cde84c96d
13 changed files with 1676 additions and 45 deletions

View File

@@ -37,21 +37,49 @@ u8 isDeadState(const u8 a) {
return a & SHENG_STATE_DEAD;
}
#if defined(HAVE_AVX512VBMI)
static really_inline
u8 isDeadState32(const u8 a) {
return a & SHENG32_STATE_DEAD;
}
#endif
static really_inline
u8 isAcceptState(const u8 a) {
return a & SHENG_STATE_ACCEPT;
}
#if defined(HAVE_AVX512VBMI)
static really_inline
u8 isAcceptState32(const u8 a) {
return a & SHENG32_STATE_ACCEPT;
}
#endif
static really_inline
u8 isAccelState(const u8 a) {
return a & SHENG_STATE_ACCEL;
}
#if defined(HAVE_AVX512VBMI)
static really_inline
u8 isAccelState32(const u8 a) {
return a & SHENG32_STATE_ACCEL;
}
#endif
static really_inline
u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
}
#if defined(HAVE_AVX512VBMI)
static really_inline
u8 hasInterestingStates32(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG32_STATE_FLAG_MASK);
}
#endif
/* these functions should be optimized out, used by NO_MATCHES mode */
static really_inline
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
@@ -71,66 +99,126 @@ u8 dummyFunc(UNUSED const u8 a) {
#define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_cod
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* callback output, can't die */
#define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_co
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* stop at match, can die */
#define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_samd
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* stop at match, can't die */
#define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_sam
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* no match, can die */
#define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_nmd
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 dummyFunc
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* no match, can't die */
#define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_nm
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 dummyFunc
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/*
@@ -144,6 +232,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_coda
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 isAccelState32
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -153,6 +250,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* callback output, can die, not accelerated */
@@ -163,6 +269,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_cod
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 dummyFunc
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -172,6 +287,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* callback output, can't die, accelerated */
@@ -182,6 +306,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_coa
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 isAccelState32
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -191,6 +324,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* callback output, can't die, not accelerated */
@@ -201,6 +343,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_co
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 dummyFunc
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -210,6 +361,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* stop at match, can die, accelerated */
@@ -220,6 +380,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_samda
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 isAccelState32
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -229,6 +398,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* stop at match, can die, not accelerated */
@@ -239,6 +417,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_samd
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 dummyFunc
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -248,6 +435,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* stop at match, can't die, accelerated */
@@ -258,6 +454,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_sama
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 isAccelState32
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -267,6 +472,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* stop at match, can't die, not accelerated */
@@ -277,6 +491,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_sam
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 dummyFunc
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#endif
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -286,6 +509,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* no-match have interesting func as dummy, and die/accel checks are outer */
@@ -298,6 +530,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_nmda
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
#define OUTER_DEAD_FUNC32 isDeadState32
#define INNER_ACCEL_FUNC32 dummyFunc
#define OUTER_ACCEL_FUNC32 isAccelState32
#define ACCEPT_FUNC32 dummyFunc
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -307,6 +548,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* no match, can die, not accelerated */
@@ -317,6 +567,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_nmd
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
#define OUTER_DEAD_FUNC32 isDeadState32
#define INNER_ACCEL_FUNC32 dummyFunc
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 dummyFunc
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -326,6 +585,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
/* there is no performance benefit in accelerating a no-match case that can't
@@ -339,6 +607,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#if defined(HAVE_AVX512VBMI)
#define SHENG32_IMPL sheng32_4_nm
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
#define OUTER_DEAD_FUNC32 dummyFunc
#define INNER_ACCEL_FUNC32 dummyFunc
#define OUTER_ACCEL_FUNC32 dummyFunc
#define ACCEPT_FUNC32 dummyFunc
#endif
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
@@ -348,6 +625,15 @@ u8 dummyFunc(UNUSED const u8 a) {
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#if defined(HAVE_AVX512VBMI)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#undef OUTER_DEAD_FUNC32
#undef INNER_ACCEL_FUNC32
#undef OUTER_ACCEL_FUNC32
#undef ACCEPT_FUNC32
#endif
#undef STOP_AT_MATCH
#endif // SHENG_DEFS_H