diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f5d661f..93f3c152 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -557,25 +557,6 @@ set (hs_exec_SRCS src/nfa/mpv.h src/nfa/mpv.c src/nfa/mpv_internal.h - src/nfa/multiaccel_common.h - src/nfa/multiaccel_doubleshift.h - src/nfa/multiaccel_doubleshiftgrab.h - src/nfa/multiaccel_long.h - src/nfa/multiaccel_longgrab.h - src/nfa/multiaccel_shift.h - src/nfa/multiaccel_shiftgrab.h - src/nfa/multishufti.c - src/nfa/multishufti_avx2.h - src/nfa/multishufti_sse.h - src/nfa/multishufti.h - src/nfa/multitruffle.c - src/nfa/multitruffle_avx2.h - src/nfa/multitruffle_sse.h - src/nfa/multitruffle.h - src/nfa/multivermicelli.c - src/nfa/multivermicelli.h - src/nfa/multivermicelli_sse.h - src/nfa/multivermicelli_avx2.h src/nfa/nfa_api.h src/nfa/nfa_api_dispatch.c src/nfa/nfa_internal.h @@ -589,13 +570,11 @@ set (hs_exec_SRCS src/nfa/sheng_impl.h src/nfa/sheng_impl4.h src/nfa/sheng_internal.h - src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h src/nfa/tamarama.c src/nfa/tamarama.h src/nfa/tamarama_internal.h - src/nfa/truffle_common.h src/nfa/truffle.c src/nfa/truffle.h src/nfa/vermicelli.h @@ -736,8 +715,6 @@ SET (hs_SRCS src/nfa/mpv_internal.h src/nfa/mpvcompile.cpp src/nfa/mpvcompile.h - src/nfa/multiaccel_compilehelper.cpp - src/nfa/multiaccel_compilehelper.h src/nfa/nfa_api.h src/nfa/nfa_api_queue.h src/nfa/nfa_api_util.h diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 99eab11d..2bc60945 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,9 +30,6 @@ #include "shufti.h" #include "truffle.h" #include "vermicelli.h" -#include "multishufti.h" -#include "multitruffle.h" -#include "multivermicelli.h" #include "ue2common.h" const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { @@ -132,220 +129,6 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = c_end; break; - /* multibyte matchers */ - case ACCEL_MLVERM: - DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MLVERM_NOCASE: - DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MLGVERM: - DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MLGVERM_NOCASE: - DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MSVERM: - DEBUG_PRINTF("accel msverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MSVERM_NOCASE: - DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MSGVERM: - DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MSGVERM_NOCASE: - DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MDSVERM: - DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSVERM_NOCASE: - DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSGVERM: - DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSGVERM_NOCASE: - DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MLSHUFTI: - DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MLGSHUFTI: - DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MSSHUFTI: - DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MSGSHUFTI: - DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MDSSHUFTI: - DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, - accel->mdshufti.len1, accel->mdshufti.len2); - break; - case ACCEL_MDSGSHUFTI: - DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, - accel->mdshufti.len1, accel->mdshufti.len2); - break; - case ACCEL_MLTRUFFLE: - DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MLGTRUFFLE: - DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MSTRUFFLE: - DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MSGTRUFFLE: - DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MDSTRUFFLE: - DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_truffleExec(accel->mdtruffle.mask1, - accel->mdtruffle.mask2, c, c_end, - accel->mdtruffle.len1, - accel->mdtruffle.len2); - break; - case ACCEL_MDSGTRUFFLE: - DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1, - accel->mdtruffle.mask2, c, c_end, - accel->mdtruffle.len1, - accel->mdtruffle.len2); - break; - default: assert(!"not here"); diff --git a/src/nfa/accel.h b/src/nfa/accel.h index a13563b6..3a03d059 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,36 +61,7 @@ enum AccelType { ACCEL_DSHUFTI, ACCEL_TRUFFLE, ACCEL_RED_TAPE, - /* multibyte vermicellis */ - ACCEL_MLVERM, - ACCEL_MLVERM_NOCASE, - ACCEL_MLGVERM, - ACCEL_MLGVERM_NOCASE, - ACCEL_MSVERM, - ACCEL_MSVERM_NOCASE, - ACCEL_MSGVERM, - ACCEL_MSGVERM_NOCASE, - ACCEL_MDSVERM, - ACCEL_MDSVERM_NOCASE, - ACCEL_MDSGVERM, - ACCEL_MDSGVERM_NOCASE, - /* multibyte shuftis */ - ACCEL_MLSHUFTI, - ACCEL_MLGSHUFTI, - ACCEL_MSSHUFTI, - ACCEL_MSGSHUFTI, - ACCEL_MDSSHUFTI, - ACCEL_MDSGSHUFTI, - /* multibyte truffles */ - ACCEL_MLTRUFFLE, - ACCEL_MLGTRUFFLE, - ACCEL_MSTRUFFLE, - ACCEL_MSGTRUFFLE, - ACCEL_MDSTRUFFLE, - ACCEL_MDSGTRUFFLE, - /* masked dverm */ ACCEL_DVERM_MASKED, - }; /** \brief Structure for accel framework. */ @@ -140,42 +111,12 @@ union AccelAux { m128 lo2; m128 hi2; } dshufti; - struct { - u8 accel_type; - u8 offset; - m128 lo; - m128 hi; - u8 len; - } mshufti; - struct { - u8 accel_type; - u8 offset; - m128 lo; - m128 hi; - u8 len1; - u8 len2; - } mdshufti; struct { u8 accel_type; u8 offset; m128 mask1; m128 mask2; } truffle; - struct { - u8 accel_type; - u8 offset; - m128 mask1; - m128 mask2; - u8 len; - } mtruffle; - struct { - u8 accel_type; - u8 offset; - m128 mask1; - m128 mask2; - u8 len1; - u8 len2; - } mdtruffle; }; /** diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index e99e71a5..0d19fa8c 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -93,54 +93,6 @@ const char *accelName(u8 accel_type) { return "truffle"; case ACCEL_RED_TAPE: return "red tape"; - case ACCEL_MLVERM: - return "multibyte long vermicelli"; - case ACCEL_MLVERM_NOCASE: - return "multibyte long vermicelli nocase"; - case ACCEL_MLGVERM: - return "multibyte long-grab vermicelli"; - case ACCEL_MLGVERM_NOCASE: - return "multibyte long-grab vermicelli nocase"; - case ACCEL_MSVERM: - return "multibyte shift vermicelli"; - case ACCEL_MSVERM_NOCASE: - return "multibyte shift vermicelli nocase"; - case ACCEL_MSGVERM: - return "multibyte shift-grab vermicelli"; - case ACCEL_MSGVERM_NOCASE: - return "multibyte shift-grab vermicelli nocase"; - case ACCEL_MDSVERM: - return "multibyte doubleshift vermicelli"; - case ACCEL_MDSVERM_NOCASE: - return "multibyte doubleshift vermicelli nocase"; - case ACCEL_MDSGVERM: - return "multibyte doubleshift-grab vermicelli"; - case ACCEL_MDSGVERM_NOCASE: - return "multibyte doubleshift-grab vermicelli nocase"; - case ACCEL_MLSHUFTI: - return "multibyte long shufti"; - case ACCEL_MLGSHUFTI: - return "multibyte long-grab shufti"; - case ACCEL_MSSHUFTI: - return "multibyte shift shufti"; - case ACCEL_MSGSHUFTI: - return "multibyte shift-grab shufti"; - case ACCEL_MDSSHUFTI: - return "multibyte doubleshift shufti"; - case ACCEL_MDSGSHUFTI: - return "multibyte doubleshift-grab shufti"; - case ACCEL_MLTRUFFLE: - return "multibyte long truffle"; - case ACCEL_MLGTRUFFLE: - return "multibyte long-grab truffle"; - case ACCEL_MSTRUFFLE: - return "multibyte shift truffle"; - case ACCEL_MSGTRUFFLE: - return "multibyte shift-grab truffle"; - case ACCEL_MDSTRUFFLE: - return "multibyte doubleshift truffle"; - case ACCEL_MDSGTRUFFLE: - return "multibyte doubleshift-grab truffle"; default: return "unknown!"; } @@ -283,59 +235,6 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { (const u8 *)&accel.truffle.mask2); break; } - case ACCEL_MLVERM: - case ACCEL_MLVERM_NOCASE: - case ACCEL_MLGVERM: - case ACCEL_MLGVERM_NOCASE: - case ACCEL_MSVERM: - case ACCEL_MSVERM_NOCASE: - case ACCEL_MSGVERM: - case ACCEL_MSGVERM_NOCASE: - fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len); - break; - case ACCEL_MDSVERM: - case ACCEL_MDSVERM_NOCASE: - case ACCEL_MDSGVERM: - case ACCEL_MDSGVERM_NOCASE: - fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1, - accel.mdverm.len2); - break; - case ACCEL_MLSHUFTI: - case ACCEL_MLGSHUFTI: - case ACCEL_MSSHUFTI: - case ACCEL_MSGSHUFTI: - fprintf(f, " len:%u\n", accel.mshufti.len); - dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo, - (const u8 *)&accel.mshufti.hi); - dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo, - (const u8 *)&accel.mshufti.hi); - break; - case ACCEL_MDSSHUFTI: - case ACCEL_MDSGSHUFTI: - fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2); - dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo, - (const u8 *)&accel.mdshufti.hi); - dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo, - (const u8 *)&accel.mdshufti.hi); - break; - case ACCEL_MLTRUFFLE: - case ACCEL_MLGTRUFFLE: - case ACCEL_MSTRUFFLE: - case ACCEL_MSGTRUFFLE: - fprintf(f, " len:%u\n", accel.mtruffle.len); - dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1, - (const u8 *)&accel.mtruffle.mask2); - dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1, - (const u8 *)&accel.mtruffle.mask2); - break; - case ACCEL_MDSTRUFFLE: - case ACCEL_MDSGTRUFFLE: - fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2); - dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1, - (const u8 *)&accel.mdtruffle.mask2); - dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1, - (const u8 *)&accel.mdtruffle.mask2); - break; default: fprintf(f, "\n"); break; diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 32e569ba..a224410d 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -225,274 +225,6 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_NONE; } -static -void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { - if (info.ma_type == MultibyteAccelInfo::MAT_NONE) { - DEBUG_PRINTF("no multimatch for us :("); - return; - } - - u32 offset = info.multiaccel_offset; - const CharReach &stops = info.multiaccel_stops; - - assert(aux->accel_type == ACCEL_NONE); - if (stops.all()) { - return; - } - - size_t outs = stops.count(); - DEBUG_PRINTF("%zu outs\n", outs); - assert(outs && outs < 256); - - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - if (outs == 1) { - aux->accel_type = ACCEL_MLVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MLVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_LONGGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MLGVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MLGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_SHIFT: - if (outs == 1) { - aux->accel_type = ACCEL_MSVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MSVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MSGVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MSGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_DSHIFT: - if (outs == 1) { - aux->accel_type = ACCEL_MDSVERM; - aux->mdverm.offset = offset; - aux->mdverm.c = stops.find_first(); - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MDSVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MDSGVERM; - aux->mdverm.offset = offset; - aux->mdverm.c = stops.find_first(); - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MDSGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - default: - // shouldn't happen - assert(0); - return; - } - - DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); - - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MLSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_LONGGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MLGSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_SHIFT: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MSSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MSGSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_DSHIFT: - if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, - (u8 *)&aux->mdshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MDSSHUFTI; - aux->mdshufti.offset = offset; - aux->mdshufti.len1 = info.ma_len1; - aux->mdshufti.len2 = info.ma_len2; - return; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, - (u8 *)&aux->mdshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MDSGSHUFTI; - aux->mdshufti.offset = offset; - aux->mdshufti.len1 = info.ma_len1; - aux->mdshufti.len2 = info.ma_len2; - return; - default: - // shouldn't happen - assert(0); - return; - } - DEBUG_PRINTF("shufti build failed, falling through\n"); - - if (outs <= ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("building Truffle for %zu chars\n", outs); - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - aux->accel_type = ACCEL_MLTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_LONGGRAB: - aux->accel_type = ACCEL_MLGTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_SHIFT: - aux->accel_type = ACCEL_MSTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - aux->accel_type = ACCEL_MSGTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_DSHIFT: - aux->accel_type = ACCEL_MDSTRUFFLE; - aux->mdtruffle.offset = offset; - aux->mdtruffle.len1 = info.ma_len1; - aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mdtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - aux->accel_type = ACCEL_MDSGTRUFFLE; - aux->mdtruffle.offset = offset; - aux->mdtruffle.len1 = info.ma_len1; - aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mdtruffle.mask2); - break; - default: - // shouldn't happen - assert(0); - return; - } - return; - } - - DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs); -} - bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { assert(aux->accel_type == ACCEL_NONE); if (info.single_stops.none()) { @@ -500,9 +232,6 @@ bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_RED_TAPE; aux->generic.offset = info.single_offset; } - if (aux->accel_type == ACCEL_NONE) { - buildAccelMulti(info, aux); - } if (aux->accel_type == ACCEL_NONE) { buildAccelDouble(info, aux); } diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 9b30146c..9bd4ff18 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,30 +37,9 @@ union AccelAux; namespace ue2 { -struct MultibyteAccelInfo { - /* multibyte accel schemes, ordered by strength */ - enum multiaccel_type { - MAT_SHIFT, - MAT_SHIFTGRAB, - MAT_DSHIFT, - MAT_DSHIFTGRAB, - MAT_LONG, - MAT_LONGGRAB, - MAT_MAX, - MAT_NONE = MAT_MAX - }; - CharReach cr; - u32 offset = 0; - u32 len1 = 0; - u32 len2 = 0; - multiaccel_type type = MAT_NONE; -}; - struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), - single_stops(CharReach::dot()), - multiaccel_offset(0), ma_len1(0), ma_len2(0), - ma_type(MultibyteAccelInfo::MAT_NONE) {} + single_stops(CharReach::dot()) {} u32 single_offset; /**< offset correction to apply to single schemes */ u32 double_offset; /**< offset correction to apply to double schemes */ CharReach double_stop1; /**< single-byte accel stop literals for double @@ -68,11 +47,6 @@ struct AccelInfo { flat_set> double_stop2; /**< double-byte accel stop * literals */ CharReach single_stops; /**< escapes for single byte acceleration */ - u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */ - CharReach multiaccel_stops; /**< escapes for multibyte acceleration */ - u32 ma_len1; /**< multiaccel len1 */ - u32 ma_len2; /**< multiaccel len2 */ - MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */ }; bool buildAccelAux(const AccelInfo &info, AccelAux *aux); diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index a96dea43..c34216f3 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -39,9 +39,6 @@ #include "nfa_internal.h" #include "shufti.h" #include "truffle.h" -#include "multishufti.h" -#include "multitruffle.h" -#include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" #include "util/arch.h" diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 3cdf5de1..7183d4b7 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -93,8 +93,6 @@ struct precalcAccel { CharReach double_cr; flat_set> double_lits; /* double-byte accel stop literals */ u32 double_offset; - - MultibyteAccelInfo ma_info; }; struct limex_accel_info { @@ -358,16 +356,12 @@ void buildReachMapping(const build_info &args, vector &reach, } struct AccelBuild { - AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0), - ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {} + AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {} NFAVertex v; u32 state; u32 offset; // offset correction to apply CharReach stop1; // single-byte accel stop literals flat_set> stop2; // double-byte accel stop literals - u32 ma_len1; // multiaccel len1 - u32 ma_len2; // multiaccel len2 - MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type }; static @@ -382,12 +376,7 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) { build.stop1 = CharReach::dot(); } else { const precalcAccel &precalc = bi.accel.precalc.at(ss); - unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2; - if (ma_len >= MULTIACCEL_MIN_LEN) { - build.ma_len1 = precalc.ma_info.len1; - build.stop1 = precalc.ma_info.cr; - build.offset = precalc.ma_info.offset; - } else if (precalc.double_lits.empty()) { + if (precalc.double_lits.empty()) { build.stop1 = precalc.single_cr; build.offset = precalc.single_offset; } else { @@ -606,7 +595,6 @@ void fillAccelInfo(build_info &bi) { limex_accel_info &accel = bi.accel; unordered_map &accel_map = accel.accel_map; const map &br_cyclic = bi.br_cyclic; - const CompileContext &cc = bi.cc; const unordered_map &state_ids = bi.state_ids; const u32 num_states = bi.num_states; @@ -663,27 +651,17 @@ void fillAccelInfo(build_info &bi) { DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, as.double_offset); - // try multibyte acceleration first - MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc); - precalcAccel &pa = accel.precalc[state_set]; - useful |= state_set; - - // if we successfully built a multibyte accel scheme, use that - if (mai.type != MultibyteAccelInfo::MAT_NONE) { - pa.ma_info = mai; - - DEBUG_PRINTF("multibyte acceleration!\n"); - continue; - } - pa.single_offset = as.offset; pa.single_cr = as.cr; + if (as.double_byte.size() != 0) { pa.double_offset = as.double_offset; pa.double_lits = as.double_byte; pa.double_cr = as.double_cr; - }; + } + + useful |= state_set; } for (const auto &m : accel_map) { @@ -700,19 +678,8 @@ void fillAccelInfo(build_info &bi) { state_set.reset(); state_set.set(state_id); - bool is_multi = false; - auto p_it = accel.precalc.find(state_set); - if (p_it != accel.precalc.end()) { - const precalcAccel &pa = p_it->second; - offset = max(pa.double_offset, pa.single_offset); - is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE; - assert(offset <= MAX_ACCEL_DEPTH); - } - accel.accelerable.insert(v); - if (!is_multi) { - findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); - } + findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); } } @@ -954,16 +921,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, if (contains(accel.precalc, effective_states)) { const auto &precalc = accel.precalc.at(effective_states); - if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { - ainfo.ma_len1 = precalc.ma_info.len1; - ainfo.ma_len2 = precalc.ma_info.len2; - ainfo.multiaccel_offset = precalc.ma_info.offset; - ainfo.multiaccel_stops = precalc.ma_info.cr; - ainfo.ma_type = precalc.ma_info.type; - } else { - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; - } + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; } } diff --git a/src/nfa/multiaccel_common.h b/src/nfa/multiaccel_common.h deleted file mode 100644 index 1a13c3b6..00000000 --- a/src/nfa/multiaccel_common.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_COMMON_H_ -#define MULTIACCEL_COMMON_H_ - -#include "config.h" -#include "ue2common.h" -#include "util/join.h" -#include "util/bitutils.h" - -/* - * When doing shifting, remember that the total number of shifts should be n-1 - */ -#define VARISHIFT(src, dst, len) \ - do { \ - (dst) &= (src) >> (len); \ - } while (0) -#define STATIC_SHIFT1(x) \ - do { \ - (x) &= (x) >> 1; \ - } while (0) -#define STATIC_SHIFT2(x) \ - do { \ - (x) &= (x) >> 2;\ - } while (0) -#define STATIC_SHIFT4(x) \ - do { \ - (x) &= (x) >> 4; \ - } while (0) -#define STATIC_SHIFT8(x) \ - do { \ - (x) &= (x) >> 8; \ - } while (0) -#define SHIFT1(x) \ - do {} while (0) -#define SHIFT2(x) \ - do { \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT3(x) \ - do { \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT4(x) \ - do { \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT5(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT6(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT7(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT8(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT9(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT10(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT11(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT12(x); \ - do { \ - SHIFT8(x);\ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT13(x); \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT14(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT15(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT16(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT8(x); \ - } while (0) -#define SHIFT17(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT18(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT19(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT20(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT21(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT22(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT23(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT24(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT8(x); \ - } while (0) -#define SHIFT25(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT26(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT27(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT28(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT29(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT30(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT31(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT32(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT8(x); \ - } while (0) - -/* - * this function is used by 32-bit multiaccel matchers. 32-bit matchers accept - * a 32-bit integer as a buffer, where low 16 bits is movemask result and - * high 16 bits are "don't care" values. this function is not expected to return - * a result higher than 16. - */ -static really_inline -const u8 *match32(const u8 *buf, const u32 z) { - if (unlikely(z != 0)) { - u32 pos = ctz32(z); - assert(pos < 16); - return buf + pos; - } - return NULL; -} - -/* - * this function is used by 64-bit multiaccel matchers. 64-bit matchers accept - * a 64-bit integer as a buffer, where low 32 bits is movemask result and - * high 32 bits are "don't care" values. this function is not expected to return - * a result higher than 32. - */ -static really_inline -const u8 *match64(const u8 *buf, const u64a z) { - if (unlikely(z != 0)) { - u32 pos = ctz64(z); - assert(pos < 32); - return buf + pos; - } - return NULL; -} - -#endif /* MULTIACCEL_COMMON_H_ */ diff --git a/src/nfa/multiaccel_compilehelper.cpp b/src/nfa/multiaccel_compilehelper.cpp deleted file mode 100644 index 4c1f8101..00000000 --- a/src/nfa/multiaccel_compilehelper.cpp +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "multiaccel_compilehelper.h" - -using namespace std; -using namespace ue2; - -#ifdef DEBUG -static const char* state_to_str[] = { - "FIRST_RUN", - "SECOND_RUN", - "WAITING_FOR_GRAB", - "FIRST_TAIL", - "SECOND_TAIL", - "STOPPED", - "INVALID" -}; -static const char* type_to_str[] = { - "SHIFT", - "SHIFTGRAB", - "DOUBLESHIFT", - "DOUBLESHIFTGRAB", - "LONG", - "LONGGRAB", - "NONE" -}; - -static -void dumpMultiaccelState(const accel_data &d) { - DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n", - type_to_str[(unsigned) d.type], - state_to_str[(unsigned) d.state], - d.len1, d.tlen1, d.len2, d.tlen2); -} -#endif - -/* stop all the matching. this may render most schemes invalid. */ -static -void stop(accel_data &d) { - switch (d.state) { - case STATE_STOPPED: - case STATE_INVALID: - break; - case STATE_FIRST_TAIL: - case STATE_SECOND_RUN: - /* - * Shift matchers are special case, because they have "tails". - * When shift matcher reaches a mid/endpoint, tail mode is - * activated, which looks for more matches to extend the match. - * - * For example, consider pattern /a{5}ba{3}/. Under normal circumstances, - * long-grab matcher will be picked for this pattern (matching a run of a's, - * followed by a not-a), because doubleshift matcher would be confused by - * consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts - * by 1) and throw out the rest of the pattern. - * - * With tails, we defer ending the run until we actually run out of - * matching characters, so the above pattern will now be parsed by - * doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4). - * - * So if we are stopping shift matchers, we should check if we aren't in - * the process of matching first tail or second run. If we are, we can't - * finish the second run as we are stopping, but we can try and split - * the first tail instead to obtain a valid second run. - */ - if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) { - // can't split an empty void... - d.state = STATE_INVALID; - break; - } - d.len2 = 0; - d.state = STATE_STOPPED; - break; - case STATE_SECOND_TAIL: - d.state = STATE_STOPPED; - break; - case STATE_WAITING_FOR_GRAB: - case STATE_FIRST_RUN: - if (d.type == MultibyteAccelInfo::MAT_LONG) { - d.state = STATE_STOPPED; - } else { - d.state = STATE_INVALID; - } - break; - } -} - -static -void validate(accel_data &d, unsigned max_len) { - // try and fit in all our tails - if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) { - // case 1: everything fits in - d.len1 += d.tlen1; - d.len2 += d.tlen2; - d.tlen1 = 0; - d.tlen2 = 0; - } else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) { - // case 2: everything but the second tail fits in - d.len1 += d.tlen1; - d.tlen1 = 0; - // try going for a partial tail - if (d.tlen2 != 0) { - int new_tlen2 = max_len - 1 - d.len1 - d.len2; - if (new_tlen2 > 0) { - d.len2 += new_tlen2; - } - d.tlen2 = 0; - } - } else if (d.len1 + d.tlen1 < max_len) { - // case 3: first run and its tail fits in - if (d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { - // split the tail into a second run - d.len2 = d.tlen1; - } else { - d.len1 += d.tlen1; - d.len2 = 0; - } - d.tlen1 = 0; - d.tlen2 = 0; - } else if (d.len1 < max_len) { - // case 4: nothing but the first run fits in - // try going for a partial tail - if (d.tlen1 != 0) { - int new_tlen1 = max_len - 1 - d.len1; - if (new_tlen1 > 0) { - d.len1 += new_tlen1; - } - d.tlen1 = 0; - } - d.len2 = 0; - d.tlen2 = 0; - } - // if we removed our second run, doubleshift matchers are no longer valid - if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) { - d.state = STATE_INVALID; - } else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) { - // long matchers can just stop whenever they want to - d.len1 = max_len - 1; - } - - // now, general sanity checks - if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) { - d.state = STATE_INVALID; - } - if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) { - d.state = STATE_INVALID; - } -} - -static -void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) { - switch (d.type) { - case MultibyteAccelInfo::MAT_LONG: - { - /* - * For long matcher, we want lots of consecutive same-or-subset - * char-reaches - */ - if ((ref_cr & cur_cr) == cur_cr) { - d.len1++; - } else { - d.state = STATE_STOPPED; - } - } - break; - - case MultibyteAccelInfo::MAT_LONGGRAB: - { - /* - * For long-grab matcher, we want lots of consecutive same-or-subset - * char-reaches with a negative match in the end. - */ - if ((ref_cr & cur_cr) == cur_cr) { - d.len1++; - } else if (!(ref_cr & cur_cr).any()) { - /* we grabbed, stop immediately */ - d.state = STATE_STOPPED; - } else { - /* our run-n-grab was interrupted; mark as invalid */ - d.state = STATE_INVALID; - } - } - break; - - case MultibyteAccelInfo::MAT_SHIFTGRAB: - { - /* - * For shift-grab matcher, we want two matches separated by anything; - * however the second vertex *must* be a negative (non-overlapping) match. - * - * Shiftgrab matcher is identical to shift except for presence of grab. - */ - if (d.state == STATE_WAITING_FOR_GRAB) { - if ((ref_cr & cur_cr).any()) { - d.state = STATE_INVALID; - } else { - d.state = STATE_FIRST_RUN; - d.len1++; - } - return; - } - } - /* no break, falling through */ - case MultibyteAccelInfo::MAT_SHIFT: - { - /* - * For shift-matcher, we want two matches separated by anything. - */ - if (ref_cr == cur_cr) { - // keep matching tail - switch (d.state) { - case STATE_FIRST_RUN: - d.state = STATE_FIRST_TAIL; - break; - case STATE_FIRST_TAIL: - d.tlen1++; - break; - default: - // shouldn't happen - assert(0); - } - } else { - switch (d.state) { - case STATE_FIRST_RUN: - // simply advance - d.len1++; - break; - case STATE_FIRST_TAIL: - // we found a non-matching char after tail, so stop - d.state = STATE_STOPPED; - break; - default: - // shouldn't happen - assert(0); - } - } - } - break; - - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - { - /* - * For double shift-grab matcher, we want two matches separated by - * either negative matches or dots; however the second vertex *must* - * be a negative match. - * - * Doubleshiftgrab matcher is identical to doubleshift except for - * presence of grab. - */ - if (d.state == STATE_WAITING_FOR_GRAB) { - if ((ref_cr & cur_cr).any()) { - d.state = STATE_INVALID; - } else { - d.state = STATE_FIRST_RUN; - d.len1++; - } - return; - } - } - /* no break, falling through */ - case MultibyteAccelInfo::MAT_DSHIFT: - { - /* - * For double shift matcher, we want three matches, each separated - * by a lot of anything. - * - * Doubleshift matcher is complicated by presence of tails. - */ - if (ref_cr == cur_cr) { - // decide if we are activating second shift or matching tails - switch (d.state) { - case STATE_FIRST_RUN: - d.state = STATE_FIRST_TAIL; - d.len2 = 1; // we're now ready for our second run - break; - case STATE_FIRST_TAIL: - d.tlen1++; - break; - case STATE_SECOND_RUN: - d.state = STATE_SECOND_TAIL; - break; - case STATE_SECOND_TAIL: - d.tlen2++; - break; - default: - // shouldn't happen - assert(0); - } - } else { - switch (d.state) { - case STATE_FIRST_RUN: - d.len1++; - break; - case STATE_FIRST_TAIL: - // start second run - d.state = STATE_SECOND_RUN; - d.len2++; - break; - case STATE_SECOND_RUN: - d.len2++; - break; - case STATE_SECOND_TAIL: - // stop - d.state = STATE_STOPPED; - break; - default: - // shouldn't happen - assert(0); - } - } - } - break; - - default: - // shouldn't happen - assert(0); - break; - } -} - -MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, - u32 off, unsigned max_length) - : cr(ref_cr), offset(off), max_len(max_length) { - int accel_num = (int) MultibyteAccelInfo::MAT_MAX; - accels.resize(accel_num); - - // mark everything as valid - for (int i = 0; i < accel_num; i++) { - accel_data &ad = accels[i]; - ad.len1 = 1; - ad.type = (MultibyteAccelInfo::multiaccel_type) i; - - /* for shift-grab matchers, we are waiting for the grab right at the start */ - if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB - || ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { - ad.state = STATE_WAITING_FOR_GRAB; - } else { - ad.state = STATE_FIRST_RUN; - } - } -} - -bool MultiaccelCompileHelper::canAdvance() { - for (const accel_data &ad : accels) { - if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) { - return true; - } - } - return false; -} - -void MultiaccelCompileHelper::advance(const CharReach &cur_cr) { - for (accel_data &ad : accels) { - if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) { - continue; - } - match(ad, cr, cur_cr); -#ifdef DEBUG - dumpMultiaccelState(ad); -#endif - } -} - -MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() { - int best_len = 0; - accel_data best; - - DEBUG_PRINTF("Stopping multiaccel compile\n"); - - for (accel_data &ad : accels) { - // stop our matching - stop(ad); - validate(ad, max_len); - -#ifdef DEBUG - dumpMultiaccelState(ad); -#endif - - // skip invalid schemes - if (ad.state == STATE_INVALID) { - continue; - } - DEBUG_PRINTF("Marking as viable\n"); - - // TODO: relative strengths of accel schemes? maybe e.g. a shorter - // long match would in some cases be preferable to a longer - // double shift match (for example, depending on length)? - int as_len = ad.len1 + ad.len2; - if (as_len >= best_len) { - DEBUG_PRINTF("Marking as best\n"); - best_len = as_len; - best = ad; - } - } - // if we found at least one accel scheme, return it - if (best.state != STATE_INVALID) { -#ifdef DEBUG - DEBUG_PRINTF("Picked best multiaccel state:\n"); - dumpMultiaccelState(best); -#endif - MultibyteAccelInfo info; - info.cr = cr; - info.offset = offset; - info.len1 = best.len1; - info.len2 = best.len2; - info.type = best.type; - return info; - } - return MultibyteAccelInfo(); -} diff --git a/src/nfa/multiaccel_compilehelper.h b/src/nfa/multiaccel_compilehelper.h deleted file mode 100644 index 27dbe634..00000000 --- a/src/nfa/multiaccel_compilehelper.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCELCOMPILE_H_ -#define MULTIACCELCOMPILE_H_ - -#include "ue2common.h" - -#include "nfagraph/ng_limex_accel.h" - -#include - -namespace ue2 { - -/* accel scheme state machine */ -enum accel_scheme_state { - STATE_FIRST_RUN, - STATE_SECOND_RUN, - STATE_WAITING_FOR_GRAB, - STATE_FIRST_TAIL, - STATE_SECOND_TAIL, - STATE_STOPPED, - STATE_INVALID -}; - -struct accel_data { - MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE; - accel_scheme_state state = STATE_INVALID; - unsigned len1 = 0; /* length of first run */ - unsigned len2 = 0; /* length of second run, if present */ - unsigned tlen1 = 0; /* first tail length */ - unsigned tlen2 = 0; /* second tail length */ -}; - -class MultiaccelCompileHelper { -private: - const CharReach &cr; - u32 offset; - std::vector accels; - unsigned max_len; -public: - MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len); - bool canAdvance(); - MultibyteAccelInfo getBestScheme(); - void advance(const ue2::CharReach &cr); -}; - -}; // namespace - -#endif /* MULTIACCELCOMPILE_H_ */ diff --git a/src/nfa/multiaccel_doubleshift.h b/src/nfa/multiaccel_doubleshift.h deleted file mode 100644 index 7ed7534c..00000000 --- a/src/nfa/multiaccel_doubleshift.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_DOUBLESHIFT_H_ -#define MULTIACCEL_DOUBLESHIFT_H_ - -#include "multiaccel_common.h" - -#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ - if (unlikely(z)) { \ - match_t tmp = z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, tmp, len2); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define DOUBLESHIFT_MATCH_32_DEF(n) \ - DOUBLESHIFT_MATCH(n, u32, 32) -#define DOUBLESHIFT_MATCH_64_DEF(n) \ - DOUBLESHIFT_MATCH(n, u64a, 64) -#define DOUBLESHIFT_MATCH_DEF(n) \ - DOUBLESHIFT_MATCH_32_DEF(n) \ - DOUBLESHIFT_MATCH_64_DEF(n) - -DOUBLESHIFT_MATCH_DEF(1) -DOUBLESHIFT_MATCH_DEF(2) -DOUBLESHIFT_MATCH_DEF(3) -DOUBLESHIFT_MATCH_DEF(4) -DOUBLESHIFT_MATCH_DEF(5) -DOUBLESHIFT_MATCH_DEF(6) -DOUBLESHIFT_MATCH_DEF(7) -DOUBLESHIFT_MATCH_DEF(8) -DOUBLESHIFT_MATCH_DEF(9) -DOUBLESHIFT_MATCH_DEF(10) -DOUBLESHIFT_MATCH_DEF(11) -DOUBLESHIFT_MATCH_DEF(12) -DOUBLESHIFT_MATCH_DEF(13) -DOUBLESHIFT_MATCH_DEF(14) -DOUBLESHIFT_MATCH_DEF(15) -DOUBLESHIFT_MATCH_64_DEF(16) -DOUBLESHIFT_MATCH_64_DEF(17) -DOUBLESHIFT_MATCH_64_DEF(18) -DOUBLESHIFT_MATCH_64_DEF(19) -DOUBLESHIFT_MATCH_64_DEF(20) -DOUBLESHIFT_MATCH_64_DEF(21) -DOUBLESHIFT_MATCH_64_DEF(22) -DOUBLESHIFT_MATCH_64_DEF(23) -DOUBLESHIFT_MATCH_64_DEF(24) -DOUBLESHIFT_MATCH_64_DEF(25) -DOUBLESHIFT_MATCH_64_DEF(26) -DOUBLESHIFT_MATCH_64_DEF(27) -DOUBLESHIFT_MATCH_64_DEF(28) -DOUBLESHIFT_MATCH_64_DEF(29) -DOUBLESHIFT_MATCH_64_DEF(30) -DOUBLESHIFT_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftMatch_32_1, - &doubleshiftMatch_32_2, - &doubleshiftMatch_32_3, - &doubleshiftMatch_32_4, - &doubleshiftMatch_32_5, - &doubleshiftMatch_32_6, - &doubleshiftMatch_32_7, - &doubleshiftMatch_32_8, - &doubleshiftMatch_32_9, - &doubleshiftMatch_32_10, - &doubleshiftMatch_32_11, - &doubleshiftMatch_32_12, - &doubleshiftMatch_32_13, - &doubleshiftMatch_32_14, - &doubleshiftMatch_32_15, -}; - -static -const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftMatch_64_1, - &doubleshiftMatch_64_2, - &doubleshiftMatch_64_3, - &doubleshiftMatch_64_4, - &doubleshiftMatch_64_5, - &doubleshiftMatch_64_6, - &doubleshiftMatch_64_7, - &doubleshiftMatch_64_8, - &doubleshiftMatch_64_9, - &doubleshiftMatch_64_10, - &doubleshiftMatch_64_11, - &doubleshiftMatch_64_12, - &doubleshiftMatch_64_13, - &doubleshiftMatch_64_14, - &doubleshiftMatch_64_15, - &doubleshiftMatch_64_16, - &doubleshiftMatch_64_17, - &doubleshiftMatch_64_18, - &doubleshiftMatch_64_19, - &doubleshiftMatch_64_20, - &doubleshiftMatch_64_21, - &doubleshiftMatch_64_22, - &doubleshiftMatch_64_23, - &doubleshiftMatch_64_24, - &doubleshiftMatch_64_25, - &doubleshiftMatch_64_26, - &doubleshiftMatch_64_27, - &doubleshiftMatch_64_28, - &doubleshiftMatch_64_29, - &doubleshiftMatch_64_30, - &doubleshiftMatch_64_31, -}; - -#endif /* MULTIACCEL_DOUBLESHIFT_H_ */ diff --git a/src/nfa/multiaccel_doubleshiftgrab.h b/src/nfa/multiaccel_doubleshiftgrab.h deleted file mode 100644 index 51955b4a..00000000 --- a/src/nfa/multiaccel_doubleshiftgrab.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_ -#define MULTIACCEL_DOUBLESHIFTGRAB_H_ - -#include "multiaccel_common.h" - -#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ - if (unlikely(z)) { \ - match_t neg = ~z; \ - match_t tmp = z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ - neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, tmp, len2); \ - VARISHIFT(neg, z, 1); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ - DOUBLESHIFTGRAB_MATCH(n, u32, 32) -#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \ - DOUBLESHIFTGRAB_MATCH(n, u64a, 64) -#define DOUBLESHIFTGRAB_MATCH_DEF(n) \ - DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ - DOUBLESHIFTGRAB_MATCH_64_DEF(n) - -DOUBLESHIFTGRAB_MATCH_DEF(1) -DOUBLESHIFTGRAB_MATCH_DEF(2) -DOUBLESHIFTGRAB_MATCH_DEF(3) -DOUBLESHIFTGRAB_MATCH_DEF(4) -DOUBLESHIFTGRAB_MATCH_DEF(5) -DOUBLESHIFTGRAB_MATCH_DEF(6) -DOUBLESHIFTGRAB_MATCH_DEF(7) -DOUBLESHIFTGRAB_MATCH_DEF(8) -DOUBLESHIFTGRAB_MATCH_DEF(9) -DOUBLESHIFTGRAB_MATCH_DEF(10) -DOUBLESHIFTGRAB_MATCH_DEF(11) -DOUBLESHIFTGRAB_MATCH_DEF(12) -DOUBLESHIFTGRAB_MATCH_DEF(13) -DOUBLESHIFTGRAB_MATCH_DEF(14) -DOUBLESHIFTGRAB_MATCH_DEF(15) -DOUBLESHIFTGRAB_MATCH_64_DEF(16) -DOUBLESHIFTGRAB_MATCH_64_DEF(17) -DOUBLESHIFTGRAB_MATCH_64_DEF(18) -DOUBLESHIFTGRAB_MATCH_64_DEF(19) -DOUBLESHIFTGRAB_MATCH_64_DEF(20) -DOUBLESHIFTGRAB_MATCH_64_DEF(21) -DOUBLESHIFTGRAB_MATCH_64_DEF(22) -DOUBLESHIFTGRAB_MATCH_64_DEF(23) -DOUBLESHIFTGRAB_MATCH_64_DEF(24) -DOUBLESHIFTGRAB_MATCH_64_DEF(25) -DOUBLESHIFTGRAB_MATCH_64_DEF(26) -DOUBLESHIFTGRAB_MATCH_64_DEF(27) -DOUBLESHIFTGRAB_MATCH_64_DEF(28) -DOUBLESHIFTGRAB_MATCH_64_DEF(29) -DOUBLESHIFTGRAB_MATCH_64_DEF(30) -DOUBLESHIFTGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftgrabMatch_32_1, - &doubleshiftgrabMatch_32_2, - &doubleshiftgrabMatch_32_3, - &doubleshiftgrabMatch_32_4, - &doubleshiftgrabMatch_32_5, - &doubleshiftgrabMatch_32_6, - &doubleshiftgrabMatch_32_7, - &doubleshiftgrabMatch_32_8, - &doubleshiftgrabMatch_32_9, - &doubleshiftgrabMatch_32_10, - &doubleshiftgrabMatch_32_11, - &doubleshiftgrabMatch_32_12, - &doubleshiftgrabMatch_32_13, - &doubleshiftgrabMatch_32_14, - &doubleshiftgrabMatch_32_15, -}; - -static -const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftgrabMatch_64_1, - &doubleshiftgrabMatch_64_2, - &doubleshiftgrabMatch_64_3, - &doubleshiftgrabMatch_64_4, - &doubleshiftgrabMatch_64_5, - &doubleshiftgrabMatch_64_6, - &doubleshiftgrabMatch_64_7, - &doubleshiftgrabMatch_64_8, - &doubleshiftgrabMatch_64_9, - &doubleshiftgrabMatch_64_10, - &doubleshiftgrabMatch_64_11, - &doubleshiftgrabMatch_64_12, - &doubleshiftgrabMatch_64_13, - &doubleshiftgrabMatch_64_14, - &doubleshiftgrabMatch_64_15, - &doubleshiftgrabMatch_64_16, - &doubleshiftgrabMatch_64_17, - &doubleshiftgrabMatch_64_18, - &doubleshiftgrabMatch_64_19, - &doubleshiftgrabMatch_64_20, - &doubleshiftgrabMatch_64_21, - &doubleshiftgrabMatch_64_22, - &doubleshiftgrabMatch_64_23, - &doubleshiftgrabMatch_64_24, - &doubleshiftgrabMatch_64_25, - &doubleshiftgrabMatch_64_26, - &doubleshiftgrabMatch_64_27, - &doubleshiftgrabMatch_64_28, - &doubleshiftgrabMatch_64_29, - &doubleshiftgrabMatch_64_30, - &doubleshiftgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */ diff --git a/src/nfa/multiaccel_long.h b/src/nfa/multiaccel_long.h deleted file mode 100644 index 515f0bc2..00000000 --- a/src/nfa/multiaccel_long.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_LONG_H_ -#define MULTIACCEL_LONG_H_ - -#include "multiaccel_common.h" - -#define LONG_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ - if (unlikely(z)) { \ - z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ - JOIN(SHIFT, len)(z); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define LONG_MATCH_32_DEF(n) \ - LONG_MATCH(n, u32, 32) -#define LONG_MATCH_64_DEF(n) \ - LONG_MATCH(n, u64a, 64) -#define LONG_MATCH_DEF(n) \ - LONG_MATCH_32_DEF(n) \ - LONG_MATCH_64_DEF(n) - -LONG_MATCH_DEF(1) -LONG_MATCH_DEF(2) -LONG_MATCH_DEF(3) -LONG_MATCH_DEF(4) -LONG_MATCH_DEF(5) -LONG_MATCH_DEF(6) -LONG_MATCH_DEF(7) -LONG_MATCH_DEF(8) -LONG_MATCH_DEF(9) -LONG_MATCH_DEF(10) -LONG_MATCH_DEF(11) -LONG_MATCH_DEF(12) -LONG_MATCH_DEF(13) -LONG_MATCH_DEF(14) -LONG_MATCH_DEF(15) -LONG_MATCH_64_DEF(16) -LONG_MATCH_64_DEF(17) -LONG_MATCH_64_DEF(18) -LONG_MATCH_64_DEF(19) -LONG_MATCH_64_DEF(20) -LONG_MATCH_64_DEF(21) -LONG_MATCH_64_DEF(22) -LONG_MATCH_64_DEF(23) -LONG_MATCH_64_DEF(24) -LONG_MATCH_64_DEF(25) -LONG_MATCH_64_DEF(26) -LONG_MATCH_64_DEF(27) -LONG_MATCH_64_DEF(28) -LONG_MATCH_64_DEF(29) -LONG_MATCH_64_DEF(30) -LONG_MATCH_64_DEF(31) - -static -const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) = -{ - // skip the first three - 0, - &longMatch_32_1, - &longMatch_32_2, - &longMatch_32_3, - &longMatch_32_4, - &longMatch_32_5, - &longMatch_32_6, - &longMatch_32_7, - &longMatch_32_8, - &longMatch_32_9, - &longMatch_32_10, - &longMatch_32_11, - &longMatch_32_12, - &longMatch_32_13, - &longMatch_32_14, - &longMatch_32_15, - }; - -static -const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first three - 0, - &longMatch_64_1, - &longMatch_64_2, - &longMatch_64_3, - &longMatch_64_4, - &longMatch_64_5, - &longMatch_64_6, - &longMatch_64_7, - &longMatch_64_8, - &longMatch_64_9, - &longMatch_64_10, - &longMatch_64_11, - &longMatch_64_12, - &longMatch_64_13, - &longMatch_64_14, - &longMatch_64_15, - &longMatch_64_16, - &longMatch_64_17, - &longMatch_64_18, - &longMatch_64_19, - &longMatch_64_20, - &longMatch_64_21, - &longMatch_64_22, - &longMatch_64_23, - &longMatch_64_24, - &longMatch_64_25, - &longMatch_64_26, - &longMatch_64_27, - &longMatch_64_28, - &longMatch_64_29, - &longMatch_64_30, - &longMatch_64_31, -}; - -#endif /* MULTIACCEL_LONG_H_ */ diff --git a/src/nfa/multiaccel_longgrab.h b/src/nfa/multiaccel_longgrab.h deleted file mode 100644 index 09daaf82..00000000 --- a/src/nfa/multiaccel_longgrab.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_LONGGRAB_H_ -#define MULTIACCEL_LONGGRAB_H_ - -#include "multiaccel_common.h" - -#define LONGGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ - if (unlikely(z)) { \ - match_t tmp = ~z; \ - tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ - JOIN(SHIFT, len)(z); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define LONGGRAB_MATCH_32_DEF(n) \ - LONGGRAB_MATCH(n, u32, 32) -#define LONGGRAB_MATCH_64_DEF(n) \ - LONGGRAB_MATCH(n, u64a, 64) -#define LONGGRAB_MATCH_DEF(n) \ - LONGGRAB_MATCH_32_DEF(n) \ - LONGGRAB_MATCH_64_DEF(n) - -LONGGRAB_MATCH_DEF(1) -LONGGRAB_MATCH_DEF(2) -LONGGRAB_MATCH_DEF(3) -LONGGRAB_MATCH_DEF(4) -LONGGRAB_MATCH_DEF(5) -LONGGRAB_MATCH_DEF(6) -LONGGRAB_MATCH_DEF(7) -LONGGRAB_MATCH_DEF(8) -LONGGRAB_MATCH_DEF(9) -LONGGRAB_MATCH_DEF(10) -LONGGRAB_MATCH_DEF(11) -LONGGRAB_MATCH_DEF(12) -LONGGRAB_MATCH_DEF(13) -LONGGRAB_MATCH_DEF(14) -LONGGRAB_MATCH_DEF(15) -LONGGRAB_MATCH_64_DEF(16) -LONGGRAB_MATCH_64_DEF(17) -LONGGRAB_MATCH_64_DEF(18) -LONGGRAB_MATCH_64_DEF(19) -LONGGRAB_MATCH_64_DEF(20) -LONGGRAB_MATCH_64_DEF(21) -LONGGRAB_MATCH_64_DEF(22) -LONGGRAB_MATCH_64_DEF(23) -LONGGRAB_MATCH_64_DEF(24) -LONGGRAB_MATCH_64_DEF(25) -LONGGRAB_MATCH_64_DEF(26) -LONGGRAB_MATCH_64_DEF(27) -LONGGRAB_MATCH_64_DEF(28) -LONGGRAB_MATCH_64_DEF(29) -LONGGRAB_MATCH_64_DEF(30) -LONGGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first three - 0, - &longgrabMatch_32_1, - &longgrabMatch_32_2, - &longgrabMatch_32_3, - &longgrabMatch_32_4, - &longgrabMatch_32_5, - &longgrabMatch_32_6, - &longgrabMatch_32_7, - &longgrabMatch_32_8, - &longgrabMatch_32_9, - &longgrabMatch_32_10, - &longgrabMatch_32_11, - &longgrabMatch_32_12, - &longgrabMatch_32_13, - &longgrabMatch_32_14, - &longgrabMatch_32_15, - }; - -static -const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first three - 0, - &longgrabMatch_64_1, - &longgrabMatch_64_2, - &longgrabMatch_64_3, - &longgrabMatch_64_4, - &longgrabMatch_64_5, - &longgrabMatch_64_6, - &longgrabMatch_64_7, - &longgrabMatch_64_8, - &longgrabMatch_64_9, - &longgrabMatch_64_10, - &longgrabMatch_64_11, - &longgrabMatch_64_12, - &longgrabMatch_64_13, - &longgrabMatch_64_14, - &longgrabMatch_64_15, - &longgrabMatch_64_16, - &longgrabMatch_64_17, - &longgrabMatch_64_18, - &longgrabMatch_64_19, - &longgrabMatch_64_20, - &longgrabMatch_64_21, - &longgrabMatch_64_22, - &longgrabMatch_64_23, - &longgrabMatch_64_24, - &longgrabMatch_64_25, - &longgrabMatch_64_26, - &longgrabMatch_64_27, - &longgrabMatch_64_28, - &longgrabMatch_64_29, - &longgrabMatch_64_30, - &longgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_LONGGRAB_H_ */ diff --git a/src/nfa/multiaccel_shift.h b/src/nfa/multiaccel_shift.h deleted file mode 100644 index fd362a8b..00000000 --- a/src/nfa/multiaccel_shift.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_SHIFT_H_ -#define MULTIACCEL_SHIFT_H_ - -#include "multiaccel_common.h" - -#define SHIFT_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ - if (unlikely(z)) { \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define SHIFT_MATCH_32_DEF(n) \ - SHIFT_MATCH(n, u32, 32) -#define SHIFT_MATCH_64_DEF(n) \ - SHIFT_MATCH(n, u64a, 64) -#define SHIFT_MATCH_DEF(n) \ - SHIFT_MATCH_32_DEF(n) \ - SHIFT_MATCH_64_DEF(n) - -SHIFT_MATCH_DEF(1) -SHIFT_MATCH_DEF(2) -SHIFT_MATCH_DEF(3) -SHIFT_MATCH_DEF(4) -SHIFT_MATCH_DEF(5) -SHIFT_MATCH_DEF(6) -SHIFT_MATCH_DEF(7) -SHIFT_MATCH_DEF(8) -SHIFT_MATCH_DEF(9) -SHIFT_MATCH_DEF(10) -SHIFT_MATCH_DEF(11) -SHIFT_MATCH_DEF(12) -SHIFT_MATCH_DEF(13) -SHIFT_MATCH_DEF(14) -SHIFT_MATCH_DEF(15) -SHIFT_MATCH_64_DEF(16) -SHIFT_MATCH_64_DEF(17) -SHIFT_MATCH_64_DEF(18) -SHIFT_MATCH_64_DEF(19) -SHIFT_MATCH_64_DEF(20) -SHIFT_MATCH_64_DEF(21) -SHIFT_MATCH_64_DEF(22) -SHIFT_MATCH_64_DEF(23) -SHIFT_MATCH_64_DEF(24) -SHIFT_MATCH_64_DEF(25) -SHIFT_MATCH_64_DEF(26) -SHIFT_MATCH_64_DEF(27) -SHIFT_MATCH_64_DEF(28) -SHIFT_MATCH_64_DEF(29) -SHIFT_MATCH_64_DEF(30) -SHIFT_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first - 0, - &shiftMatch_32_1, - &shiftMatch_32_2, - &shiftMatch_32_3, - &shiftMatch_32_4, - &shiftMatch_32_5, - &shiftMatch_32_6, - &shiftMatch_32_7, - &shiftMatch_32_8, - &shiftMatch_32_9, - &shiftMatch_32_10, - &shiftMatch_32_11, - &shiftMatch_32_12, - &shiftMatch_32_13, - &shiftMatch_32_14, - &shiftMatch_32_15, -}; - -static -const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first - 0, - &shiftMatch_64_1, - &shiftMatch_64_2, - &shiftMatch_64_3, - &shiftMatch_64_4, - &shiftMatch_64_5, - &shiftMatch_64_6, - &shiftMatch_64_7, - &shiftMatch_64_8, - &shiftMatch_64_9, - &shiftMatch_64_10, - &shiftMatch_64_11, - &shiftMatch_64_12, - &shiftMatch_64_13, - &shiftMatch_64_14, - &shiftMatch_64_15, - &shiftMatch_64_16, - &shiftMatch_64_17, - &shiftMatch_64_18, - &shiftMatch_64_19, - &shiftMatch_64_20, - &shiftMatch_64_21, - &shiftMatch_64_22, - &shiftMatch_64_23, - &shiftMatch_64_24, - &shiftMatch_64_25, - &shiftMatch_64_26, - &shiftMatch_64_27, - &shiftMatch_64_28, - &shiftMatch_64_29, - &shiftMatch_64_30, - &shiftMatch_64_31, -}; - -#endif /* MULTIACCEL_SHIFT_H_ */ diff --git a/src/nfa/multiaccel_shiftgrab.h b/src/nfa/multiaccel_shiftgrab.h deleted file mode 100644 index 032ed086..00000000 --- a/src/nfa/multiaccel_shiftgrab.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_SHIFTGRAB_H_ -#define MULTIACCEL_SHIFTGRAB_H_ - -#include "multiaccel_common.h" - -#define SHIFTGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ - if (unlikely(z)) { \ - match_t tmp = ~z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, z, 1); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define SHIFTGRAB_MATCH_32_DEF(n) \ - SHIFTGRAB_MATCH(n, u32, 32) -#define SHIFTGRAB_MATCH_64_DEF(n) \ - SHIFTGRAB_MATCH(n, u64a, 64) -#define SHIFTGRAB_MATCH_DEF(n) \ - SHIFTGRAB_MATCH_32_DEF(n) \ - SHIFTGRAB_MATCH_64_DEF(n) - -SHIFTGRAB_MATCH_DEF(1) -SHIFTGRAB_MATCH_DEF(2) -SHIFTGRAB_MATCH_DEF(3) -SHIFTGRAB_MATCH_DEF(4) -SHIFTGRAB_MATCH_DEF(5) -SHIFTGRAB_MATCH_DEF(6) -SHIFTGRAB_MATCH_DEF(7) -SHIFTGRAB_MATCH_DEF(8) -SHIFTGRAB_MATCH_DEF(9) -SHIFTGRAB_MATCH_DEF(10) -SHIFTGRAB_MATCH_DEF(11) -SHIFTGRAB_MATCH_DEF(12) -SHIFTGRAB_MATCH_DEF(13) -SHIFTGRAB_MATCH_DEF(14) -SHIFTGRAB_MATCH_DEF(15) -SHIFTGRAB_MATCH_64_DEF(16) -SHIFTGRAB_MATCH_64_DEF(17) -SHIFTGRAB_MATCH_64_DEF(18) -SHIFTGRAB_MATCH_64_DEF(19) -SHIFTGRAB_MATCH_64_DEF(20) -SHIFTGRAB_MATCH_64_DEF(21) -SHIFTGRAB_MATCH_64_DEF(22) -SHIFTGRAB_MATCH_64_DEF(23) -SHIFTGRAB_MATCH_64_DEF(24) -SHIFTGRAB_MATCH_64_DEF(25) -SHIFTGRAB_MATCH_64_DEF(26) -SHIFTGRAB_MATCH_64_DEF(27) -SHIFTGRAB_MATCH_64_DEF(28) -SHIFTGRAB_MATCH_64_DEF(29) -SHIFTGRAB_MATCH_64_DEF(30) -SHIFTGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first - 0, - &shiftgrabMatch_32_1, - &shiftgrabMatch_32_2, - &shiftgrabMatch_32_3, - &shiftgrabMatch_32_4, - &shiftgrabMatch_32_5, - &shiftgrabMatch_32_6, - &shiftgrabMatch_32_7, - &shiftgrabMatch_32_8, - &shiftgrabMatch_32_9, - &shiftgrabMatch_32_10, - &shiftgrabMatch_32_11, - &shiftgrabMatch_32_12, - &shiftgrabMatch_32_13, - &shiftgrabMatch_32_14, - &shiftgrabMatch_32_15, -}; - -static -const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) = - { -// skip the first - 0, - &shiftgrabMatch_64_1, - &shiftgrabMatch_64_2, - &shiftgrabMatch_64_3, - &shiftgrabMatch_64_4, - &shiftgrabMatch_64_5, - &shiftgrabMatch_64_6, - &shiftgrabMatch_64_7, - &shiftgrabMatch_64_8, - &shiftgrabMatch_64_9, - &shiftgrabMatch_64_10, - &shiftgrabMatch_64_11, - &shiftgrabMatch_64_12, - &shiftgrabMatch_64_13, - &shiftgrabMatch_64_14, - &shiftgrabMatch_64_15, - &shiftgrabMatch_64_16, - &shiftgrabMatch_64_17, - &shiftgrabMatch_64_18, - &shiftgrabMatch_64_19, - &shiftgrabMatch_64_20, - &shiftgrabMatch_64_21, - &shiftgrabMatch_64_22, - &shiftgrabMatch_64_23, - &shiftgrabMatch_64_24, - &shiftgrabMatch_64_25, - &shiftgrabMatch_64_26, - &shiftgrabMatch_64_27, - &shiftgrabMatch_64_28, - &shiftgrabMatch_64_29, - &shiftgrabMatch_64_30, - &shiftgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_SHIFTGRAB_H_ */ diff --git a/src/nfa/multishufti.c b/src/nfa/multishufti.c deleted file mode 100644 index 80a2bcd0..00000000 --- a/src/nfa/multishufti.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shufti: character class acceleration. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#include "config.h" -#include "ue2common.h" -#include "util/arch.h" - -#include "multishufti.h" - -#include "multiaccel_common.h" - -#if !defined(HAVE_AVX2) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multishufti.h b/src/nfa/multishufti.h deleted file mode 100644 index af578483..00000000 --- a/src/nfa/multishufti.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Multishufti: multibyte version of Shufti - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#ifndef MULTISHUFTI_H -#define MULTISHUFTI_H - -#include "ue2common.h" -#include "util/simd_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/nfa/multishufti_avx2.h b/src/nfa/multishufti_avx2.h deleted file mode 100644 index 042f5570..00000000 --- a/src/nfa/multishufti_avx2.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "shufti_common.h" - -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars, - const u8 *buf, const m256 low4bits, - const m256 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, - const u8 *buf, - const u8 *buf_end, u8 run_len -#ifdef MULTIACCEL_DOUBLE - , u8 run_len2 -#endif - ) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 32) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m256 zeroes = zeroes256(); - const m256 low4bits = set32x8(0xf); - const m256 wide_mask_lo = set2x128(mask_lo); - const m256 wide_mask_hi = set2x128(mask_hi); - const u8 *rv; - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (32 - min); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multishufti_sse.h b/src/nfa/multishufti_sse.h deleted file mode 100644 index 0a9b543e..00000000 --- a/src/nfa/multishufti_sse.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "shufti_common.h" - -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -/* Normal SSSE3 shufti */ - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars, - const u8 *buf, const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - // negate first 16 bits - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF; - return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi, - const u8 *buf, const u8 *buf_end, - const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi, - const u8 *buf, const u8 *buf_end, - const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; - m128 data2 = load128(buf + 16); - u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; - - // store the results - u32 last_res = z1 | (z2 << 16); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; - data2 = load128(buf + 16); - z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - for (; buf + 15 < buf_end; buf += 16) { - m128 chars = load128(buf); - ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, - const u8 *buf, - const u8 *buf_end, u8 run_len -#ifdef MULTIACCEL_DOUBLE - , u8 run_len2 -#endif - ) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 16) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m128 zeroes = zeroes128(); - const m128 low4bits = _mm_set1_epi8(0xf); - const u8 *rv; - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (16 - min); - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi, - buf, buf_end, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } else if (buf_end - buf >= 16){ - rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi, - buf, buf_end, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf_end - 16); - rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, - buf_end - 16, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multitruffle.c b/src/nfa/multitruffle.c deleted file mode 100644 index c333414c..00000000 --- a/src/nfa/multitruffle.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" -#include "util/arch.h" - -#include "multitruffle.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -#include "multiaccel_common.h" - -#if !defined(HAVE_AVX2) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multitruffle.h b/src/nfa/multitruffle.h deleted file mode 100644 index 8703b5ca..00000000 --- a/src/nfa/multitruffle.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTITRUFFLE_H -#define MULTITRUFFLE_H - -/** \file - * \brief Multitruffle: multibyte version of Truffle. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#include "util/simd_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -#ifdef __cplusplus -} -#endif - - -#endif /* MULTITRUFFLE_H */ diff --git a/src/nfa/multitruffle_avx2.h b/src/nfa/multitruffle_avx2.h deleted file mode 100644 index e52db5fc..00000000 --- a/src/nfa/multitruffle_avx2.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Matches a byte in a charclass using three shuffles - */ - -#include "config.h" -#include "ue2common.h" -#include "multiaccel_common.h" - -/* - * include "block" function - */ -#include "truffle_common.h" - -/* - * single-byte truffle fwd match function, should only be defined when not - * compiling multiaccel - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - m256 v, const u8 *buf, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("run_len %zu\n", buf_end - buf); - const m256 wide_clear = set2x128(shuf_mask_lo_highclear); - const m256 wide_set = set2x128(shuf_mask_lo_highset); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 32) { - return truffleMini(wide_clear, wide_set, buf, buf_end); - } - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (32 - min); - - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multitruffle_sse.h b/src/nfa/multitruffle_sse.h deleted file mode 100644 index b287e4fc..00000000 --- a/src/nfa/multitruffle_sse.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" -#include "multiaccel_common.h" - -/* - * include "block" function - */ -#include "truffle_common.h" - -/* - * single-byte truffle fwd match function, should only be defined when not - * compiling multiaccel - */ - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - m128 v, const u8 *buf, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF; - return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; - m128 data2 = load128(buf + 16); - u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; - - // store the results - u32 last_res = z1 | (z2 << 16); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; - data2 = load128(buf + 16); - z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - for (; buf + 15 < buf_end; buf += 16) { - m128 chars = load128(buf); - ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("run_len %zu\n", buf_end - buf); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 16) { - return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end); - } - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (16 - min); - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } else if (buf_end - buf >= 16){ - rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf_end - 16); - rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, - buf_end - 16, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multivermicelli.c b/src/nfa/multivermicelli.c deleted file mode 100644 index fe6cbdb5..00000000 --- a/src/nfa/multivermicelli.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" -#include "util/arch.h" - -#include "multivermicelli.h" - -#include "multiaccel_common.h" - -#if !defined(HAVE_AVX2) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multivermicelli.h b/src/nfa/multivermicelli.h deleted file mode 100644 index 55f9b1f2..00000000 --- a/src/nfa/multivermicelli.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIVERMICELLI_H_ -#define MULTIVERMICELLI_H_ - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -#ifdef __cplusplus -} -#endif - - -#endif /* MULTIVERMICELLI_H_ */ diff --git a/src/nfa/multivermicelli_avx2.h b/src/nfa/multivermicelli_avx2.h deleted file mode 100644 index 9081aa3f..00000000 --- a/src/nfa/multivermicelli_avx2.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#include "multiaccel_common.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m256 casemask = set32x8(CASE_CLEAR); - const u8 *ptr; - m256 data = loadu256(buf); - u32 z = movemask256(eq256(chars, and256(casemask, data))); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8 *ptr; - - m256 data = loadu256(buf); - u32 z = movemask256(eq256(chars, data)); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -/* - * 32-byte pipeline - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m256 data = load256(buf); - u32 z = movemask256(eq256(chars, data)); - last_res = z; - last_buf = buf; - buf += 32; - - // now, start the pipeline! - assert((size_t)buf % 32 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data = load256(buf); - z = movemask256(eq256(chars, data)); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 32); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte caseless pipeline - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m256 casemask = set32x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m256 data = load256(buf); - u32 z = movemask256(eq256(chars, and256(casemask, data))); - last_res = z; - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 32 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data = load256(buf); - z = movemask256(eq256(chars, and256(casemask, data))); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 32); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - const u8 *ptr; - - // Handle small scans. - if (buf_end - buf < 32) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf; - } - - m256 chars = set32x8(c); /* nocase already uppercase */ - - uintptr_t min = (uintptr_t)buf % 32; - - if (min) { - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - buf += 32 - min; - } - - if (buf_end - buf >= 32){ - ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - - // final unaligned scan - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - - // run our pipeline - return ptr ? ptr : buf_end; -} diff --git a/src/nfa/multivermicelli_sse.h b/src/nfa/multivermicelli_sse.h deleted file mode 100644 index cdacd2c4..00000000 --- a/src/nfa/multivermicelli_sse.h +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#define VERM_BOUNDARY 16 -#define VERM_TYPE m128 -#define VERM_SET_FN set16x8 - -#include "multiaccel_common.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8 *ptr; - m128 data = loadu128(buf); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8 *ptr; - - m128 data = loadu128(buf); - u32 z = movemask128(eq128(chars, data)); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, data)); - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = movemask128(eq128(chars, data)); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = movemask128(eq128(chars, and128(casemask, data))); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = movemask128(eq128(chars, data1)); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, data2)); - - // store the results - u32 last_res = z1 | (z2 << VERM_BOUNDARY); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = movemask128(eq128(chars, data1)); - data2 = load128(buf + 16); - z2 = movemask128(eq128(chars, data2)); - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - if (buf + 15 < buf_end) { - return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -/* - * 32-byte caseless pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = movemask128(eq128(chars, and128(casemask, data1))); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, and128(casemask, data2))); - u32 z = z1 | (z2 << VERM_BOUNDARY); - - last_res = z; - last_buf = buf; - buf += 32; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = movemask128(eq128(chars, and128(casemask, data1))); - data2 = load128(buf + 16); - z2 = movemask128(eq128(chars, and128(casemask, data2))); - z = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = z; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - if (buf + 15 < buf_end) { - return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - const u8 *ptr; - - // Handle small scans. - if (buf_end - buf < VERM_BOUNDARY) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf; - } - - VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - - uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; - - if (min) { - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - buf += VERM_BOUNDARY - min; - } - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline32)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } else if (buf_end - buf >= 16){ - ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline16)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - - // final unaligned scan - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf_end - VERM_BOUNDARY, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf_end - VERM_BOUNDARY, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - - // run our pipeline - return ptr ? ptr : buf_end; -} diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index f7b4403e..dda5060f 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -39,7 +39,52 @@ #include "util/simd_utils.h" #include "util/unaligned.h" -#include "shufti_common.h" +#ifdef DEBUG +#include + +#define DUMP_MSK(_t) \ +static UNUSED \ +void dumpMsk##_t(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + for (int j = 0; j < 8; j++) { \ + if ((c >> (7-j)) & 0x1) \ + printf("1"); \ + else \ + printf("0"); \ + } \ + printf(" "); \ + } \ +} \ +static UNUSED \ +void dumpMsk##_t##AsChars(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + if (isprint(c)) \ + printf("%c",c); \ + else \ + printf("."); \ + } \ +} + +#endif + +/** \brief Naive byte-by-byte implementation. */ +static really_inline +const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, + const u8 *buf_end) { + assert(buf < buf_end); + + for (; buf < buf_end; ++buf) { + u8 c = *buf; + if (lo[c & 0xf] & hi[c >> 4]) { + break; + } + } + return buf; +} /** \brief Naive byte-by-byte implementation. */ static really_inline @@ -59,6 +104,30 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, #if !defined(HAVE_AVX2) /* Normal SSSE3 shufti */ +#ifdef DEBUG +DUMP_MSK(128) +#endif + +#define GET_LO_4(chars) and128(chars, low4bits) +#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) + +static really_inline +u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, + const m128 compare) { + m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 t = and128(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); +#endif + return movemask128(eq128(t, compare)); +} + static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffff)) { @@ -293,6 +362,31 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, #else // AVX2 - 256 wide shuftis +#ifdef DEBUG +DUMP_MSK(256) +#endif + +#define GET_LO_4(chars) and256(chars, low4bits) +#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) + +static really_inline +u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, + const m256 compare) { + m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); + m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); + m256 t = and256(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); +#endif + + return movemask256(eq256(t, compare)); +} + static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { diff --git a/src/nfa/shufti_common.h b/src/nfa/shufti_common.h deleted file mode 100644 index 7048a8b1..00000000 --- a/src/nfa/shufti_common.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHUFTI_COMMON_H_ -#define SHUFTI_COMMON_H_ - -#include "ue2common.h" - -#include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -/* - * Common stuff for all versions of shufti (single, multi and multidouble) - */ - -/** \brief Naive byte-by-byte implementation. */ -static really_inline -const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, - const u8 *buf_end) { - assert(buf < buf_end); - - for (; buf < buf_end; ++buf) { - u8 c = *buf; - if (lo[c & 0xf] & hi[c >> 4]) { - break; - } - } - return buf; -} - -#ifdef DEBUG -#include - -#define DUMP_MSK(_t) \ -static UNUSED \ -void dumpMsk##_t(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - for (int j = 0; j < 8; j++) { \ - if ((c >> (7-j)) & 0x1) \ - printf("1"); \ - else \ - printf("0"); \ - } \ - printf(" "); \ - } \ -} \ -static UNUSED \ -void dumpMsk##_t##AsChars(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - if (isprint(c)) \ - printf("%c",c); \ - else \ - printf("."); \ - } \ -} - -#endif - -#if !defined(HAVE_AVX2) - -#ifdef DEBUG -DUMP_MSK(128) -#endif - -#define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) - -static really_inline -u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, - const m128 compare) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); - m128 t = and128(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); -#endif - return movemask128(eq128(t, compare)); -} - -#else - -#ifdef DEBUG -DUMP_MSK(256) -#endif - -#define GET_LO_4(chars) and256(chars, low4bits) -#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) - -static really_inline -u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, - const m256 compare) { - m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); - m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); - m256 t = and256(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); -#endif - - return movemask256(eq256(t, compare)); -} - -#endif - - -#endif /* SHUFTI_COMMON_H_ */ diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 6d82f8e1..331ae6d6 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -37,8 +37,6 @@ #include "util/bitutils.h" #include "util/simd_utils.h" -#include "truffle_common.h" - #if !defined(HAVE_AVX2) static really_inline @@ -52,6 +50,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) { return NULL; // no match } +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffff)) { + u32 pos = ctz32(~z & 0xffff); + assert(pos < 16); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { + + m128 highconst = _mm_set1_epi8(0x80); + m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); + + // and now do the real work + m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); + m128 t1 = xor128(v, highconst); + m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); + m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); + m128 shuf3 = pshufb(shuf_mask_hi, t2); + m128 tmp = and128(or128(shuf1, shuf2), shuf3); + m128 tmp2 = eq128(tmp, zeroes128()); + u32 z = movemask128(tmp2); + + return z; +} + +static +const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 16); + + m128 chars = zeroes128(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + static really_inline const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v, const u8 *buf) { @@ -125,7 +174,7 @@ const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, m128 chars = zeroes128(); memcpy(&chars, buf, len); - u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); const u8 *rv = lastMatch(buf, z | mask); @@ -184,6 +233,8 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, #else +// AVX2 + static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { @@ -195,6 +246,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) { return NULL; // no match } +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffffffff)) { + u32 pos = ctz32(~z); + assert(pos < 32); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { + + m256 highconst = _mm256_set1_epi8(0x80); + m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); + + // and now do the real work + m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); + m256 t1 = xor256(v, highconst); + m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); + m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); + m256 shuf3 = vpshufb(shuf_mask_hi, t2); + m256 tmp = and256(or256(shuf1, shuf2), shuf3); + m256 tmp2 = eq256(tmp, zeroes256()); + u32 z = movemask256(tmp2); + + return z; +} + +static +const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 32); + + m256 chars = zeroes256(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + static really_inline const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v, const u8 *buf) { @@ -266,7 +368,7 @@ const u8 *truffleRevMini(m256 shuf_mask_lo_highclear, m256 chars = zeroes256(); memcpy(&chars, buf, len); - u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); const u8 *rv = lastMatch(buf, z | mask); diff --git a/src/nfa/truffle_common.h b/src/nfa/truffle_common.h deleted file mode 100644 index dc9c726c..00000000 --- a/src/nfa/truffle_common.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TRUFFLE_COMMON_H_ -#define TRUFFLE_COMMON_H_ - -#include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -/* - * Common stuff for all versions of truffle (single, multi and multidouble) - */ -#if !defined(HAVE_AVX2) - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffff)) { - u32 pos = ctz32(~z & 0xffff); - assert(pos < 16); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { - - m128 highconst = _mm_set1_epi8(0x80); - m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); - - // and now do the real work - m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); - m128 t1 = xor128(v, highconst); - m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); - m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); - m128 shuf3 = pshufb(shuf_mask_hi, t2); - m128 tmp = and128(or128(shuf1, shuf2), shuf3); - m128 tmp2 = eq128(tmp, zeroes128()); - u32 z = movemask128(tmp2); - - return z; -} - -static -const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 16); - - m128 chars = zeroes128(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; - const u8 *rv = firstMatch(buf, z| mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - -#else - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffffffff)) { - u32 pos = ctz32(~z); - assert(pos < 32); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { - - m256 highconst = _mm256_set1_epi8(0x80); - m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); - - // and now do the real work - m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); - m256 t1 = xor256(v, highconst); - m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); - m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); - m256 shuf3 = vpshufb(shuf_mask_hi, t2); - m256 tmp = and256(or256(shuf1, shuf2), shuf3); - m256 tmp2 = eq256(tmp, zeroes256()); - u32 z = movemask256(tmp2); - - return z; -} - -static -const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 32); - - m256 chars = zeroes256(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; - const u8 *rv = firstMatch(buf, z | mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - -#endif - -#endif /* TRUFFLE_COMMON_H_ */ diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 52f1e7d8..beeb4a69 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -37,7 +37,6 @@ #include "ue2common.h" #include "nfa/accel.h" -#include "nfa/multiaccel_compilehelper.h" #include "util/bitutils.h" // for CASE_CLEAR #include "util/charreach.h" @@ -677,134 +676,6 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { return g.startDs; } -static -NFAVertex find_next(const NFAVertex v, const NGHolder &g) { - NFAVertex res = NGHolder::null_vertex(); - for (NFAVertex u : adjacent_vertices_range(v, g)) { - if (u != v) { - res = u; - break; - } - } - return res; -} - -/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ -MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, - const vector &states, - const CompileContext &cc) { - // For a set of states to be accelerable, we basically have to have only - // one state to accelerate. - if (states.size() != 1) { - DEBUG_PRINTF("can't accelerate multiple states\n"); - return MultibyteAccelInfo(); - } - - // Get our base vertex - NFAVertex v = states[0]; - - // We need the base vertex to be a self-looping dotall leading to exactly - // one vertex. - if (!hasSelfLoop(v, g)) { - DEBUG_PRINTF("base vertex has self-loop\n"); - return MultibyteAccelInfo(); - } - - if (!g[v].char_reach.all()) { - DEBUG_PRINTF("can't accelerate anything but dot\n"); - return MultibyteAccelInfo(); - } - - if (proper_out_degree(v, g) != 1) { - DEBUG_PRINTF("can't accelerate states with multiple successors\n"); - return MultibyteAccelInfo(); - } - - // find our start vertex - NFAVertex cur = find_next(v, g); - if (cur == NGHolder::null_vertex()) { - DEBUG_PRINTF("invalid start vertex\n"); - return MultibyteAccelInfo(); - } - - bool has_offset = false; - u32 offset = 0; - CharReach cr = g[cur].char_reach; - - // if we start with a dot, we have an offset, so defer figuring out the - // real CharReach for this accel scheme - if (cr == CharReach::dot()) { - has_offset = true; - offset = 1; - } - - // figure out our offset - while (has_offset) { - // vertices have to have no self loops - if (hasSelfLoop(cur, g)) { - DEBUG_PRINTF("can't have self-loops\n"); - return MultibyteAccelInfo(); - } - - // we have to have exactly 1 successor to have this acceleration scheme - if (out_degree(cur, g) != 1) { - DEBUG_PRINTF("can't have multiple successors\n"); - return MultibyteAccelInfo(); - } - - cur = *adjacent_vertices(cur, g).first; - - // if we met a special vertex, bail out - if (is_special(cur, g)) { - DEBUG_PRINTF("can't have special vertices\n"); - return MultibyteAccelInfo(); - } - - // now, get the real char reach - if (g[cur].char_reach != CharReach::dot()) { - cr = g[cur].char_reach; - has_offset = false; - } else { - offset++; - } - } - - // now, fire up the compilation machinery - target_t ti = cc.target_info; - unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE; - MultiaccelCompileHelper mac(cr, offset, max_len); - - while (mac.canAdvance()) { - // vertices have to have no self loops - if (hasSelfLoop(cur, g)) { - break; - } - - // we have to have exactly 1 successor to have this acceleration scheme - if (out_degree(cur, g) != 1) { - break; - } - - cur = *adjacent_vertices(cur, g).first; - - // if we met a special vertex, bail out - if (is_special(cur, g)) { - break; - } - - mac.advance(g[cur].char_reach); - } - MultibyteAccelInfo mai = mac.getBestScheme(); -#ifdef DEBUG - DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n", - mai.type, mai.offset, mai.len1, mai.len2); - for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) { - DEBUG_PRINTF("multibyte accel char: %zu\n", c); - } -#endif - return mai; -} - /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const vector &refined_cr, diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index cb3d1210..f0c98db2 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,9 +51,6 @@ namespace ue2 { #define MAX_MERGED_ACCEL_STOPS 200 #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ -#define MULTIACCEL_MIN_LEN 3 -#define MULTIACCEL_MAX_LEN_SSE 15 -#define MULTIACCEL_MAX_LEN_AVX2 31 // forward-declaration of CompileContext struct CompileContext; @@ -84,11 +81,6 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, AccelScheme *as, bool allow_wide); -/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). - */ -MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, - const std::vector &verts, - const CompileContext &cc); } // namespace ue2 diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 8b494444..75ee3e65 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -52,8 +52,6 @@ set(unit_internal_SOURCES internal/limex_nfa.cpp internal/masked_move.cpp internal/multi_bit.cpp - internal/multiaccel_matcher.cpp - internal/multiaccel_shift.cpp internal/nfagraph_common.h internal/nfagraph_comp.cpp internal/nfagraph_equivalence.cpp diff --git a/unit/internal/multiaccel_matcher.cpp b/unit/internal/multiaccel_matcher.cpp deleted file mode 100644 index bdf56ff9..00000000 --- a/unit/internal/multiaccel_matcher.cpp +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -extern "C" { -#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works -} - -#include "config.h" -#include "src/ue2common.h" - -#include "gtest/gtest.h" -#include "nfagraph/ng_limex_accel.h" -#include "nfa/accelcompile.h" -#include "nfa/multivermicelli.h" -#include "nfa/multishufti.h" -#include "nfa/multitruffle.h" -#include "util/alloc.h" -#include "util/charreach.h" - -#include -#include -#include -#include -#include - -using namespace ue2; -using namespace std; -using namespace testing; - -// test parameters structure -struct MultiaccelTestParam { - string match_pattern; - u32 match_pattern_start_idx; - u32 match_idx; - bool test_all_offsets; - u8 match_len1; - u8 match_len2; - MultibyteAccelInfo::multiaccel_type type; -}; - -// buffer size is constant -static const u32 BUF_SIZE = 200; - -// strings, out of which CharReach will be generated -static const string VERM_CR = "a"; -static const string V_NC_CR = "aA"; -static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz"; -static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99"; - -// Parameterized test case for multiaccel patterns. -class MultiaccelTest : public TestWithParam { -protected: - virtual void SetUp() { - // set up is deferred until the actual test, since we can't compile - // any accel schemes unless we know CharReach - const MultiaccelTestParam &p = GetParam(); - - // reserve space in our buffer - buffer = (u8 *)aligned_zmalloc(BUF_SIZE); - - // store the index where we expect to see the match. note that it may - // be different from where the match pattern has started since we may - // have a flooded match (i.e. a match preceded by almost-match) or a - // no-match (in which case "match" index is at the end of the buffer). - match_idx = p.match_idx; - - // make note if we need to test all offsets - sometimes we don't, for - // example when testing partial or no-match. - test_all_offsets = p.test_all_offsets; - } - - char getChar(const CharReach &cr) { - assert(cr.count() > 0); - auto dist = uniform_int_distribution(0, cr.count() - 1); - size_t result = cr.find_nth(dist(prng)); - assert(result != CharReach::npos); - return (char)result; - } - - // char generator - char getChar(const CharReach &cr, bool match) { - return getChar(match ? cr : ~cr); - } - - // appends a string with matches/unmatches according to input match pattern - void getMatch(u8 *result, u32 start, const string &pattern, - const CharReach &cr) { - for (const auto &c : pattern) { - result[start++] = getChar(cr, c == '1'); - } - } - - // appends non-matching noise of certain lengths - void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) { - for (unsigned i = 0; i < len; i++) { - result[start + i] = getChar(cr, false); - } - } - - // deferred buffer generation, as we don't know CharReach before we run the test - void GenerateBuffer(const CharReach &cr) { - const MultiaccelTestParam &p = GetParam(); - - // step 1: fill prefix with non-matching noise - u32 start = 0; - getNoise(buffer, start, p.match_pattern_start_idx, cr); - - // step 2: add a match - start += p.match_pattern_start_idx; - getMatch(buffer, start, p.match_pattern, cr); - - // step 3: fill in the rest of the buffer with non-matching noise - start += p.match_pattern.size(); - getNoise(buffer, start, BUF_SIZE - p.match_pattern.size() - - p.match_pattern_start_idx, cr); - } - - // deferred accel scheme generation, as we don't know CharReach before we run the test - void CompileAccelScheme(const CharReach &cr, AccelAux *aux) { - const MultiaccelTestParam &p = GetParam(); - - AccelInfo ai; - ai.single_stops = cr; // dummy CharReach to prevent red tape accel - ai.ma_len1 = p.match_len1; - ai.ma_len2 = p.match_len2; - ai.multiaccel_stops = cr; - ai.ma_type = p.type; - - buildAccelAux(ai, aux); - - // now, verify we've successfully built our accel scheme, *and* that it's - // a multibyte scheme - ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM && - aux->accel_type <= ACCEL_MDSGTRUFFLE); - } - - virtual void TearDown() { - aligned_free(buffer); - } - - // We want our tests to be deterministic, so we use a PRNG in the test - // fixture. - mt19937 prng; - - u32 match_idx; - u8 *buffer; - bool test_all_offsets; -}; - -static -void runTest(const u8 *buffer, AccelAux *aux, unsigned match_idx, - bool test_all_offsets) { - const u8 *start = buffer; - const u8 *end = start + BUF_SIZE; - const u8 *match = start + match_idx; - - // comparing indexes into the buffer is easier to understand than pointers - if (test_all_offsets) { - // run_accel can only scan >15 byte buffers - u32 end_offset = min(match_idx, BUF_SIZE - 15); - - for (unsigned offset = 0; offset < end_offset; offset++) { - const u8 *ptr = run_accel(aux, (start + offset), end); - unsigned idx = ptr - start; - ASSERT_EQ(match_idx, idx); - } - } else { - const u8 *ptr = run_accel(aux, start, end); - unsigned idx = ptr - start; - ASSERT_EQ(match_idx, idx); - } -} - -TEST_P(MultiaccelTest, TestVermicelli) { - AccelAux aux = {0}; - CharReach cr(VERM_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestVermicelliNocase) { - AccelAux aux = {0}; - CharReach cr(V_NC_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestShufti) { - AccelAux aux = {0}; - CharReach cr(SHUF_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestTruffle) { - AccelAux aux = {0}; - CharReach cr(TRUF_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -static const MultiaccelTestParam multiaccelTests[] = { - // long matcher - - // full, partial, flooded, nomatch - {"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, - - // long-grab matcher - - // full, partial, flooded, nomatch - {"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - - // shift matcher - - // full, partial, flooded, nomatch - {"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - - // shift-grab matcher - - // full, partial, flooded, nomatch - {"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - - // doubleshift matcher - - // full, partial (one and two shifts), flooded, nomatch - {"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - - // doubleshift-grab matcher - - // full, partial (one and two shifts), flooded, nomatch - {"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, -}; - -INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests)); - -// boring stuff for google test -void PrintTo(const MultiaccelTestParam &p, ::std::ostream *os) { - *os << "MultiaccelTestParam: " << p.match_pattern; -} diff --git a/unit/internal/multiaccel_shift.cpp b/unit/internal/multiaccel_shift.cpp deleted file mode 100644 index d6019870..00000000 --- a/unit/internal/multiaccel_shift.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "src/ue2common.h" - -#include "gtest/gtest.h" -#include "nfa/multiaccel_common.h" - -/* - * Unit tests for the shifters. - * - * This is a bit messy, as shifters are macros, so we're using macros to test - * other macros. - */ - -#define TEST_SHIFT(n) \ - do { \ - u64a val = ((u64a) 1 << n) - 1; \ - JOIN(SHIFT, n)(val); \ - ASSERT_EQ(val, 1); \ - } while (0) - -TEST(MultiaccelShift, StaticShift) { - TEST_SHIFT(1); - TEST_SHIFT(2); - TEST_SHIFT(3); - TEST_SHIFT(4); - TEST_SHIFT(5); - TEST_SHIFT(6); - TEST_SHIFT(7); - TEST_SHIFT(8); - TEST_SHIFT(10); - TEST_SHIFT(11); - TEST_SHIFT(12); - TEST_SHIFT(13); - TEST_SHIFT(14); - TEST_SHIFT(15); - TEST_SHIFT(16); - TEST_SHIFT(17); - TEST_SHIFT(18); - TEST_SHIFT(19); - TEST_SHIFT(20); - TEST_SHIFT(21); - TEST_SHIFT(22); - TEST_SHIFT(23); - TEST_SHIFT(24); - TEST_SHIFT(25); - TEST_SHIFT(26); - TEST_SHIFT(27); - TEST_SHIFT(28); - TEST_SHIFT(29); - TEST_SHIFT(30); - TEST_SHIFT(31); - TEST_SHIFT(32); -}