diff --git a/CMakeLists.txt b/CMakeLists.txt index 353bc561..db123c1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -448,6 +448,10 @@ set (hs_exec_SRCS src/nfa/multishufti_avx2.h src/nfa/multishufti_sse.h src/nfa/multishufti.h + src/nfa/multitruffle.c + src/nfa/multitruffle_avx2.h + src/nfa/multitruffle_sse.h + src/nfa/multitruffle.h src/nfa/multivermicelli.c src/nfa/multivermicelli.h src/nfa/multivermicelli_sse.h @@ -462,6 +466,7 @@ set (hs_exec_SRCS src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h + src/nfa/truffle_common.h src/nfa/truffle.c src/nfa/truffle.h src/nfa/vermicelli.h diff --git a/src/nfa/accel.c b/src/nfa/accel.c index ee081154..a8fc4e36 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -31,6 +31,7 @@ #include "truffle.h" #include "vermicelli.h" #include "multishufti.h" +#include "multitruffle.h" #include "multivermicelli.h" #include "ue2common.h" @@ -274,6 +275,65 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, accel->mdshufti.len1, accel->mdshufti.len2); break; + case ACCEL_MLTRUFFLE: + DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MLGTRUFFLE: + DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MSTRUFFLE: + DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MSGTRUFFLE: + DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, + c, c_end, accel->mtruffle.len); + break; + case ACCEL_MDSTRUFFLE: + DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshift_truffleExec(accel->mdtruffle.mask1, + accel->mdtruffle.mask2, c, c_end, + accel->mdtruffle.len1, + accel->mdtruffle.len2); + break; + case ACCEL_MDSGTRUFFLE: + DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1, + accel->mdtruffle.mask2, c, c_end, + accel->mdtruffle.len1, + accel->mdtruffle.len2); + break; + default: assert(!"not here"); diff --git a/src/nfa/accel.h b/src/nfa/accel.h index 87acf6cf..af029566 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -81,6 +81,13 @@ enum AccelType { ACCEL_MSGSHUFTI, ACCEL_MDSSHUFTI, ACCEL_MDSGSHUFTI, + /* multibyte truffles */ + ACCEL_MLTRUFFLE, + ACCEL_MLGTRUFFLE, + ACCEL_MSTRUFFLE, + ACCEL_MSGTRUFFLE, + ACCEL_MDSTRUFFLE, + ACCEL_MDSGTRUFFLE }; /** \brief Structure for accel framework. */ @@ -149,6 +156,21 @@ union AccelAux { m128 mask1; m128 mask2; } truffle; + struct { + u8 accel_type; + u8 offset; + m128 mask1; + m128 mask2; + u8 len; + } mtruffle; + struct { + u8 accel_type; + u8 offset; + m128 mask1; + m128 mask2; + u8 len1; + u8 len2; + } mdtruffle; }; /** diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 5a28c6a0..2370718a 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -122,6 +122,18 @@ const char *accelName(u8 accel_type) { return "multibyte doubleshift shufti"; case ACCEL_MDSGSHUFTI: return "multibyte doubleshift-grab shufti"; + case ACCEL_MLTRUFFLE: + return "multibyte long truffle"; + case ACCEL_MLGTRUFFLE: + return "multibyte long-grab truffle"; + case ACCEL_MSTRUFFLE: + return "multibyte shift truffle"; + case ACCEL_MSGTRUFFLE: + return "multibyte shift-grab truffle"; + case ACCEL_MDSTRUFFLE: + return "multibyte doubleshift truffle"; + case ACCEL_MDSGTRUFFLE: + return "multibyte doubleshift-grab truffle"; default: return "unknown!"; } @@ -142,6 +154,22 @@ void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) { dumpMask((const u8 *)&hi, 128).c_str()); } +static +void dumpTruffleCharReach(FILE *f, const m128 &hiset, const m128 &hiclear) { + CharReach cr = truffle2cr(hiset, hiclear); + fprintf(f, "count %zu class %s\n", cr.count(), + describeClass(cr).c_str()); +} + +static +void dumpTruffleMasks(FILE *f, const m128 &hiset, const m128 &hiclear) { + fprintf(f, "lo %s\n", + dumpMask((const u8 *)&hiset, 128).c_str()); + fprintf(f, "hi %s\n", + dumpMask((const u8 *)&hiclear, 128).c_str()); +} + + void dumpAccelInfo(FILE *f, const AccelAux &accel) { fprintf(f, " %s", accelName(accel.accel_type)); if (accel.generic.offset) { @@ -176,13 +204,8 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { break; case ACCEL_TRUFFLE: { fprintf(f, "\n"); - fprintf(f, "lo %s\n", - dumpMask((const u8 *)&accel.truffle.mask1, 128).c_str()); - fprintf(f, "hi %s\n", - dumpMask((const u8 *)&accel.truffle.mask2, 128).c_str()); - CharReach cr = truffle2cr(accel.truffle.mask1, accel.truffle.mask2); - fprintf(f, "count %zu class %s\n", cr.count(), - describeClass(cr).c_str()); + dumpTruffleMasks(f, accel.truffle.mask1, accel.truffle.mask2); + dumpTruffleCharReach(f, accel.truffle.mask1, accel.truffle.mask2); break; } case ACCEL_MLVERM: @@ -216,6 +239,20 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi); dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi); break; + case ACCEL_MLTRUFFLE: + case ACCEL_MLGTRUFFLE: + case ACCEL_MSTRUFFLE: + case ACCEL_MSGTRUFFLE: + fprintf(f, " len:%u\n", accel.mtruffle.len); + dumpTruffleMasks(f, accel.mtruffle.mask1, accel.mtruffle.mask2); + dumpTruffleCharReach(f, accel.mtruffle.mask1, accel.mtruffle.mask2); + break; + case ACCEL_MDSTRUFFLE: + case ACCEL_MDSGTRUFFLE: + fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2); + dumpTruffleMasks(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); + dumpTruffleCharReach(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); + break; default: fprintf(f, "\n"); break; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index c12f917a..77ed5ac0 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -39,6 +39,7 @@ #include "shufti.h" #include "truffle.h" #include "multishufti.h" +#include "multitruffle.h" #include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" @@ -182,6 +183,46 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { offset = aux->truffle.offset; ptr = truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end); break; + case ACCEL_MLTRUFFLE: + DEBUG_PRINTF("long match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = long_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MLGTRUFFLE: + DEBUG_PRINTF("long grab match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = longgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MSTRUFFLE: + DEBUG_PRINTF("shift match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = shift_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MSGTRUFFLE: + DEBUG_PRINTF("shift grab match truffle shuffle\n"); + offset = aux->mtruffle.offset; + ptr = shiftgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, + ptr, end, aux->mtruffle.len); + break; + case ACCEL_MDSTRUFFLE: + DEBUG_PRINTF("double shift match truffle shuffle\n"); + offset = aux->mdtruffle.offset; + ptr = doubleshift_truffleExec(aux->mdtruffle.mask1, + aux->mdtruffle.mask2, ptr, end, + aux->mdtruffle.len1, + aux->mdtruffle.len2); + break; + case ACCEL_MDSGTRUFFLE: + DEBUG_PRINTF("double shift grab match truffle shuffle\n"); + offset = aux->mdtruffle.offset; + ptr = doubleshiftgrab_truffleExec(aux->mdtruffle.mask1, + aux->mdtruffle.mask2, ptr, end, + aux->mdtruffle.len1, + aux->mdtruffle.len2); + break; case ACCEL_RED_TAPE: ptr = end; /* there is no escape */ offset = aux->generic.offset; diff --git a/src/nfa/multitruffle.c b/src/nfa/multitruffle.c new file mode 100644 index 00000000..3af6394a --- /dev/null +++ b/src/nfa/multitruffle.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ue2common.h" + +#include "multitruffle.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" + +#include "multiaccel_common.h" + +#if !defined(__AVX2__) + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multitruffle_sse.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#else + +#define MATCH_ALGO long_ +#include "multiaccel_long.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO longgrab_ +#include "multiaccel_longgrab.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shift_ +#include "multiaccel_shift.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO shiftgrab_ +#include "multiaccel_shiftgrab.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MULTIACCEL_DOUBLE + +#define MATCH_ALGO doubleshift_ +#include "multiaccel_doubleshift.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#define MATCH_ALGO doubleshiftgrab_ +#include "multiaccel_doubleshiftgrab.h" +#include "multitruffle_avx2.h" +#undef MATCH_ALGO + +#undef MULTIACCEL_DOUBLE + +#endif diff --git a/src/nfa/multitruffle.h b/src/nfa/multitruffle.h new file mode 100644 index 00000000..8703b5ca --- /dev/null +++ b/src/nfa/multitruffle.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MULTITRUFFLE_H +#define MULTITRUFFLE_H + +/** \file + * \brief Multitruffle: multibyte version of Truffle. + * + * Utilises the SSSE3 pshufb shuffle instruction + */ + +#include "util/simd_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len); + +const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len); + +const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len); + +const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end, const u8 run_len); + +const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end, const u8 run_len, + const u8 run2_len); + +#ifdef __cplusplus +} +#endif + + +#endif /* MULTITRUFFLE_H */ diff --git a/src/nfa/multitruffle_avx2.h b/src/nfa/multitruffle_avx2.h new file mode 100644 index 00000000..e52db5fc --- /dev/null +++ b/src/nfa/multitruffle_avx2.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Matches a byte in a charclass using three shuffles + */ + +#include "config.h" +#include "ue2common.h" +#include "multiaccel_common.h" + +/* + * include "block" function + */ +#include "truffle_common.h" + +/* + * single-byte truffle fwd match function, should only be defined when not + * compiling multiaccel + */ +static really_inline +const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + m256 v, const u8 *buf, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); +} + +const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + DEBUG_PRINTF("run_len %zu\n", buf_end - buf); + const m256 wide_clear = set2x128(shuf_mask_lo_highclear); + const m256 wide_set = set2x128(shuf_mask_lo_highset); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf < 32) { + return truffleMini(wide_clear, wide_set, buf, buf_end); + } + + size_t min = (size_t)buf % 32; + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += (32 - min); + + const u8 *last_block = buf_end - 32; + while (buf < last_block) { + m256 lchars = load256(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars, + buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += 32; + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 32); + chars = loadu256(buf_end - 32); + rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, + buf_end - 32, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + + return buf_end; +} diff --git a/src/nfa/multitruffle_sse.h b/src/nfa/multitruffle_sse.h new file mode 100644 index 00000000..b287e4fc --- /dev/null +++ b/src/nfa/multitruffle_sse.h @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ue2common.h" +#include "multiaccel_common.h" + +/* + * include "block" function + */ +#include "truffle_common.h" + +/* + * single-byte truffle fwd match function, should only be defined when not + * compiling multiaccel + */ + +static really_inline +const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + m128 v, const u8 *buf, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF; + return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); +} + +/* + * 16-byte pipeline, for smaller scans + */ +static +const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 last_res; + + // pipeline prologue: scan first 16 bytes + m128 data = load128(buf); + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; + last_buf = buf; + last_res = z; + buf += 16; + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 15 < buf_end; buf += 16) { + // scan more data + data = load128(buf); + z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_buf = buf; + last_res = z; + } + assert(buf <= buf_end && buf >= buf_end - 16); + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + return NULL; +} + +/* + * 32-byte pipeline, for bigger scans + */ +static +const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, + const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + const u8* ptr, *last_buf; + u32 res; + + // pipeline prologue: scan first 32 bytes + m128 data1 = load128(buf); + u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; + m128 data2 = load128(buf + 16); + u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; + + // store the results + u32 last_res = z1 | (z2 << 16); + last_buf = buf; + buf += 32; + + + // now, start the pipeline! + assert((size_t)buf % 16 == 0); + for (; buf + 31 < buf_end; buf += 32) { + // scan more data + data1 = load128(buf); + z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; + data2 = load128(buf + 16); + z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; + res = z1 | (z2 << 16); + + // do a comparison on previous result + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + last_res = res; + last_buf = buf; + } + + // epilogue: compare final results + ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) + (last_buf, last_res +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + + // if we still have some data left, scan it too + for (; buf + 15 < buf_end; buf += 16) { + m128 chars = load128(buf); + ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, + chars, buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(ptr)) { + return ptr; + } + } + assert(buf <= buf_end && buf >= buf_end - 16); + + return NULL; +} + +const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end, const u8 run_len +#ifdef MULTIACCEL_DOUBLE + , const u8 run_len2 +#endif + ) { + DEBUG_PRINTF("run_len %zu\n", buf_end - buf); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf < 16) { + return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end); + } + + size_t min = (size_t)buf % 16; + assert(buf_end - buf >= 16); + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf); + rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + buf += (16 - min); + + // if we have enough data, run bigger pipeline; otherwise run smaller one + if (buf_end - buf >= 128) { + rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(rv)) { + return rv; + } + } else if (buf_end - buf >= 16){ + rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset, + buf, buf_end, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (unlikely(rv)) { + return rv; + } + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf_end. + chars = loadu128(buf_end - 16); + rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, + buf_end - 16, run_len +#ifdef MULTIACCEL_DOUBLE + , run_len2 +#endif + ); + if (rv) { + return rv; + } + + return buf_end; +}