Multibyte vermicelli runtime

This commit is contained in:
Anatoly Burakov 2015-12-09 11:46:19 +00:00 committed by Matthew Barr
parent 77ff826bbf
commit dd2ec6bdac
9 changed files with 1141 additions and 1 deletions

View File

@ -444,6 +444,10 @@ set (hs_exec_SRCS
src/nfa/multiaccel_longgrab.h
src/nfa/multiaccel_shift.h
src/nfa/multiaccel_shiftgrab.h
src/nfa/multivermicelli.c
src/nfa/multivermicelli.h
src/nfa/multivermicelli_sse.h
src/nfa/multivermicelli_avx2.h
src/nfa/nfa_api.h
src/nfa/nfa_api_dispatch.c
src/nfa/nfa_internal.h

View File

@ -30,6 +30,7 @@
#include "shufti.h"
#include "truffle.h"
#include "vermicelli.h"
#include "multivermicelli.h"
#include "ue2common.h"
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
@ -117,6 +118,108 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
rv = c_end;
break;
/* multibyte matchers */
case ACCEL_MLVERM:
DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MLVERM_NOCASE:
DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MLGVERM:
DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MLGVERM_NOCASE:
DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MSVERM:
DEBUG_PRINTF("accel msverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MSVERM_NOCASE:
DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MSGVERM:
DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MSGVERM_NOCASE:
DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MDSVERM:
DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
case ACCEL_MDSVERM_NOCASE:
DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
case ACCEL_MDSGVERM:
DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
case ACCEL_MDSGVERM_NOCASE:
DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
default:
assert(!"not here");
return c;

View File

@ -60,7 +60,20 @@ enum AccelType {
ACCEL_SHUFTI,
ACCEL_DSHUFTI,
ACCEL_TRUFFLE,
ACCEL_RED_TAPE
ACCEL_RED_TAPE,
/* multibyte vermicellis */
ACCEL_MLVERM,
ACCEL_MLVERM_NOCASE,
ACCEL_MLGVERM,
ACCEL_MLGVERM_NOCASE,
ACCEL_MSVERM,
ACCEL_MSVERM_NOCASE,
ACCEL_MSGVERM,
ACCEL_MSGVERM_NOCASE,
ACCEL_MDSVERM,
ACCEL_MDSVERM_NOCASE,
ACCEL_MDSGVERM,
ACCEL_MDSGVERM_NOCASE,
};
/** \brief Structure for accel framework. */
@ -81,6 +94,19 @@ union AccelAux {
u8 c1; // uppercase if nocase
u8 c2; // uppercase if nocase
} dverm;
struct {
u8 accel_type;
u8 offset;
u8 c; // uppercase if nocase
u8 len;
} mverm;
struct {
u8 accel_type;
u8 offset;
u8 c; // uppercase if nocase
u8 len1;
u8 len2;
} mdverm;
struct {
u8 accel_type;
u8 offset;

View File

@ -86,6 +86,30 @@ const char *accelName(u8 accel_type) {
return "truffle";
case ACCEL_RED_TAPE:
return "red tape";
case ACCEL_MLVERM:
return "multibyte long vermicelli";
case ACCEL_MLVERM_NOCASE:
return "multibyte long vermicelli nocase";
case ACCEL_MLGVERM:
return "multibyte long-grab vermicelli";
case ACCEL_MLGVERM_NOCASE:
return "multibyte long-grab vermicelli nocase";
case ACCEL_MSVERM:
return "multibyte shift vermicelli";
case ACCEL_MSVERM_NOCASE:
return "multibyte shift vermicelli nocase";
case ACCEL_MSGVERM:
return "multibyte shift-grab vermicelli";
case ACCEL_MSGVERM_NOCASE:
return "multibyte shift-grab vermicelli nocase";
case ACCEL_MDSVERM:
return "multibyte doubleshift vermicelli";
case ACCEL_MDSVERM_NOCASE:
return "multibyte doubleshift vermicelli nocase";
case ACCEL_MDSGVERM:
return "multibyte doubleshift-grab vermicelli";
case ACCEL_MDSGVERM_NOCASE:
return "multibyte doubleshift-grab vermicelli nocase";
default:
return "unknown!";
}
@ -143,6 +167,23 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
describeClass(cr).c_str());
break;
}
case ACCEL_MLVERM:
case ACCEL_MLVERM_NOCASE:
case ACCEL_MLGVERM:
case ACCEL_MLGVERM_NOCASE:
case ACCEL_MSVERM:
case ACCEL_MSVERM_NOCASE:
case ACCEL_MSGVERM:
case ACCEL_MSGVERM_NOCASE:
fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len);
break;
case ACCEL_MDSVERM:
case ACCEL_MDSVERM_NOCASE:
case ACCEL_MDSGVERM:
case ACCEL_MDSGVERM_NOCASE:
fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1,
accel.mdverm.len2);
break;
default:
fprintf(f, "\n");
break;

View File

@ -38,6 +38,7 @@
#include "nfa_internal.h"
#include "shufti.h"
#include "truffle.h"
#include "multivermicelli.h"
#include "ue2common.h"
#include "vermicelli.h"
#include "util/bitutils.h"
@ -78,6 +79,66 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) {
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2,
1, ptr, end);
break;
case ACCEL_MLVERM:
DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = long_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MLVERM_NOCASE:
DEBUG_PRINTF("long vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = long_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MLGVERM:
DEBUG_PRINTF("long grab vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = longgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MLGVERM_NOCASE:
DEBUG_PRINTF("long grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = longgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MSVERM:
DEBUG_PRINTF("shift vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shift_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MSVERM_NOCASE:
DEBUG_PRINTF("shift vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shift_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MSGVERM:
DEBUG_PRINTF("shift grab vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MSGVERM_NOCASE:
DEBUG_PRINTF("shift grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MDSVERM:
DEBUG_PRINTF("double shift vermicelli for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mdverm.offset;
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSVERM_NOCASE:
DEBUG_PRINTF("double shift vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSGVERM:
DEBUG_PRINTF("double shift grab vermicelli for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSGVERM_NOCASE:
DEBUG_PRINTF("double shift grab vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n");
offset = aux->shufti.offset;

108
src/nfa/multivermicelli.c Normal file
View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "ue2common.h"
#include "multivermicelli.h"
#include "multiaccel_common.h"
#if !defined(__AVX2__)
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#else
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#endif

62
src/nfa/multivermicelli.h Normal file
View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIVERMICELLI_H_
#define MULTIVERMICELLI_H_
#ifdef __cplusplus
extern "C"
{
#endif
const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
#ifdef __cplusplus
}
#endif
#endif /* MULTIVERMICELLI_H_ */

View File

@ -0,0 +1,283 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/unaligned.h"
#include "multiaccel_common.h"
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m256 casemask = set32x8(CASE_CLEAR);
const u8 *ptr;
m256 data = loadu256(buf);
u32 z = movemask256(eq256(chars, and256(casemask, data)));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8 *ptr;
m256 data = loadu256(buf);
u32 z = movemask256(eq256(chars, data));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte pipeline
*/
static really_inline
const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 32 bytes
m256 data = load256(buf);
u32 z = movemask256(eq256(chars, data));
last_res = z;
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 32 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data = load256(buf);
z = movemask256(eq256(chars, data));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 32);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte caseless pipeline
*/
static really_inline
const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m256 casemask = set32x8(CASE_CLEAR);
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 32 bytes
m256 data = load256(buf);
u32 z = movemask256(eq256(chars, and256(casemask, data)));
last_res = z;
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 32 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data = load256(buf);
z = movemask256(eq256(chars, and256(casemask, data)));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 32);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
const u8 *ptr;
// Handle small scans.
if (buf_end - buf < 32) {
for (; buf < buf_end; buf++) {
char cur = (char)*buf;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur == c) {
break;
}
}
return buf;
}
m256 chars = set32x8(c); /* nocase already uppercase */
uintptr_t min = (uintptr_t)buf % 32;
if (min) {
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
buf += 32 - min;
}
if (buf_end - buf >= 32){
ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermPipeline)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
}
// final unaligned scan
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf_end - 32, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf_end - 32, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
// run our pipeline
return ptr ? ptr : buf_end;
}

View File

@ -0,0 +1,452 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/unaligned.h"
#define VERM_BOUNDARY 16
#define VERM_TYPE m128
#define VERM_SET_FN set16x8
#include "multiaccel_common.h"
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m128 casemask = set16x8(CASE_CLEAR);
const u8 *ptr;
m128 data = loadu128(buf);
u32 z = movemask128(eq128(chars, and128(casemask, data)));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8 *ptr;
m128 data = loadu128(buf);
u32 z = movemask128(eq128(chars, data));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 16-byte pipeline, for smaller scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 16 bytes
m128 data = load128(buf);
u32 z = movemask128(eq128(chars, data));
last_buf = buf;
last_res = z;
buf += 16;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 15 < buf_end; buf += 16) {
// scan more data
data = load128(buf);
z = movemask128(eq128(chars, data));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 16);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 16-byte pipeline, for smaller scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m128 casemask = set16x8(CASE_CLEAR);
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 16 bytes
m128 data = load128(buf);
u32 z = movemask128(eq128(chars, and128(casemask, data)));
last_buf = buf;
last_res = z;
buf += 16;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 15 < buf_end; buf += 16) {
// scan more data
data = load128(buf);
z = movemask128(eq128(chars, and128(casemask, data)));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 16);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte pipeline, for bigger scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 res;
// pipeline prologue: scan first 32 bytes
m128 data1 = load128(buf);
u32 z1 = movemask128(eq128(chars, data1));
m128 data2 = load128(buf + 16);
u32 z2 = movemask128(eq128(chars, data2));
// store the results
u32 last_res = z1 | (z2 << VERM_BOUNDARY);
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data1 = load128(buf);
z1 = movemask128(eq128(chars, data1));
data2 = load128(buf + 16);
z2 = movemask128(eq128(chars, data2));
res = z1 | (z2 << 16);
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_res = res;
last_buf = buf;
}
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
// if we still have some data left, scan it too
if (buf + 15 < buf_end) {
return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
assert(buf <= buf_end && buf >= buf_end - 16);
return NULL;
}
/*
* 32-byte caseless pipeline, for bigger scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m128 casemask = set16x8(CASE_CLEAR);
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 32 bytes
m128 data1 = load128(buf);
u32 z1 = movemask128(eq128(chars, and128(casemask, data1)));
m128 data2 = load128(buf + 16);
u32 z2 = movemask128(eq128(chars, and128(casemask, data2)));
u32 z = z1 | (z2 << VERM_BOUNDARY);
last_res = z;
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data1 = load128(buf);
z1 = movemask128(eq128(chars, and128(casemask, data1)));
data2 = load128(buf + 16);
z2 = movemask128(eq128(chars, and128(casemask, data2)));
z = z1 | (z2 << 16);
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_res = z;
last_buf = buf;
}
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
// if we still have some data left, scan it too
if (buf + 15 < buf_end) {
return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
assert(buf <= buf_end && buf >= buf_end - 16);
return NULL;
}
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
const u8 *ptr;
// Handle small scans.
if (buf_end - buf < VERM_BOUNDARY) {
for (; buf < buf_end; buf++) {
char cur = (char)*buf;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur == c) {
break;
}
}
return buf;
}
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) {
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
buf += VERM_BOUNDARY - min;
}
// if we have enough data, run bigger pipeline; otherwise run smaller one
if (buf_end - buf >= 128) {
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermPipeline32)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
} else if (buf_end - buf >= 16){
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermPipeline16)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
}
// final unaligned scan
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf_end - VERM_BOUNDARY, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf_end - VERM_BOUNDARY, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
// run our pipeline
return ptr ? ptr : buf_end;
}