mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Multibyte vermicelli runtime
This commit is contained in:
parent
77ff826bbf
commit
dd2ec6bdac
@ -444,6 +444,10 @@ set (hs_exec_SRCS
|
||||
src/nfa/multiaccel_longgrab.h
|
||||
src/nfa/multiaccel_shift.h
|
||||
src/nfa/multiaccel_shiftgrab.h
|
||||
src/nfa/multivermicelli.c
|
||||
src/nfa/multivermicelli.h
|
||||
src/nfa/multivermicelli_sse.h
|
||||
src/nfa/multivermicelli_avx2.h
|
||||
src/nfa/nfa_api.h
|
||||
src/nfa/nfa_api_dispatch.c
|
||||
src/nfa/nfa_internal.h
|
||||
|
103
src/nfa/accel.c
103
src/nfa/accel.c
@ -30,6 +30,7 @@
|
||||
#include "shufti.h"
|
||||
#include "truffle.h"
|
||||
#include "vermicelli.h"
|
||||
#include "multivermicelli.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||
@ -117,6 +118,108 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||
rv = c_end;
|
||||
break;
|
||||
|
||||
/* multibyte matchers */
|
||||
case ACCEL_MLVERM:
|
||||
DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLGVERM:
|
||||
DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLGVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSVERM:
|
||||
DEBUG_PRINTF("accel msverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSGVERM:
|
||||
DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSGVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MDSVERM:
|
||||
DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSGVERM:
|
||||
DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSGVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"not here");
|
||||
return c;
|
||||
|
@ -60,7 +60,20 @@ enum AccelType {
|
||||
ACCEL_SHUFTI,
|
||||
ACCEL_DSHUFTI,
|
||||
ACCEL_TRUFFLE,
|
||||
ACCEL_RED_TAPE
|
||||
ACCEL_RED_TAPE,
|
||||
/* multibyte vermicellis */
|
||||
ACCEL_MLVERM,
|
||||
ACCEL_MLVERM_NOCASE,
|
||||
ACCEL_MLGVERM,
|
||||
ACCEL_MLGVERM_NOCASE,
|
||||
ACCEL_MSVERM,
|
||||
ACCEL_MSVERM_NOCASE,
|
||||
ACCEL_MSGVERM,
|
||||
ACCEL_MSGVERM_NOCASE,
|
||||
ACCEL_MDSVERM,
|
||||
ACCEL_MDSVERM_NOCASE,
|
||||
ACCEL_MDSGVERM,
|
||||
ACCEL_MDSGVERM_NOCASE,
|
||||
};
|
||||
|
||||
/** \brief Structure for accel framework. */
|
||||
@ -81,6 +94,19 @@ union AccelAux {
|
||||
u8 c1; // uppercase if nocase
|
||||
u8 c2; // uppercase if nocase
|
||||
} dverm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
u8 c; // uppercase if nocase
|
||||
u8 len;
|
||||
} mverm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
u8 c; // uppercase if nocase
|
||||
u8 len1;
|
||||
u8 len2;
|
||||
} mdverm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
|
@ -86,6 +86,30 @@ const char *accelName(u8 accel_type) {
|
||||
return "truffle";
|
||||
case ACCEL_RED_TAPE:
|
||||
return "red tape";
|
||||
case ACCEL_MLVERM:
|
||||
return "multibyte long vermicelli";
|
||||
case ACCEL_MLVERM_NOCASE:
|
||||
return "multibyte long vermicelli nocase";
|
||||
case ACCEL_MLGVERM:
|
||||
return "multibyte long-grab vermicelli";
|
||||
case ACCEL_MLGVERM_NOCASE:
|
||||
return "multibyte long-grab vermicelli nocase";
|
||||
case ACCEL_MSVERM:
|
||||
return "multibyte shift vermicelli";
|
||||
case ACCEL_MSVERM_NOCASE:
|
||||
return "multibyte shift vermicelli nocase";
|
||||
case ACCEL_MSGVERM:
|
||||
return "multibyte shift-grab vermicelli";
|
||||
case ACCEL_MSGVERM_NOCASE:
|
||||
return "multibyte shift-grab vermicelli nocase";
|
||||
case ACCEL_MDSVERM:
|
||||
return "multibyte doubleshift vermicelli";
|
||||
case ACCEL_MDSVERM_NOCASE:
|
||||
return "multibyte doubleshift vermicelli nocase";
|
||||
case ACCEL_MDSGVERM:
|
||||
return "multibyte doubleshift-grab vermicelli";
|
||||
case ACCEL_MDSGVERM_NOCASE:
|
||||
return "multibyte doubleshift-grab vermicelli nocase";
|
||||
default:
|
||||
return "unknown!";
|
||||
}
|
||||
@ -143,6 +167,23 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
||||
describeClass(cr).c_str());
|
||||
break;
|
||||
}
|
||||
case ACCEL_MLVERM:
|
||||
case ACCEL_MLVERM_NOCASE:
|
||||
case ACCEL_MLGVERM:
|
||||
case ACCEL_MLGVERM_NOCASE:
|
||||
case ACCEL_MSVERM:
|
||||
case ACCEL_MSVERM_NOCASE:
|
||||
case ACCEL_MSGVERM:
|
||||
case ACCEL_MSGVERM_NOCASE:
|
||||
fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len);
|
||||
break;
|
||||
case ACCEL_MDSVERM:
|
||||
case ACCEL_MDSVERM_NOCASE:
|
||||
case ACCEL_MDSGVERM:
|
||||
case ACCEL_MDSGVERM_NOCASE:
|
||||
fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1,
|
||||
accel.mdverm.len2);
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "\n");
|
||||
break;
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "nfa_internal.h"
|
||||
#include "shufti.h"
|
||||
#include "truffle.h"
|
||||
#include "multivermicelli.h"
|
||||
#include "ue2common.h"
|
||||
#include "vermicelli.h"
|
||||
#include "util/bitutils.h"
|
||||
@ -78,6 +79,66 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) {
|
||||
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2,
|
||||
1, ptr, end);
|
||||
break;
|
||||
case ACCEL_MLVERM:
|
||||
DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = long_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLVERM_NOCASE:
|
||||
DEBUG_PRINTF("long vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = long_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLGVERM:
|
||||
DEBUG_PRINTF("long grab vermicelli for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = longgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLGVERM_NOCASE:
|
||||
DEBUG_PRINTF("long grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = longgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSVERM:
|
||||
DEBUG_PRINTF("shift vermicelli for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = shift_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSVERM_NOCASE:
|
||||
DEBUG_PRINTF("shift vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = shift_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSGVERM:
|
||||
DEBUG_PRINTF("shift grab vermicelli for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSGVERM_NOCASE:
|
||||
DEBUG_PRINTF("shift grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
|
||||
break;
|
||||
case ACCEL_MDSVERM:
|
||||
DEBUG_PRINTF("double shift vermicelli for 0x%02hhx\n", aux->mdverm.c);
|
||||
offset = aux->mdverm.offset;
|
||||
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSVERM_NOCASE:
|
||||
DEBUG_PRINTF("double shift vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSGVERM:
|
||||
DEBUG_PRINTF("double shift grab vermicelli for 0x%02hhx\n", aux->mdverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSGVERM_NOCASE:
|
||||
DEBUG_PRINTF("double shift grab vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
|
||||
offset = aux->mverm.offset;
|
||||
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_SHUFTI:
|
||||
DEBUG_PRINTF("single shufti\n");
|
||||
offset = aux->shufti.offset;
|
||||
|
108
src/nfa/multivermicelli.c
Normal file
108
src/nfa/multivermicelli.c
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "multivermicelli.h"
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define MATCH_ALGO long_
|
||||
#include "multiaccel_long.h"
|
||||
#include "multivermicelli_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO longgrab_
|
||||
#include "multiaccel_longgrab.h"
|
||||
#include "multivermicelli_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shift_
|
||||
#include "multiaccel_shift.h"
|
||||
#include "multivermicelli_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shiftgrab_
|
||||
#include "multiaccel_shiftgrab.h"
|
||||
#include "multivermicelli_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MULTIACCEL_DOUBLE
|
||||
|
||||
#define MATCH_ALGO doubleshift_
|
||||
#include "multiaccel_doubleshift.h"
|
||||
#include "multivermicelli_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO doubleshiftgrab_
|
||||
#include "multiaccel_doubleshiftgrab.h"
|
||||
#include "multivermicelli_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#undef MULTIACCEL_DOUBLE
|
||||
|
||||
#else
|
||||
|
||||
#define MATCH_ALGO long_
|
||||
#include "multiaccel_long.h"
|
||||
#include "multivermicelli_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO longgrab_
|
||||
#include "multiaccel_longgrab.h"
|
||||
#include "multivermicelli_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shift_
|
||||
#include "multiaccel_shift.h"
|
||||
#include "multivermicelli_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shiftgrab_
|
||||
#include "multiaccel_shiftgrab.h"
|
||||
#include "multivermicelli_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MULTIACCEL_DOUBLE
|
||||
|
||||
#define MATCH_ALGO doubleshift_
|
||||
#include "multiaccel_doubleshift.h"
|
||||
#include "multivermicelli_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO doubleshiftgrab_
|
||||
#include "multiaccel_doubleshiftgrab.h"
|
||||
#include "multivermicelli_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#undef MULTIACCEL_DOUBLE
|
||||
|
||||
#endif
|
62
src/nfa/multivermicelli.h
Normal file
62
src/nfa/multivermicelli.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIVERMICELLI_H_
|
||||
#define MULTIVERMICELLI_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len,
|
||||
const u8 run2_len);
|
||||
|
||||
const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len,
|
||||
const u8 run2_len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* MULTIVERMICELLI_H_ */
|
283
src/nfa/multivermicelli_avx2.h
Normal file
283
src/nfa/multivermicelli_avx2.h
Normal file
@ -0,0 +1,283 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars,
|
||||
const u8 *buf,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
m256 casemask = set32x8(CASE_CLEAR);
|
||||
const u8 *ptr;
|
||||
m256 data = loadu256(buf);
|
||||
u32 z = movemask256(eq256(chars, and256(casemask, data)));
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(buf, z
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars,
|
||||
const u8 *buf,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
const u8 *ptr;
|
||||
|
||||
m256 data = loadu256(buf);
|
||||
u32 z = movemask256(eq256(chars, data));
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(buf, z
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* 32-byte pipeline
|
||||
*/
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
const u8* ptr, *last_buf;
|
||||
u32 last_res;
|
||||
|
||||
// pipeline prologue: scan first 32 bytes
|
||||
m256 data = load256(buf);
|
||||
u32 z = movemask256(eq256(chars, data));
|
||||
last_res = z;
|
||||
last_buf = buf;
|
||||
buf += 32;
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 32 == 0);
|
||||
for (; buf + 31 < buf_end; buf += 32) {
|
||||
// scan more data
|
||||
data = load256(buf);
|
||||
z = movemask256(eq256(chars, data));
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 32);
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* 32-byte caseless pipeline
|
||||
*/
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
m256 casemask = set32x8(CASE_CLEAR);
|
||||
const u8* ptr, *last_buf;
|
||||
u32 last_res;
|
||||
|
||||
// pipeline prologue: scan first 32 bytes
|
||||
m256 data = load256(buf);
|
||||
u32 z = movemask256(eq256(chars, and256(casemask, data)));
|
||||
last_res = z;
|
||||
last_buf = buf;
|
||||
buf += 32;
|
||||
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 32 == 0);
|
||||
for (; buf + 31 < buf_end; buf += 32) {
|
||||
// scan more data
|
||||
data = load256(buf);
|
||||
z = movemask256(eq256(chars, and256(casemask, data)));
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 32);
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
|
||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
const u8 *ptr;
|
||||
|
||||
// Handle small scans.
|
||||
if (buf_end - buf < 32) {
|
||||
for (; buf < buf_end; buf++) {
|
||||
char cur = (char)*buf;
|
||||
if (nocase) {
|
||||
cur &= CASE_CLEAR;
|
||||
}
|
||||
if (cur == c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
m256 chars = set32x8(c); /* nocase already uppercase */
|
||||
|
||||
uintptr_t min = (uintptr_t)buf % 32;
|
||||
|
||||
if (min) {
|
||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
||||
buf, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
||||
buf, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
buf += 32 - min;
|
||||
}
|
||||
|
||||
if (buf_end - buf >= 32){
|
||||
ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars,
|
||||
buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
) : JOIN(MATCH_ALGO, vermPipeline)(chars,
|
||||
buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
// final unaligned scan
|
||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
||||
buf_end - 32, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
||||
buf_end - 32, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
|
||||
// run our pipeline
|
||||
return ptr ? ptr : buf_end;
|
||||
}
|
452
src/nfa/multivermicelli_sse.h
Normal file
452
src/nfa/multivermicelli_sse.h
Normal file
@ -0,0 +1,452 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
#define VERM_BOUNDARY 16
|
||||
#define VERM_TYPE m128
|
||||
#define VERM_SET_FN set16x8
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars,
|
||||
const u8 *buf,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
const u8 *ptr;
|
||||
m128 data = loadu128(buf);
|
||||
u32 z = movemask128(eq128(chars, and128(casemask, data)));
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(buf, z
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars,
|
||||
const u8 *buf,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
const u8 *ptr;
|
||||
|
||||
m128 data = loadu128(buf);
|
||||
u32 z = movemask128(eq128(chars, data));
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(buf, z
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* 16-byte pipeline, for smaller scans
|
||||
*/
|
||||
static
|
||||
const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
const u8* ptr, *last_buf;
|
||||
u32 last_res;
|
||||
|
||||
// pipeline prologue: scan first 16 bytes
|
||||
m128 data = load128(buf);
|
||||
u32 z = movemask128(eq128(chars, data));
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
buf += 16;
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 16 == 0);
|
||||
for (; buf + 15 < buf_end; buf += 16) {
|
||||
// scan more data
|
||||
data = load128(buf);
|
||||
z = movemask128(eq128(chars, data));
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* 16-byte pipeline, for smaller scans
|
||||
*/
|
||||
static
|
||||
const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
const u8* ptr, *last_buf;
|
||||
u32 last_res;
|
||||
|
||||
// pipeline prologue: scan first 16 bytes
|
||||
m128 data = load128(buf);
|
||||
u32 z = movemask128(eq128(chars, and128(casemask, data)));
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
buf += 16;
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 16 == 0);
|
||||
for (; buf + 15 < buf_end; buf += 16) {
|
||||
// scan more data
|
||||
data = load128(buf);
|
||||
z = movemask128(eq128(chars, and128(casemask, data)));
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* 32-byte pipeline, for bigger scans
|
||||
*/
|
||||
static
|
||||
const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
const u8* ptr, *last_buf;
|
||||
u32 res;
|
||||
|
||||
// pipeline prologue: scan first 32 bytes
|
||||
m128 data1 = load128(buf);
|
||||
u32 z1 = movemask128(eq128(chars, data1));
|
||||
m128 data2 = load128(buf + 16);
|
||||
u32 z2 = movemask128(eq128(chars, data2));
|
||||
|
||||
// store the results
|
||||
u32 last_res = z1 | (z2 << VERM_BOUNDARY);
|
||||
last_buf = buf;
|
||||
buf += 32;
|
||||
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 16 == 0);
|
||||
for (; buf + 31 < buf_end; buf += 32) {
|
||||
// scan more data
|
||||
data1 = load128(buf);
|
||||
z1 = movemask128(eq128(chars, data1));
|
||||
data2 = load128(buf + 16);
|
||||
z2 = movemask128(eq128(chars, data2));
|
||||
res = z1 | (z2 << 16);
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_res = res;
|
||||
last_buf = buf;
|
||||
}
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// if we still have some data left, scan it too
|
||||
if (buf + 15 < buf_end) {
|
||||
return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* 32-byte caseless pipeline, for bigger scans
|
||||
*/
|
||||
static
|
||||
const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
m128 casemask = set16x8(CASE_CLEAR);
|
||||
const u8* ptr, *last_buf;
|
||||
u32 last_res;
|
||||
|
||||
// pipeline prologue: scan first 32 bytes
|
||||
m128 data1 = load128(buf);
|
||||
u32 z1 = movemask128(eq128(chars, and128(casemask, data1)));
|
||||
m128 data2 = load128(buf + 16);
|
||||
u32 z2 = movemask128(eq128(chars, and128(casemask, data2)));
|
||||
u32 z = z1 | (z2 << VERM_BOUNDARY);
|
||||
|
||||
last_res = z;
|
||||
last_buf = buf;
|
||||
buf += 32;
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 16 == 0);
|
||||
for (; buf + 31 < buf_end; buf += 32) {
|
||||
// scan more data
|
||||
data1 = load128(buf);
|
||||
z1 = movemask128(eq128(chars, and128(casemask, data1)));
|
||||
data2 = load128(buf + 16);
|
||||
z2 = movemask128(eq128(chars, and128(casemask, data2)));
|
||||
z = z1 | (z2 << 16);
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_res = z;
|
||||
last_buf = buf;
|
||||
}
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// if we still have some data left, scan it too
|
||||
if (buf + 15 < buf_end) {
|
||||
return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end,
|
||||
const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
|
||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
const u8 *ptr;
|
||||
|
||||
// Handle small scans.
|
||||
if (buf_end - buf < VERM_BOUNDARY) {
|
||||
for (; buf < buf_end; buf++) {
|
||||
char cur = (char)*buf;
|
||||
if (nocase) {
|
||||
cur &= CASE_CLEAR;
|
||||
}
|
||||
if (cur == c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
|
||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||
|
||||
if (min) {
|
||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
||||
buf, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
||||
buf, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
buf += VERM_BOUNDARY - min;
|
||||
}
|
||||
|
||||
// if we have enough data, run bigger pipeline; otherwise run smaller one
|
||||
if (buf_end - buf >= 128) {
|
||||
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars,
|
||||
buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
) : JOIN(MATCH_ALGO, vermPipeline32)(chars,
|
||||
buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
} else if (buf_end - buf >= 16){
|
||||
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars,
|
||||
buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
) : JOIN(MATCH_ALGO, vermPipeline16)(chars,
|
||||
buf, buf_end, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
// final unaligned scan
|
||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
||||
buf_end - VERM_BOUNDARY, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
||||
buf_end - VERM_BOUNDARY, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
|
||||
// run our pipeline
|
||||
return ptr ? ptr : buf_end;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user