De-multiaccel

This commit is contained in:
Matthew Barr 2017-03-31 10:38:03 +11:00
parent 2b1a7da188
commit 423569ec82
38 changed files with 217 additions and 5293 deletions

View File

@ -557,25 +557,6 @@ set (hs_exec_SRCS
src/nfa/mpv.h src/nfa/mpv.h
src/nfa/mpv.c src/nfa/mpv.c
src/nfa/mpv_internal.h src/nfa/mpv_internal.h
src/nfa/multiaccel_common.h
src/nfa/multiaccel_doubleshift.h
src/nfa/multiaccel_doubleshiftgrab.h
src/nfa/multiaccel_long.h
src/nfa/multiaccel_longgrab.h
src/nfa/multiaccel_shift.h
src/nfa/multiaccel_shiftgrab.h
src/nfa/multishufti.c
src/nfa/multishufti_avx2.h
src/nfa/multishufti_sse.h
src/nfa/multishufti.h
src/nfa/multitruffle.c
src/nfa/multitruffle_avx2.h
src/nfa/multitruffle_sse.h
src/nfa/multitruffle.h
src/nfa/multivermicelli.c
src/nfa/multivermicelli.h
src/nfa/multivermicelli_sse.h
src/nfa/multivermicelli_avx2.h
src/nfa/nfa_api.h src/nfa/nfa_api.h
src/nfa/nfa_api_dispatch.c src/nfa/nfa_api_dispatch.c
src/nfa/nfa_internal.h src/nfa/nfa_internal.h
@ -589,13 +570,11 @@ set (hs_exec_SRCS
src/nfa/sheng_impl.h src/nfa/sheng_impl.h
src/nfa/sheng_impl4.h src/nfa/sheng_impl4.h
src/nfa/sheng_internal.h src/nfa/sheng_internal.h
src/nfa/shufti_common.h
src/nfa/shufti.c src/nfa/shufti.c
src/nfa/shufti.h src/nfa/shufti.h
src/nfa/tamarama.c src/nfa/tamarama.c
src/nfa/tamarama.h src/nfa/tamarama.h
src/nfa/tamarama_internal.h src/nfa/tamarama_internal.h
src/nfa/truffle_common.h
src/nfa/truffle.c src/nfa/truffle.c
src/nfa/truffle.h src/nfa/truffle.h
src/nfa/vermicelli.h src/nfa/vermicelli.h
@ -736,8 +715,6 @@ SET (hs_SRCS
src/nfa/mpv_internal.h src/nfa/mpv_internal.h
src/nfa/mpvcompile.cpp src/nfa/mpvcompile.cpp
src/nfa/mpvcompile.h src/nfa/mpvcompile.h
src/nfa/multiaccel_compilehelper.cpp
src/nfa/multiaccel_compilehelper.h
src/nfa/nfa_api.h src/nfa/nfa_api.h
src/nfa/nfa_api_queue.h src/nfa/nfa_api_queue.h
src/nfa/nfa_api_util.h src/nfa/nfa_api_util.h

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -30,9 +30,6 @@
#include "shufti.h" #include "shufti.h"
#include "truffle.h" #include "truffle.h"
#include "vermicelli.h" #include "vermicelli.h"
#include "multishufti.h"
#include "multitruffle.h"
#include "multivermicelli.h"
#include "ue2common.h" #include "ue2common.h"
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
@ -132,220 +129,6 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
rv = c_end; rv = c_end;
break; break;
/* multibyte matchers */
case ACCEL_MLVERM:
DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MLVERM_NOCASE:
DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MLGVERM:
DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MLGVERM_NOCASE:
DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MSVERM:
DEBUG_PRINTF("accel msverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MSVERM_NOCASE:
DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MSGVERM:
DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
break;
case ACCEL_MSGVERM_NOCASE:
DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
break;
case ACCEL_MDSVERM:
DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
case ACCEL_MDSVERM_NOCASE:
DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
case ACCEL_MDSGVERM:
DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
case ACCEL_MDSGVERM_NOCASE:
DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end,
accel->mdverm.len1, accel->mdverm.len2);
break;
case ACCEL_MLSHUFTI:
DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
accel->mshufti.len);
break;
case ACCEL_MLGSHUFTI:
DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
accel->mshufti.len);
break;
case ACCEL_MSSHUFTI:
DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
accel->mshufti.len);
break;
case ACCEL_MSGSHUFTI:
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
accel->mshufti.len);
break;
case ACCEL_MDSSHUFTI:
DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
accel->mdshufti.len1, accel->mdshufti.len2);
break;
case ACCEL_MDSGSHUFTI:
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
accel->mdshufti.len1, accel->mdshufti.len2);
break;
case ACCEL_MLTRUFFLE:
DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
c, c_end, accel->mtruffle.len);
break;
case ACCEL_MLGTRUFFLE:
DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
c, c_end, accel->mtruffle.len);
break;
case ACCEL_MSTRUFFLE:
DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
c, c_end, accel->mtruffle.len);
break;
case ACCEL_MSGTRUFFLE:
DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
c, c_end, accel->mtruffle.len);
break;
case ACCEL_MDSTRUFFLE:
DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshift_truffleExec(accel->mdtruffle.mask1,
accel->mdtruffle.mask2, c, c_end,
accel->mdtruffle.len1,
accel->mdtruffle.len2);
break;
case ACCEL_MDSGTRUFFLE:
DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1,
accel->mdtruffle.mask2, c, c_end,
accel->mdtruffle.len1,
accel->mdtruffle.len2);
break;
default: default:
assert(!"not here"); assert(!"not here");

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -61,36 +61,7 @@ enum AccelType {
ACCEL_DSHUFTI, ACCEL_DSHUFTI,
ACCEL_TRUFFLE, ACCEL_TRUFFLE,
ACCEL_RED_TAPE, ACCEL_RED_TAPE,
/* multibyte vermicellis */
ACCEL_MLVERM,
ACCEL_MLVERM_NOCASE,
ACCEL_MLGVERM,
ACCEL_MLGVERM_NOCASE,
ACCEL_MSVERM,
ACCEL_MSVERM_NOCASE,
ACCEL_MSGVERM,
ACCEL_MSGVERM_NOCASE,
ACCEL_MDSVERM,
ACCEL_MDSVERM_NOCASE,
ACCEL_MDSGVERM,
ACCEL_MDSGVERM_NOCASE,
/* multibyte shuftis */
ACCEL_MLSHUFTI,
ACCEL_MLGSHUFTI,
ACCEL_MSSHUFTI,
ACCEL_MSGSHUFTI,
ACCEL_MDSSHUFTI,
ACCEL_MDSGSHUFTI,
/* multibyte truffles */
ACCEL_MLTRUFFLE,
ACCEL_MLGTRUFFLE,
ACCEL_MSTRUFFLE,
ACCEL_MSGTRUFFLE,
ACCEL_MDSTRUFFLE,
ACCEL_MDSGTRUFFLE,
/* masked dverm */
ACCEL_DVERM_MASKED, ACCEL_DVERM_MASKED,
}; };
/** \brief Structure for accel framework. */ /** \brief Structure for accel framework. */
@ -140,42 +111,12 @@ union AccelAux {
m128 lo2; m128 lo2;
m128 hi2; m128 hi2;
} dshufti; } dshufti;
struct {
u8 accel_type;
u8 offset;
m128 lo;
m128 hi;
u8 len;
} mshufti;
struct {
u8 accel_type;
u8 offset;
m128 lo;
m128 hi;
u8 len1;
u8 len2;
} mdshufti;
struct { struct {
u8 accel_type; u8 accel_type;
u8 offset; u8 offset;
m128 mask1; m128 mask1;
m128 mask2; m128 mask2;
} truffle; } truffle;
struct {
u8 accel_type;
u8 offset;
m128 mask1;
m128 mask2;
u8 len;
} mtruffle;
struct {
u8 accel_type;
u8 offset;
m128 mask1;
m128 mask2;
u8 len1;
u8 len2;
} mdtruffle;
}; };
/** /**

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -93,54 +93,6 @@ const char *accelName(u8 accel_type) {
return "truffle"; return "truffle";
case ACCEL_RED_TAPE: case ACCEL_RED_TAPE:
return "red tape"; return "red tape";
case ACCEL_MLVERM:
return "multibyte long vermicelli";
case ACCEL_MLVERM_NOCASE:
return "multibyte long vermicelli nocase";
case ACCEL_MLGVERM:
return "multibyte long-grab vermicelli";
case ACCEL_MLGVERM_NOCASE:
return "multibyte long-grab vermicelli nocase";
case ACCEL_MSVERM:
return "multibyte shift vermicelli";
case ACCEL_MSVERM_NOCASE:
return "multibyte shift vermicelli nocase";
case ACCEL_MSGVERM:
return "multibyte shift-grab vermicelli";
case ACCEL_MSGVERM_NOCASE:
return "multibyte shift-grab vermicelli nocase";
case ACCEL_MDSVERM:
return "multibyte doubleshift vermicelli";
case ACCEL_MDSVERM_NOCASE:
return "multibyte doubleshift vermicelli nocase";
case ACCEL_MDSGVERM:
return "multibyte doubleshift-grab vermicelli";
case ACCEL_MDSGVERM_NOCASE:
return "multibyte doubleshift-grab vermicelli nocase";
case ACCEL_MLSHUFTI:
return "multibyte long shufti";
case ACCEL_MLGSHUFTI:
return "multibyte long-grab shufti";
case ACCEL_MSSHUFTI:
return "multibyte shift shufti";
case ACCEL_MSGSHUFTI:
return "multibyte shift-grab shufti";
case ACCEL_MDSSHUFTI:
return "multibyte doubleshift shufti";
case ACCEL_MDSGSHUFTI:
return "multibyte doubleshift-grab shufti";
case ACCEL_MLTRUFFLE:
return "multibyte long truffle";
case ACCEL_MLGTRUFFLE:
return "multibyte long-grab truffle";
case ACCEL_MSTRUFFLE:
return "multibyte shift truffle";
case ACCEL_MSGTRUFFLE:
return "multibyte shift-grab truffle";
case ACCEL_MDSTRUFFLE:
return "multibyte doubleshift truffle";
case ACCEL_MDSGTRUFFLE:
return "multibyte doubleshift-grab truffle";
default: default:
return "unknown!"; return "unknown!";
} }
@ -283,59 +235,6 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
(const u8 *)&accel.truffle.mask2); (const u8 *)&accel.truffle.mask2);
break; break;
} }
case ACCEL_MLVERM:
case ACCEL_MLVERM_NOCASE:
case ACCEL_MLGVERM:
case ACCEL_MLGVERM_NOCASE:
case ACCEL_MSVERM:
case ACCEL_MSVERM_NOCASE:
case ACCEL_MSGVERM:
case ACCEL_MSGVERM_NOCASE:
fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len);
break;
case ACCEL_MDSVERM:
case ACCEL_MDSVERM_NOCASE:
case ACCEL_MDSGVERM:
case ACCEL_MDSGVERM_NOCASE:
fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1,
accel.mdverm.len2);
break;
case ACCEL_MLSHUFTI:
case ACCEL_MLGSHUFTI:
case ACCEL_MSSHUFTI:
case ACCEL_MSGSHUFTI:
fprintf(f, " len:%u\n", accel.mshufti.len);
dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo,
(const u8 *)&accel.mshufti.hi);
dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo,
(const u8 *)&accel.mshufti.hi);
break;
case ACCEL_MDSSHUFTI:
case ACCEL_MDSGSHUFTI:
fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2);
dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo,
(const u8 *)&accel.mdshufti.hi);
dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo,
(const u8 *)&accel.mdshufti.hi);
break;
case ACCEL_MLTRUFFLE:
case ACCEL_MLGTRUFFLE:
case ACCEL_MSTRUFFLE:
case ACCEL_MSGTRUFFLE:
fprintf(f, " len:%u\n", accel.mtruffle.len);
dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1,
(const u8 *)&accel.mtruffle.mask2);
dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1,
(const u8 *)&accel.mtruffle.mask2);
break;
case ACCEL_MDSTRUFFLE:
case ACCEL_MDSGTRUFFLE:
fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2);
dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1,
(const u8 *)&accel.mdtruffle.mask2);
dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1,
(const u8 *)&accel.mdtruffle.mask2);
break;
default: default:
fprintf(f, "\n"); fprintf(f, "\n");
break; break;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -225,274 +225,6 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
aux->accel_type = ACCEL_NONE; aux->accel_type = ACCEL_NONE;
} }
static
void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
if (info.ma_type == MultibyteAccelInfo::MAT_NONE) {
DEBUG_PRINTF("no multimatch for us :(");
return;
}
u32 offset = info.multiaccel_offset;
const CharReach &stops = info.multiaccel_stops;
assert(aux->accel_type == ACCEL_NONE);
if (stops.all()) {
return;
}
size_t outs = stops.count();
DEBUG_PRINTF("%zu outs\n", outs);
assert(outs && outs < 256);
switch (info.ma_type) {
case MultibyteAccelInfo::MAT_LONG:
if (outs == 1) {
aux->accel_type = ACCEL_MLVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MLVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_LONGGRAB:
if (outs == 1) {
aux->accel_type = ACCEL_MLGVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MLGVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_SHIFT:
if (outs == 1) {
aux->accel_type = ACCEL_MSVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MSVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
if (outs == 1) {
aux->accel_type = ACCEL_MSGVERM;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first();
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MSGVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mverm.len = info.ma_len1;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_DSHIFT:
if (outs == 1) {
aux->accel_type = ACCEL_MDSVERM;
aux->mdverm.offset = offset;
aux->mdverm.c = stops.find_first();
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MDSVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
if (outs == 1) {
aux->accel_type = ACCEL_MDSGVERM;
aux->mdverm.offset = offset;
aux->mdverm.c = stops.find_first();
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && stops.isCaselessChar()) {
aux->accel_type = ACCEL_MDSGVERM_NOCASE;
aux->mverm.offset = offset;
aux->mverm.c = stops.find_first() & CASE_CLEAR;
aux->mdverm.len1 = info.ma_len1;
aux->mdverm.len2 = info.ma_len2;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
break;
default:
// shouldn't happen
assert(0);
return;
}
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
switch (info.ma_type) {
case MultibyteAccelInfo::MAT_LONG:
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MLSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_LONGGRAB:
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MLGSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_SHIFT:
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MSSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MSGSHUFTI;
aux->mshufti.offset = offset;
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_DSHIFT:
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
(u8 *)&aux->mdshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MDSSHUFTI;
aux->mdshufti.offset = offset;
aux->mdshufti.len1 = info.ma_len1;
aux->mdshufti.len2 = info.ma_len2;
return;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
(u8 *)&aux->mdshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MDSGSHUFTI;
aux->mdshufti.offset = offset;
aux->mdshufti.len1 = info.ma_len1;
aux->mdshufti.len2 = info.ma_len2;
return;
default:
// shouldn't happen
assert(0);
return;
}
DEBUG_PRINTF("shufti build failed, falling through\n");
if (outs <= ACCEL_MAX_STOP_CHAR) {
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
switch (info.ma_type) {
case MultibyteAccelInfo::MAT_LONG:
aux->accel_type = ACCEL_MLTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_LONGGRAB:
aux->accel_type = ACCEL_MLGTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_SHIFT:
aux->accel_type = ACCEL_MSTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
aux->accel_type = ACCEL_MSGTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_DSHIFT:
aux->accel_type = ACCEL_MDSTRUFFLE;
aux->mdtruffle.offset = offset;
aux->mdtruffle.len1 = info.ma_len1;
aux->mdtruffle.len2 = info.ma_len2;
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mdtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
aux->accel_type = ACCEL_MDSGTRUFFLE;
aux->mdtruffle.offset = offset;
aux->mdtruffle.len1 = info.ma_len1;
aux->mdtruffle.len2 = info.ma_len2;
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mdtruffle.mask2);
break;
default:
// shouldn't happen
assert(0);
return;
}
return;
}
DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs);
}
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
assert(aux->accel_type == ACCEL_NONE); assert(aux->accel_type == ACCEL_NONE);
if (info.single_stops.none()) { if (info.single_stops.none()) {
@ -500,9 +232,6 @@ bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
aux->accel_type = ACCEL_RED_TAPE; aux->accel_type = ACCEL_RED_TAPE;
aux->generic.offset = info.single_offset; aux->generic.offset = info.single_offset;
} }
if (aux->accel_type == ACCEL_NONE) {
buildAccelMulti(info, aux);
}
if (aux->accel_type == ACCEL_NONE) { if (aux->accel_type == ACCEL_NONE) {
buildAccelDouble(info, aux); buildAccelDouble(info, aux);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -37,30 +37,9 @@ union AccelAux;
namespace ue2 { namespace ue2 {
struct MultibyteAccelInfo {
/* multibyte accel schemes, ordered by strength */
enum multiaccel_type {
MAT_SHIFT,
MAT_SHIFTGRAB,
MAT_DSHIFT,
MAT_DSHIFTGRAB,
MAT_LONG,
MAT_LONGGRAB,
MAT_MAX,
MAT_NONE = MAT_MAX
};
CharReach cr;
u32 offset = 0;
u32 len1 = 0;
u32 len2 = 0;
multiaccel_type type = MAT_NONE;
};
struct AccelInfo { struct AccelInfo {
AccelInfo() : single_offset(0U), double_offset(0U), AccelInfo() : single_offset(0U), double_offset(0U),
single_stops(CharReach::dot()), single_stops(CharReach::dot()) {}
multiaccel_offset(0), ma_len1(0), ma_len2(0),
ma_type(MultibyteAccelInfo::MAT_NONE) {}
u32 single_offset; /**< offset correction to apply to single schemes */ u32 single_offset; /**< offset correction to apply to single schemes */
u32 double_offset; /**< offset correction to apply to double schemes */ u32 double_offset; /**< offset correction to apply to double schemes */
CharReach double_stop1; /**< single-byte accel stop literals for double CharReach double_stop1; /**< single-byte accel stop literals for double
@ -68,11 +47,6 @@ struct AccelInfo {
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
* literals */ * literals */
CharReach single_stops; /**< escapes for single byte acceleration */ CharReach single_stops; /**< escapes for single byte acceleration */
u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */
CharReach multiaccel_stops; /**< escapes for multibyte acceleration */
u32 ma_len1; /**< multiaccel len1 */
u32 ma_len2; /**< multiaccel len2 */
MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */
}; };
bool buildAccelAux(const AccelInfo &info, AccelAux *aux); bool buildAccelAux(const AccelInfo &info, AccelAux *aux);

View File

@ -39,9 +39,6 @@
#include "nfa_internal.h" #include "nfa_internal.h"
#include "shufti.h" #include "shufti.h"
#include "truffle.h" #include "truffle.h"
#include "multishufti.h"
#include "multitruffle.h"
#include "multivermicelli.h"
#include "ue2common.h" #include "ue2common.h"
#include "vermicelli.h" #include "vermicelli.h"
#include "util/arch.h" #include "util/arch.h"

View File

@ -93,8 +93,6 @@ struct precalcAccel {
CharReach double_cr; CharReach double_cr;
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */ flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
u32 double_offset; u32 double_offset;
MultibyteAccelInfo ma_info;
}; };
struct limex_accel_info { struct limex_accel_info {
@ -358,16 +356,12 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
} }
struct AccelBuild { struct AccelBuild {
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0), AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {}
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
NFAVertex v; NFAVertex v;
u32 state; u32 state;
u32 offset; // offset correction to apply u32 offset; // offset correction to apply
CharReach stop1; // single-byte accel stop literals CharReach stop1; // single-byte accel stop literals
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
u32 ma_len1; // multiaccel len1
u32 ma_len2; // multiaccel len2
MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type
}; };
static static
@ -382,12 +376,7 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
build.stop1 = CharReach::dot(); build.stop1 = CharReach::dot();
} else { } else {
const precalcAccel &precalc = bi.accel.precalc.at(ss); const precalcAccel &precalc = bi.accel.precalc.at(ss);
unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2; if (precalc.double_lits.empty()) {
if (ma_len >= MULTIACCEL_MIN_LEN) {
build.ma_len1 = precalc.ma_info.len1;
build.stop1 = precalc.ma_info.cr;
build.offset = precalc.ma_info.offset;
} else if (precalc.double_lits.empty()) {
build.stop1 = precalc.single_cr; build.stop1 = precalc.single_cr;
build.offset = precalc.single_offset; build.offset = precalc.single_offset;
} else { } else {
@ -606,7 +595,6 @@ void fillAccelInfo(build_info &bi) {
limex_accel_info &accel = bi.accel; limex_accel_info &accel = bi.accel;
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map; unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic; const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
const CompileContext &cc = bi.cc;
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids; const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
const u32 num_states = bi.num_states; const u32 num_states = bi.num_states;
@ -663,27 +651,17 @@ void fillAccelInfo(build_info &bi) {
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
as.double_offset); as.double_offset);
// try multibyte acceleration first
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
precalcAccel &pa = accel.precalc[state_set]; precalcAccel &pa = accel.precalc[state_set];
useful |= state_set;
// if we successfully built a multibyte accel scheme, use that
if (mai.type != MultibyteAccelInfo::MAT_NONE) {
pa.ma_info = mai;
DEBUG_PRINTF("multibyte acceleration!\n");
continue;
}
pa.single_offset = as.offset; pa.single_offset = as.offset;
pa.single_cr = as.cr; pa.single_cr = as.cr;
if (as.double_byte.size() != 0) { if (as.double_byte.size() != 0) {
pa.double_offset = as.double_offset; pa.double_offset = as.double_offset;
pa.double_lits = as.double_byte; pa.double_lits = as.double_byte;
pa.double_cr = as.double_cr; pa.double_cr = as.double_cr;
}; }
useful |= state_set;
} }
for (const auto &m : accel_map) { for (const auto &m : accel_map) {
@ -700,19 +678,8 @@ void fillAccelInfo(build_info &bi) {
state_set.reset(); state_set.reset();
state_set.set(state_id); state_set.set(state_id);
bool is_multi = false;
auto p_it = accel.precalc.find(state_set);
if (p_it != accel.precalc.end()) {
const precalcAccel &pa = p_it->second;
offset = max(pa.double_offset, pa.single_offset);
is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
assert(offset <= MAX_ACCEL_DEPTH);
}
accel.accelerable.insert(v); accel.accelerable.insert(v);
if (!is_multi) { findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
}
} }
} }
@ -954,16 +921,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
if (contains(accel.precalc, effective_states)) { if (contains(accel.precalc, effective_states)) {
const auto &precalc = accel.precalc.at(effective_states); const auto &precalc = accel.precalc.at(effective_states);
if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { ainfo.single_offset = precalc.single_offset;
ainfo.ma_len1 = precalc.ma_info.len1; ainfo.single_stops = precalc.single_cr;
ainfo.ma_len2 = precalc.ma_info.len2;
ainfo.multiaccel_offset = precalc.ma_info.offset;
ainfo.multiaccel_stops = precalc.ma_info.cr;
ainfo.ma_type = precalc.ma_info.type;
} else {
ainfo.single_offset = precalc.single_offset;
ainfo.single_stops = precalc.single_cr;
}
} }
} }

View File

@ -1,265 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCEL_COMMON_H_
#define MULTIACCEL_COMMON_H_
#include "config.h"
#include "ue2common.h"
#include "util/join.h"
#include "util/bitutils.h"
/*
* When doing shifting, remember that the total number of shifts should be n-1
*/
#define VARISHIFT(src, dst, len) \
do { \
(dst) &= (src) >> (len); \
} while (0)
#define STATIC_SHIFT1(x) \
do { \
(x) &= (x) >> 1; \
} while (0)
#define STATIC_SHIFT2(x) \
do { \
(x) &= (x) >> 2;\
} while (0)
#define STATIC_SHIFT4(x) \
do { \
(x) &= (x) >> 4; \
} while (0)
#define STATIC_SHIFT8(x) \
do { \
(x) &= (x) >> 8; \
} while (0)
#define SHIFT1(x) \
do {} while (0)
#define SHIFT2(x) \
do { \
STATIC_SHIFT1(x); \
} while (0)
#define SHIFT3(x) \
do { \
STATIC_SHIFT1(x); \
STATIC_SHIFT1(x); \
} while (0)
#define SHIFT4(x) \
do { \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT5(x) \
do { \
SHIFT4(x); \
STATIC_SHIFT1(x); \
} while (0)
#define SHIFT6(x) \
do { \
SHIFT4(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT7(x) \
do { \
SHIFT4(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT8(x) \
do { \
SHIFT4(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT9(x) \
do { \
SHIFT8(x); \
STATIC_SHIFT1(x); \
} while (0)
#define SHIFT10(x) \
do { \
SHIFT8(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT11(x) \
do { \
SHIFT8(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT12(x); \
do { \
SHIFT8(x);\
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT13(x); \
do { \
SHIFT8(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT14(x) \
do { \
SHIFT8(x); \
STATIC_SHIFT2(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT15(x) \
do { \
SHIFT8(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT16(x) \
do { \
SHIFT8(x); \
STATIC_SHIFT8(x); \
} while (0)
#define SHIFT17(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT1(x); \
} while (0)
#define SHIFT18(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT19(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT20(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT21(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT22(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT2(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT23(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT24(x) \
do { \
SHIFT16(x); \
STATIC_SHIFT8(x); \
} while (0)
#define SHIFT25(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT1(x); \
} while (0)
#define SHIFT26(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT27(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
} while (0)
#define SHIFT28(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT29(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT30(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT2(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT31(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT1(x); \
STATIC_SHIFT2(x); \
STATIC_SHIFT4(x); \
} while (0)
#define SHIFT32(x) \
do { \
SHIFT24(x); \
STATIC_SHIFT8(x); \
} while (0)
/*
* this function is used by 32-bit multiaccel matchers. 32-bit matchers accept
* a 32-bit integer as a buffer, where low 16 bits is movemask result and
* high 16 bits are "don't care" values. this function is not expected to return
* a result higher than 16.
*/
static really_inline
const u8 *match32(const u8 *buf, const u32 z) {
if (unlikely(z != 0)) {
u32 pos = ctz32(z);
assert(pos < 16);
return buf + pos;
}
return NULL;
}
/*
* this function is used by 64-bit multiaccel matchers. 64-bit matchers accept
* a 64-bit integer as a buffer, where low 32 bits is movemask result and
* high 32 bits are "don't care" values. this function is not expected to return
* a result higher than 32.
*/
static really_inline
const u8 *match64(const u8 *buf, const u64a z) {
if (unlikely(z != 0)) {
u32 pos = ctz64(z);
assert(pos < 32);
return buf + pos;
}
return NULL;
}
#endif /* MULTIACCEL_COMMON_H_ */

View File

@ -1,439 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "multiaccel_compilehelper.h"
using namespace std;
using namespace ue2;
#ifdef DEBUG
static const char* state_to_str[] = {
"FIRST_RUN",
"SECOND_RUN",
"WAITING_FOR_GRAB",
"FIRST_TAIL",
"SECOND_TAIL",
"STOPPED",
"INVALID"
};
static const char* type_to_str[] = {
"SHIFT",
"SHIFTGRAB",
"DOUBLESHIFT",
"DOUBLESHIFTGRAB",
"LONG",
"LONGGRAB",
"NONE"
};
static
void dumpMultiaccelState(const accel_data &d) {
DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n",
type_to_str[(unsigned) d.type],
state_to_str[(unsigned) d.state],
d.len1, d.tlen1, d.len2, d.tlen2);
}
#endif
/* stop all the matching. this may render most schemes invalid. */
static
void stop(accel_data &d) {
switch (d.state) {
case STATE_STOPPED:
case STATE_INVALID:
break;
case STATE_FIRST_TAIL:
case STATE_SECOND_RUN:
/*
* Shift matchers are special case, because they have "tails".
* When shift matcher reaches a mid/endpoint, tail mode is
* activated, which looks for more matches to extend the match.
*
* For example, consider pattern /a{5}ba{3}/. Under normal circumstances,
* long-grab matcher will be picked for this pattern (matching a run of a's,
* followed by a not-a), because doubleshift matcher would be confused by
* consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts
* by 1) and throw out the rest of the pattern.
*
* With tails, we defer ending the run until we actually run out of
* matching characters, so the above pattern will now be parsed by
* doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4).
*
* So if we are stopping shift matchers, we should check if we aren't in
* the process of matching first tail or second run. If we are, we can't
* finish the second run as we are stopping, but we can try and split
* the first tail instead to obtain a valid second run.
*/
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) {
// can't split an empty void...
d.state = STATE_INVALID;
break;
}
d.len2 = 0;
d.state = STATE_STOPPED;
break;
case STATE_SECOND_TAIL:
d.state = STATE_STOPPED;
break;
case STATE_WAITING_FOR_GRAB:
case STATE_FIRST_RUN:
if (d.type == MultibyteAccelInfo::MAT_LONG) {
d.state = STATE_STOPPED;
} else {
d.state = STATE_INVALID;
}
break;
}
}
static
void validate(accel_data &d, unsigned max_len) {
// try and fit in all our tails
if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) {
// case 1: everything fits in
d.len1 += d.tlen1;
d.len2 += d.tlen2;
d.tlen1 = 0;
d.tlen2 = 0;
} else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) {
// case 2: everything but the second tail fits in
d.len1 += d.tlen1;
d.tlen1 = 0;
// try going for a partial tail
if (d.tlen2 != 0) {
int new_tlen2 = max_len - 1 - d.len1 - d.len2;
if (new_tlen2 > 0) {
d.len2 += new_tlen2;
}
d.tlen2 = 0;
}
} else if (d.len1 + d.tlen1 < max_len) {
// case 3: first run and its tail fits in
if (d.type == MultibyteAccelInfo::MAT_DSHIFT ||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
// split the tail into a second run
d.len2 = d.tlen1;
} else {
d.len1 += d.tlen1;
d.len2 = 0;
}
d.tlen1 = 0;
d.tlen2 = 0;
} else if (d.len1 < max_len) {
// case 4: nothing but the first run fits in
// try going for a partial tail
if (d.tlen1 != 0) {
int new_tlen1 = max_len - 1 - d.len1;
if (new_tlen1 > 0) {
d.len1 += new_tlen1;
}
d.tlen1 = 0;
}
d.len2 = 0;
d.tlen2 = 0;
}
// if we removed our second run, doubleshift matchers are no longer valid
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) {
d.state = STATE_INVALID;
} else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) {
// long matchers can just stop whenever they want to
d.len1 = max_len - 1;
}
// now, general sanity checks
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) {
d.state = STATE_INVALID;
}
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) {
d.state = STATE_INVALID;
}
}
static
void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
switch (d.type) {
case MultibyteAccelInfo::MAT_LONG:
{
/*
* For long matcher, we want lots of consecutive same-or-subset
* char-reaches
*/
if ((ref_cr & cur_cr) == cur_cr) {
d.len1++;
} else {
d.state = STATE_STOPPED;
}
}
break;
case MultibyteAccelInfo::MAT_LONGGRAB:
{
/*
* For long-grab matcher, we want lots of consecutive same-or-subset
* char-reaches with a negative match in the end.
*/
if ((ref_cr & cur_cr) == cur_cr) {
d.len1++;
} else if (!(ref_cr & cur_cr).any()) {
/* we grabbed, stop immediately */
d.state = STATE_STOPPED;
} else {
/* our run-n-grab was interrupted; mark as invalid */
d.state = STATE_INVALID;
}
}
break;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
{
/*
* For shift-grab matcher, we want two matches separated by anything;
* however the second vertex *must* be a negative (non-overlapping) match.
*
* Shiftgrab matcher is identical to shift except for presence of grab.
*/
if (d.state == STATE_WAITING_FOR_GRAB) {
if ((ref_cr & cur_cr).any()) {
d.state = STATE_INVALID;
} else {
d.state = STATE_FIRST_RUN;
d.len1++;
}
return;
}
}
/* no break, falling through */
case MultibyteAccelInfo::MAT_SHIFT:
{
/*
* For shift-matcher, we want two matches separated by anything.
*/
if (ref_cr == cur_cr) {
// keep matching tail
switch (d.state) {
case STATE_FIRST_RUN:
d.state = STATE_FIRST_TAIL;
break;
case STATE_FIRST_TAIL:
d.tlen1++;
break;
default:
// shouldn't happen
assert(0);
}
} else {
switch (d.state) {
case STATE_FIRST_RUN:
// simply advance
d.len1++;
break;
case STATE_FIRST_TAIL:
// we found a non-matching char after tail, so stop
d.state = STATE_STOPPED;
break;
default:
// shouldn't happen
assert(0);
}
}
}
break;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
{
/*
* For double shift-grab matcher, we want two matches separated by
* either negative matches or dots; however the second vertex *must*
* be a negative match.
*
* Doubleshiftgrab matcher is identical to doubleshift except for
* presence of grab.
*/
if (d.state == STATE_WAITING_FOR_GRAB) {
if ((ref_cr & cur_cr).any()) {
d.state = STATE_INVALID;
} else {
d.state = STATE_FIRST_RUN;
d.len1++;
}
return;
}
}
/* no break, falling through */
case MultibyteAccelInfo::MAT_DSHIFT:
{
/*
* For double shift matcher, we want three matches, each separated
* by a lot of anything.
*
* Doubleshift matcher is complicated by presence of tails.
*/
if (ref_cr == cur_cr) {
// decide if we are activating second shift or matching tails
switch (d.state) {
case STATE_FIRST_RUN:
d.state = STATE_FIRST_TAIL;
d.len2 = 1; // we're now ready for our second run
break;
case STATE_FIRST_TAIL:
d.tlen1++;
break;
case STATE_SECOND_RUN:
d.state = STATE_SECOND_TAIL;
break;
case STATE_SECOND_TAIL:
d.tlen2++;
break;
default:
// shouldn't happen
assert(0);
}
} else {
switch (d.state) {
case STATE_FIRST_RUN:
d.len1++;
break;
case STATE_FIRST_TAIL:
// start second run
d.state = STATE_SECOND_RUN;
d.len2++;
break;
case STATE_SECOND_RUN:
d.len2++;
break;
case STATE_SECOND_TAIL:
// stop
d.state = STATE_STOPPED;
break;
default:
// shouldn't happen
assert(0);
}
}
}
break;
default:
// shouldn't happen
assert(0);
break;
}
}
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
u32 off, unsigned max_length)
: cr(ref_cr), offset(off), max_len(max_length) {
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
accels.resize(accel_num);
// mark everything as valid
for (int i = 0; i < accel_num; i++) {
accel_data &ad = accels[i];
ad.len1 = 1;
ad.type = (MultibyteAccelInfo::multiaccel_type) i;
/* for shift-grab matchers, we are waiting for the grab right at the start */
if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB
|| ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
ad.state = STATE_WAITING_FOR_GRAB;
} else {
ad.state = STATE_FIRST_RUN;
}
}
}
bool MultiaccelCompileHelper::canAdvance() {
for (const accel_data &ad : accels) {
if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) {
return true;
}
}
return false;
}
void MultiaccelCompileHelper::advance(const CharReach &cur_cr) {
for (accel_data &ad : accels) {
if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) {
continue;
}
match(ad, cr, cur_cr);
#ifdef DEBUG
dumpMultiaccelState(ad);
#endif
}
}
MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() {
int best_len = 0;
accel_data best;
DEBUG_PRINTF("Stopping multiaccel compile\n");
for (accel_data &ad : accels) {
// stop our matching
stop(ad);
validate(ad, max_len);
#ifdef DEBUG
dumpMultiaccelState(ad);
#endif
// skip invalid schemes
if (ad.state == STATE_INVALID) {
continue;
}
DEBUG_PRINTF("Marking as viable\n");
// TODO: relative strengths of accel schemes? maybe e.g. a shorter
// long match would in some cases be preferable to a longer
// double shift match (for example, depending on length)?
int as_len = ad.len1 + ad.len2;
if (as_len >= best_len) {
DEBUG_PRINTF("Marking as best\n");
best_len = as_len;
best = ad;
}
}
// if we found at least one accel scheme, return it
if (best.state != STATE_INVALID) {
#ifdef DEBUG
DEBUG_PRINTF("Picked best multiaccel state:\n");
dumpMultiaccelState(best);
#endif
MultibyteAccelInfo info;
info.cr = cr;
info.offset = offset;
info.len1 = best.len1;
info.len2 = best.len2;
info.type = best.type;
return info;
}
return MultibyteAccelInfo();
}

View File

@ -1,75 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCELCOMPILE_H_
#define MULTIACCELCOMPILE_H_
#include "ue2common.h"
#include "nfagraph/ng_limex_accel.h"
#include <vector>
namespace ue2 {
/* accel scheme state machine */
enum accel_scheme_state {
STATE_FIRST_RUN,
STATE_SECOND_RUN,
STATE_WAITING_FOR_GRAB,
STATE_FIRST_TAIL,
STATE_SECOND_TAIL,
STATE_STOPPED,
STATE_INVALID
};
struct accel_data {
MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE;
accel_scheme_state state = STATE_INVALID;
unsigned len1 = 0; /* length of first run */
unsigned len2 = 0; /* length of second run, if present */
unsigned tlen1 = 0; /* first tail length */
unsigned tlen2 = 0; /* second tail length */
};
class MultiaccelCompileHelper {
private:
const CharReach &cr;
u32 offset;
std::vector<accel_data> accels;
unsigned max_len;
public:
MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len);
bool canAdvance();
MultibyteAccelInfo getBestScheme();
void advance(const ue2::CharReach &cr);
};
}; // namespace
#endif /* MULTIACCELCOMPILE_H_ */

View File

@ -1,149 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCEL_DOUBLESHIFT_H_
#define MULTIACCEL_DOUBLESHIFT_H_
#include "multiaccel_common.h"
#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \
static really_inline \
const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
if (unlikely(z)) { \
match_t tmp = z; \
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
VARISHIFT(z, z, len); \
VARISHIFT(tmp, tmp, len2); \
VARISHIFT(tmp, z, len); \
return JOIN(match, match_sz)(buf, z); \
} \
return NULL; \
}
#define DOUBLESHIFT_MATCH_32_DEF(n) \
DOUBLESHIFT_MATCH(n, u32, 32)
#define DOUBLESHIFT_MATCH_64_DEF(n) \
DOUBLESHIFT_MATCH(n, u64a, 64)
#define DOUBLESHIFT_MATCH_DEF(n) \
DOUBLESHIFT_MATCH_32_DEF(n) \
DOUBLESHIFT_MATCH_64_DEF(n)
DOUBLESHIFT_MATCH_DEF(1)
DOUBLESHIFT_MATCH_DEF(2)
DOUBLESHIFT_MATCH_DEF(3)
DOUBLESHIFT_MATCH_DEF(4)
DOUBLESHIFT_MATCH_DEF(5)
DOUBLESHIFT_MATCH_DEF(6)
DOUBLESHIFT_MATCH_DEF(7)
DOUBLESHIFT_MATCH_DEF(8)
DOUBLESHIFT_MATCH_DEF(9)
DOUBLESHIFT_MATCH_DEF(10)
DOUBLESHIFT_MATCH_DEF(11)
DOUBLESHIFT_MATCH_DEF(12)
DOUBLESHIFT_MATCH_DEF(13)
DOUBLESHIFT_MATCH_DEF(14)
DOUBLESHIFT_MATCH_DEF(15)
DOUBLESHIFT_MATCH_64_DEF(16)
DOUBLESHIFT_MATCH_64_DEF(17)
DOUBLESHIFT_MATCH_64_DEF(18)
DOUBLESHIFT_MATCH_64_DEF(19)
DOUBLESHIFT_MATCH_64_DEF(20)
DOUBLESHIFT_MATCH_64_DEF(21)
DOUBLESHIFT_MATCH_64_DEF(22)
DOUBLESHIFT_MATCH_64_DEF(23)
DOUBLESHIFT_MATCH_64_DEF(24)
DOUBLESHIFT_MATCH_64_DEF(25)
DOUBLESHIFT_MATCH_64_DEF(26)
DOUBLESHIFT_MATCH_64_DEF(27)
DOUBLESHIFT_MATCH_64_DEF(28)
DOUBLESHIFT_MATCH_64_DEF(29)
DOUBLESHIFT_MATCH_64_DEF(30)
DOUBLESHIFT_MATCH_64_DEF(31)
static
const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
{
// skip the first
0,
&doubleshiftMatch_32_1,
&doubleshiftMatch_32_2,
&doubleshiftMatch_32_3,
&doubleshiftMatch_32_4,
&doubleshiftMatch_32_5,
&doubleshiftMatch_32_6,
&doubleshiftMatch_32_7,
&doubleshiftMatch_32_8,
&doubleshiftMatch_32_9,
&doubleshiftMatch_32_10,
&doubleshiftMatch_32_11,
&doubleshiftMatch_32_12,
&doubleshiftMatch_32_13,
&doubleshiftMatch_32_14,
&doubleshiftMatch_32_15,
};
static
const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
{
// skip the first
0,
&doubleshiftMatch_64_1,
&doubleshiftMatch_64_2,
&doubleshiftMatch_64_3,
&doubleshiftMatch_64_4,
&doubleshiftMatch_64_5,
&doubleshiftMatch_64_6,
&doubleshiftMatch_64_7,
&doubleshiftMatch_64_8,
&doubleshiftMatch_64_9,
&doubleshiftMatch_64_10,
&doubleshiftMatch_64_11,
&doubleshiftMatch_64_12,
&doubleshiftMatch_64_13,
&doubleshiftMatch_64_14,
&doubleshiftMatch_64_15,
&doubleshiftMatch_64_16,
&doubleshiftMatch_64_17,
&doubleshiftMatch_64_18,
&doubleshiftMatch_64_19,
&doubleshiftMatch_64_20,
&doubleshiftMatch_64_21,
&doubleshiftMatch_64_22,
&doubleshiftMatch_64_23,
&doubleshiftMatch_64_24,
&doubleshiftMatch_64_25,
&doubleshiftMatch_64_26,
&doubleshiftMatch_64_27,
&doubleshiftMatch_64_28,
&doubleshiftMatch_64_29,
&doubleshiftMatch_64_30,
&doubleshiftMatch_64_31,
};
#endif /* MULTIACCEL_DOUBLESHIFT_H_ */

View File

@ -1,152 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_
#define MULTIACCEL_DOUBLESHIFTGRAB_H_
#include "multiaccel_common.h"
#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \
static really_inline \
const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
if (unlikely(z)) { \
match_t neg = ~z; \
match_t tmp = z; \
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
VARISHIFT(z, z, len); \
VARISHIFT(tmp, tmp, len2); \
VARISHIFT(neg, z, 1); \
VARISHIFT(tmp, z, len); \
return JOIN(match, match_sz)(buf, z); \
} \
return NULL; \
}
#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
DOUBLESHIFTGRAB_MATCH(n, u32, 32)
#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \
DOUBLESHIFTGRAB_MATCH(n, u64a, 64)
#define DOUBLESHIFTGRAB_MATCH_DEF(n) \
DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
DOUBLESHIFTGRAB_MATCH_64_DEF(n)
DOUBLESHIFTGRAB_MATCH_DEF(1)
DOUBLESHIFTGRAB_MATCH_DEF(2)
DOUBLESHIFTGRAB_MATCH_DEF(3)
DOUBLESHIFTGRAB_MATCH_DEF(4)
DOUBLESHIFTGRAB_MATCH_DEF(5)
DOUBLESHIFTGRAB_MATCH_DEF(6)
DOUBLESHIFTGRAB_MATCH_DEF(7)
DOUBLESHIFTGRAB_MATCH_DEF(8)
DOUBLESHIFTGRAB_MATCH_DEF(9)
DOUBLESHIFTGRAB_MATCH_DEF(10)
DOUBLESHIFTGRAB_MATCH_DEF(11)
DOUBLESHIFTGRAB_MATCH_DEF(12)
DOUBLESHIFTGRAB_MATCH_DEF(13)
DOUBLESHIFTGRAB_MATCH_DEF(14)
DOUBLESHIFTGRAB_MATCH_DEF(15)
DOUBLESHIFTGRAB_MATCH_64_DEF(16)
DOUBLESHIFTGRAB_MATCH_64_DEF(17)
DOUBLESHIFTGRAB_MATCH_64_DEF(18)
DOUBLESHIFTGRAB_MATCH_64_DEF(19)
DOUBLESHIFTGRAB_MATCH_64_DEF(20)
DOUBLESHIFTGRAB_MATCH_64_DEF(21)
DOUBLESHIFTGRAB_MATCH_64_DEF(22)
DOUBLESHIFTGRAB_MATCH_64_DEF(23)
DOUBLESHIFTGRAB_MATCH_64_DEF(24)
DOUBLESHIFTGRAB_MATCH_64_DEF(25)
DOUBLESHIFTGRAB_MATCH_64_DEF(26)
DOUBLESHIFTGRAB_MATCH_64_DEF(27)
DOUBLESHIFTGRAB_MATCH_64_DEF(28)
DOUBLESHIFTGRAB_MATCH_64_DEF(29)
DOUBLESHIFTGRAB_MATCH_64_DEF(30)
DOUBLESHIFTGRAB_MATCH_64_DEF(31)
static
const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
{
// skip the first
0,
&doubleshiftgrabMatch_32_1,
&doubleshiftgrabMatch_32_2,
&doubleshiftgrabMatch_32_3,
&doubleshiftgrabMatch_32_4,
&doubleshiftgrabMatch_32_5,
&doubleshiftgrabMatch_32_6,
&doubleshiftgrabMatch_32_7,
&doubleshiftgrabMatch_32_8,
&doubleshiftgrabMatch_32_9,
&doubleshiftgrabMatch_32_10,
&doubleshiftgrabMatch_32_11,
&doubleshiftgrabMatch_32_12,
&doubleshiftgrabMatch_32_13,
&doubleshiftgrabMatch_32_14,
&doubleshiftgrabMatch_32_15,
};
static
const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
{
// skip the first
0,
&doubleshiftgrabMatch_64_1,
&doubleshiftgrabMatch_64_2,
&doubleshiftgrabMatch_64_3,
&doubleshiftgrabMatch_64_4,
&doubleshiftgrabMatch_64_5,
&doubleshiftgrabMatch_64_6,
&doubleshiftgrabMatch_64_7,
&doubleshiftgrabMatch_64_8,
&doubleshiftgrabMatch_64_9,
&doubleshiftgrabMatch_64_10,
&doubleshiftgrabMatch_64_11,
&doubleshiftgrabMatch_64_12,
&doubleshiftgrabMatch_64_13,
&doubleshiftgrabMatch_64_14,
&doubleshiftgrabMatch_64_15,
&doubleshiftgrabMatch_64_16,
&doubleshiftgrabMatch_64_17,
&doubleshiftgrabMatch_64_18,
&doubleshiftgrabMatch_64_19,
&doubleshiftgrabMatch_64_20,
&doubleshiftgrabMatch_64_21,
&doubleshiftgrabMatch_64_22,
&doubleshiftgrabMatch_64_23,
&doubleshiftgrabMatch_64_24,
&doubleshiftgrabMatch_64_25,
&doubleshiftgrabMatch_64_26,
&doubleshiftgrabMatch_64_27,
&doubleshiftgrabMatch_64_28,
&doubleshiftgrabMatch_64_29,
&doubleshiftgrabMatch_64_30,
&doubleshiftgrabMatch_64_31,
};
#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */

View File

@ -1,145 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCEL_LONG_H_
#define MULTIACCEL_LONG_H_
#include "multiaccel_common.h"
#define LONG_MATCH(len, match_t, match_sz) \
static really_inline \
const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
if (unlikely(z)) { \
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
JOIN(SHIFT, len)(z); \
return JOIN(match, match_sz)(buf, z); \
} \
return NULL; \
}
#define LONG_MATCH_32_DEF(n) \
LONG_MATCH(n, u32, 32)
#define LONG_MATCH_64_DEF(n) \
LONG_MATCH(n, u64a, 64)
#define LONG_MATCH_DEF(n) \
LONG_MATCH_32_DEF(n) \
LONG_MATCH_64_DEF(n)
LONG_MATCH_DEF(1)
LONG_MATCH_DEF(2)
LONG_MATCH_DEF(3)
LONG_MATCH_DEF(4)
LONG_MATCH_DEF(5)
LONG_MATCH_DEF(6)
LONG_MATCH_DEF(7)
LONG_MATCH_DEF(8)
LONG_MATCH_DEF(9)
LONG_MATCH_DEF(10)
LONG_MATCH_DEF(11)
LONG_MATCH_DEF(12)
LONG_MATCH_DEF(13)
LONG_MATCH_DEF(14)
LONG_MATCH_DEF(15)
LONG_MATCH_64_DEF(16)
LONG_MATCH_64_DEF(17)
LONG_MATCH_64_DEF(18)
LONG_MATCH_64_DEF(19)
LONG_MATCH_64_DEF(20)
LONG_MATCH_64_DEF(21)
LONG_MATCH_64_DEF(22)
LONG_MATCH_64_DEF(23)
LONG_MATCH_64_DEF(24)
LONG_MATCH_64_DEF(25)
LONG_MATCH_64_DEF(26)
LONG_MATCH_64_DEF(27)
LONG_MATCH_64_DEF(28)
LONG_MATCH_64_DEF(29)
LONG_MATCH_64_DEF(30)
LONG_MATCH_64_DEF(31)
static
const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) =
{
// skip the first three
0,
&longMatch_32_1,
&longMatch_32_2,
&longMatch_32_3,
&longMatch_32_4,
&longMatch_32_5,
&longMatch_32_6,
&longMatch_32_7,
&longMatch_32_8,
&longMatch_32_9,
&longMatch_32_10,
&longMatch_32_11,
&longMatch_32_12,
&longMatch_32_13,
&longMatch_32_14,
&longMatch_32_15,
};
static
const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) =
{
// skip the first three
0,
&longMatch_64_1,
&longMatch_64_2,
&longMatch_64_3,
&longMatch_64_4,
&longMatch_64_5,
&longMatch_64_6,
&longMatch_64_7,
&longMatch_64_8,
&longMatch_64_9,
&longMatch_64_10,
&longMatch_64_11,
&longMatch_64_12,
&longMatch_64_13,
&longMatch_64_14,
&longMatch_64_15,
&longMatch_64_16,
&longMatch_64_17,
&longMatch_64_18,
&longMatch_64_19,
&longMatch_64_20,
&longMatch_64_21,
&longMatch_64_22,
&longMatch_64_23,
&longMatch_64_24,
&longMatch_64_25,
&longMatch_64_26,
&longMatch_64_27,
&longMatch_64_28,
&longMatch_64_29,
&longMatch_64_30,
&longMatch_64_31,
};
#endif /* MULTIACCEL_LONG_H_ */

View File

@ -1,148 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCEL_LONGGRAB_H_
#define MULTIACCEL_LONGGRAB_H_
#include "multiaccel_common.h"
#define LONGGRAB_MATCH(len, match_t, match_sz) \
static really_inline \
const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
if (unlikely(z)) { \
match_t tmp = ~z; \
tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
JOIN(SHIFT, len)(z); \
VARISHIFT(tmp, z, len); \
return JOIN(match, match_sz)(buf, z); \
} \
return NULL; \
}
#define LONGGRAB_MATCH_32_DEF(n) \
LONGGRAB_MATCH(n, u32, 32)
#define LONGGRAB_MATCH_64_DEF(n) \
LONGGRAB_MATCH(n, u64a, 64)
#define LONGGRAB_MATCH_DEF(n) \
LONGGRAB_MATCH_32_DEF(n) \
LONGGRAB_MATCH_64_DEF(n)
LONGGRAB_MATCH_DEF(1)
LONGGRAB_MATCH_DEF(2)
LONGGRAB_MATCH_DEF(3)
LONGGRAB_MATCH_DEF(4)
LONGGRAB_MATCH_DEF(5)
LONGGRAB_MATCH_DEF(6)
LONGGRAB_MATCH_DEF(7)
LONGGRAB_MATCH_DEF(8)
LONGGRAB_MATCH_DEF(9)
LONGGRAB_MATCH_DEF(10)
LONGGRAB_MATCH_DEF(11)
LONGGRAB_MATCH_DEF(12)
LONGGRAB_MATCH_DEF(13)
LONGGRAB_MATCH_DEF(14)
LONGGRAB_MATCH_DEF(15)
LONGGRAB_MATCH_64_DEF(16)
LONGGRAB_MATCH_64_DEF(17)
LONGGRAB_MATCH_64_DEF(18)
LONGGRAB_MATCH_64_DEF(19)
LONGGRAB_MATCH_64_DEF(20)
LONGGRAB_MATCH_64_DEF(21)
LONGGRAB_MATCH_64_DEF(22)
LONGGRAB_MATCH_64_DEF(23)
LONGGRAB_MATCH_64_DEF(24)
LONGGRAB_MATCH_64_DEF(25)
LONGGRAB_MATCH_64_DEF(26)
LONGGRAB_MATCH_64_DEF(27)
LONGGRAB_MATCH_64_DEF(28)
LONGGRAB_MATCH_64_DEF(29)
LONGGRAB_MATCH_64_DEF(30)
LONGGRAB_MATCH_64_DEF(31)
static
const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) =
{
// skip the first three
0,
&longgrabMatch_32_1,
&longgrabMatch_32_2,
&longgrabMatch_32_3,
&longgrabMatch_32_4,
&longgrabMatch_32_5,
&longgrabMatch_32_6,
&longgrabMatch_32_7,
&longgrabMatch_32_8,
&longgrabMatch_32_9,
&longgrabMatch_32_10,
&longgrabMatch_32_11,
&longgrabMatch_32_12,
&longgrabMatch_32_13,
&longgrabMatch_32_14,
&longgrabMatch_32_15,
};
static
const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) =
{
// skip the first three
0,
&longgrabMatch_64_1,
&longgrabMatch_64_2,
&longgrabMatch_64_3,
&longgrabMatch_64_4,
&longgrabMatch_64_5,
&longgrabMatch_64_6,
&longgrabMatch_64_7,
&longgrabMatch_64_8,
&longgrabMatch_64_9,
&longgrabMatch_64_10,
&longgrabMatch_64_11,
&longgrabMatch_64_12,
&longgrabMatch_64_13,
&longgrabMatch_64_14,
&longgrabMatch_64_15,
&longgrabMatch_64_16,
&longgrabMatch_64_17,
&longgrabMatch_64_18,
&longgrabMatch_64_19,
&longgrabMatch_64_20,
&longgrabMatch_64_21,
&longgrabMatch_64_22,
&longgrabMatch_64_23,
&longgrabMatch_64_24,
&longgrabMatch_64_25,
&longgrabMatch_64_26,
&longgrabMatch_64_27,
&longgrabMatch_64_28,
&longgrabMatch_64_29,
&longgrabMatch_64_30,
&longgrabMatch_64_31,
};
#endif /* MULTIACCEL_LONGGRAB_H_ */

View File

@ -1,145 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCEL_SHIFT_H_
#define MULTIACCEL_SHIFT_H_
#include "multiaccel_common.h"
#define SHIFT_MATCH(len, match_t, match_sz) \
static really_inline \
const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
if (unlikely(z)) { \
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
VARISHIFT(z, z, len); \
return JOIN(match, match_sz)(buf, z); \
} \
return NULL; \
}
#define SHIFT_MATCH_32_DEF(n) \
SHIFT_MATCH(n, u32, 32)
#define SHIFT_MATCH_64_DEF(n) \
SHIFT_MATCH(n, u64a, 64)
#define SHIFT_MATCH_DEF(n) \
SHIFT_MATCH_32_DEF(n) \
SHIFT_MATCH_64_DEF(n)
SHIFT_MATCH_DEF(1)
SHIFT_MATCH_DEF(2)
SHIFT_MATCH_DEF(3)
SHIFT_MATCH_DEF(4)
SHIFT_MATCH_DEF(5)
SHIFT_MATCH_DEF(6)
SHIFT_MATCH_DEF(7)
SHIFT_MATCH_DEF(8)
SHIFT_MATCH_DEF(9)
SHIFT_MATCH_DEF(10)
SHIFT_MATCH_DEF(11)
SHIFT_MATCH_DEF(12)
SHIFT_MATCH_DEF(13)
SHIFT_MATCH_DEF(14)
SHIFT_MATCH_DEF(15)
SHIFT_MATCH_64_DEF(16)
SHIFT_MATCH_64_DEF(17)
SHIFT_MATCH_64_DEF(18)
SHIFT_MATCH_64_DEF(19)
SHIFT_MATCH_64_DEF(20)
SHIFT_MATCH_64_DEF(21)
SHIFT_MATCH_64_DEF(22)
SHIFT_MATCH_64_DEF(23)
SHIFT_MATCH_64_DEF(24)
SHIFT_MATCH_64_DEF(25)
SHIFT_MATCH_64_DEF(26)
SHIFT_MATCH_64_DEF(27)
SHIFT_MATCH_64_DEF(28)
SHIFT_MATCH_64_DEF(29)
SHIFT_MATCH_64_DEF(30)
SHIFT_MATCH_64_DEF(31)
static
const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) =
{
// skip the first
0,
&shiftMatch_32_1,
&shiftMatch_32_2,
&shiftMatch_32_3,
&shiftMatch_32_4,
&shiftMatch_32_5,
&shiftMatch_32_6,
&shiftMatch_32_7,
&shiftMatch_32_8,
&shiftMatch_32_9,
&shiftMatch_32_10,
&shiftMatch_32_11,
&shiftMatch_32_12,
&shiftMatch_32_13,
&shiftMatch_32_14,
&shiftMatch_32_15,
};
static
const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) =
{
// skip the first
0,
&shiftMatch_64_1,
&shiftMatch_64_2,
&shiftMatch_64_3,
&shiftMatch_64_4,
&shiftMatch_64_5,
&shiftMatch_64_6,
&shiftMatch_64_7,
&shiftMatch_64_8,
&shiftMatch_64_9,
&shiftMatch_64_10,
&shiftMatch_64_11,
&shiftMatch_64_12,
&shiftMatch_64_13,
&shiftMatch_64_14,
&shiftMatch_64_15,
&shiftMatch_64_16,
&shiftMatch_64_17,
&shiftMatch_64_18,
&shiftMatch_64_19,
&shiftMatch_64_20,
&shiftMatch_64_21,
&shiftMatch_64_22,
&shiftMatch_64_23,
&shiftMatch_64_24,
&shiftMatch_64_25,
&shiftMatch_64_26,
&shiftMatch_64_27,
&shiftMatch_64_28,
&shiftMatch_64_29,
&shiftMatch_64_30,
&shiftMatch_64_31,
};
#endif /* MULTIACCEL_SHIFT_H_ */

View File

@ -1,148 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIACCEL_SHIFTGRAB_H_
#define MULTIACCEL_SHIFTGRAB_H_
#include "multiaccel_common.h"
#define SHIFTGRAB_MATCH(len, match_t, match_sz) \
static really_inline \
const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
if (unlikely(z)) { \
match_t tmp = ~z; \
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
VARISHIFT(z, z, len); \
VARISHIFT(tmp, z, 1); \
return JOIN(match, match_sz)(buf, z); \
} \
return NULL; \
}
#define SHIFTGRAB_MATCH_32_DEF(n) \
SHIFTGRAB_MATCH(n, u32, 32)
#define SHIFTGRAB_MATCH_64_DEF(n) \
SHIFTGRAB_MATCH(n, u64a, 64)
#define SHIFTGRAB_MATCH_DEF(n) \
SHIFTGRAB_MATCH_32_DEF(n) \
SHIFTGRAB_MATCH_64_DEF(n)
SHIFTGRAB_MATCH_DEF(1)
SHIFTGRAB_MATCH_DEF(2)
SHIFTGRAB_MATCH_DEF(3)
SHIFTGRAB_MATCH_DEF(4)
SHIFTGRAB_MATCH_DEF(5)
SHIFTGRAB_MATCH_DEF(6)
SHIFTGRAB_MATCH_DEF(7)
SHIFTGRAB_MATCH_DEF(8)
SHIFTGRAB_MATCH_DEF(9)
SHIFTGRAB_MATCH_DEF(10)
SHIFTGRAB_MATCH_DEF(11)
SHIFTGRAB_MATCH_DEF(12)
SHIFTGRAB_MATCH_DEF(13)
SHIFTGRAB_MATCH_DEF(14)
SHIFTGRAB_MATCH_DEF(15)
SHIFTGRAB_MATCH_64_DEF(16)
SHIFTGRAB_MATCH_64_DEF(17)
SHIFTGRAB_MATCH_64_DEF(18)
SHIFTGRAB_MATCH_64_DEF(19)
SHIFTGRAB_MATCH_64_DEF(20)
SHIFTGRAB_MATCH_64_DEF(21)
SHIFTGRAB_MATCH_64_DEF(22)
SHIFTGRAB_MATCH_64_DEF(23)
SHIFTGRAB_MATCH_64_DEF(24)
SHIFTGRAB_MATCH_64_DEF(25)
SHIFTGRAB_MATCH_64_DEF(26)
SHIFTGRAB_MATCH_64_DEF(27)
SHIFTGRAB_MATCH_64_DEF(28)
SHIFTGRAB_MATCH_64_DEF(29)
SHIFTGRAB_MATCH_64_DEF(30)
SHIFTGRAB_MATCH_64_DEF(31)
static
const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) =
{
// skip the first
0,
&shiftgrabMatch_32_1,
&shiftgrabMatch_32_2,
&shiftgrabMatch_32_3,
&shiftgrabMatch_32_4,
&shiftgrabMatch_32_5,
&shiftgrabMatch_32_6,
&shiftgrabMatch_32_7,
&shiftgrabMatch_32_8,
&shiftgrabMatch_32_9,
&shiftgrabMatch_32_10,
&shiftgrabMatch_32_11,
&shiftgrabMatch_32_12,
&shiftgrabMatch_32_13,
&shiftgrabMatch_32_14,
&shiftgrabMatch_32_15,
};
static
const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) =
{
// skip the first
0,
&shiftgrabMatch_64_1,
&shiftgrabMatch_64_2,
&shiftgrabMatch_64_3,
&shiftgrabMatch_64_4,
&shiftgrabMatch_64_5,
&shiftgrabMatch_64_6,
&shiftgrabMatch_64_7,
&shiftgrabMatch_64_8,
&shiftgrabMatch_64_9,
&shiftgrabMatch_64_10,
&shiftgrabMatch_64_11,
&shiftgrabMatch_64_12,
&shiftgrabMatch_64_13,
&shiftgrabMatch_64_14,
&shiftgrabMatch_64_15,
&shiftgrabMatch_64_16,
&shiftgrabMatch_64_17,
&shiftgrabMatch_64_18,
&shiftgrabMatch_64_19,
&shiftgrabMatch_64_20,
&shiftgrabMatch_64_21,
&shiftgrabMatch_64_22,
&shiftgrabMatch_64_23,
&shiftgrabMatch_64_24,
&shiftgrabMatch_64_25,
&shiftgrabMatch_64_26,
&shiftgrabMatch_64_27,
&shiftgrabMatch_64_28,
&shiftgrabMatch_64_29,
&shiftgrabMatch_64_30,
&shiftgrabMatch_64_31,
};
#endif /* MULTIACCEL_SHIFTGRAB_H_ */

View File

@ -1,115 +0,0 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Shufti: character class acceleration.
*
* Utilises the SSSE3 pshufb shuffle instruction
*/
#include "config.h"
#include "ue2common.h"
#include "util/arch.h"
#include "multishufti.h"
#include "multiaccel_common.h"
#if !defined(HAVE_AVX2)
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multishufti_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multishufti_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multishufti_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multishufti_sse.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multishufti_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multishufti_sse.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#else
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multishufti_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multishufti_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multishufti_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multishufti_avx2.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multishufti_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multishufti_avx2.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#endif

View File

@ -1,70 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Multishufti: multibyte version of Shufti
*
* Utilises the SSSE3 pshufb shuffle instruction
*/
#ifndef MULTISHUFTI_H
#define MULTISHUFTI_H
#include "ue2common.h"
#include "util/simd_types.h"
#ifdef __cplusplus
extern "C"
{
#endif
const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,121 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "shufti_common.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
static really_inline
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,
const u8 *buf, const m256 low4bits,
const m256 zeroes, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes);
return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
const u8 *buf,
const u8 *buf_end, u8 run_len
#ifdef MULTIACCEL_DOUBLE
, u8 run_len2
#endif
) {
assert(buf && buf_end);
assert(buf < buf_end);
// Slow path for small cases.
if (buf_end - buf < 32) {
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
buf, buf_end);
}
const m256 zeroes = zeroes256();
const m256 low4bits = set32x8(0xf);
const m256 wide_mask_lo = set2x128(mask_lo);
const m256 wide_mask_hi = set2x128(mask_hi);
const u8 *rv;
size_t min = (size_t)buf % 32;
assert(buf_end - buf >= 32);
// Preconditioning: most of the time our buffer won't be aligned.
m256 chars = loadu256(buf);
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf,
low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
buf += (32 - min);
// Unrolling was here, but it wasn't doing anything but taking up space.
// Reroll FTW.
const u8 *last_block = buf_end - 32;
while (buf < last_block) {
m256 lchars = load256(buf);
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf,
low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
buf += 32;
}
// Use an unaligned load to mop up the last 32 bytes and get an accurate
// picture to buf_end.
assert(buf <= buf_end && buf >= buf_end - 32);
chars = loadu256(buf_end - 32);
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32,
low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
return buf_end;
}

View File

@ -1,265 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "shufti_common.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
/* Normal SSSE3 shufti */
static really_inline
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars,
const u8 *buf, const m128 low4bits,
const m128 zeroes, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
// negate first 16 bits
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF;
return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
/*
* 16-byte pipeline, for smaller scans
*/
static
const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi,
const u8 *buf, const u8 *buf_end,
const m128 low4bits,
const m128 zeroes, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 16 bytes
m128 data = load128(buf);
u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
last_buf = buf;
last_res = z;
buf += 16;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 15 < buf_end; buf += 16) {
// scan more data
data = load128(buf);
z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 16);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte pipeline, for bigger scans
*/
static
const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi,
const u8 *buf, const u8 *buf_end,
const m128 low4bits,
const m128 zeroes, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 res;
// pipeline prologue: scan first 32 bytes
m128 data1 = load128(buf);
u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
m128 data2 = load128(buf + 16);
u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
// store the results
u32 last_res = z1 | (z2 << 16);
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data1 = load128(buf);
z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
data2 = load128(buf + 16);
z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
res = z1 | (z2 << 16);
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_res = res;
last_buf = buf;
}
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
// if we still have some data left, scan it too
for (; buf + 15 < buf_end; buf += 16) {
m128 chars = load128(buf);
ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
}
assert(buf <= buf_end && buf >= buf_end - 16);
return NULL;
}
const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
const u8 *buf,
const u8 *buf_end, u8 run_len
#ifdef MULTIACCEL_DOUBLE
, u8 run_len2
#endif
) {
assert(buf && buf_end);
assert(buf < buf_end);
// Slow path for small cases.
if (buf_end - buf < 16) {
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
buf, buf_end);
}
const m128 zeroes = zeroes128();
const m128 low4bits = _mm_set1_epi8(0xf);
const u8 *rv;
size_t min = (size_t)buf % 16;
assert(buf_end - buf >= 16);
// Preconditioning: most of the time our buffer won't be aligned.
m128 chars = loadu128(buf);
rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
buf += (16 - min);
// if we have enough data, run bigger pipeline; otherwise run smaller one
if (buf_end - buf >= 128) {
rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi,
buf, buf_end, low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(rv)) {
return rv;
}
} else if (buf_end - buf >= 16){
rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi,
buf, buf_end, low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(rv)) {
return rv;
}
}
// Use an unaligned load to mop up the last 16 bytes and get an accurate
// picture to buf_end.
chars = loadu128(buf_end - 16);
rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars,
buf_end - 16, low4bits, zeroes, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
return buf_end;
}

View File

@ -1,111 +0,0 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "ue2common.h"
#include "util/arch.h"
#include "multitruffle.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "multiaccel_common.h"
#if !defined(HAVE_AVX2)
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multitruffle_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multitruffle_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multitruffle_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multitruffle_sse.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multitruffle_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multitruffle_sse.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#else
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multitruffle_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multitruffle_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multitruffle_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multitruffle_avx2.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multitruffle_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multitruffle_avx2.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#endif

View File

@ -1,73 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTITRUFFLE_H
#define MULTITRUFFLE_H
/** \file
* \brief Multitruffle: multibyte version of Truffle.
*
* Utilises the SSSE3 pshufb shuffle instruction
*/
#include "util/simd_types.h"
#ifdef __cplusplus
extern "C"
{
#endif
const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end, const u8 run_len);
const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end, const u8 run_len);
const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end, const u8 run_len);
const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
#ifdef __cplusplus
}
#endif
#endif /* MULTITRUFFLE_H */

View File

@ -1,125 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Matches a byte in a charclass using three shuffles
*/
#include "config.h"
#include "ue2common.h"
#include "multiaccel_common.h"
/*
* include "block" function
*/
#include "truffle_common.h"
/*
* single-byte truffle fwd match function, should only be defined when not
* compiling multiaccel
*/
static really_inline
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
m256 v, const u8 *buf, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v);
return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
DEBUG_PRINTF("run_len %zu\n", buf_end - buf);
const m256 wide_clear = set2x128(shuf_mask_lo_highclear);
const m256 wide_set = set2x128(shuf_mask_lo_highset);
assert(buf && buf_end);
assert(buf < buf_end);
const u8 *rv;
if (buf_end - buf < 32) {
return truffleMini(wide_clear, wide_set, buf, buf_end);
}
size_t min = (size_t)buf % 32;
assert(buf_end - buf >= 32);
// Preconditioning: most of the time our buffer won't be aligned.
m256 chars = loadu256(buf);
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
buf += (32 - min);
const u8 *last_block = buf_end - 32;
while (buf < last_block) {
m256 lchars = load256(buf);
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
buf += 32;
}
// Use an unaligned load to mop up the last 32 bytes and get an accurate
// picture to buf_end.
assert(buf <= buf_end && buf >= buf_end - 32);
chars = loadu256(buf_end - 32);
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars,
buf_end - 32, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
return buf_end;
}

View File

@ -1,265 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "ue2common.h"
#include "multiaccel_common.h"
/*
* include "block" function
*/
#include "truffle_common.h"
/*
* single-byte truffle fwd match function, should only be defined when not
* compiling multiaccel
*/
static really_inline
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
m128 v, const u8 *buf, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF;
return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
/*
* 16-byte pipeline, for smaller scans
*/
static
const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 16 bytes
m128 data = load128(buf);
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF;
last_buf = buf;
last_res = z;
buf += 16;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 15 < buf_end; buf += 16) {
// scan more data
data = load128(buf);
z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF;
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 16);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte pipeline, for bigger scans
*/
static
const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 res;
// pipeline prologue: scan first 32 bytes
m128 data1 = load128(buf);
u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF;
m128 data2 = load128(buf + 16);
u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF;
// store the results
u32 last_res = z1 | (z2 << 16);
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data1 = load128(buf);
z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF;
data2 = load128(buf + 16);
z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF;
res = z1 | (z2 << 16);
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_res = res;
last_buf = buf;
}
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
// if we still have some data left, scan it too
for (; buf + 15 < buf_end; buf += 16) {
m128 chars = load128(buf);
ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
chars, buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
}
assert(buf <= buf_end && buf >= buf_end - 16);
return NULL;
}
const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end, const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
DEBUG_PRINTF("run_len %zu\n", buf_end - buf);
assert(buf && buf_end);
assert(buf < buf_end);
const u8 *rv;
if (buf_end - buf < 16) {
return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end);
}
size_t min = (size_t)buf % 16;
assert(buf_end - buf >= 16);
// Preconditioning: most of the time our buffer won't be aligned.
m128 chars = loadu128(buf);
rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
buf += (16 - min);
// if we have enough data, run bigger pipeline; otherwise run smaller one
if (buf_end - buf >= 128) {
rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(rv)) {
return rv;
}
} else if (buf_end - buf >= 16){
rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(rv)) {
return rv;
}
}
// Use an unaligned load to mop up the last 16 bytes and get an accurate
// picture to buf_end.
chars = loadu128(buf_end - 16);
rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars,
buf_end - 16, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (rv) {
return rv;
}
return buf_end;
}

View File

@ -1,109 +0,0 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "ue2common.h"
#include "util/arch.h"
#include "multivermicelli.h"
#include "multiaccel_common.h"
#if !defined(HAVE_AVX2)
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multivermicelli_sse.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#else
#define MATCH_ALGO long_
#include "multiaccel_long.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO longgrab_
#include "multiaccel_longgrab.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shift_
#include "multiaccel_shift.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO shiftgrab_
#include "multiaccel_shiftgrab.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MULTIACCEL_DOUBLE
#define MATCH_ALGO doubleshift_
#include "multiaccel_doubleshift.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#define MATCH_ALGO doubleshiftgrab_
#include "multiaccel_doubleshiftgrab.h"
#include "multivermicelli_avx2.h"
#undef MATCH_ALGO
#undef MULTIACCEL_DOUBLE
#endif

View File

@ -1,62 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTIVERMICELLI_H_
#define MULTIVERMICELLI_H_
#ifdef __cplusplus
extern "C"
{
#endif
const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len);
const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
const u8 *buf_end, const u8 run_len,
const u8 run2_len);
#ifdef __cplusplus
}
#endif
#endif /* MULTIVERMICELLI_H_ */

View File

@ -1,283 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/unaligned.h"
#include "multiaccel_common.h"
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m256 casemask = set32x8(CASE_CLEAR);
const u8 *ptr;
m256 data = loadu256(buf);
u32 z = movemask256(eq256(chars, and256(casemask, data)));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8 *ptr;
m256 data = loadu256(buf);
u32 z = movemask256(eq256(chars, data));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte pipeline
*/
static really_inline
const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 32 bytes
m256 data = load256(buf);
u32 z = movemask256(eq256(chars, data));
last_res = z;
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 32 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data = load256(buf);
z = movemask256(eq256(chars, data));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 32);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte caseless pipeline
*/
static really_inline
const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m256 casemask = set32x8(CASE_CLEAR);
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 32 bytes
m256 data = load256(buf);
u32 z = movemask256(eq256(chars, and256(casemask, data)));
last_res = z;
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 32 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data = load256(buf);
z = movemask256(eq256(chars, and256(casemask, data)));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 32);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
const u8 *ptr;
// Handle small scans.
if (buf_end - buf < 32) {
for (; buf < buf_end; buf++) {
char cur = (char)*buf;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur == c) {
break;
}
}
return buf;
}
m256 chars = set32x8(c); /* nocase already uppercase */
uintptr_t min = (uintptr_t)buf % 32;
if (min) {
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
buf += 32 - min;
}
if (buf_end - buf >= 32){
ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermPipeline)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
}
// final unaligned scan
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf_end - 32, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf_end - 32, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
// run our pipeline
return ptr ? ptr : buf_end;
}

View File

@ -1,452 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/unaligned.h"
#define VERM_BOUNDARY 16
#define VERM_TYPE m128
#define VERM_SET_FN set16x8
#include "multiaccel_common.h"
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m128 casemask = set16x8(CASE_CLEAR);
const u8 *ptr;
m128 data = loadu128(buf);
u32 z = movemask128(eq128(chars, and128(casemask, data)));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
static really_inline
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars,
const u8 *buf,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8 *ptr;
m128 data = loadu128(buf);
u32 z = movemask128(eq128(chars, data));
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(buf, z
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 16-byte pipeline, for smaller scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 16 bytes
m128 data = load128(buf);
u32 z = movemask128(eq128(chars, data));
last_buf = buf;
last_res = z;
buf += 16;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 15 < buf_end; buf += 16) {
// scan more data
data = load128(buf);
z = movemask128(eq128(chars, data));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 16);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 16-byte pipeline, for smaller scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m128 casemask = set16x8(CASE_CLEAR);
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 16 bytes
m128 data = load128(buf);
u32 z = movemask128(eq128(chars, and128(casemask, data)));
last_buf = buf;
last_res = z;
buf += 16;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 15 < buf_end; buf += 16) {
// scan more data
data = load128(buf);
z = movemask128(eq128(chars, and128(casemask, data)));
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_buf = buf;
last_res = z;
}
assert(buf <= buf_end && buf >= buf_end - 16);
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
return NULL;
}
/*
* 32-byte pipeline, for bigger scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
const u8* ptr, *last_buf;
u32 res;
// pipeline prologue: scan first 32 bytes
m128 data1 = load128(buf);
u32 z1 = movemask128(eq128(chars, data1));
m128 data2 = load128(buf + 16);
u32 z2 = movemask128(eq128(chars, data2));
// store the results
u32 last_res = z1 | (z2 << VERM_BOUNDARY);
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data1 = load128(buf);
z1 = movemask128(eq128(chars, data1));
data2 = load128(buf + 16);
z2 = movemask128(eq128(chars, data2));
res = z1 | (z2 << 16);
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_res = res;
last_buf = buf;
}
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
// if we still have some data left, scan it too
if (buf + 15 < buf_end) {
return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
assert(buf <= buf_end && buf >= buf_end - 16);
return NULL;
}
/*
* 32-byte caseless pipeline, for bigger scans
*/
static
const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
m128 casemask = set16x8(CASE_CLEAR);
const u8* ptr, *last_buf;
u32 last_res;
// pipeline prologue: scan first 32 bytes
m128 data1 = load128(buf);
u32 z1 = movemask128(eq128(chars, and128(casemask, data1)));
m128 data2 = load128(buf + 16);
u32 z2 = movemask128(eq128(chars, and128(casemask, data2)));
u32 z = z1 | (z2 << VERM_BOUNDARY);
last_res = z;
last_buf = buf;
buf += 32;
// now, start the pipeline!
assert((size_t)buf % 16 == 0);
for (; buf + 31 < buf_end; buf += 32) {
// scan more data
data1 = load128(buf);
z1 = movemask128(eq128(chars, and128(casemask, data1)));
data2 = load128(buf + 16);
z2 = movemask128(eq128(chars, and128(casemask, data2)));
z = z1 | (z2 << 16);
// do a comparison on previous result
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
last_res = z;
last_buf = buf;
}
// epilogue: compare final results
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
(last_buf, last_res
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
// if we still have some data left, scan it too
if (buf + 15 < buf_end) {
return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
}
assert(buf <= buf_end && buf >= buf_end - 16);
return NULL;
}
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
const u8 *buf,
const u8 *buf_end,
const u8 run_len
#ifdef MULTIACCEL_DOUBLE
, const u8 run_len2
#endif
) {
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
const u8 *ptr;
// Handle small scans.
if (buf_end - buf < VERM_BOUNDARY) {
for (; buf < buf_end; buf++) {
char cur = (char)*buf;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur == c) {
break;
}
}
return buf;
}
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) {
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
buf += VERM_BOUNDARY - min;
}
// if we have enough data, run bigger pipeline; otherwise run smaller one
if (buf_end - buf >= 128) {
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermPipeline32)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
} else if (buf_end - buf >= 16){
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermPipeline16)(chars,
buf, buf_end, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
if (unlikely(ptr)) {
return ptr;
}
}
// final unaligned scan
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
buf_end - VERM_BOUNDARY, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
buf_end - VERM_BOUNDARY, run_len
#ifdef MULTIACCEL_DOUBLE
, run_len2
#endif
);
// run our pipeline
return ptr ? ptr : buf_end;
}

View File

@ -39,7 +39,52 @@
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/unaligned.h" #include "util/unaligned.h"
#include "shufti_common.h" #ifdef DEBUG
#include <ctype.h>
#define DUMP_MSK(_t) \
static UNUSED \
void dumpMsk##_t(m##_t msk) { \
u8 * mskAsU8 = (u8 *)&msk; \
for (unsigned i = 0; i < sizeof(msk); i++) { \
u8 c = mskAsU8[i]; \
for (int j = 0; j < 8; j++) { \
if ((c >> (7-j)) & 0x1) \
printf("1"); \
else \
printf("0"); \
} \
printf(" "); \
} \
} \
static UNUSED \
void dumpMsk##_t##AsChars(m##_t msk) { \
u8 * mskAsU8 = (u8 *)&msk; \
for (unsigned i = 0; i < sizeof(msk); i++) { \
u8 c = mskAsU8[i]; \
if (isprint(c)) \
printf("%c",c); \
else \
printf("."); \
} \
}
#endif
/** \brief Naive byte-by-byte implementation. */
static really_inline
const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf,
const u8 *buf_end) {
assert(buf < buf_end);
for (; buf < buf_end; ++buf) {
u8 c = *buf;
if (lo[c & 0xf] & hi[c >> 4]) {
break;
}
}
return buf;
}
/** \brief Naive byte-by-byte implementation. */ /** \brief Naive byte-by-byte implementation. */
static really_inline static really_inline
@ -59,6 +104,30 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
#if !defined(HAVE_AVX2) #if !defined(HAVE_AVX2)
/* Normal SSSE3 shufti */ /* Normal SSSE3 shufti */
#ifdef DEBUG
DUMP_MSK(128)
#endif
#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
static really_inline
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
const m128 compare) {
m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
m128 t = and128(c_lo, c_hi);
#ifdef DEBUG
DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
#endif
return movemask128(eq128(t, compare));
}
static really_inline static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) { const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffff)) { if (unlikely(z != 0xffff)) {
@ -293,6 +362,31 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
#else // AVX2 - 256 wide shuftis #else // AVX2 - 256 wide shuftis
#ifdef DEBUG
DUMP_MSK(256)
#endif
#define GET_LO_4(chars) and256(chars, low4bits)
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
static really_inline
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
const m256 compare) {
m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
m256 t = and256(c_lo, c_hi);
#ifdef DEBUG
DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
#endif
return movemask256(eq256(t, compare));
}
static really_inline static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) { const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffffffff)) { if (unlikely(z != 0xffffffff)) {

View File

@ -1,146 +0,0 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SHUFTI_COMMON_H_
#define SHUFTI_COMMON_H_
#include "ue2common.h"
#include "util/arch.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/unaligned.h"
/*
* Common stuff for all versions of shufti (single, multi and multidouble)
*/
/** \brief Naive byte-by-byte implementation. */
static really_inline
const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf,
const u8 *buf_end) {
assert(buf < buf_end);
for (; buf < buf_end; ++buf) {
u8 c = *buf;
if (lo[c & 0xf] & hi[c >> 4]) {
break;
}
}
return buf;
}
#ifdef DEBUG
#include <ctype.h>
#define DUMP_MSK(_t) \
static UNUSED \
void dumpMsk##_t(m##_t msk) { \
u8 * mskAsU8 = (u8 *)&msk; \
for (unsigned i = 0; i < sizeof(msk); i++) { \
u8 c = mskAsU8[i]; \
for (int j = 0; j < 8; j++) { \
if ((c >> (7-j)) & 0x1) \
printf("1"); \
else \
printf("0"); \
} \
printf(" "); \
} \
} \
static UNUSED \
void dumpMsk##_t##AsChars(m##_t msk) { \
u8 * mskAsU8 = (u8 *)&msk; \
for (unsigned i = 0; i < sizeof(msk); i++) { \
u8 c = mskAsU8[i]; \
if (isprint(c)) \
printf("%c",c); \
else \
printf("."); \
} \
}
#endif
#if !defined(HAVE_AVX2)
#ifdef DEBUG
DUMP_MSK(128)
#endif
#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
static really_inline
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
const m128 compare) {
m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
m128 t = and128(c_lo, c_hi);
#ifdef DEBUG
DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
#endif
return movemask128(eq128(t, compare));
}
#else
#ifdef DEBUG
DUMP_MSK(256)
#endif
#define GET_LO_4(chars) and256(chars, low4bits)
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
static really_inline
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
const m256 compare) {
m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
m256 t = and256(c_lo, c_hi);
#ifdef DEBUG
DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
#endif
return movemask256(eq256(t, compare));
}
#endif
#endif /* SHUFTI_COMMON_H_ */

View File

@ -37,8 +37,6 @@
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "truffle_common.h"
#if !defined(HAVE_AVX2) #if !defined(HAVE_AVX2)
static really_inline static really_inline
@ -52,6 +50,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) {
return NULL; // no match return NULL; // no match
} }
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffff)) {
u32 pos = ctz32(~z & 0xffff);
assert(pos < 16);
return buf + pos;
}
return NULL; // no match
}
static really_inline
u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
m128 highconst = _mm_set1_epi8(0x80);
m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
// and now do the real work
m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
m128 t1 = xor128(v, highconst);
m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
m128 shuf3 = pshufb(shuf_mask_hi, t2);
m128 tmp = and128(or128(shuf1, shuf2), shuf3);
m128 tmp2 = eq128(tmp, zeroes128());
u32 z = movemask128(tmp2);
return z;
}
static
const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end) {
uintptr_t len = buf_end - buf;
assert(len < 16);
m128 chars = zeroes128();
memcpy(&chars, buf, len);
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
// can't be these bytes in z
u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
const u8 *rv = firstMatch(buf, z | mask);
if (rv) {
return rv;
} else {
return buf_end;
}
}
static really_inline static really_inline
const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
m128 v, const u8 *buf) { m128 v, const u8 *buf) {
@ -125,7 +174,7 @@ const u8 *truffleRevMini(m128 shuf_mask_lo_highclear,
m128 chars = zeroes128(); m128 chars = zeroes128();
memcpy(&chars, buf, len); memcpy(&chars, buf, len);
u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
const u8 *rv = lastMatch(buf, z | mask); const u8 *rv = lastMatch(buf, z | mask);
@ -184,6 +233,8 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear,
#else #else
// AVX2
static really_inline static really_inline
const u8 *lastMatch(const u8 *buf, u32 z) { const u8 *lastMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffffffff)) { if (unlikely(z != 0xffffffff)) {
@ -195,6 +246,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) {
return NULL; // no match return NULL; // no match
} }
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffffffff)) {
u32 pos = ctz32(~z);
assert(pos < 32);
return buf + pos;
}
return NULL; // no match
}
static really_inline
u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
m256 highconst = _mm256_set1_epi8(0x80);
m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
// and now do the real work
m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
m256 t1 = xor256(v, highconst);
m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
m256 shuf3 = vpshufb(shuf_mask_hi, t2);
m256 tmp = and256(or256(shuf1, shuf2), shuf3);
m256 tmp2 = eq256(tmp, zeroes256());
u32 z = movemask256(tmp2);
return z;
}
static
const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end) {
uintptr_t len = buf_end - buf;
assert(len < 32);
m256 chars = zeroes256();
memcpy(&chars, buf, len);
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
// can't be these bytes in z
u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff;
const u8 *rv = firstMatch(buf, z | mask);
if (rv) {
return rv;
} else {
return buf_end;
}
}
static really_inline static really_inline
const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
m256 v, const u8 *buf) { m256 v, const u8 *buf) {
@ -266,7 +368,7 @@ const u8 *truffleRevMini(m256 shuf_mask_lo_highclear,
m256 chars = zeroes256(); m256 chars = zeroes256();
memcpy(&chars, buf, len); memcpy(&chars, buf, len);
u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff;
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
const u8 *rv = lastMatch(buf, z | mask); const u8 *rv = lastMatch(buf, z | mask);

View File

@ -1,147 +0,0 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TRUFFLE_COMMON_H_
#define TRUFFLE_COMMON_H_
#include "util/arch.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
/*
* Common stuff for all versions of truffle (single, multi and multidouble)
*/
#if !defined(HAVE_AVX2)
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffff)) {
u32 pos = ctz32(~z & 0xffff);
assert(pos < 16);
return buf + pos;
}
return NULL; // no match
}
static really_inline
u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
m128 highconst = _mm_set1_epi8(0x80);
m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
// and now do the real work
m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
m128 t1 = xor128(v, highconst);
m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
m128 shuf3 = pshufb(shuf_mask_hi, t2);
m128 tmp = and128(or128(shuf1, shuf2), shuf3);
m128 tmp2 = eq128(tmp, zeroes128());
u32 z = movemask128(tmp2);
return z;
}
static
const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end) {
uintptr_t len = buf_end - buf;
assert(len < 16);
m128 chars = zeroes128();
memcpy(&chars, buf, len);
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
// can't be these bytes in z
u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF;
const u8 *rv = firstMatch(buf, z| mask);
if (rv) {
return rv;
} else {
return buf_end;
}
}
#else
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffffffff)) {
u32 pos = ctz32(~z);
assert(pos < 32);
return buf + pos;
}
return NULL; // no match
}
static really_inline
u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
m256 highconst = _mm256_set1_epi8(0x80);
m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
// and now do the real work
m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
m256 t1 = xor256(v, highconst);
m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
m256 shuf3 = vpshufb(shuf_mask_hi, t2);
m256 tmp = and256(or256(shuf1, shuf2), shuf3);
m256 tmp2 = eq256(tmp, zeroes256());
u32 z = movemask256(tmp2);
return z;
}
static
const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end) {
uintptr_t len = buf_end - buf;
assert(len < 32);
m256 chars = zeroes256();
memcpy(&chars, buf, len);
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
// can't be these bytes in z
u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF;
const u8 *rv = firstMatch(buf, z | mask);
if (rv) {
return rv;
} else {
return buf_end;
}
}
#endif
#endif /* TRUFFLE_COMMON_H_ */

View File

@ -37,7 +37,6 @@
#include "ue2common.h" #include "ue2common.h"
#include "nfa/accel.h" #include "nfa/accel.h"
#include "nfa/multiaccel_compilehelper.h"
#include "util/bitutils.h" // for CASE_CLEAR #include "util/bitutils.h" // for CASE_CLEAR
#include "util/charreach.h" #include "util/charreach.h"
@ -677,134 +676,6 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
return g.startDs; return g.startDs;
} }
static
NFAVertex find_next(const NFAVertex v, const NGHolder &g) {
NFAVertex res = NGHolder::null_vertex();
for (NFAVertex u : adjacent_vertices_range(v, g)) {
if (u != v) {
res = u;
break;
}
}
return res;
}
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
const vector<NFAVertex> &states,
const CompileContext &cc) {
// For a set of states to be accelerable, we basically have to have only
// one state to accelerate.
if (states.size() != 1) {
DEBUG_PRINTF("can't accelerate multiple states\n");
return MultibyteAccelInfo();
}
// Get our base vertex
NFAVertex v = states[0];
// We need the base vertex to be a self-looping dotall leading to exactly
// one vertex.
if (!hasSelfLoop(v, g)) {
DEBUG_PRINTF("base vertex has self-loop\n");
return MultibyteAccelInfo();
}
if (!g[v].char_reach.all()) {
DEBUG_PRINTF("can't accelerate anything but dot\n");
return MultibyteAccelInfo();
}
if (proper_out_degree(v, g) != 1) {
DEBUG_PRINTF("can't accelerate states with multiple successors\n");
return MultibyteAccelInfo();
}
// find our start vertex
NFAVertex cur = find_next(v, g);
if (cur == NGHolder::null_vertex()) {
DEBUG_PRINTF("invalid start vertex\n");
return MultibyteAccelInfo();
}
bool has_offset = false;
u32 offset = 0;
CharReach cr = g[cur].char_reach;
// if we start with a dot, we have an offset, so defer figuring out the
// real CharReach for this accel scheme
if (cr == CharReach::dot()) {
has_offset = true;
offset = 1;
}
// figure out our offset
while (has_offset) {
// vertices have to have no self loops
if (hasSelfLoop(cur, g)) {
DEBUG_PRINTF("can't have self-loops\n");
return MultibyteAccelInfo();
}
// we have to have exactly 1 successor to have this acceleration scheme
if (out_degree(cur, g) != 1) {
DEBUG_PRINTF("can't have multiple successors\n");
return MultibyteAccelInfo();
}
cur = *adjacent_vertices(cur, g).first;
// if we met a special vertex, bail out
if (is_special(cur, g)) {
DEBUG_PRINTF("can't have special vertices\n");
return MultibyteAccelInfo();
}
// now, get the real char reach
if (g[cur].char_reach != CharReach::dot()) {
cr = g[cur].char_reach;
has_offset = false;
} else {
offset++;
}
}
// now, fire up the compilation machinery
target_t ti = cc.target_info;
unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE;
MultiaccelCompileHelper mac(cr, offset, max_len);
while (mac.canAdvance()) {
// vertices have to have no self loops
if (hasSelfLoop(cur, g)) {
break;
}
// we have to have exactly 1 successor to have this acceleration scheme
if (out_degree(cur, g) != 1) {
break;
}
cur = *adjacent_vertices(cur, g).first;
// if we met a special vertex, bail out
if (is_special(cur, g)) {
break;
}
mac.advance(g[cur].char_reach);
}
MultibyteAccelInfo mai = mac.getBestScheme();
#ifdef DEBUG
DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n",
mai.type, mai.offset, mai.len1, mai.len2);
for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) {
DEBUG_PRINTF("multibyte accel char: %zu\n", c);
}
#endif
return mai;
}
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
bool nfaCheckAccel(const NGHolder &g, NFAVertex v, bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr, const vector<CharReach> &refined_cr,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -51,9 +51,6 @@ namespace ue2 {
#define MAX_MERGED_ACCEL_STOPS 200 #define MAX_MERGED_ACCEL_STOPS 200
#define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_STOP_CHAR 24
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
#define MULTIACCEL_MIN_LEN 3
#define MULTIACCEL_MAX_LEN_SSE 15
#define MULTIACCEL_MAX_LEN_AVX2 31
// forward-declaration of CompileContext // forward-declaration of CompileContext
struct CompileContext; struct CompileContext;
@ -84,11 +81,6 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic, const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
AccelScheme *as, bool allow_wide); AccelScheme *as, bool allow_wide);
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA).
*/
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
const std::vector<NFAVertex> &verts,
const CompileContext &cc);
} // namespace ue2 } // namespace ue2

View File

@ -52,8 +52,6 @@ set(unit_internal_SOURCES
internal/limex_nfa.cpp internal/limex_nfa.cpp
internal/masked_move.cpp internal/masked_move.cpp
internal/multi_bit.cpp internal/multi_bit.cpp
internal/multiaccel_matcher.cpp
internal/multiaccel_shift.cpp
internal/nfagraph_common.h internal/nfagraph_common.h
internal/nfagraph_comp.cpp internal/nfagraph_comp.cpp
internal/nfagraph_equivalence.cpp internal/nfagraph_equivalence.cpp

View File

@ -1,301 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
extern "C" {
#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works
}
#include "config.h"
#include "src/ue2common.h"
#include "gtest/gtest.h"
#include "nfagraph/ng_limex_accel.h"
#include "nfa/accelcompile.h"
#include "nfa/multivermicelli.h"
#include "nfa/multishufti.h"
#include "nfa/multitruffle.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include <algorithm>
#include <iostream>
#include <random>
#include <string>
#include <vector>
using namespace ue2;
using namespace std;
using namespace testing;
// test parameters structure
struct MultiaccelTestParam {
string match_pattern;
u32 match_pattern_start_idx;
u32 match_idx;
bool test_all_offsets;
u8 match_len1;
u8 match_len2;
MultibyteAccelInfo::multiaccel_type type;
};
// buffer size is constant
static const u32 BUF_SIZE = 200;
// strings, out of which CharReach will be generated
static const string VERM_CR = "a";
static const string V_NC_CR = "aA";
static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz";
static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99";
// Parameterized test case for multiaccel patterns.
class MultiaccelTest : public TestWithParam<MultiaccelTestParam> {
protected:
virtual void SetUp() {
// set up is deferred until the actual test, since we can't compile
// any accel schemes unless we know CharReach
const MultiaccelTestParam &p = GetParam();
// reserve space in our buffer
buffer = (u8 *)aligned_zmalloc(BUF_SIZE);
// store the index where we expect to see the match. note that it may
// be different from where the match pattern has started since we may
// have a flooded match (i.e. a match preceded by almost-match) or a
// no-match (in which case "match" index is at the end of the buffer).
match_idx = p.match_idx;
// make note if we need to test all offsets - sometimes we don't, for
// example when testing partial or no-match.
test_all_offsets = p.test_all_offsets;
}
char getChar(const CharReach &cr) {
assert(cr.count() > 0);
auto dist = uniform_int_distribution<size_t>(0, cr.count() - 1);
size_t result = cr.find_nth(dist(prng));
assert(result != CharReach::npos);
return (char)result;
}
// char generator
char getChar(const CharReach &cr, bool match) {
return getChar(match ? cr : ~cr);
}
// appends a string with matches/unmatches according to input match pattern
void getMatch(u8 *result, u32 start, const string &pattern,
const CharReach &cr) {
for (const auto &c : pattern) {
result[start++] = getChar(cr, c == '1');
}
}
// appends non-matching noise of certain lengths
void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) {
for (unsigned i = 0; i < len; i++) {
result[start + i] = getChar(cr, false);
}
}
// deferred buffer generation, as we don't know CharReach before we run the test
void GenerateBuffer(const CharReach &cr) {
const MultiaccelTestParam &p = GetParam();
// step 1: fill prefix with non-matching noise
u32 start = 0;
getNoise(buffer, start, p.match_pattern_start_idx, cr);
// step 2: add a match
start += p.match_pattern_start_idx;
getMatch(buffer, start, p.match_pattern, cr);
// step 3: fill in the rest of the buffer with non-matching noise
start += p.match_pattern.size();
getNoise(buffer, start, BUF_SIZE - p.match_pattern.size() -
p.match_pattern_start_idx, cr);
}
// deferred accel scheme generation, as we don't know CharReach before we run the test
void CompileAccelScheme(const CharReach &cr, AccelAux *aux) {
const MultiaccelTestParam &p = GetParam();
AccelInfo ai;
ai.single_stops = cr; // dummy CharReach to prevent red tape accel
ai.ma_len1 = p.match_len1;
ai.ma_len2 = p.match_len2;
ai.multiaccel_stops = cr;
ai.ma_type = p.type;
buildAccelAux(ai, aux);
// now, verify we've successfully built our accel scheme, *and* that it's
// a multibyte scheme
ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM &&
aux->accel_type <= ACCEL_MDSGTRUFFLE);
}
virtual void TearDown() {
aligned_free(buffer);
}
// We want our tests to be deterministic, so we use a PRNG in the test
// fixture.
mt19937 prng;
u32 match_idx;
u8 *buffer;
bool test_all_offsets;
};
static
void runTest(const u8 *buffer, AccelAux *aux, unsigned match_idx,
bool test_all_offsets) {
const u8 *start = buffer;
const u8 *end = start + BUF_SIZE;
const u8 *match = start + match_idx;
// comparing indexes into the buffer is easier to understand than pointers
if (test_all_offsets) {
// run_accel can only scan >15 byte buffers
u32 end_offset = min(match_idx, BUF_SIZE - 15);
for (unsigned offset = 0; offset < end_offset; offset++) {
const u8 *ptr = run_accel(aux, (start + offset), end);
unsigned idx = ptr - start;
ASSERT_EQ(match_idx, idx);
}
} else {
const u8 *ptr = run_accel(aux, start, end);
unsigned idx = ptr - start;
ASSERT_EQ(match_idx, idx);
}
}
TEST_P(MultiaccelTest, TestVermicelli) {
AccelAux aux = {0};
CharReach cr(VERM_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
TEST_P(MultiaccelTest, TestVermicelliNocase) {
AccelAux aux = {0};
CharReach cr(V_NC_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
TEST_P(MultiaccelTest, TestShufti) {
AccelAux aux = {0};
CharReach cr(SHUF_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
TEST_P(MultiaccelTest, TestTruffle) {
AccelAux aux = {0};
CharReach cr(TRUF_CR);
GenerateBuffer(cr);
CompileAccelScheme(cr, &aux);
runTest(buffer, &aux, match_idx, test_all_offsets);
}
static const MultiaccelTestParam multiaccelTests[] = {
// long matcher
// full, partial, flooded, nomatch
{"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
{"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
{"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
// long-grab matcher
// full, partial, flooded, nomatch
{"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
{"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
{"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
// shift matcher
// full, partial, flooded, nomatch
{"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
{"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
{"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
{"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
// shift-grab matcher
// full, partial, flooded, nomatch
{"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
{"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
{"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
{"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
// doubleshift matcher
// full, partial (one and two shifts), flooded, nomatch
{"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
{"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
// doubleshift-grab matcher
// full, partial (one and two shifts), flooded, nomatch
{"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
{"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
};
INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests));
// boring stuff for google test
void PrintTo(const MultiaccelTestParam &p, ::std::ostream *os) {
*os << "MultiaccelTestParam: " << p.match_pattern;
}

View File

@ -1,81 +0,0 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "src/ue2common.h"
#include "gtest/gtest.h"
#include "nfa/multiaccel_common.h"
/*
* Unit tests for the shifters.
*
* This is a bit messy, as shifters are macros, so we're using macros to test
* other macros.
*/
#define TEST_SHIFT(n) \
do { \
u64a val = ((u64a) 1 << n) - 1; \
JOIN(SHIFT, n)(val); \
ASSERT_EQ(val, 1); \
} while (0)
TEST(MultiaccelShift, StaticShift) {
TEST_SHIFT(1);
TEST_SHIFT(2);
TEST_SHIFT(3);
TEST_SHIFT(4);
TEST_SHIFT(5);
TEST_SHIFT(6);
TEST_SHIFT(7);
TEST_SHIFT(8);
TEST_SHIFT(10);
TEST_SHIFT(11);
TEST_SHIFT(12);
TEST_SHIFT(13);
TEST_SHIFT(14);
TEST_SHIFT(15);
TEST_SHIFT(16);
TEST_SHIFT(17);
TEST_SHIFT(18);
TEST_SHIFT(19);
TEST_SHIFT(20);
TEST_SHIFT(21);
TEST_SHIFT(22);
TEST_SHIFT(23);
TEST_SHIFT(24);
TEST_SHIFT(25);
TEST_SHIFT(26);
TEST_SHIFT(27);
TEST_SHIFT(28);
TEST_SHIFT(29);
TEST_SHIFT(30);
TEST_SHIFT(31);
TEST_SHIFT(32);
}