mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
De-multiaccel
This commit is contained in:
parent
2b1a7da188
commit
423569ec82
@ -557,25 +557,6 @@ set (hs_exec_SRCS
|
|||||||
src/nfa/mpv.h
|
src/nfa/mpv.h
|
||||||
src/nfa/mpv.c
|
src/nfa/mpv.c
|
||||||
src/nfa/mpv_internal.h
|
src/nfa/mpv_internal.h
|
||||||
src/nfa/multiaccel_common.h
|
|
||||||
src/nfa/multiaccel_doubleshift.h
|
|
||||||
src/nfa/multiaccel_doubleshiftgrab.h
|
|
||||||
src/nfa/multiaccel_long.h
|
|
||||||
src/nfa/multiaccel_longgrab.h
|
|
||||||
src/nfa/multiaccel_shift.h
|
|
||||||
src/nfa/multiaccel_shiftgrab.h
|
|
||||||
src/nfa/multishufti.c
|
|
||||||
src/nfa/multishufti_avx2.h
|
|
||||||
src/nfa/multishufti_sse.h
|
|
||||||
src/nfa/multishufti.h
|
|
||||||
src/nfa/multitruffle.c
|
|
||||||
src/nfa/multitruffle_avx2.h
|
|
||||||
src/nfa/multitruffle_sse.h
|
|
||||||
src/nfa/multitruffle.h
|
|
||||||
src/nfa/multivermicelli.c
|
|
||||||
src/nfa/multivermicelli.h
|
|
||||||
src/nfa/multivermicelli_sse.h
|
|
||||||
src/nfa/multivermicelli_avx2.h
|
|
||||||
src/nfa/nfa_api.h
|
src/nfa/nfa_api.h
|
||||||
src/nfa/nfa_api_dispatch.c
|
src/nfa/nfa_api_dispatch.c
|
||||||
src/nfa/nfa_internal.h
|
src/nfa/nfa_internal.h
|
||||||
@ -589,13 +570,11 @@ set (hs_exec_SRCS
|
|||||||
src/nfa/sheng_impl.h
|
src/nfa/sheng_impl.h
|
||||||
src/nfa/sheng_impl4.h
|
src/nfa/sheng_impl4.h
|
||||||
src/nfa/sheng_internal.h
|
src/nfa/sheng_internal.h
|
||||||
src/nfa/shufti_common.h
|
|
||||||
src/nfa/shufti.c
|
src/nfa/shufti.c
|
||||||
src/nfa/shufti.h
|
src/nfa/shufti.h
|
||||||
src/nfa/tamarama.c
|
src/nfa/tamarama.c
|
||||||
src/nfa/tamarama.h
|
src/nfa/tamarama.h
|
||||||
src/nfa/tamarama_internal.h
|
src/nfa/tamarama_internal.h
|
||||||
src/nfa/truffle_common.h
|
|
||||||
src/nfa/truffle.c
|
src/nfa/truffle.c
|
||||||
src/nfa/truffle.h
|
src/nfa/truffle.h
|
||||||
src/nfa/vermicelli.h
|
src/nfa/vermicelli.h
|
||||||
@ -736,8 +715,6 @@ SET (hs_SRCS
|
|||||||
src/nfa/mpv_internal.h
|
src/nfa/mpv_internal.h
|
||||||
src/nfa/mpvcompile.cpp
|
src/nfa/mpvcompile.cpp
|
||||||
src/nfa/mpvcompile.h
|
src/nfa/mpvcompile.h
|
||||||
src/nfa/multiaccel_compilehelper.cpp
|
|
||||||
src/nfa/multiaccel_compilehelper.h
|
|
||||||
src/nfa/nfa_api.h
|
src/nfa/nfa_api.h
|
||||||
src/nfa/nfa_api_queue.h
|
src/nfa/nfa_api_queue.h
|
||||||
src/nfa/nfa_api_util.h
|
src/nfa/nfa_api_util.h
|
||||||
|
219
src/nfa/accel.c
219
src/nfa/accel.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,9 +30,6 @@
|
|||||||
#include "shufti.h"
|
#include "shufti.h"
|
||||||
#include "truffle.h"
|
#include "truffle.h"
|
||||||
#include "vermicelli.h"
|
#include "vermicelli.h"
|
||||||
#include "multishufti.h"
|
|
||||||
#include "multitruffle.h"
|
|
||||||
#include "multivermicelli.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||||
@ -132,220 +129,6 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
|||||||
rv = c_end;
|
rv = c_end;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* multibyte matchers */
|
|
||||||
case ACCEL_MLVERM:
|
|
||||||
DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGVERM:
|
|
||||||
DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSVERM:
|
|
||||||
DEBUG_PRINTF("accel msverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGVERM:
|
|
||||||
DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSVERM:
|
|
||||||
DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGVERM:
|
|
||||||
DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
|
|
||||||
accel->mdshufti.len1, accel->mdshufti.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
|
|
||||||
accel->mdshufti.len1, accel->mdshufti.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_truffleExec(accel->mdtruffle.mask1,
|
|
||||||
accel->mdtruffle.mask2, c, c_end,
|
|
||||||
accel->mdtruffle.len1,
|
|
||||||
accel->mdtruffle.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1,
|
|
||||||
accel->mdtruffle.mask2, c, c_end,
|
|
||||||
accel->mdtruffle.len1,
|
|
||||||
accel->mdtruffle.len2);
|
|
||||||
break;
|
|
||||||
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
assert(!"not here");
|
assert(!"not here");
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -61,36 +61,7 @@ enum AccelType {
|
|||||||
ACCEL_DSHUFTI,
|
ACCEL_DSHUFTI,
|
||||||
ACCEL_TRUFFLE,
|
ACCEL_TRUFFLE,
|
||||||
ACCEL_RED_TAPE,
|
ACCEL_RED_TAPE,
|
||||||
/* multibyte vermicellis */
|
|
||||||
ACCEL_MLVERM,
|
|
||||||
ACCEL_MLVERM_NOCASE,
|
|
||||||
ACCEL_MLGVERM,
|
|
||||||
ACCEL_MLGVERM_NOCASE,
|
|
||||||
ACCEL_MSVERM,
|
|
||||||
ACCEL_MSVERM_NOCASE,
|
|
||||||
ACCEL_MSGVERM,
|
|
||||||
ACCEL_MSGVERM_NOCASE,
|
|
||||||
ACCEL_MDSVERM,
|
|
||||||
ACCEL_MDSVERM_NOCASE,
|
|
||||||
ACCEL_MDSGVERM,
|
|
||||||
ACCEL_MDSGVERM_NOCASE,
|
|
||||||
/* multibyte shuftis */
|
|
||||||
ACCEL_MLSHUFTI,
|
|
||||||
ACCEL_MLGSHUFTI,
|
|
||||||
ACCEL_MSSHUFTI,
|
|
||||||
ACCEL_MSGSHUFTI,
|
|
||||||
ACCEL_MDSSHUFTI,
|
|
||||||
ACCEL_MDSGSHUFTI,
|
|
||||||
/* multibyte truffles */
|
|
||||||
ACCEL_MLTRUFFLE,
|
|
||||||
ACCEL_MLGTRUFFLE,
|
|
||||||
ACCEL_MSTRUFFLE,
|
|
||||||
ACCEL_MSGTRUFFLE,
|
|
||||||
ACCEL_MDSTRUFFLE,
|
|
||||||
ACCEL_MDSGTRUFFLE,
|
|
||||||
/* masked dverm */
|
|
||||||
ACCEL_DVERM_MASKED,
|
ACCEL_DVERM_MASKED,
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief Structure for accel framework. */
|
/** \brief Structure for accel framework. */
|
||||||
@ -140,42 +111,12 @@ union AccelAux {
|
|||||||
m128 lo2;
|
m128 lo2;
|
||||||
m128 hi2;
|
m128 hi2;
|
||||||
} dshufti;
|
} dshufti;
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 lo;
|
|
||||||
m128 hi;
|
|
||||||
u8 len;
|
|
||||||
} mshufti;
|
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 lo;
|
|
||||||
m128 hi;
|
|
||||||
u8 len1;
|
|
||||||
u8 len2;
|
|
||||||
} mdshufti;
|
|
||||||
struct {
|
struct {
|
||||||
u8 accel_type;
|
u8 accel_type;
|
||||||
u8 offset;
|
u8 offset;
|
||||||
m128 mask1;
|
m128 mask1;
|
||||||
m128 mask2;
|
m128 mask2;
|
||||||
} truffle;
|
} truffle;
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 mask1;
|
|
||||||
m128 mask2;
|
|
||||||
u8 len;
|
|
||||||
} mtruffle;
|
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 mask1;
|
|
||||||
m128 mask2;
|
|
||||||
u8 len1;
|
|
||||||
u8 len2;
|
|
||||||
} mdtruffle;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -93,54 +93,6 @@ const char *accelName(u8 accel_type) {
|
|||||||
return "truffle";
|
return "truffle";
|
||||||
case ACCEL_RED_TAPE:
|
case ACCEL_RED_TAPE:
|
||||||
return "red tape";
|
return "red tape";
|
||||||
case ACCEL_MLVERM:
|
|
||||||
return "multibyte long vermicelli";
|
|
||||||
case ACCEL_MLVERM_NOCASE:
|
|
||||||
return "multibyte long vermicelli nocase";
|
|
||||||
case ACCEL_MLGVERM:
|
|
||||||
return "multibyte long-grab vermicelli";
|
|
||||||
case ACCEL_MLGVERM_NOCASE:
|
|
||||||
return "multibyte long-grab vermicelli nocase";
|
|
||||||
case ACCEL_MSVERM:
|
|
||||||
return "multibyte shift vermicelli";
|
|
||||||
case ACCEL_MSVERM_NOCASE:
|
|
||||||
return "multibyte shift vermicelli nocase";
|
|
||||||
case ACCEL_MSGVERM:
|
|
||||||
return "multibyte shift-grab vermicelli";
|
|
||||||
case ACCEL_MSGVERM_NOCASE:
|
|
||||||
return "multibyte shift-grab vermicelli nocase";
|
|
||||||
case ACCEL_MDSVERM:
|
|
||||||
return "multibyte doubleshift vermicelli";
|
|
||||||
case ACCEL_MDSVERM_NOCASE:
|
|
||||||
return "multibyte doubleshift vermicelli nocase";
|
|
||||||
case ACCEL_MDSGVERM:
|
|
||||||
return "multibyte doubleshift-grab vermicelli";
|
|
||||||
case ACCEL_MDSGVERM_NOCASE:
|
|
||||||
return "multibyte doubleshift-grab vermicelli nocase";
|
|
||||||
case ACCEL_MLSHUFTI:
|
|
||||||
return "multibyte long shufti";
|
|
||||||
case ACCEL_MLGSHUFTI:
|
|
||||||
return "multibyte long-grab shufti";
|
|
||||||
case ACCEL_MSSHUFTI:
|
|
||||||
return "multibyte shift shufti";
|
|
||||||
case ACCEL_MSGSHUFTI:
|
|
||||||
return "multibyte shift-grab shufti";
|
|
||||||
case ACCEL_MDSSHUFTI:
|
|
||||||
return "multibyte doubleshift shufti";
|
|
||||||
case ACCEL_MDSGSHUFTI:
|
|
||||||
return "multibyte doubleshift-grab shufti";
|
|
||||||
case ACCEL_MLTRUFFLE:
|
|
||||||
return "multibyte long truffle";
|
|
||||||
case ACCEL_MLGTRUFFLE:
|
|
||||||
return "multibyte long-grab truffle";
|
|
||||||
case ACCEL_MSTRUFFLE:
|
|
||||||
return "multibyte shift truffle";
|
|
||||||
case ACCEL_MSGTRUFFLE:
|
|
||||||
return "multibyte shift-grab truffle";
|
|
||||||
case ACCEL_MDSTRUFFLE:
|
|
||||||
return "multibyte doubleshift truffle";
|
|
||||||
case ACCEL_MDSGTRUFFLE:
|
|
||||||
return "multibyte doubleshift-grab truffle";
|
|
||||||
default:
|
default:
|
||||||
return "unknown!";
|
return "unknown!";
|
||||||
}
|
}
|
||||||
@ -283,59 +235,6 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
|||||||
(const u8 *)&accel.truffle.mask2);
|
(const u8 *)&accel.truffle.mask2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ACCEL_MLVERM:
|
|
||||||
case ACCEL_MLVERM_NOCASE:
|
|
||||||
case ACCEL_MLGVERM:
|
|
||||||
case ACCEL_MLGVERM_NOCASE:
|
|
||||||
case ACCEL_MSVERM:
|
|
||||||
case ACCEL_MSVERM_NOCASE:
|
|
||||||
case ACCEL_MSGVERM:
|
|
||||||
case ACCEL_MSGVERM_NOCASE:
|
|
||||||
fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSVERM:
|
|
||||||
case ACCEL_MDSVERM_NOCASE:
|
|
||||||
case ACCEL_MDSGVERM:
|
|
||||||
case ACCEL_MDSGVERM_NOCASE:
|
|
||||||
fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1,
|
|
||||||
accel.mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLSHUFTI:
|
|
||||||
case ACCEL_MLGSHUFTI:
|
|
||||||
case ACCEL_MSSHUFTI:
|
|
||||||
case ACCEL_MSGSHUFTI:
|
|
||||||
fprintf(f, " len:%u\n", accel.mshufti.len);
|
|
||||||
dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo,
|
|
||||||
(const u8 *)&accel.mshufti.hi);
|
|
||||||
dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo,
|
|
||||||
(const u8 *)&accel.mshufti.hi);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSSHUFTI:
|
|
||||||
case ACCEL_MDSGSHUFTI:
|
|
||||||
fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2);
|
|
||||||
dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo,
|
|
||||||
(const u8 *)&accel.mdshufti.hi);
|
|
||||||
dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo,
|
|
||||||
(const u8 *)&accel.mdshufti.hi);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLTRUFFLE:
|
|
||||||
case ACCEL_MLGTRUFFLE:
|
|
||||||
case ACCEL_MSTRUFFLE:
|
|
||||||
case ACCEL_MSGTRUFFLE:
|
|
||||||
fprintf(f, " len:%u\n", accel.mtruffle.len);
|
|
||||||
dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mtruffle.mask2);
|
|
||||||
dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSTRUFFLE:
|
|
||||||
case ACCEL_MDSGTRUFFLE:
|
|
||||||
fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2);
|
|
||||||
dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mdtruffle.mask2);
|
|
||||||
dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mdtruffle.mask2);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
break;
|
break;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -225,274 +225,6 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
|||||||
aux->accel_type = ACCEL_NONE;
|
aux->accel_type = ACCEL_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
|
||||||
if (info.ma_type == MultibyteAccelInfo::MAT_NONE) {
|
|
||||||
DEBUG_PRINTF("no multimatch for us :(");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 offset = info.multiaccel_offset;
|
|
||||||
const CharReach &stops = info.multiaccel_stops;
|
|
||||||
|
|
||||||
assert(aux->accel_type == ACCEL_NONE);
|
|
||||||
if (stops.all()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t outs = stops.count();
|
|
||||||
DEBUG_PRINTF("%zu outs\n", outs);
|
|
||||||
assert(outs && outs < 256);
|
|
||||||
|
|
||||||
switch (info.ma_type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MLVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MLVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MLGVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MLGVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MSVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MSVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MSGVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MSGVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MDSVERM;
|
|
||||||
aux->mdverm.offset = offset;
|
|
||||||
aux->mdverm.c = stops.find_first();
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MDSVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MDSGVERM;
|
|
||||||
aux->mdverm.offset = offset;
|
|
||||||
aux->mdverm.c = stops.find_first();
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MDSGVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
|
||||||
|
|
||||||
switch (info.ma_type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MLSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MLGSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MSSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MSGSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
|
|
||||||
(u8 *)&aux->mdshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MDSSHUFTI;
|
|
||||||
aux->mdshufti.offset = offset;
|
|
||||||
aux->mdshufti.len1 = info.ma_len1;
|
|
||||||
aux->mdshufti.len2 = info.ma_len2;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
|
|
||||||
(u8 *)&aux->mdshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MDSGSHUFTI;
|
|
||||||
aux->mdshufti.offset = offset;
|
|
||||||
aux->mdshufti.len1 = info.ma_len1;
|
|
||||||
aux->mdshufti.len2 = info.ma_len2;
|
|
||||||
return;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("shufti build failed, falling through\n");
|
|
||||||
|
|
||||||
if (outs <= ACCEL_MAX_STOP_CHAR) {
|
|
||||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
|
||||||
switch (info.ma_type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
aux->accel_type = ACCEL_MLTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
aux->accel_type = ACCEL_MLGTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
aux->accel_type = ACCEL_MSTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
aux->accel_type = ACCEL_MSGTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
aux->accel_type = ACCEL_MDSTRUFFLE;
|
|
||||||
aux->mdtruffle.offset = offset;
|
|
||||||
aux->mdtruffle.len1 = info.ma_len1;
|
|
||||||
aux->mdtruffle.len2 = info.ma_len2;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mdtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
aux->accel_type = ACCEL_MDSGTRUFFLE;
|
|
||||||
aux->mdtruffle.offset = offset;
|
|
||||||
aux->mdtruffle.len1 = info.ma_len1;
|
|
||||||
aux->mdtruffle.len2 = info.ma_len2;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mdtruffle.mask2);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
||||||
assert(aux->accel_type == ACCEL_NONE);
|
assert(aux->accel_type == ACCEL_NONE);
|
||||||
if (info.single_stops.none()) {
|
if (info.single_stops.none()) {
|
||||||
@ -500,9 +232,6 @@ bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
|||||||
aux->accel_type = ACCEL_RED_TAPE;
|
aux->accel_type = ACCEL_RED_TAPE;
|
||||||
aux->generic.offset = info.single_offset;
|
aux->generic.offset = info.single_offset;
|
||||||
}
|
}
|
||||||
if (aux->accel_type == ACCEL_NONE) {
|
|
||||||
buildAccelMulti(info, aux);
|
|
||||||
}
|
|
||||||
if (aux->accel_type == ACCEL_NONE) {
|
if (aux->accel_type == ACCEL_NONE) {
|
||||||
buildAccelDouble(info, aux);
|
buildAccelDouble(info, aux);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -37,30 +37,9 @@ union AccelAux;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
struct MultibyteAccelInfo {
|
|
||||||
/* multibyte accel schemes, ordered by strength */
|
|
||||||
enum multiaccel_type {
|
|
||||||
MAT_SHIFT,
|
|
||||||
MAT_SHIFTGRAB,
|
|
||||||
MAT_DSHIFT,
|
|
||||||
MAT_DSHIFTGRAB,
|
|
||||||
MAT_LONG,
|
|
||||||
MAT_LONGGRAB,
|
|
||||||
MAT_MAX,
|
|
||||||
MAT_NONE = MAT_MAX
|
|
||||||
};
|
|
||||||
CharReach cr;
|
|
||||||
u32 offset = 0;
|
|
||||||
u32 len1 = 0;
|
|
||||||
u32 len2 = 0;
|
|
||||||
multiaccel_type type = MAT_NONE;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AccelInfo {
|
struct AccelInfo {
|
||||||
AccelInfo() : single_offset(0U), double_offset(0U),
|
AccelInfo() : single_offset(0U), double_offset(0U),
|
||||||
single_stops(CharReach::dot()),
|
single_stops(CharReach::dot()) {}
|
||||||
multiaccel_offset(0), ma_len1(0), ma_len2(0),
|
|
||||||
ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
|
||||||
u32 single_offset; /**< offset correction to apply to single schemes */
|
u32 single_offset; /**< offset correction to apply to single schemes */
|
||||||
u32 double_offset; /**< offset correction to apply to double schemes */
|
u32 double_offset; /**< offset correction to apply to double schemes */
|
||||||
CharReach double_stop1; /**< single-byte accel stop literals for double
|
CharReach double_stop1; /**< single-byte accel stop literals for double
|
||||||
@ -68,11 +47,6 @@ struct AccelInfo {
|
|||||||
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
|
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
|
||||||
* literals */
|
* literals */
|
||||||
CharReach single_stops; /**< escapes for single byte acceleration */
|
CharReach single_stops; /**< escapes for single byte acceleration */
|
||||||
u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */
|
|
||||||
CharReach multiaccel_stops; /**< escapes for multibyte acceleration */
|
|
||||||
u32 ma_len1; /**< multiaccel len1 */
|
|
||||||
u32 ma_len2; /**< multiaccel len2 */
|
|
||||||
MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
|
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
|
||||||
|
@ -39,9 +39,6 @@
|
|||||||
#include "nfa_internal.h"
|
#include "nfa_internal.h"
|
||||||
#include "shufti.h"
|
#include "shufti.h"
|
||||||
#include "truffle.h"
|
#include "truffle.h"
|
||||||
#include "multishufti.h"
|
|
||||||
#include "multitruffle.h"
|
|
||||||
#include "multivermicelli.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "vermicelli.h"
|
#include "vermicelli.h"
|
||||||
#include "util/arch.h"
|
#include "util/arch.h"
|
||||||
|
@ -93,8 +93,6 @@ struct precalcAccel {
|
|||||||
CharReach double_cr;
|
CharReach double_cr;
|
||||||
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
|
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
|
||||||
u32 double_offset;
|
u32 double_offset;
|
||||||
|
|
||||||
MultibyteAccelInfo ma_info;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct limex_accel_info {
|
struct limex_accel_info {
|
||||||
@ -358,16 +356,12 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct AccelBuild {
|
struct AccelBuild {
|
||||||
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0),
|
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {}
|
||||||
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
|
||||||
NFAVertex v;
|
NFAVertex v;
|
||||||
u32 state;
|
u32 state;
|
||||||
u32 offset; // offset correction to apply
|
u32 offset; // offset correction to apply
|
||||||
CharReach stop1; // single-byte accel stop literals
|
CharReach stop1; // single-byte accel stop literals
|
||||||
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
|
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
|
||||||
u32 ma_len1; // multiaccel len1
|
|
||||||
u32 ma_len2; // multiaccel len2
|
|
||||||
MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -382,12 +376,7 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
|
|||||||
build.stop1 = CharReach::dot();
|
build.stop1 = CharReach::dot();
|
||||||
} else {
|
} else {
|
||||||
const precalcAccel &precalc = bi.accel.precalc.at(ss);
|
const precalcAccel &precalc = bi.accel.precalc.at(ss);
|
||||||
unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2;
|
if (precalc.double_lits.empty()) {
|
||||||
if (ma_len >= MULTIACCEL_MIN_LEN) {
|
|
||||||
build.ma_len1 = precalc.ma_info.len1;
|
|
||||||
build.stop1 = precalc.ma_info.cr;
|
|
||||||
build.offset = precalc.ma_info.offset;
|
|
||||||
} else if (precalc.double_lits.empty()) {
|
|
||||||
build.stop1 = precalc.single_cr;
|
build.stop1 = precalc.single_cr;
|
||||||
build.offset = precalc.single_offset;
|
build.offset = precalc.single_offset;
|
||||||
} else {
|
} else {
|
||||||
@ -606,7 +595,6 @@ void fillAccelInfo(build_info &bi) {
|
|||||||
limex_accel_info &accel = bi.accel;
|
limex_accel_info &accel = bi.accel;
|
||||||
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
|
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
|
||||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
|
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
|
||||||
const CompileContext &cc = bi.cc;
|
|
||||||
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
|
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
|
||||||
const u32 num_states = bi.num_states;
|
const u32 num_states = bi.num_states;
|
||||||
|
|
||||||
@ -663,27 +651,17 @@ void fillAccelInfo(build_info &bi) {
|
|||||||
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
|
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
|
||||||
as.double_offset);
|
as.double_offset);
|
||||||
|
|
||||||
// try multibyte acceleration first
|
|
||||||
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
|
|
||||||
|
|
||||||
precalcAccel &pa = accel.precalc[state_set];
|
precalcAccel &pa = accel.precalc[state_set];
|
||||||
useful |= state_set;
|
|
||||||
|
|
||||||
// if we successfully built a multibyte accel scheme, use that
|
|
||||||
if (mai.type != MultibyteAccelInfo::MAT_NONE) {
|
|
||||||
pa.ma_info = mai;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("multibyte acceleration!\n");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
pa.single_offset = as.offset;
|
pa.single_offset = as.offset;
|
||||||
pa.single_cr = as.cr;
|
pa.single_cr = as.cr;
|
||||||
|
|
||||||
if (as.double_byte.size() != 0) {
|
if (as.double_byte.size() != 0) {
|
||||||
pa.double_offset = as.double_offset;
|
pa.double_offset = as.double_offset;
|
||||||
pa.double_lits = as.double_byte;
|
pa.double_lits = as.double_byte;
|
||||||
pa.double_cr = as.double_cr;
|
pa.double_cr = as.double_cr;
|
||||||
};
|
}
|
||||||
|
|
||||||
|
useful |= state_set;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &m : accel_map) {
|
for (const auto &m : accel_map) {
|
||||||
@ -700,19 +678,8 @@ void fillAccelInfo(build_info &bi) {
|
|||||||
state_set.reset();
|
state_set.reset();
|
||||||
state_set.set(state_id);
|
state_set.set(state_id);
|
||||||
|
|
||||||
bool is_multi = false;
|
|
||||||
auto p_it = accel.precalc.find(state_set);
|
|
||||||
if (p_it != accel.precalc.end()) {
|
|
||||||
const precalcAccel &pa = p_it->second;
|
|
||||||
offset = max(pa.double_offset, pa.single_offset);
|
|
||||||
is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
|
|
||||||
assert(offset <= MAX_ACCEL_DEPTH);
|
|
||||||
}
|
|
||||||
|
|
||||||
accel.accelerable.insert(v);
|
accel.accelerable.insert(v);
|
||||||
if (!is_multi) {
|
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
|
||||||
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -954,16 +921,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
|
|||||||
|
|
||||||
if (contains(accel.precalc, effective_states)) {
|
if (contains(accel.precalc, effective_states)) {
|
||||||
const auto &precalc = accel.precalc.at(effective_states);
|
const auto &precalc = accel.precalc.at(effective_states);
|
||||||
if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
|
ainfo.single_offset = precalc.single_offset;
|
||||||
ainfo.ma_len1 = precalc.ma_info.len1;
|
ainfo.single_stops = precalc.single_cr;
|
||||||
ainfo.ma_len2 = precalc.ma_info.len2;
|
|
||||||
ainfo.multiaccel_offset = precalc.ma_info.offset;
|
|
||||||
ainfo.multiaccel_stops = precalc.ma_info.cr;
|
|
||||||
ainfo.ma_type = precalc.ma_info.type;
|
|
||||||
} else {
|
|
||||||
ainfo.single_offset = precalc.single_offset;
|
|
||||||
ainfo.single_stops = precalc.single_cr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,265 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_COMMON_H_
|
|
||||||
#define MULTIACCEL_COMMON_H_
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/join.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When doing shifting, remember that the total number of shifts should be n-1
|
|
||||||
*/
|
|
||||||
#define VARISHIFT(src, dst, len) \
|
|
||||||
do { \
|
|
||||||
(dst) &= (src) >> (len); \
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT1(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 1; \
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT2(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 2;\
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT4(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 4; \
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT8(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 8; \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT1(x) \
|
|
||||||
do {} while (0)
|
|
||||||
#define SHIFT2(x) \
|
|
||||||
do { \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT3(x) \
|
|
||||||
do { \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT4(x) \
|
|
||||||
do { \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT5(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT6(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT7(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT8(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT9(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT10(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT11(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT12(x); \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x);\
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT13(x); \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT14(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT15(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT16(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT8(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT17(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT18(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT19(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT20(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT21(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT22(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT23(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT24(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT8(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT25(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT26(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT27(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT28(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT29(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT30(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT31(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT32(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT8(x); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* this function is used by 32-bit multiaccel matchers. 32-bit matchers accept
|
|
||||||
* a 32-bit integer as a buffer, where low 16 bits is movemask result and
|
|
||||||
* high 16 bits are "don't care" values. this function is not expected to return
|
|
||||||
* a result higher than 16.
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
const u8 *match32(const u8 *buf, const u32 z) {
|
|
||||||
if (unlikely(z != 0)) {
|
|
||||||
u32 pos = ctz32(z);
|
|
||||||
assert(pos < 16);
|
|
||||||
return buf + pos;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* this function is used by 64-bit multiaccel matchers. 64-bit matchers accept
|
|
||||||
* a 64-bit integer as a buffer, where low 32 bits is movemask result and
|
|
||||||
* high 32 bits are "don't care" values. this function is not expected to return
|
|
||||||
* a result higher than 32.
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
const u8 *match64(const u8 *buf, const u64a z) {
|
|
||||||
if (unlikely(z != 0)) {
|
|
||||||
u32 pos = ctz64(z);
|
|
||||||
assert(pos < 32);
|
|
||||||
return buf + pos;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_COMMON_H_ */
|
|
@ -1,439 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "multiaccel_compilehelper.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace ue2;
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
static const char* state_to_str[] = {
|
|
||||||
"FIRST_RUN",
|
|
||||||
"SECOND_RUN",
|
|
||||||
"WAITING_FOR_GRAB",
|
|
||||||
"FIRST_TAIL",
|
|
||||||
"SECOND_TAIL",
|
|
||||||
"STOPPED",
|
|
||||||
"INVALID"
|
|
||||||
};
|
|
||||||
static const char* type_to_str[] = {
|
|
||||||
"SHIFT",
|
|
||||||
"SHIFTGRAB",
|
|
||||||
"DOUBLESHIFT",
|
|
||||||
"DOUBLESHIFTGRAB",
|
|
||||||
"LONG",
|
|
||||||
"LONGGRAB",
|
|
||||||
"NONE"
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
void dumpMultiaccelState(const accel_data &d) {
|
|
||||||
DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n",
|
|
||||||
type_to_str[(unsigned) d.type],
|
|
||||||
state_to_str[(unsigned) d.state],
|
|
||||||
d.len1, d.tlen1, d.len2, d.tlen2);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* stop all the matching. this may render most schemes invalid. */
|
|
||||||
static
|
|
||||||
void stop(accel_data &d) {
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_STOPPED:
|
|
||||||
case STATE_INVALID:
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
case STATE_SECOND_RUN:
|
|
||||||
/*
|
|
||||||
* Shift matchers are special case, because they have "tails".
|
|
||||||
* When shift matcher reaches a mid/endpoint, tail mode is
|
|
||||||
* activated, which looks for more matches to extend the match.
|
|
||||||
*
|
|
||||||
* For example, consider pattern /a{5}ba{3}/. Under normal circumstances,
|
|
||||||
* long-grab matcher will be picked for this pattern (matching a run of a's,
|
|
||||||
* followed by a not-a), because doubleshift matcher would be confused by
|
|
||||||
* consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts
|
|
||||||
* by 1) and throw out the rest of the pattern.
|
|
||||||
*
|
|
||||||
* With tails, we defer ending the run until we actually run out of
|
|
||||||
* matching characters, so the above pattern will now be parsed by
|
|
||||||
* doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4).
|
|
||||||
*
|
|
||||||
* So if we are stopping shift matchers, we should check if we aren't in
|
|
||||||
* the process of matching first tail or second run. If we are, we can't
|
|
||||||
* finish the second run as we are stopping, but we can try and split
|
|
||||||
* the first tail instead to obtain a valid second run.
|
|
||||||
*/
|
|
||||||
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
|
||||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) {
|
|
||||||
// can't split an empty void...
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
d.len2 = 0;
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_TAIL:
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
case STATE_WAITING_FOR_GRAB:
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
if (d.type == MultibyteAccelInfo::MAT_LONG) {
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void validate(accel_data &d, unsigned max_len) {
|
|
||||||
// try and fit in all our tails
|
|
||||||
if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) {
|
|
||||||
// case 1: everything fits in
|
|
||||||
d.len1 += d.tlen1;
|
|
||||||
d.len2 += d.tlen2;
|
|
||||||
d.tlen1 = 0;
|
|
||||||
d.tlen2 = 0;
|
|
||||||
} else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) {
|
|
||||||
// case 2: everything but the second tail fits in
|
|
||||||
d.len1 += d.tlen1;
|
|
||||||
d.tlen1 = 0;
|
|
||||||
// try going for a partial tail
|
|
||||||
if (d.tlen2 != 0) {
|
|
||||||
int new_tlen2 = max_len - 1 - d.len1 - d.len2;
|
|
||||||
if (new_tlen2 > 0) {
|
|
||||||
d.len2 += new_tlen2;
|
|
||||||
}
|
|
||||||
d.tlen2 = 0;
|
|
||||||
}
|
|
||||||
} else if (d.len1 + d.tlen1 < max_len) {
|
|
||||||
// case 3: first run and its tail fits in
|
|
||||||
if (d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
|
||||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
|
|
||||||
// split the tail into a second run
|
|
||||||
d.len2 = d.tlen1;
|
|
||||||
} else {
|
|
||||||
d.len1 += d.tlen1;
|
|
||||||
d.len2 = 0;
|
|
||||||
}
|
|
||||||
d.tlen1 = 0;
|
|
||||||
d.tlen2 = 0;
|
|
||||||
} else if (d.len1 < max_len) {
|
|
||||||
// case 4: nothing but the first run fits in
|
|
||||||
// try going for a partial tail
|
|
||||||
if (d.tlen1 != 0) {
|
|
||||||
int new_tlen1 = max_len - 1 - d.len1;
|
|
||||||
if (new_tlen1 > 0) {
|
|
||||||
d.len1 += new_tlen1;
|
|
||||||
}
|
|
||||||
d.tlen1 = 0;
|
|
||||||
}
|
|
||||||
d.len2 = 0;
|
|
||||||
d.tlen2 = 0;
|
|
||||||
}
|
|
||||||
// if we removed our second run, doubleshift matchers are no longer valid
|
|
||||||
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
|
||||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
} else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) {
|
|
||||||
// long matchers can just stop whenever they want to
|
|
||||||
d.len1 = max_len - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now, general sanity checks
|
|
||||||
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
|
|
||||||
switch (d.type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For long matcher, we want lots of consecutive same-or-subset
|
|
||||||
* char-reaches
|
|
||||||
*/
|
|
||||||
if ((ref_cr & cur_cr) == cur_cr) {
|
|
||||||
d.len1++;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For long-grab matcher, we want lots of consecutive same-or-subset
|
|
||||||
* char-reaches with a negative match in the end.
|
|
||||||
*/
|
|
||||||
if ((ref_cr & cur_cr) == cur_cr) {
|
|
||||||
d.len1++;
|
|
||||||
} else if (!(ref_cr & cur_cr).any()) {
|
|
||||||
/* we grabbed, stop immediately */
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
} else {
|
|
||||||
/* our run-n-grab was interrupted; mark as invalid */
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For shift-grab matcher, we want two matches separated by anything;
|
|
||||||
* however the second vertex *must* be a negative (non-overlapping) match.
|
|
||||||
*
|
|
||||||
* Shiftgrab matcher is identical to shift except for presence of grab.
|
|
||||||
*/
|
|
||||||
if (d.state == STATE_WAITING_FOR_GRAB) {
|
|
||||||
if ((ref_cr & cur_cr).any()) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_FIRST_RUN;
|
|
||||||
d.len1++;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* no break, falling through */
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For shift-matcher, we want two matches separated by anything.
|
|
||||||
*/
|
|
||||||
if (ref_cr == cur_cr) {
|
|
||||||
// keep matching tail
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
d.state = STATE_FIRST_TAIL;
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
d.tlen1++;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
// simply advance
|
|
||||||
d.len1++;
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
// we found a non-matching char after tail, so stop
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For double shift-grab matcher, we want two matches separated by
|
|
||||||
* either negative matches or dots; however the second vertex *must*
|
|
||||||
* be a negative match.
|
|
||||||
*
|
|
||||||
* Doubleshiftgrab matcher is identical to doubleshift except for
|
|
||||||
* presence of grab.
|
|
||||||
*/
|
|
||||||
if (d.state == STATE_WAITING_FOR_GRAB) {
|
|
||||||
if ((ref_cr & cur_cr).any()) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_FIRST_RUN;
|
|
||||||
d.len1++;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* no break, falling through */
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For double shift matcher, we want three matches, each separated
|
|
||||||
* by a lot of anything.
|
|
||||||
*
|
|
||||||
* Doubleshift matcher is complicated by presence of tails.
|
|
||||||
*/
|
|
||||||
if (ref_cr == cur_cr) {
|
|
||||||
// decide if we are activating second shift or matching tails
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
d.state = STATE_FIRST_TAIL;
|
|
||||||
d.len2 = 1; // we're now ready for our second run
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
d.tlen1++;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_RUN:
|
|
||||||
d.state = STATE_SECOND_TAIL;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_TAIL:
|
|
||||||
d.tlen2++;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
d.len1++;
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
// start second run
|
|
||||||
d.state = STATE_SECOND_RUN;
|
|
||||||
d.len2++;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_RUN:
|
|
||||||
d.len2++;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_TAIL:
|
|
||||||
// stop
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
|
|
||||||
u32 off, unsigned max_length)
|
|
||||||
: cr(ref_cr), offset(off), max_len(max_length) {
|
|
||||||
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
|
|
||||||
accels.resize(accel_num);
|
|
||||||
|
|
||||||
// mark everything as valid
|
|
||||||
for (int i = 0; i < accel_num; i++) {
|
|
||||||
accel_data &ad = accels[i];
|
|
||||||
ad.len1 = 1;
|
|
||||||
ad.type = (MultibyteAccelInfo::multiaccel_type) i;
|
|
||||||
|
|
||||||
/* for shift-grab matchers, we are waiting for the grab right at the start */
|
|
||||||
if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB
|
|
||||||
|| ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
|
|
||||||
ad.state = STATE_WAITING_FOR_GRAB;
|
|
||||||
} else {
|
|
||||||
ad.state = STATE_FIRST_RUN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MultiaccelCompileHelper::canAdvance() {
|
|
||||||
for (const accel_data &ad : accels) {
|
|
||||||
if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MultiaccelCompileHelper::advance(const CharReach &cur_cr) {
|
|
||||||
for (accel_data &ad : accels) {
|
|
||||||
if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match(ad, cr, cur_cr);
|
|
||||||
#ifdef DEBUG
|
|
||||||
dumpMultiaccelState(ad);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() {
|
|
||||||
int best_len = 0;
|
|
||||||
accel_data best;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("Stopping multiaccel compile\n");
|
|
||||||
|
|
||||||
for (accel_data &ad : accels) {
|
|
||||||
// stop our matching
|
|
||||||
stop(ad);
|
|
||||||
validate(ad, max_len);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
dumpMultiaccelState(ad);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// skip invalid schemes
|
|
||||||
if (ad.state == STATE_INVALID) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("Marking as viable\n");
|
|
||||||
|
|
||||||
// TODO: relative strengths of accel schemes? maybe e.g. a shorter
|
|
||||||
// long match would in some cases be preferable to a longer
|
|
||||||
// double shift match (for example, depending on length)?
|
|
||||||
int as_len = ad.len1 + ad.len2;
|
|
||||||
if (as_len >= best_len) {
|
|
||||||
DEBUG_PRINTF("Marking as best\n");
|
|
||||||
best_len = as_len;
|
|
||||||
best = ad;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if we found at least one accel scheme, return it
|
|
||||||
if (best.state != STATE_INVALID) {
|
|
||||||
#ifdef DEBUG
|
|
||||||
DEBUG_PRINTF("Picked best multiaccel state:\n");
|
|
||||||
dumpMultiaccelState(best);
|
|
||||||
#endif
|
|
||||||
MultibyteAccelInfo info;
|
|
||||||
info.cr = cr;
|
|
||||||
info.offset = offset;
|
|
||||||
info.len1 = best.len1;
|
|
||||||
info.len2 = best.len2;
|
|
||||||
info.type = best.type;
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
@ -1,75 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCELCOMPILE_H_
|
|
||||||
#define MULTIACCELCOMPILE_H_
|
|
||||||
|
|
||||||
#include "ue2common.h"
|
|
||||||
|
|
||||||
#include "nfagraph/ng_limex_accel.h"
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
namespace ue2 {
|
|
||||||
|
|
||||||
/* accel scheme state machine */
|
|
||||||
enum accel_scheme_state {
|
|
||||||
STATE_FIRST_RUN,
|
|
||||||
STATE_SECOND_RUN,
|
|
||||||
STATE_WAITING_FOR_GRAB,
|
|
||||||
STATE_FIRST_TAIL,
|
|
||||||
STATE_SECOND_TAIL,
|
|
||||||
STATE_STOPPED,
|
|
||||||
STATE_INVALID
|
|
||||||
};
|
|
||||||
|
|
||||||
struct accel_data {
|
|
||||||
MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE;
|
|
||||||
accel_scheme_state state = STATE_INVALID;
|
|
||||||
unsigned len1 = 0; /* length of first run */
|
|
||||||
unsigned len2 = 0; /* length of second run, if present */
|
|
||||||
unsigned tlen1 = 0; /* first tail length */
|
|
||||||
unsigned tlen2 = 0; /* second tail length */
|
|
||||||
};
|
|
||||||
|
|
||||||
class MultiaccelCompileHelper {
|
|
||||||
private:
|
|
||||||
const CharReach &cr;
|
|
||||||
u32 offset;
|
|
||||||
std::vector<accel_data> accels;
|
|
||||||
unsigned max_len;
|
|
||||||
public:
|
|
||||||
MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len);
|
|
||||||
bool canAdvance();
|
|
||||||
MultibyteAccelInfo getBestScheme();
|
|
||||||
void advance(const ue2::CharReach &cr);
|
|
||||||
};
|
|
||||||
|
|
||||||
}; // namespace
|
|
||||||
|
|
||||||
#endif /* MULTIACCELCOMPILE_H_ */
|
|
@ -1,149 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_DOUBLESHIFT_H_
|
|
||||||
#define MULTIACCEL_DOUBLESHIFT_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
match_t tmp = z; \
|
|
||||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
|
||||||
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
|
|
||||||
VARISHIFT(z, z, len); \
|
|
||||||
VARISHIFT(tmp, tmp, len2); \
|
|
||||||
VARISHIFT(tmp, z, len); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DOUBLESHIFT_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH(n, u32, 32)
|
|
||||||
#define DOUBLESHIFT_MATCH_64_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH(n, u64a, 64)
|
|
||||||
#define DOUBLESHIFT_MATCH_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
DOUBLESHIFT_MATCH_DEF(1)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(2)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(3)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(4)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(5)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(6)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(7)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(8)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(9)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(10)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(11)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(12)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(13)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(14)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(15)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(16)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(17)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(18)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(19)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(20)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(21)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(22)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(23)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(24)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(25)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(26)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(27)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(28)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(29)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(30)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftMatch_32_1,
|
|
||||||
&doubleshiftMatch_32_2,
|
|
||||||
&doubleshiftMatch_32_3,
|
|
||||||
&doubleshiftMatch_32_4,
|
|
||||||
&doubleshiftMatch_32_5,
|
|
||||||
&doubleshiftMatch_32_6,
|
|
||||||
&doubleshiftMatch_32_7,
|
|
||||||
&doubleshiftMatch_32_8,
|
|
||||||
&doubleshiftMatch_32_9,
|
|
||||||
&doubleshiftMatch_32_10,
|
|
||||||
&doubleshiftMatch_32_11,
|
|
||||||
&doubleshiftMatch_32_12,
|
|
||||||
&doubleshiftMatch_32_13,
|
|
||||||
&doubleshiftMatch_32_14,
|
|
||||||
&doubleshiftMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftMatch_64_1,
|
|
||||||
&doubleshiftMatch_64_2,
|
|
||||||
&doubleshiftMatch_64_3,
|
|
||||||
&doubleshiftMatch_64_4,
|
|
||||||
&doubleshiftMatch_64_5,
|
|
||||||
&doubleshiftMatch_64_6,
|
|
||||||
&doubleshiftMatch_64_7,
|
|
||||||
&doubleshiftMatch_64_8,
|
|
||||||
&doubleshiftMatch_64_9,
|
|
||||||
&doubleshiftMatch_64_10,
|
|
||||||
&doubleshiftMatch_64_11,
|
|
||||||
&doubleshiftMatch_64_12,
|
|
||||||
&doubleshiftMatch_64_13,
|
|
||||||
&doubleshiftMatch_64_14,
|
|
||||||
&doubleshiftMatch_64_15,
|
|
||||||
&doubleshiftMatch_64_16,
|
|
||||||
&doubleshiftMatch_64_17,
|
|
||||||
&doubleshiftMatch_64_18,
|
|
||||||
&doubleshiftMatch_64_19,
|
|
||||||
&doubleshiftMatch_64_20,
|
|
||||||
&doubleshiftMatch_64_21,
|
|
||||||
&doubleshiftMatch_64_22,
|
|
||||||
&doubleshiftMatch_64_23,
|
|
||||||
&doubleshiftMatch_64_24,
|
|
||||||
&doubleshiftMatch_64_25,
|
|
||||||
&doubleshiftMatch_64_26,
|
|
||||||
&doubleshiftMatch_64_27,
|
|
||||||
&doubleshiftMatch_64_28,
|
|
||||||
&doubleshiftMatch_64_29,
|
|
||||||
&doubleshiftMatch_64_30,
|
|
||||||
&doubleshiftMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_DOUBLESHIFT_H_ */
|
|
@ -1,152 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_
|
|
||||||
#define MULTIACCEL_DOUBLESHIFTGRAB_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
match_t neg = ~z; \
|
|
||||||
match_t tmp = z; \
|
|
||||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
|
||||||
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
|
|
||||||
neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
|
||||||
VARISHIFT(z, z, len); \
|
|
||||||
VARISHIFT(tmp, tmp, len2); \
|
|
||||||
VARISHIFT(neg, z, 1); \
|
|
||||||
VARISHIFT(tmp, z, len); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH(n, u32, 32)
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH(n, u64a, 64)
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(1)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(2)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(3)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(4)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(5)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(6)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(7)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(8)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(9)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(10)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(11)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(12)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(13)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(14)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(15)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(16)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(17)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(18)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(19)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(20)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(21)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(22)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(23)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(24)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(25)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(26)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(27)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(28)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(29)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(30)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftgrabMatch_32_1,
|
|
||||||
&doubleshiftgrabMatch_32_2,
|
|
||||||
&doubleshiftgrabMatch_32_3,
|
|
||||||
&doubleshiftgrabMatch_32_4,
|
|
||||||
&doubleshiftgrabMatch_32_5,
|
|
||||||
&doubleshiftgrabMatch_32_6,
|
|
||||||
&doubleshiftgrabMatch_32_7,
|
|
||||||
&doubleshiftgrabMatch_32_8,
|
|
||||||
&doubleshiftgrabMatch_32_9,
|
|
||||||
&doubleshiftgrabMatch_32_10,
|
|
||||||
&doubleshiftgrabMatch_32_11,
|
|
||||||
&doubleshiftgrabMatch_32_12,
|
|
||||||
&doubleshiftgrabMatch_32_13,
|
|
||||||
&doubleshiftgrabMatch_32_14,
|
|
||||||
&doubleshiftgrabMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftgrabMatch_64_1,
|
|
||||||
&doubleshiftgrabMatch_64_2,
|
|
||||||
&doubleshiftgrabMatch_64_3,
|
|
||||||
&doubleshiftgrabMatch_64_4,
|
|
||||||
&doubleshiftgrabMatch_64_5,
|
|
||||||
&doubleshiftgrabMatch_64_6,
|
|
||||||
&doubleshiftgrabMatch_64_7,
|
|
||||||
&doubleshiftgrabMatch_64_8,
|
|
||||||
&doubleshiftgrabMatch_64_9,
|
|
||||||
&doubleshiftgrabMatch_64_10,
|
|
||||||
&doubleshiftgrabMatch_64_11,
|
|
||||||
&doubleshiftgrabMatch_64_12,
|
|
||||||
&doubleshiftgrabMatch_64_13,
|
|
||||||
&doubleshiftgrabMatch_64_14,
|
|
||||||
&doubleshiftgrabMatch_64_15,
|
|
||||||
&doubleshiftgrabMatch_64_16,
|
|
||||||
&doubleshiftgrabMatch_64_17,
|
|
||||||
&doubleshiftgrabMatch_64_18,
|
|
||||||
&doubleshiftgrabMatch_64_19,
|
|
||||||
&doubleshiftgrabMatch_64_20,
|
|
||||||
&doubleshiftgrabMatch_64_21,
|
|
||||||
&doubleshiftgrabMatch_64_22,
|
|
||||||
&doubleshiftgrabMatch_64_23,
|
|
||||||
&doubleshiftgrabMatch_64_24,
|
|
||||||
&doubleshiftgrabMatch_64_25,
|
|
||||||
&doubleshiftgrabMatch_64_26,
|
|
||||||
&doubleshiftgrabMatch_64_27,
|
|
||||||
&doubleshiftgrabMatch_64_28,
|
|
||||||
&doubleshiftgrabMatch_64_29,
|
|
||||||
&doubleshiftgrabMatch_64_30,
|
|
||||||
&doubleshiftgrabMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */
|
|
@ -1,145 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_LONG_H_
|
|
||||||
#define MULTIACCEL_LONG_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define LONG_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
|
|
||||||
JOIN(SHIFT, len)(z); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LONG_MATCH_32_DEF(n) \
|
|
||||||
LONG_MATCH(n, u32, 32)
|
|
||||||
#define LONG_MATCH_64_DEF(n) \
|
|
||||||
LONG_MATCH(n, u64a, 64)
|
|
||||||
#define LONG_MATCH_DEF(n) \
|
|
||||||
LONG_MATCH_32_DEF(n) \
|
|
||||||
LONG_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
LONG_MATCH_DEF(1)
|
|
||||||
LONG_MATCH_DEF(2)
|
|
||||||
LONG_MATCH_DEF(3)
|
|
||||||
LONG_MATCH_DEF(4)
|
|
||||||
LONG_MATCH_DEF(5)
|
|
||||||
LONG_MATCH_DEF(6)
|
|
||||||
LONG_MATCH_DEF(7)
|
|
||||||
LONG_MATCH_DEF(8)
|
|
||||||
LONG_MATCH_DEF(9)
|
|
||||||
LONG_MATCH_DEF(10)
|
|
||||||
LONG_MATCH_DEF(11)
|
|
||||||
LONG_MATCH_DEF(12)
|
|
||||||
LONG_MATCH_DEF(13)
|
|
||||||
LONG_MATCH_DEF(14)
|
|
||||||
LONG_MATCH_DEF(15)
|
|
||||||
LONG_MATCH_64_DEF(16)
|
|
||||||
LONG_MATCH_64_DEF(17)
|
|
||||||
LONG_MATCH_64_DEF(18)
|
|
||||||
LONG_MATCH_64_DEF(19)
|
|
||||||
LONG_MATCH_64_DEF(20)
|
|
||||||
LONG_MATCH_64_DEF(21)
|
|
||||||
LONG_MATCH_64_DEF(22)
|
|
||||||
LONG_MATCH_64_DEF(23)
|
|
||||||
LONG_MATCH_64_DEF(24)
|
|
||||||
LONG_MATCH_64_DEF(25)
|
|
||||||
LONG_MATCH_64_DEF(26)
|
|
||||||
LONG_MATCH_64_DEF(27)
|
|
||||||
LONG_MATCH_64_DEF(28)
|
|
||||||
LONG_MATCH_64_DEF(29)
|
|
||||||
LONG_MATCH_64_DEF(30)
|
|
||||||
LONG_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longMatch_32_1,
|
|
||||||
&longMatch_32_2,
|
|
||||||
&longMatch_32_3,
|
|
||||||
&longMatch_32_4,
|
|
||||||
&longMatch_32_5,
|
|
||||||
&longMatch_32_6,
|
|
||||||
&longMatch_32_7,
|
|
||||||
&longMatch_32_8,
|
|
||||||
&longMatch_32_9,
|
|
||||||
&longMatch_32_10,
|
|
||||||
&longMatch_32_11,
|
|
||||||
&longMatch_32_12,
|
|
||||||
&longMatch_32_13,
|
|
||||||
&longMatch_32_14,
|
|
||||||
&longMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longMatch_64_1,
|
|
||||||
&longMatch_64_2,
|
|
||||||
&longMatch_64_3,
|
|
||||||
&longMatch_64_4,
|
|
||||||
&longMatch_64_5,
|
|
||||||
&longMatch_64_6,
|
|
||||||
&longMatch_64_7,
|
|
||||||
&longMatch_64_8,
|
|
||||||
&longMatch_64_9,
|
|
||||||
&longMatch_64_10,
|
|
||||||
&longMatch_64_11,
|
|
||||||
&longMatch_64_12,
|
|
||||||
&longMatch_64_13,
|
|
||||||
&longMatch_64_14,
|
|
||||||
&longMatch_64_15,
|
|
||||||
&longMatch_64_16,
|
|
||||||
&longMatch_64_17,
|
|
||||||
&longMatch_64_18,
|
|
||||||
&longMatch_64_19,
|
|
||||||
&longMatch_64_20,
|
|
||||||
&longMatch_64_21,
|
|
||||||
&longMatch_64_22,
|
|
||||||
&longMatch_64_23,
|
|
||||||
&longMatch_64_24,
|
|
||||||
&longMatch_64_25,
|
|
||||||
&longMatch_64_26,
|
|
||||||
&longMatch_64_27,
|
|
||||||
&longMatch_64_28,
|
|
||||||
&longMatch_64_29,
|
|
||||||
&longMatch_64_30,
|
|
||||||
&longMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_LONG_H_ */
|
|
@ -1,148 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_LONGGRAB_H_
|
|
||||||
#define MULTIACCEL_LONGGRAB_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define LONGGRAB_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
match_t tmp = ~z; \
|
|
||||||
tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
|
||||||
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
|
|
||||||
JOIN(SHIFT, len)(z); \
|
|
||||||
VARISHIFT(tmp, z, len); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LONGGRAB_MATCH_32_DEF(n) \
|
|
||||||
LONGGRAB_MATCH(n, u32, 32)
|
|
||||||
#define LONGGRAB_MATCH_64_DEF(n) \
|
|
||||||
LONGGRAB_MATCH(n, u64a, 64)
|
|
||||||
#define LONGGRAB_MATCH_DEF(n) \
|
|
||||||
LONGGRAB_MATCH_32_DEF(n) \
|
|
||||||
LONGGRAB_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
LONGGRAB_MATCH_DEF(1)
|
|
||||||
LONGGRAB_MATCH_DEF(2)
|
|
||||||
LONGGRAB_MATCH_DEF(3)
|
|
||||||
LONGGRAB_MATCH_DEF(4)
|
|
||||||
LONGGRAB_MATCH_DEF(5)
|
|
||||||
LONGGRAB_MATCH_DEF(6)
|
|
||||||
LONGGRAB_MATCH_DEF(7)
|
|
||||||
LONGGRAB_MATCH_DEF(8)
|
|
||||||
LONGGRAB_MATCH_DEF(9)
|
|
||||||
LONGGRAB_MATCH_DEF(10)
|
|
||||||
LONGGRAB_MATCH_DEF(11)
|
|
||||||
LONGGRAB_MATCH_DEF(12)
|
|
||||||
LONGGRAB_MATCH_DEF(13)
|
|
||||||
LONGGRAB_MATCH_DEF(14)
|
|
||||||
LONGGRAB_MATCH_DEF(15)
|
|
||||||
LONGGRAB_MATCH_64_DEF(16)
|
|
||||||
LONGGRAB_MATCH_64_DEF(17)
|
|
||||||
LONGGRAB_MATCH_64_DEF(18)
|
|
||||||
LONGGRAB_MATCH_64_DEF(19)
|
|
||||||
LONGGRAB_MATCH_64_DEF(20)
|
|
||||||
LONGGRAB_MATCH_64_DEF(21)
|
|
||||||
LONGGRAB_MATCH_64_DEF(22)
|
|
||||||
LONGGRAB_MATCH_64_DEF(23)
|
|
||||||
LONGGRAB_MATCH_64_DEF(24)
|
|
||||||
LONGGRAB_MATCH_64_DEF(25)
|
|
||||||
LONGGRAB_MATCH_64_DEF(26)
|
|
||||||
LONGGRAB_MATCH_64_DEF(27)
|
|
||||||
LONGGRAB_MATCH_64_DEF(28)
|
|
||||||
LONGGRAB_MATCH_64_DEF(29)
|
|
||||||
LONGGRAB_MATCH_64_DEF(30)
|
|
||||||
LONGGRAB_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longgrabMatch_32_1,
|
|
||||||
&longgrabMatch_32_2,
|
|
||||||
&longgrabMatch_32_3,
|
|
||||||
&longgrabMatch_32_4,
|
|
||||||
&longgrabMatch_32_5,
|
|
||||||
&longgrabMatch_32_6,
|
|
||||||
&longgrabMatch_32_7,
|
|
||||||
&longgrabMatch_32_8,
|
|
||||||
&longgrabMatch_32_9,
|
|
||||||
&longgrabMatch_32_10,
|
|
||||||
&longgrabMatch_32_11,
|
|
||||||
&longgrabMatch_32_12,
|
|
||||||
&longgrabMatch_32_13,
|
|
||||||
&longgrabMatch_32_14,
|
|
||||||
&longgrabMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longgrabMatch_64_1,
|
|
||||||
&longgrabMatch_64_2,
|
|
||||||
&longgrabMatch_64_3,
|
|
||||||
&longgrabMatch_64_4,
|
|
||||||
&longgrabMatch_64_5,
|
|
||||||
&longgrabMatch_64_6,
|
|
||||||
&longgrabMatch_64_7,
|
|
||||||
&longgrabMatch_64_8,
|
|
||||||
&longgrabMatch_64_9,
|
|
||||||
&longgrabMatch_64_10,
|
|
||||||
&longgrabMatch_64_11,
|
|
||||||
&longgrabMatch_64_12,
|
|
||||||
&longgrabMatch_64_13,
|
|
||||||
&longgrabMatch_64_14,
|
|
||||||
&longgrabMatch_64_15,
|
|
||||||
&longgrabMatch_64_16,
|
|
||||||
&longgrabMatch_64_17,
|
|
||||||
&longgrabMatch_64_18,
|
|
||||||
&longgrabMatch_64_19,
|
|
||||||
&longgrabMatch_64_20,
|
|
||||||
&longgrabMatch_64_21,
|
|
||||||
&longgrabMatch_64_22,
|
|
||||||
&longgrabMatch_64_23,
|
|
||||||
&longgrabMatch_64_24,
|
|
||||||
&longgrabMatch_64_25,
|
|
||||||
&longgrabMatch_64_26,
|
|
||||||
&longgrabMatch_64_27,
|
|
||||||
&longgrabMatch_64_28,
|
|
||||||
&longgrabMatch_64_29,
|
|
||||||
&longgrabMatch_64_30,
|
|
||||||
&longgrabMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_LONGGRAB_H_ */
|
|
@ -1,145 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_SHIFT_H_
|
|
||||||
#define MULTIACCEL_SHIFT_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define SHIFT_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
|
||||||
VARISHIFT(z, z, len); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SHIFT_MATCH_32_DEF(n) \
|
|
||||||
SHIFT_MATCH(n, u32, 32)
|
|
||||||
#define SHIFT_MATCH_64_DEF(n) \
|
|
||||||
SHIFT_MATCH(n, u64a, 64)
|
|
||||||
#define SHIFT_MATCH_DEF(n) \
|
|
||||||
SHIFT_MATCH_32_DEF(n) \
|
|
||||||
SHIFT_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
SHIFT_MATCH_DEF(1)
|
|
||||||
SHIFT_MATCH_DEF(2)
|
|
||||||
SHIFT_MATCH_DEF(3)
|
|
||||||
SHIFT_MATCH_DEF(4)
|
|
||||||
SHIFT_MATCH_DEF(5)
|
|
||||||
SHIFT_MATCH_DEF(6)
|
|
||||||
SHIFT_MATCH_DEF(7)
|
|
||||||
SHIFT_MATCH_DEF(8)
|
|
||||||
SHIFT_MATCH_DEF(9)
|
|
||||||
SHIFT_MATCH_DEF(10)
|
|
||||||
SHIFT_MATCH_DEF(11)
|
|
||||||
SHIFT_MATCH_DEF(12)
|
|
||||||
SHIFT_MATCH_DEF(13)
|
|
||||||
SHIFT_MATCH_DEF(14)
|
|
||||||
SHIFT_MATCH_DEF(15)
|
|
||||||
SHIFT_MATCH_64_DEF(16)
|
|
||||||
SHIFT_MATCH_64_DEF(17)
|
|
||||||
SHIFT_MATCH_64_DEF(18)
|
|
||||||
SHIFT_MATCH_64_DEF(19)
|
|
||||||
SHIFT_MATCH_64_DEF(20)
|
|
||||||
SHIFT_MATCH_64_DEF(21)
|
|
||||||
SHIFT_MATCH_64_DEF(22)
|
|
||||||
SHIFT_MATCH_64_DEF(23)
|
|
||||||
SHIFT_MATCH_64_DEF(24)
|
|
||||||
SHIFT_MATCH_64_DEF(25)
|
|
||||||
SHIFT_MATCH_64_DEF(26)
|
|
||||||
SHIFT_MATCH_64_DEF(27)
|
|
||||||
SHIFT_MATCH_64_DEF(28)
|
|
||||||
SHIFT_MATCH_64_DEF(29)
|
|
||||||
SHIFT_MATCH_64_DEF(30)
|
|
||||||
SHIFT_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&shiftMatch_32_1,
|
|
||||||
&shiftMatch_32_2,
|
|
||||||
&shiftMatch_32_3,
|
|
||||||
&shiftMatch_32_4,
|
|
||||||
&shiftMatch_32_5,
|
|
||||||
&shiftMatch_32_6,
|
|
||||||
&shiftMatch_32_7,
|
|
||||||
&shiftMatch_32_8,
|
|
||||||
&shiftMatch_32_9,
|
|
||||||
&shiftMatch_32_10,
|
|
||||||
&shiftMatch_32_11,
|
|
||||||
&shiftMatch_32_12,
|
|
||||||
&shiftMatch_32_13,
|
|
||||||
&shiftMatch_32_14,
|
|
||||||
&shiftMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&shiftMatch_64_1,
|
|
||||||
&shiftMatch_64_2,
|
|
||||||
&shiftMatch_64_3,
|
|
||||||
&shiftMatch_64_4,
|
|
||||||
&shiftMatch_64_5,
|
|
||||||
&shiftMatch_64_6,
|
|
||||||
&shiftMatch_64_7,
|
|
||||||
&shiftMatch_64_8,
|
|
||||||
&shiftMatch_64_9,
|
|
||||||
&shiftMatch_64_10,
|
|
||||||
&shiftMatch_64_11,
|
|
||||||
&shiftMatch_64_12,
|
|
||||||
&shiftMatch_64_13,
|
|
||||||
&shiftMatch_64_14,
|
|
||||||
&shiftMatch_64_15,
|
|
||||||
&shiftMatch_64_16,
|
|
||||||
&shiftMatch_64_17,
|
|
||||||
&shiftMatch_64_18,
|
|
||||||
&shiftMatch_64_19,
|
|
||||||
&shiftMatch_64_20,
|
|
||||||
&shiftMatch_64_21,
|
|
||||||
&shiftMatch_64_22,
|
|
||||||
&shiftMatch_64_23,
|
|
||||||
&shiftMatch_64_24,
|
|
||||||
&shiftMatch_64_25,
|
|
||||||
&shiftMatch_64_26,
|
|
||||||
&shiftMatch_64_27,
|
|
||||||
&shiftMatch_64_28,
|
|
||||||
&shiftMatch_64_29,
|
|
||||||
&shiftMatch_64_30,
|
|
||||||
&shiftMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_SHIFT_H_ */
|
|
@ -1,148 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_SHIFTGRAB_H_
|
|
||||||
#define MULTIACCEL_SHIFTGRAB_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define SHIFTGRAB_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
match_t tmp = ~z; \
|
|
||||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
|
||||||
tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
|
||||||
VARISHIFT(z, z, len); \
|
|
||||||
VARISHIFT(tmp, z, 1); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SHIFTGRAB_MATCH_32_DEF(n) \
|
|
||||||
SHIFTGRAB_MATCH(n, u32, 32)
|
|
||||||
#define SHIFTGRAB_MATCH_64_DEF(n) \
|
|
||||||
SHIFTGRAB_MATCH(n, u64a, 64)
|
|
||||||
#define SHIFTGRAB_MATCH_DEF(n) \
|
|
||||||
SHIFTGRAB_MATCH_32_DEF(n) \
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
SHIFTGRAB_MATCH_DEF(1)
|
|
||||||
SHIFTGRAB_MATCH_DEF(2)
|
|
||||||
SHIFTGRAB_MATCH_DEF(3)
|
|
||||||
SHIFTGRAB_MATCH_DEF(4)
|
|
||||||
SHIFTGRAB_MATCH_DEF(5)
|
|
||||||
SHIFTGRAB_MATCH_DEF(6)
|
|
||||||
SHIFTGRAB_MATCH_DEF(7)
|
|
||||||
SHIFTGRAB_MATCH_DEF(8)
|
|
||||||
SHIFTGRAB_MATCH_DEF(9)
|
|
||||||
SHIFTGRAB_MATCH_DEF(10)
|
|
||||||
SHIFTGRAB_MATCH_DEF(11)
|
|
||||||
SHIFTGRAB_MATCH_DEF(12)
|
|
||||||
SHIFTGRAB_MATCH_DEF(13)
|
|
||||||
SHIFTGRAB_MATCH_DEF(14)
|
|
||||||
SHIFTGRAB_MATCH_DEF(15)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(16)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(17)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(18)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(19)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(20)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(21)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(22)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(23)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(24)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(25)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(26)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(27)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(28)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(29)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(30)
|
|
||||||
SHIFTGRAB_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&shiftgrabMatch_32_1,
|
|
||||||
&shiftgrabMatch_32_2,
|
|
||||||
&shiftgrabMatch_32_3,
|
|
||||||
&shiftgrabMatch_32_4,
|
|
||||||
&shiftgrabMatch_32_5,
|
|
||||||
&shiftgrabMatch_32_6,
|
|
||||||
&shiftgrabMatch_32_7,
|
|
||||||
&shiftgrabMatch_32_8,
|
|
||||||
&shiftgrabMatch_32_9,
|
|
||||||
&shiftgrabMatch_32_10,
|
|
||||||
&shiftgrabMatch_32_11,
|
|
||||||
&shiftgrabMatch_32_12,
|
|
||||||
&shiftgrabMatch_32_13,
|
|
||||||
&shiftgrabMatch_32_14,
|
|
||||||
&shiftgrabMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&shiftgrabMatch_64_1,
|
|
||||||
&shiftgrabMatch_64_2,
|
|
||||||
&shiftgrabMatch_64_3,
|
|
||||||
&shiftgrabMatch_64_4,
|
|
||||||
&shiftgrabMatch_64_5,
|
|
||||||
&shiftgrabMatch_64_6,
|
|
||||||
&shiftgrabMatch_64_7,
|
|
||||||
&shiftgrabMatch_64_8,
|
|
||||||
&shiftgrabMatch_64_9,
|
|
||||||
&shiftgrabMatch_64_10,
|
|
||||||
&shiftgrabMatch_64_11,
|
|
||||||
&shiftgrabMatch_64_12,
|
|
||||||
&shiftgrabMatch_64_13,
|
|
||||||
&shiftgrabMatch_64_14,
|
|
||||||
&shiftgrabMatch_64_15,
|
|
||||||
&shiftgrabMatch_64_16,
|
|
||||||
&shiftgrabMatch_64_17,
|
|
||||||
&shiftgrabMatch_64_18,
|
|
||||||
&shiftgrabMatch_64_19,
|
|
||||||
&shiftgrabMatch_64_20,
|
|
||||||
&shiftgrabMatch_64_21,
|
|
||||||
&shiftgrabMatch_64_22,
|
|
||||||
&shiftgrabMatch_64_23,
|
|
||||||
&shiftgrabMatch_64_24,
|
|
||||||
&shiftgrabMatch_64_25,
|
|
||||||
&shiftgrabMatch_64_26,
|
|
||||||
&shiftgrabMatch_64_27,
|
|
||||||
&shiftgrabMatch_64_28,
|
|
||||||
&shiftgrabMatch_64_29,
|
|
||||||
&shiftgrabMatch_64_30,
|
|
||||||
&shiftgrabMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_SHIFTGRAB_H_ */
|
|
@ -1,115 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** \file
|
|
||||||
* \brief Shufti: character class acceleration.
|
|
||||||
*
|
|
||||||
* Utilises the SSSE3 pshufb shuffle instruction
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/arch.h"
|
|
||||||
|
|
||||||
#include "multishufti.h"
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#if !defined(HAVE_AVX2)
|
|
||||||
|
|
||||||
#define MATCH_ALGO long_
|
|
||||||
#include "multiaccel_long.h"
|
|
||||||
#include "multishufti_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO longgrab_
|
|
||||||
#include "multiaccel_longgrab.h"
|
|
||||||
#include "multishufti_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shift_
|
|
||||||
#include "multiaccel_shift.h"
|
|
||||||
#include "multishufti_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shiftgrab_
|
|
||||||
#include "multiaccel_shiftgrab.h"
|
|
||||||
#include "multishufti_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshift_
|
|
||||||
#include "multiaccel_doubleshift.h"
|
|
||||||
#include "multishufti_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshiftgrab_
|
|
||||||
#include "multiaccel_doubleshiftgrab.h"
|
|
||||||
#include "multishufti_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#undef MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define MATCH_ALGO long_
|
|
||||||
#include "multiaccel_long.h"
|
|
||||||
#include "multishufti_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO longgrab_
|
|
||||||
#include "multiaccel_longgrab.h"
|
|
||||||
#include "multishufti_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shift_
|
|
||||||
#include "multiaccel_shift.h"
|
|
||||||
#include "multishufti_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shiftgrab_
|
|
||||||
#include "multiaccel_shiftgrab.h"
|
|
||||||
#include "multishufti_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshift_
|
|
||||||
#include "multiaccel_doubleshift.h"
|
|
||||||
#include "multishufti_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshiftgrab_
|
|
||||||
#include "multiaccel_doubleshiftgrab.h"
|
|
||||||
#include "multishufti_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#undef MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,70 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** \file
|
|
||||||
* \brief Multishufti: multibyte version of Shufti
|
|
||||||
*
|
|
||||||
* Utilises the SSSE3 pshufb shuffle instruction
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTISHUFTI_H
|
|
||||||
#define MULTISHUFTI_H
|
|
||||||
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/simd_types.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len,
|
|
||||||
const u8 run2_len);
|
|
||||||
|
|
||||||
const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len,
|
|
||||||
const u8 run2_len);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,121 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "shufti_common.h"
|
|
||||||
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,
|
|
||||||
const u8 *buf, const m256 low4bits,
|
|
||||||
const m256 zeroes, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes);
|
|
||||||
return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end, u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
assert(buf && buf_end);
|
|
||||||
assert(buf < buf_end);
|
|
||||||
|
|
||||||
// Slow path for small cases.
|
|
||||||
if (buf_end - buf < 32) {
|
|
||||||
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
|
|
||||||
buf, buf_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
const m256 zeroes = zeroes256();
|
|
||||||
const m256 low4bits = set32x8(0xf);
|
|
||||||
const m256 wide_mask_lo = set2x128(mask_lo);
|
|
||||||
const m256 wide_mask_hi = set2x128(mask_hi);
|
|
||||||
const u8 *rv;
|
|
||||||
|
|
||||||
size_t min = (size_t)buf % 32;
|
|
||||||
assert(buf_end - buf >= 32);
|
|
||||||
|
|
||||||
// Preconditioning: most of the time our buffer won't be aligned.
|
|
||||||
m256 chars = loadu256(buf);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf,
|
|
||||||
low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
buf += (32 - min);
|
|
||||||
|
|
||||||
// Unrolling was here, but it wasn't doing anything but taking up space.
|
|
||||||
// Reroll FTW.
|
|
||||||
const u8 *last_block = buf_end - 32;
|
|
||||||
while (buf < last_block) {
|
|
||||||
m256 lchars = load256(buf);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf,
|
|
||||||
low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
buf += 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use an unaligned load to mop up the last 32 bytes and get an accurate
|
|
||||||
// picture to buf_end.
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 32);
|
|
||||||
chars = loadu256(buf_end - 32);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32,
|
|
||||||
low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf_end;
|
|
||||||
}
|
|
@ -1,265 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "shufti_common.h"
|
|
||||||
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
|
||||||
|
|
||||||
/* Normal SSSE3 shufti */
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars,
|
|
||||||
const u8 *buf, const m128 low4bits,
|
|
||||||
const m128 zeroes, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
// negate first 16 bits
|
|
||||||
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF;
|
|
||||||
return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 16-byte pipeline, for smaller scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi,
|
|
||||||
const u8 *buf, const u8 *buf_end,
|
|
||||||
const m128 low4bits,
|
|
||||||
const m128 zeroes, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 last_res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 16 bytes
|
|
||||||
m128 data = load128(buf);
|
|
||||||
u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
buf += 16;
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 15 < buf_end; buf += 16) {
|
|
||||||
// scan more data
|
|
||||||
data = load128(buf);
|
|
||||||
z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 32-byte pipeline, for bigger scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi,
|
|
||||||
const u8 *buf, const u8 *buf_end,
|
|
||||||
const m128 low4bits,
|
|
||||||
const m128 zeroes, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 32 bytes
|
|
||||||
m128 data1 = load128(buf);
|
|
||||||
u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
|
|
||||||
m128 data2 = load128(buf + 16);
|
|
||||||
u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
|
|
||||||
|
|
||||||
// store the results
|
|
||||||
u32 last_res = z1 | (z2 << 16);
|
|
||||||
last_buf = buf;
|
|
||||||
buf += 32;
|
|
||||||
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 31 < buf_end; buf += 32) {
|
|
||||||
// scan more data
|
|
||||||
data1 = load128(buf);
|
|
||||||
z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
|
|
||||||
data2 = load128(buf + 16);
|
|
||||||
z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
|
|
||||||
res = z1 | (z2 << 16);
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_res = res;
|
|
||||||
last_buf = buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we still have some data left, scan it too
|
|
||||||
for (; buf + 15 < buf_end; buf += 16) {
|
|
||||||
m128 chars = load128(buf);
|
|
||||||
ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
|
|
||||||
low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end, u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
assert(buf && buf_end);
|
|
||||||
assert(buf < buf_end);
|
|
||||||
|
|
||||||
// Slow path for small cases.
|
|
||||||
if (buf_end - buf < 16) {
|
|
||||||
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
|
|
||||||
buf, buf_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
const m128 zeroes = zeroes128();
|
|
||||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
|
||||||
const u8 *rv;
|
|
||||||
|
|
||||||
size_t min = (size_t)buf % 16;
|
|
||||||
assert(buf_end - buf >= 16);
|
|
||||||
|
|
||||||
// Preconditioning: most of the time our buffer won't be aligned.
|
|
||||||
m128 chars = loadu128(buf);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
|
|
||||||
low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
buf += (16 - min);
|
|
||||||
|
|
||||||
// if we have enough data, run bigger pipeline; otherwise run smaller one
|
|
||||||
if (buf_end - buf >= 128) {
|
|
||||||
rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi,
|
|
||||||
buf, buf_end, low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(rv)) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
} else if (buf_end - buf >= 16){
|
|
||||||
rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi,
|
|
||||||
buf, buf_end, low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(rv)) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use an unaligned load to mop up the last 16 bytes and get an accurate
|
|
||||||
// picture to buf_end.
|
|
||||||
chars = loadu128(buf_end - 16);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars,
|
|
||||||
buf_end - 16, low4bits, zeroes, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf_end;
|
|
||||||
}
|
|
@ -1,111 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/arch.h"
|
|
||||||
|
|
||||||
#include "multitruffle.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#if !defined(HAVE_AVX2)
|
|
||||||
|
|
||||||
#define MATCH_ALGO long_
|
|
||||||
#include "multiaccel_long.h"
|
|
||||||
#include "multitruffle_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO longgrab_
|
|
||||||
#include "multiaccel_longgrab.h"
|
|
||||||
#include "multitruffle_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shift_
|
|
||||||
#include "multiaccel_shift.h"
|
|
||||||
#include "multitruffle_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shiftgrab_
|
|
||||||
#include "multiaccel_shiftgrab.h"
|
|
||||||
#include "multitruffle_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshift_
|
|
||||||
#include "multiaccel_doubleshift.h"
|
|
||||||
#include "multitruffle_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshiftgrab_
|
|
||||||
#include "multiaccel_doubleshiftgrab.h"
|
|
||||||
#include "multitruffle_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#undef MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define MATCH_ALGO long_
|
|
||||||
#include "multiaccel_long.h"
|
|
||||||
#include "multitruffle_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO longgrab_
|
|
||||||
#include "multiaccel_longgrab.h"
|
|
||||||
#include "multitruffle_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shift_
|
|
||||||
#include "multiaccel_shift.h"
|
|
||||||
#include "multitruffle_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shiftgrab_
|
|
||||||
#include "multiaccel_shiftgrab.h"
|
|
||||||
#include "multitruffle_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshift_
|
|
||||||
#include "multiaccel_doubleshift.h"
|
|
||||||
#include "multitruffle_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshiftgrab_
|
|
||||||
#include "multiaccel_doubleshiftgrab.h"
|
|
||||||
#include "multitruffle_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#undef MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,73 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTITRUFFLE_H
|
|
||||||
#define MULTITRUFFLE_H
|
|
||||||
|
|
||||||
/** \file
|
|
||||||
* \brief Multitruffle: multibyte version of Truffle.
|
|
||||||
*
|
|
||||||
* Utilises the SSSE3 pshufb shuffle instruction
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "util/simd_types.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
|
|
||||||
m128 shuf_mask_lo_highset, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear,
|
|
||||||
m128 shuf_mask_lo_highset, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len,
|
|
||||||
const u8 run2_len);
|
|
||||||
|
|
||||||
const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
|
|
||||||
m128 shuf_mask_lo_highset, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len,
|
|
||||||
const u8 run2_len);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* MULTITRUFFLE_H */
|
|
@ -1,125 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Matches a byte in a charclass using three shuffles
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* include "block" function
|
|
||||||
*/
|
|
||||||
#include "truffle_common.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* single-byte truffle fwd match function, should only be defined when not
|
|
||||||
* compiling multiaccel
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
|
|
||||||
m256 v, const u8 *buf, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v);
|
|
||||||
return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear,
|
|
||||||
m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
DEBUG_PRINTF("run_len %zu\n", buf_end - buf);
|
|
||||||
const m256 wide_clear = set2x128(shuf_mask_lo_highclear);
|
|
||||||
const m256 wide_set = set2x128(shuf_mask_lo_highset);
|
|
||||||
|
|
||||||
assert(buf && buf_end);
|
|
||||||
assert(buf < buf_end);
|
|
||||||
const u8 *rv;
|
|
||||||
|
|
||||||
if (buf_end - buf < 32) {
|
|
||||||
return truffleMini(wide_clear, wide_set, buf, buf_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t min = (size_t)buf % 32;
|
|
||||||
assert(buf_end - buf >= 32);
|
|
||||||
|
|
||||||
// Preconditioning: most of the time our buffer won't be aligned.
|
|
||||||
m256 chars = loadu256(buf);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
buf += (32 - min);
|
|
||||||
|
|
||||||
const u8 *last_block = buf_end - 32;
|
|
||||||
while (buf < last_block) {
|
|
||||||
m256 lchars = load256(buf);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars,
|
|
||||||
buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
buf += 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use an unaligned load to mop up the last 32 bytes and get an accurate
|
|
||||||
// picture to buf_end.
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 32);
|
|
||||||
chars = loadu256(buf_end - 32);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars,
|
|
||||||
buf_end - 32, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf_end;
|
|
||||||
}
|
|
@ -1,265 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* include "block" function
|
|
||||||
*/
|
|
||||||
#include "truffle_common.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* single-byte truffle fwd match function, should only be defined when not
|
|
||||||
* compiling multiaccel
|
|
||||||
*/
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
|
||||||
m128 v, const u8 *buf, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF;
|
|
||||||
return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 16-byte pipeline, for smaller scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear,
|
|
||||||
m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 last_res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 16 bytes
|
|
||||||
m128 data = load128(buf);
|
|
||||||
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF;
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
buf += 16;
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 15 < buf_end; buf += 16) {
|
|
||||||
// scan more data
|
|
||||||
data = load128(buf);
|
|
||||||
z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF;
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 32-byte pipeline, for bigger scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear,
|
|
||||||
m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 32 bytes
|
|
||||||
m128 data1 = load128(buf);
|
|
||||||
u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF;
|
|
||||||
m128 data2 = load128(buf + 16);
|
|
||||||
u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF;
|
|
||||||
|
|
||||||
// store the results
|
|
||||||
u32 last_res = z1 | (z2 << 16);
|
|
||||||
last_buf = buf;
|
|
||||||
buf += 32;
|
|
||||||
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 31 < buf_end; buf += 32) {
|
|
||||||
// scan more data
|
|
||||||
data1 = load128(buf);
|
|
||||||
z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF;
|
|
||||||
data2 = load128(buf + 16);
|
|
||||||
z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF;
|
|
||||||
res = z1 | (z2 << 16);
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_res = res;
|
|
||||||
last_buf = buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we still have some data left, scan it too
|
|
||||||
for (; buf + 15 < buf_end; buf += 16) {
|
|
||||||
m128 chars = load128(buf);
|
|
||||||
ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
|
|
||||||
chars, buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear,
|
|
||||||
m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end, const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
DEBUG_PRINTF("run_len %zu\n", buf_end - buf);
|
|
||||||
|
|
||||||
assert(buf && buf_end);
|
|
||||||
assert(buf < buf_end);
|
|
||||||
const u8 *rv;
|
|
||||||
|
|
||||||
if (buf_end - buf < 16) {
|
|
||||||
return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t min = (size_t)buf % 16;
|
|
||||||
assert(buf_end - buf >= 16);
|
|
||||||
|
|
||||||
// Preconditioning: most of the time our buffer won't be aligned.
|
|
||||||
m128 chars = loadu128(buf);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
buf += (16 - min);
|
|
||||||
|
|
||||||
// if we have enough data, run bigger pipeline; otherwise run smaller one
|
|
||||||
if (buf_end - buf >= 128) {
|
|
||||||
rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(rv)) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
} else if (buf_end - buf >= 16){
|
|
||||||
rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(rv)) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use an unaligned load to mop up the last 16 bytes and get an accurate
|
|
||||||
// picture to buf_end.
|
|
||||||
chars = loadu128(buf_end - 16);
|
|
||||||
rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars,
|
|
||||||
buf_end - 16, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf_end;
|
|
||||||
}
|
|
@ -1,109 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/arch.h"
|
|
||||||
|
|
||||||
#include "multivermicelli.h"
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#if !defined(HAVE_AVX2)
|
|
||||||
|
|
||||||
#define MATCH_ALGO long_
|
|
||||||
#include "multiaccel_long.h"
|
|
||||||
#include "multivermicelli_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO longgrab_
|
|
||||||
#include "multiaccel_longgrab.h"
|
|
||||||
#include "multivermicelli_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shift_
|
|
||||||
#include "multiaccel_shift.h"
|
|
||||||
#include "multivermicelli_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shiftgrab_
|
|
||||||
#include "multiaccel_shiftgrab.h"
|
|
||||||
#include "multivermicelli_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshift_
|
|
||||||
#include "multiaccel_doubleshift.h"
|
|
||||||
#include "multivermicelli_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshiftgrab_
|
|
||||||
#include "multiaccel_doubleshiftgrab.h"
|
|
||||||
#include "multivermicelli_sse.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#undef MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define MATCH_ALGO long_
|
|
||||||
#include "multiaccel_long.h"
|
|
||||||
#include "multivermicelli_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO longgrab_
|
|
||||||
#include "multiaccel_longgrab.h"
|
|
||||||
#include "multivermicelli_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shift_
|
|
||||||
#include "multiaccel_shift.h"
|
|
||||||
#include "multivermicelli_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO shiftgrab_
|
|
||||||
#include "multiaccel_shiftgrab.h"
|
|
||||||
#include "multivermicelli_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshift_
|
|
||||||
#include "multiaccel_doubleshift.h"
|
|
||||||
#include "multivermicelli_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#define MATCH_ALGO doubleshiftgrab_
|
|
||||||
#include "multiaccel_doubleshiftgrab.h"
|
|
||||||
#include "multivermicelli_avx2.h"
|
|
||||||
#undef MATCH_ALGO
|
|
||||||
|
|
||||||
#undef MULTIACCEL_DOUBLE
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,62 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIVERMICELLI_H_
|
|
||||||
#define MULTIVERMICELLI_H_
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len);
|
|
||||||
|
|
||||||
const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len,
|
|
||||||
const u8 run2_len);
|
|
||||||
|
|
||||||
const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
|
|
||||||
const u8 *buf_end, const u8 run_len,
|
|
||||||
const u8 run2_len);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* MULTIVERMICELLI_H_ */
|
|
@ -1,283 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
|
||||||
#include "util/unaligned.h"
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
m256 casemask = set32x8(CASE_CLEAR);
|
|
||||||
const u8 *ptr;
|
|
||||||
m256 data = loadu256(buf);
|
|
||||||
u32 z = movemask256(eq256(chars, and256(casemask, data)));
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(buf, z
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8 *ptr;
|
|
||||||
|
|
||||||
m256 data = loadu256(buf);
|
|
||||||
u32 z = movemask256(eq256(chars, data));
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(buf, z
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 32-byte pipeline
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 last_res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 32 bytes
|
|
||||||
m256 data = load256(buf);
|
|
||||||
u32 z = movemask256(eq256(chars, data));
|
|
||||||
last_res = z;
|
|
||||||
last_buf = buf;
|
|
||||||
buf += 32;
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 32 == 0);
|
|
||||||
for (; buf + 31 < buf_end; buf += 32) {
|
|
||||||
// scan more data
|
|
||||||
data = load256(buf);
|
|
||||||
z = movemask256(eq256(chars, data));
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 32);
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 32-byte caseless pipeline
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
m256 casemask = set32x8(CASE_CLEAR);
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 last_res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 32 bytes
|
|
||||||
m256 data = load256(buf);
|
|
||||||
u32 z = movemask256(eq256(chars, and256(casemask, data)));
|
|
||||||
last_res = z;
|
|
||||||
last_buf = buf;
|
|
||||||
buf += 32;
|
|
||||||
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 32 == 0);
|
|
||||||
for (; buf + 31 < buf_end; buf += 32) {
|
|
||||||
// scan more data
|
|
||||||
data = load256(buf);
|
|
||||||
z = movemask256(eq256(chars, and256(casemask, data)));
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 32);
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
|
|
||||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
|
||||||
assert(buf < buf_end);
|
|
||||||
|
|
||||||
const u8 *ptr;
|
|
||||||
|
|
||||||
// Handle small scans.
|
|
||||||
if (buf_end - buf < 32) {
|
|
||||||
for (; buf < buf_end; buf++) {
|
|
||||||
char cur = (char)*buf;
|
|
||||||
if (nocase) {
|
|
||||||
cur &= CASE_CLEAR;
|
|
||||||
}
|
|
||||||
if (cur == c) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
m256 chars = set32x8(c); /* nocase already uppercase */
|
|
||||||
|
|
||||||
uintptr_t min = (uintptr_t)buf % 32;
|
|
||||||
|
|
||||||
if (min) {
|
|
||||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
|
||||||
buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
|
||||||
buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
buf += 32 - min;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (buf_end - buf >= 32){
|
|
||||||
ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
) : JOIN(MATCH_ALGO, vermPipeline)(chars,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// final unaligned scan
|
|
||||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
|
||||||
buf_end - 32, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
|
||||||
buf_end - 32, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
|
|
||||||
// run our pipeline
|
|
||||||
return ptr ? ptr : buf_end;
|
|
||||||
}
|
|
@ -1,452 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
|
||||||
#include "util/unaligned.h"
|
|
||||||
|
|
||||||
#define VERM_BOUNDARY 16
|
|
||||||
#define VERM_TYPE m128
|
|
||||||
#define VERM_SET_FN set16x8
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
m128 casemask = set16x8(CASE_CLEAR);
|
|
||||||
const u8 *ptr;
|
|
||||||
m128 data = loadu128(buf);
|
|
||||||
u32 z = movemask128(eq128(chars, and128(casemask, data)));
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(buf, z
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8 *ptr;
|
|
||||||
|
|
||||||
m128 data = loadu128(buf);
|
|
||||||
u32 z = movemask128(eq128(chars, data));
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(buf, z
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 16-byte pipeline, for smaller scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 last_res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 16 bytes
|
|
||||||
m128 data = load128(buf);
|
|
||||||
u32 z = movemask128(eq128(chars, data));
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
buf += 16;
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 15 < buf_end; buf += 16) {
|
|
||||||
// scan more data
|
|
||||||
data = load128(buf);
|
|
||||||
z = movemask128(eq128(chars, data));
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 16-byte pipeline, for smaller scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
m128 casemask = set16x8(CASE_CLEAR);
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 last_res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 16 bytes
|
|
||||||
m128 data = load128(buf);
|
|
||||||
u32 z = movemask128(eq128(chars, and128(casemask, data)));
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
buf += 16;
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 15 < buf_end; buf += 16) {
|
|
||||||
// scan more data
|
|
||||||
data = load128(buf);
|
|
||||||
z = movemask128(eq128(chars, and128(casemask, data)));
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_buf = buf;
|
|
||||||
last_res = z;
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 32-byte pipeline, for bigger scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 32 bytes
|
|
||||||
m128 data1 = load128(buf);
|
|
||||||
u32 z1 = movemask128(eq128(chars, data1));
|
|
||||||
m128 data2 = load128(buf + 16);
|
|
||||||
u32 z2 = movemask128(eq128(chars, data2));
|
|
||||||
|
|
||||||
// store the results
|
|
||||||
u32 last_res = z1 | (z2 << VERM_BOUNDARY);
|
|
||||||
last_buf = buf;
|
|
||||||
buf += 32;
|
|
||||||
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 31 < buf_end; buf += 32) {
|
|
||||||
// scan more data
|
|
||||||
data1 = load128(buf);
|
|
||||||
z1 = movemask128(eq128(chars, data1));
|
|
||||||
data2 = load128(buf + 16);
|
|
||||||
z2 = movemask128(eq128(chars, data2));
|
|
||||||
res = z1 | (z2 << 16);
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_res = res;
|
|
||||||
last_buf = buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we still have some data left, scan it too
|
|
||||||
if (buf + 15 < buf_end) {
|
|
||||||
return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 32-byte caseless pipeline, for bigger scans
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
m128 casemask = set16x8(CASE_CLEAR);
|
|
||||||
const u8* ptr, *last_buf;
|
|
||||||
u32 last_res;
|
|
||||||
|
|
||||||
// pipeline prologue: scan first 32 bytes
|
|
||||||
m128 data1 = load128(buf);
|
|
||||||
u32 z1 = movemask128(eq128(chars, and128(casemask, data1)));
|
|
||||||
m128 data2 = load128(buf + 16);
|
|
||||||
u32 z2 = movemask128(eq128(chars, and128(casemask, data2)));
|
|
||||||
u32 z = z1 | (z2 << VERM_BOUNDARY);
|
|
||||||
|
|
||||||
last_res = z;
|
|
||||||
last_buf = buf;
|
|
||||||
buf += 32;
|
|
||||||
|
|
||||||
// now, start the pipeline!
|
|
||||||
assert((size_t)buf % 16 == 0);
|
|
||||||
for (; buf + 31 < buf_end; buf += 32) {
|
|
||||||
// scan more data
|
|
||||||
data1 = load128(buf);
|
|
||||||
z1 = movemask128(eq128(chars, and128(casemask, data1)));
|
|
||||||
data2 = load128(buf + 16);
|
|
||||||
z2 = movemask128(eq128(chars, and128(casemask, data2)));
|
|
||||||
z = z1 | (z2 << 16);
|
|
||||||
|
|
||||||
// do a comparison on previous result
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
last_res = z;
|
|
||||||
last_buf = buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
// epilogue: compare final results
|
|
||||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
|
||||||
(last_buf, last_res
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we still have some data left, scan it too
|
|
||||||
if (buf + 15 < buf_end) {
|
|
||||||
return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
}
|
|
||||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
|
|
||||||
const u8 *buf,
|
|
||||||
const u8 *buf_end,
|
|
||||||
const u8 run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, const u8 run_len2
|
|
||||||
#endif
|
|
||||||
) {
|
|
||||||
DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
|
|
||||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
|
||||||
assert(buf < buf_end);
|
|
||||||
|
|
||||||
const u8 *ptr;
|
|
||||||
|
|
||||||
// Handle small scans.
|
|
||||||
if (buf_end - buf < VERM_BOUNDARY) {
|
|
||||||
for (; buf < buf_end; buf++) {
|
|
||||||
char cur = (char)*buf;
|
|
||||||
if (nocase) {
|
|
||||||
cur &= CASE_CLEAR;
|
|
||||||
}
|
|
||||||
if (cur == c) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
|
||||||
|
|
||||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
|
||||||
|
|
||||||
if (min) {
|
|
||||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
|
||||||
buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
|
||||||
buf, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
buf += VERM_BOUNDARY - min;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we have enough data, run bigger pipeline; otherwise run smaller one
|
|
||||||
if (buf_end - buf >= 128) {
|
|
||||||
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
) : JOIN(MATCH_ALGO, vermPipeline32)(chars,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
} else if (buf_end - buf >= 16){
|
|
||||||
ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
) : JOIN(MATCH_ALGO, vermPipeline16)(chars,
|
|
||||||
buf, buf_end, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
if (unlikely(ptr)) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// final unaligned scan
|
|
||||||
ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
|
|
||||||
buf_end - VERM_BOUNDARY, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
) : JOIN(MATCH_ALGO, vermUnalign)(chars,
|
|
||||||
buf_end - VERM_BOUNDARY, run_len
|
|
||||||
#ifdef MULTIACCEL_DOUBLE
|
|
||||||
, run_len2
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
|
|
||||||
// run our pipeline
|
|
||||||
return ptr ? ptr : buf_end;
|
|
||||||
}
|
|
@ -39,7 +39,52 @@
|
|||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/unaligned.h"
|
#include "util/unaligned.h"
|
||||||
|
|
||||||
#include "shufti_common.h"
|
#ifdef DEBUG
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#define DUMP_MSK(_t) \
|
||||||
|
static UNUSED \
|
||||||
|
void dumpMsk##_t(m##_t msk) { \
|
||||||
|
u8 * mskAsU8 = (u8 *)&msk; \
|
||||||
|
for (unsigned i = 0; i < sizeof(msk); i++) { \
|
||||||
|
u8 c = mskAsU8[i]; \
|
||||||
|
for (int j = 0; j < 8; j++) { \
|
||||||
|
if ((c >> (7-j)) & 0x1) \
|
||||||
|
printf("1"); \
|
||||||
|
else \
|
||||||
|
printf("0"); \
|
||||||
|
} \
|
||||||
|
printf(" "); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
static UNUSED \
|
||||||
|
void dumpMsk##_t##AsChars(m##_t msk) { \
|
||||||
|
u8 * mskAsU8 = (u8 *)&msk; \
|
||||||
|
for (unsigned i = 0; i < sizeof(msk); i++) { \
|
||||||
|
u8 c = mskAsU8[i]; \
|
||||||
|
if (isprint(c)) \
|
||||||
|
printf("%c",c); \
|
||||||
|
else \
|
||||||
|
printf("."); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** \brief Naive byte-by-byte implementation. */
|
||||||
|
static really_inline
|
||||||
|
const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf,
|
||||||
|
const u8 *buf_end) {
|
||||||
|
assert(buf < buf_end);
|
||||||
|
|
||||||
|
for (; buf < buf_end; ++buf) {
|
||||||
|
u8 c = *buf;
|
||||||
|
if (lo[c & 0xf] & hi[c >> 4]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
/** \brief Naive byte-by-byte implementation. */
|
/** \brief Naive byte-by-byte implementation. */
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -59,6 +104,30 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
|
|||||||
#if !defined(HAVE_AVX2)
|
#if !defined(HAVE_AVX2)
|
||||||
/* Normal SSSE3 shufti */
|
/* Normal SSSE3 shufti */
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
DUMP_MSK(128)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GET_LO_4(chars) and128(chars, low4bits)
|
||||||
|
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
|
||||||
|
const m128 compare) {
|
||||||
|
m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
|
||||||
|
m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
|
||||||
|
m128 t = and128(c_lo, c_hi);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
|
||||||
|
DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
|
||||||
|
DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
|
||||||
|
DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
|
||||||
|
DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
|
||||||
|
#endif
|
||||||
|
return movemask128(eq128(t, compare));
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *firstMatch(const u8 *buf, u32 z) {
|
const u8 *firstMatch(const u8 *buf, u32 z) {
|
||||||
if (unlikely(z != 0xffff)) {
|
if (unlikely(z != 0xffff)) {
|
||||||
@ -293,6 +362,31 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
|
|||||||
|
|
||||||
#else // AVX2 - 256 wide shuftis
|
#else // AVX2 - 256 wide shuftis
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
DUMP_MSK(256)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GET_LO_4(chars) and256(chars, low4bits)
|
||||||
|
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
|
||||||
|
const m256 compare) {
|
||||||
|
m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
|
||||||
|
m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
|
||||||
|
m256 t = and256(c_lo, c_hi);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
|
||||||
|
DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
|
||||||
|
DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
|
||||||
|
DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
|
||||||
|
DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return movemask256(eq256(t, compare));
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *firstMatch(const u8 *buf, u32 z) {
|
const u8 *firstMatch(const u8 *buf, u32 z) {
|
||||||
if (unlikely(z != 0xffffffff)) {
|
if (unlikely(z != 0xffffffff)) {
|
||||||
|
@ -1,146 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef SHUFTI_COMMON_H_
|
|
||||||
#define SHUFTI_COMMON_H_
|
|
||||||
|
|
||||||
#include "ue2common.h"
|
|
||||||
|
|
||||||
#include "util/arch.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
|
||||||
#include "util/unaligned.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Common stuff for all versions of shufti (single, multi and multidouble)
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** \brief Naive byte-by-byte implementation. */
|
|
||||||
static really_inline
|
|
||||||
const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf,
|
|
||||||
const u8 *buf_end) {
|
|
||||||
assert(buf < buf_end);
|
|
||||||
|
|
||||||
for (; buf < buf_end; ++buf) {
|
|
||||||
u8 c = *buf;
|
|
||||||
if (lo[c & 0xf] & hi[c >> 4]) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#define DUMP_MSK(_t) \
|
|
||||||
static UNUSED \
|
|
||||||
void dumpMsk##_t(m##_t msk) { \
|
|
||||||
u8 * mskAsU8 = (u8 *)&msk; \
|
|
||||||
for (unsigned i = 0; i < sizeof(msk); i++) { \
|
|
||||||
u8 c = mskAsU8[i]; \
|
|
||||||
for (int j = 0; j < 8; j++) { \
|
|
||||||
if ((c >> (7-j)) & 0x1) \
|
|
||||||
printf("1"); \
|
|
||||||
else \
|
|
||||||
printf("0"); \
|
|
||||||
} \
|
|
||||||
printf(" "); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
static UNUSED \
|
|
||||||
void dumpMsk##_t##AsChars(m##_t msk) { \
|
|
||||||
u8 * mskAsU8 = (u8 *)&msk; \
|
|
||||||
for (unsigned i = 0; i < sizeof(msk); i++) { \
|
|
||||||
u8 c = mskAsU8[i]; \
|
|
||||||
if (isprint(c)) \
|
|
||||||
printf("%c",c); \
|
|
||||||
else \
|
|
||||||
printf("."); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(HAVE_AVX2)
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
DUMP_MSK(128)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GET_LO_4(chars) and128(chars, low4bits)
|
|
||||||
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
|
|
||||||
const m128 compare) {
|
|
||||||
m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
|
|
||||||
m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
|
|
||||||
m128 t = and128(c_lo, c_hi);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
|
|
||||||
DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
|
|
||||||
DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
|
|
||||||
DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
|
|
||||||
DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
|
|
||||||
#endif
|
|
||||||
return movemask128(eq128(t, compare));
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
DUMP_MSK(256)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GET_LO_4(chars) and256(chars, low4bits)
|
|
||||||
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
|
|
||||||
const m256 compare) {
|
|
||||||
m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
|
|
||||||
m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
|
|
||||||
m256 t = and256(c_lo, c_hi);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
|
|
||||||
DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
|
|
||||||
DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
|
|
||||||
DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
|
|
||||||
DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return movemask256(eq256(t, compare));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* SHUFTI_COMMON_H_ */
|
|
@ -37,8 +37,6 @@
|
|||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
#include "truffle_common.h"
|
|
||||||
|
|
||||||
#if !defined(HAVE_AVX2)
|
#if !defined(HAVE_AVX2)
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -52,6 +50,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) {
|
|||||||
return NULL; // no match
|
return NULL; // no match
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *firstMatch(const u8 *buf, u32 z) {
|
||||||
|
if (unlikely(z != 0xffff)) {
|
||||||
|
u32 pos = ctz32(~z & 0xffff);
|
||||||
|
assert(pos < 16);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL; // no match
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
|
||||||
|
|
||||||
|
m128 highconst = _mm_set1_epi8(0x80);
|
||||||
|
m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
|
||||||
|
|
||||||
|
// and now do the real work
|
||||||
|
m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
|
||||||
|
m128 t1 = xor128(v, highconst);
|
||||||
|
m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
|
||||||
|
m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
|
||||||
|
m128 shuf3 = pshufb(shuf_mask_hi, t2);
|
||||||
|
m128 tmp = and128(or128(shuf1, shuf2), shuf3);
|
||||||
|
m128 tmp2 = eq128(tmp, zeroes128());
|
||||||
|
u32 z = movemask128(tmp2);
|
||||||
|
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
assert(len < 16);
|
||||||
|
|
||||||
|
m128 chars = zeroes128();
|
||||||
|
memcpy(&chars, buf, len);
|
||||||
|
|
||||||
|
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
||||||
|
// can't be these bytes in z
|
||||||
|
u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
|
||||||
|
const u8 *rv = firstMatch(buf, z | mask);
|
||||||
|
|
||||||
|
if (rv) {
|
||||||
|
return rv;
|
||||||
|
} else {
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
||||||
m128 v, const u8 *buf) {
|
m128 v, const u8 *buf) {
|
||||||
@ -125,7 +174,7 @@ const u8 *truffleRevMini(m128 shuf_mask_lo_highclear,
|
|||||||
m128 chars = zeroes128();
|
m128 chars = zeroes128();
|
||||||
memcpy(&chars, buf, len);
|
memcpy(&chars, buf, len);
|
||||||
|
|
||||||
u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF;
|
u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
|
||||||
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
||||||
const u8 *rv = lastMatch(buf, z | mask);
|
const u8 *rv = lastMatch(buf, z | mask);
|
||||||
|
|
||||||
@ -184,6 +233,8 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear,
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
// AVX2
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *lastMatch(const u8 *buf, u32 z) {
|
const u8 *lastMatch(const u8 *buf, u32 z) {
|
||||||
if (unlikely(z != 0xffffffff)) {
|
if (unlikely(z != 0xffffffff)) {
|
||||||
@ -195,6 +246,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) {
|
|||||||
return NULL; // no match
|
return NULL; // no match
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *firstMatch(const u8 *buf, u32 z) {
|
||||||
|
if (unlikely(z != 0xffffffff)) {
|
||||||
|
u32 pos = ctz32(~z);
|
||||||
|
assert(pos < 32);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL; // no match
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
|
||||||
|
|
||||||
|
m256 highconst = _mm256_set1_epi8(0x80);
|
||||||
|
m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
|
||||||
|
|
||||||
|
// and now do the real work
|
||||||
|
m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
|
||||||
|
m256 t1 = xor256(v, highconst);
|
||||||
|
m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
|
||||||
|
m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
|
||||||
|
m256 shuf3 = vpshufb(shuf_mask_hi, t2);
|
||||||
|
m256 tmp = and256(or256(shuf1, shuf2), shuf3);
|
||||||
|
m256 tmp2 = eq256(tmp, zeroes256());
|
||||||
|
u32 z = movemask256(tmp2);
|
||||||
|
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
assert(len < 32);
|
||||||
|
|
||||||
|
m256 chars = zeroes256();
|
||||||
|
memcpy(&chars, buf, len);
|
||||||
|
|
||||||
|
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
||||||
|
// can't be these bytes in z
|
||||||
|
u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff;
|
||||||
|
const u8 *rv = firstMatch(buf, z | mask);
|
||||||
|
|
||||||
|
if (rv) {
|
||||||
|
return rv;
|
||||||
|
} else {
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
|
const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
|
||||||
m256 v, const u8 *buf) {
|
m256 v, const u8 *buf) {
|
||||||
@ -266,7 +368,7 @@ const u8 *truffleRevMini(m256 shuf_mask_lo_highclear,
|
|||||||
m256 chars = zeroes256();
|
m256 chars = zeroes256();
|
||||||
memcpy(&chars, buf, len);
|
memcpy(&chars, buf, len);
|
||||||
|
|
||||||
u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF;
|
u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff;
|
||||||
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
||||||
const u8 *rv = lastMatch(buf, z | mask);
|
const u8 *rv = lastMatch(buf, z | mask);
|
||||||
|
|
||||||
|
@ -1,147 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef TRUFFLE_COMMON_H_
|
|
||||||
#define TRUFFLE_COMMON_H_
|
|
||||||
|
|
||||||
#include "util/arch.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Common stuff for all versions of truffle (single, multi and multidouble)
|
|
||||||
*/
|
|
||||||
#if !defined(HAVE_AVX2)
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *firstMatch(const u8 *buf, u32 z) {
|
|
||||||
if (unlikely(z != 0xffff)) {
|
|
||||||
u32 pos = ctz32(~z & 0xffff);
|
|
||||||
assert(pos < 16);
|
|
||||||
return buf + pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL; // no match
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
|
|
||||||
|
|
||||||
m128 highconst = _mm_set1_epi8(0x80);
|
|
||||||
m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
|
|
||||||
|
|
||||||
// and now do the real work
|
|
||||||
m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
|
|
||||||
m128 t1 = xor128(v, highconst);
|
|
||||||
m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
|
|
||||||
m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
|
|
||||||
m128 shuf3 = pshufb(shuf_mask_hi, t2);
|
|
||||||
m128 tmp = and128(or128(shuf1, shuf2), shuf3);
|
|
||||||
m128 tmp2 = eq128(tmp, zeroes128());
|
|
||||||
u32 z = movemask128(tmp2);
|
|
||||||
|
|
||||||
return z;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end) {
|
|
||||||
uintptr_t len = buf_end - buf;
|
|
||||||
assert(len < 16);
|
|
||||||
|
|
||||||
m128 chars = zeroes128();
|
|
||||||
memcpy(&chars, buf, len);
|
|
||||||
|
|
||||||
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
|
||||||
// can't be these bytes in z
|
|
||||||
u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF;
|
|
||||||
const u8 *rv = firstMatch(buf, z| mask);
|
|
||||||
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
} else {
|
|
||||||
return buf_end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
const u8 *firstMatch(const u8 *buf, u32 z) {
|
|
||||||
if (unlikely(z != 0xffffffff)) {
|
|
||||||
u32 pos = ctz32(~z);
|
|
||||||
assert(pos < 32);
|
|
||||||
return buf + pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL; // no match
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
|
|
||||||
|
|
||||||
m256 highconst = _mm256_set1_epi8(0x80);
|
|
||||||
m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
|
|
||||||
|
|
||||||
// and now do the real work
|
|
||||||
m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
|
|
||||||
m256 t1 = xor256(v, highconst);
|
|
||||||
m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
|
|
||||||
m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
|
|
||||||
m256 shuf3 = vpshufb(shuf_mask_hi, t2);
|
|
||||||
m256 tmp = and256(or256(shuf1, shuf2), shuf3);
|
|
||||||
m256 tmp2 = eq256(tmp, zeroes256());
|
|
||||||
u32 z = movemask256(tmp2);
|
|
||||||
|
|
||||||
return z;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
|
|
||||||
const u8 *buf, const u8 *buf_end) {
|
|
||||||
uintptr_t len = buf_end - buf;
|
|
||||||
assert(len < 32);
|
|
||||||
|
|
||||||
m256 chars = zeroes256();
|
|
||||||
memcpy(&chars, buf, len);
|
|
||||||
|
|
||||||
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
|
|
||||||
// can't be these bytes in z
|
|
||||||
u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF;
|
|
||||||
const u8 *rv = firstMatch(buf, z | mask);
|
|
||||||
|
|
||||||
if (rv) {
|
|
||||||
return rv;
|
|
||||||
} else {
|
|
||||||
return buf_end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* TRUFFLE_COMMON_H_ */
|
|
@ -37,7 +37,6 @@
|
|||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
#include "nfa/accel.h"
|
#include "nfa/accel.h"
|
||||||
#include "nfa/multiaccel_compilehelper.h"
|
|
||||||
|
|
||||||
#include "util/bitutils.h" // for CASE_CLEAR
|
#include "util/bitutils.h" // for CASE_CLEAR
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
@ -677,134 +676,6 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
|
|||||||
return g.startDs;
|
return g.startDs;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
NFAVertex find_next(const NFAVertex v, const NGHolder &g) {
|
|
||||||
NFAVertex res = NGHolder::null_vertex();
|
|
||||||
for (NFAVertex u : adjacent_vertices_range(v, g)) {
|
|
||||||
if (u != v) {
|
|
||||||
res = u;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
|
|
||||||
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
|
|
||||||
const vector<NFAVertex> &states,
|
|
||||||
const CompileContext &cc) {
|
|
||||||
// For a set of states to be accelerable, we basically have to have only
|
|
||||||
// one state to accelerate.
|
|
||||||
if (states.size() != 1) {
|
|
||||||
DEBUG_PRINTF("can't accelerate multiple states\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get our base vertex
|
|
||||||
NFAVertex v = states[0];
|
|
||||||
|
|
||||||
// We need the base vertex to be a self-looping dotall leading to exactly
|
|
||||||
// one vertex.
|
|
||||||
if (!hasSelfLoop(v, g)) {
|
|
||||||
DEBUG_PRINTF("base vertex has self-loop\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!g[v].char_reach.all()) {
|
|
||||||
DEBUG_PRINTF("can't accelerate anything but dot\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (proper_out_degree(v, g) != 1) {
|
|
||||||
DEBUG_PRINTF("can't accelerate states with multiple successors\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
// find our start vertex
|
|
||||||
NFAVertex cur = find_next(v, g);
|
|
||||||
if (cur == NGHolder::null_vertex()) {
|
|
||||||
DEBUG_PRINTF("invalid start vertex\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool has_offset = false;
|
|
||||||
u32 offset = 0;
|
|
||||||
CharReach cr = g[cur].char_reach;
|
|
||||||
|
|
||||||
// if we start with a dot, we have an offset, so defer figuring out the
|
|
||||||
// real CharReach for this accel scheme
|
|
||||||
if (cr == CharReach::dot()) {
|
|
||||||
has_offset = true;
|
|
||||||
offset = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// figure out our offset
|
|
||||||
while (has_offset) {
|
|
||||||
// vertices have to have no self loops
|
|
||||||
if (hasSelfLoop(cur, g)) {
|
|
||||||
DEBUG_PRINTF("can't have self-loops\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
// we have to have exactly 1 successor to have this acceleration scheme
|
|
||||||
if (out_degree(cur, g) != 1) {
|
|
||||||
DEBUG_PRINTF("can't have multiple successors\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
cur = *adjacent_vertices(cur, g).first;
|
|
||||||
|
|
||||||
// if we met a special vertex, bail out
|
|
||||||
if (is_special(cur, g)) {
|
|
||||||
DEBUG_PRINTF("can't have special vertices\n");
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
// now, get the real char reach
|
|
||||||
if (g[cur].char_reach != CharReach::dot()) {
|
|
||||||
cr = g[cur].char_reach;
|
|
||||||
has_offset = false;
|
|
||||||
} else {
|
|
||||||
offset++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now, fire up the compilation machinery
|
|
||||||
target_t ti = cc.target_info;
|
|
||||||
unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE;
|
|
||||||
MultiaccelCompileHelper mac(cr, offset, max_len);
|
|
||||||
|
|
||||||
while (mac.canAdvance()) {
|
|
||||||
// vertices have to have no self loops
|
|
||||||
if (hasSelfLoop(cur, g)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// we have to have exactly 1 successor to have this acceleration scheme
|
|
||||||
if (out_degree(cur, g) != 1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
cur = *adjacent_vertices(cur, g).first;
|
|
||||||
|
|
||||||
// if we met a special vertex, bail out
|
|
||||||
if (is_special(cur, g)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
mac.advance(g[cur].char_reach);
|
|
||||||
}
|
|
||||||
MultibyteAccelInfo mai = mac.getBestScheme();
|
|
||||||
#ifdef DEBUG
|
|
||||||
DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n",
|
|
||||||
mai.type, mai.offset, mai.len1, mai.len2);
|
|
||||||
for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) {
|
|
||||||
DEBUG_PRINTF("multibyte accel char: %zu\n", c);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return mai;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
|
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
|
||||||
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
||||||
const vector<CharReach> &refined_cr,
|
const vector<CharReach> &refined_cr,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -51,9 +51,6 @@ namespace ue2 {
|
|||||||
#define MAX_MERGED_ACCEL_STOPS 200
|
#define MAX_MERGED_ACCEL_STOPS 200
|
||||||
#define ACCEL_MAX_STOP_CHAR 24
|
#define ACCEL_MAX_STOP_CHAR 24
|
||||||
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
|
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
|
||||||
#define MULTIACCEL_MIN_LEN 3
|
|
||||||
#define MULTIACCEL_MAX_LEN_SSE 15
|
|
||||||
#define MULTIACCEL_MAX_LEN_AVX2 31
|
|
||||||
|
|
||||||
// forward-declaration of CompileContext
|
// forward-declaration of CompileContext
|
||||||
struct CompileContext;
|
struct CompileContext;
|
||||||
@ -84,11 +81,6 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
|||||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||||
AccelScheme *as, bool allow_wide);
|
AccelScheme *as, bool allow_wide);
|
||||||
|
|
||||||
/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA).
|
|
||||||
*/
|
|
||||||
MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
|
|
||||||
const std::vector<NFAVertex> &verts,
|
|
||||||
const CompileContext &cc);
|
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -52,8 +52,6 @@ set(unit_internal_SOURCES
|
|||||||
internal/limex_nfa.cpp
|
internal/limex_nfa.cpp
|
||||||
internal/masked_move.cpp
|
internal/masked_move.cpp
|
||||||
internal/multi_bit.cpp
|
internal/multi_bit.cpp
|
||||||
internal/multiaccel_matcher.cpp
|
|
||||||
internal/multiaccel_shift.cpp
|
|
||||||
internal/nfagraph_common.h
|
internal/nfagraph_common.h
|
||||||
internal/nfagraph_comp.cpp
|
internal/nfagraph_comp.cpp
|
||||||
internal/nfagraph_equivalence.cpp
|
internal/nfagraph_equivalence.cpp
|
||||||
|
@ -1,301 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "src/ue2common.h"
|
|
||||||
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
#include "nfagraph/ng_limex_accel.h"
|
|
||||||
#include "nfa/accelcompile.h"
|
|
||||||
#include "nfa/multivermicelli.h"
|
|
||||||
#include "nfa/multishufti.h"
|
|
||||||
#include "nfa/multitruffle.h"
|
|
||||||
#include "util/alloc.h"
|
|
||||||
#include "util/charreach.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
|
||||||
#include <random>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
using namespace ue2;
|
|
||||||
using namespace std;
|
|
||||||
using namespace testing;
|
|
||||||
|
|
||||||
// test parameters structure
|
|
||||||
struct MultiaccelTestParam {
|
|
||||||
string match_pattern;
|
|
||||||
u32 match_pattern_start_idx;
|
|
||||||
u32 match_idx;
|
|
||||||
bool test_all_offsets;
|
|
||||||
u8 match_len1;
|
|
||||||
u8 match_len2;
|
|
||||||
MultibyteAccelInfo::multiaccel_type type;
|
|
||||||
};
|
|
||||||
|
|
||||||
// buffer size is constant
|
|
||||||
static const u32 BUF_SIZE = 200;
|
|
||||||
|
|
||||||
// strings, out of which CharReach will be generated
|
|
||||||
static const string VERM_CR = "a";
|
|
||||||
static const string V_NC_CR = "aA";
|
|
||||||
static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz";
|
|
||||||
static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99";
|
|
||||||
|
|
||||||
// Parameterized test case for multiaccel patterns.
|
|
||||||
class MultiaccelTest : public TestWithParam<MultiaccelTestParam> {
|
|
||||||
protected:
|
|
||||||
virtual void SetUp() {
|
|
||||||
// set up is deferred until the actual test, since we can't compile
|
|
||||||
// any accel schemes unless we know CharReach
|
|
||||||
const MultiaccelTestParam &p = GetParam();
|
|
||||||
|
|
||||||
// reserve space in our buffer
|
|
||||||
buffer = (u8 *)aligned_zmalloc(BUF_SIZE);
|
|
||||||
|
|
||||||
// store the index where we expect to see the match. note that it may
|
|
||||||
// be different from where the match pattern has started since we may
|
|
||||||
// have a flooded match (i.e. a match preceded by almost-match) or a
|
|
||||||
// no-match (in which case "match" index is at the end of the buffer).
|
|
||||||
match_idx = p.match_idx;
|
|
||||||
|
|
||||||
// make note if we need to test all offsets - sometimes we don't, for
|
|
||||||
// example when testing partial or no-match.
|
|
||||||
test_all_offsets = p.test_all_offsets;
|
|
||||||
}
|
|
||||||
|
|
||||||
char getChar(const CharReach &cr) {
|
|
||||||
assert(cr.count() > 0);
|
|
||||||
auto dist = uniform_int_distribution<size_t>(0, cr.count() - 1);
|
|
||||||
size_t result = cr.find_nth(dist(prng));
|
|
||||||
assert(result != CharReach::npos);
|
|
||||||
return (char)result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// char generator
|
|
||||||
char getChar(const CharReach &cr, bool match) {
|
|
||||||
return getChar(match ? cr : ~cr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// appends a string with matches/unmatches according to input match pattern
|
|
||||||
void getMatch(u8 *result, u32 start, const string &pattern,
|
|
||||||
const CharReach &cr) {
|
|
||||||
for (const auto &c : pattern) {
|
|
||||||
result[start++] = getChar(cr, c == '1');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// appends non-matching noise of certain lengths
|
|
||||||
void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) {
|
|
||||||
for (unsigned i = 0; i < len; i++) {
|
|
||||||
result[start + i] = getChar(cr, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// deferred buffer generation, as we don't know CharReach before we run the test
|
|
||||||
void GenerateBuffer(const CharReach &cr) {
|
|
||||||
const MultiaccelTestParam &p = GetParam();
|
|
||||||
|
|
||||||
// step 1: fill prefix with non-matching noise
|
|
||||||
u32 start = 0;
|
|
||||||
getNoise(buffer, start, p.match_pattern_start_idx, cr);
|
|
||||||
|
|
||||||
// step 2: add a match
|
|
||||||
start += p.match_pattern_start_idx;
|
|
||||||
getMatch(buffer, start, p.match_pattern, cr);
|
|
||||||
|
|
||||||
// step 3: fill in the rest of the buffer with non-matching noise
|
|
||||||
start += p.match_pattern.size();
|
|
||||||
getNoise(buffer, start, BUF_SIZE - p.match_pattern.size() -
|
|
||||||
p.match_pattern_start_idx, cr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// deferred accel scheme generation, as we don't know CharReach before we run the test
|
|
||||||
void CompileAccelScheme(const CharReach &cr, AccelAux *aux) {
|
|
||||||
const MultiaccelTestParam &p = GetParam();
|
|
||||||
|
|
||||||
AccelInfo ai;
|
|
||||||
ai.single_stops = cr; // dummy CharReach to prevent red tape accel
|
|
||||||
ai.ma_len1 = p.match_len1;
|
|
||||||
ai.ma_len2 = p.match_len2;
|
|
||||||
ai.multiaccel_stops = cr;
|
|
||||||
ai.ma_type = p.type;
|
|
||||||
|
|
||||||
buildAccelAux(ai, aux);
|
|
||||||
|
|
||||||
// now, verify we've successfully built our accel scheme, *and* that it's
|
|
||||||
// a multibyte scheme
|
|
||||||
ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM &&
|
|
||||||
aux->accel_type <= ACCEL_MDSGTRUFFLE);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void TearDown() {
|
|
||||||
aligned_free(buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We want our tests to be deterministic, so we use a PRNG in the test
|
|
||||||
// fixture.
|
|
||||||
mt19937 prng;
|
|
||||||
|
|
||||||
u32 match_idx;
|
|
||||||
u8 *buffer;
|
|
||||||
bool test_all_offsets;
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
void runTest(const u8 *buffer, AccelAux *aux, unsigned match_idx,
|
|
||||||
bool test_all_offsets) {
|
|
||||||
const u8 *start = buffer;
|
|
||||||
const u8 *end = start + BUF_SIZE;
|
|
||||||
const u8 *match = start + match_idx;
|
|
||||||
|
|
||||||
// comparing indexes into the buffer is easier to understand than pointers
|
|
||||||
if (test_all_offsets) {
|
|
||||||
// run_accel can only scan >15 byte buffers
|
|
||||||
u32 end_offset = min(match_idx, BUF_SIZE - 15);
|
|
||||||
|
|
||||||
for (unsigned offset = 0; offset < end_offset; offset++) {
|
|
||||||
const u8 *ptr = run_accel(aux, (start + offset), end);
|
|
||||||
unsigned idx = ptr - start;
|
|
||||||
ASSERT_EQ(match_idx, idx);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const u8 *ptr = run_accel(aux, start, end);
|
|
||||||
unsigned idx = ptr - start;
|
|
||||||
ASSERT_EQ(match_idx, idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_P(MultiaccelTest, TestVermicelli) {
|
|
||||||
AccelAux aux = {0};
|
|
||||||
CharReach cr(VERM_CR);
|
|
||||||
|
|
||||||
GenerateBuffer(cr);
|
|
||||||
|
|
||||||
CompileAccelScheme(cr, &aux);
|
|
||||||
|
|
||||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_P(MultiaccelTest, TestVermicelliNocase) {
|
|
||||||
AccelAux aux = {0};
|
|
||||||
CharReach cr(V_NC_CR);
|
|
||||||
|
|
||||||
GenerateBuffer(cr);
|
|
||||||
|
|
||||||
CompileAccelScheme(cr, &aux);
|
|
||||||
|
|
||||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_P(MultiaccelTest, TestShufti) {
|
|
||||||
AccelAux aux = {0};
|
|
||||||
CharReach cr(SHUF_CR);
|
|
||||||
|
|
||||||
GenerateBuffer(cr);
|
|
||||||
|
|
||||||
CompileAccelScheme(cr, &aux);
|
|
||||||
|
|
||||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_P(MultiaccelTest, TestTruffle) {
|
|
||||||
AccelAux aux = {0};
|
|
||||||
CharReach cr(TRUF_CR);
|
|
||||||
|
|
||||||
GenerateBuffer(cr);
|
|
||||||
|
|
||||||
CompileAccelScheme(cr, &aux);
|
|
||||||
|
|
||||||
runTest(buffer, &aux, match_idx, test_all_offsets);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const MultiaccelTestParam multiaccelTests[] = {
|
|
||||||
// long matcher
|
|
||||||
|
|
||||||
// full, partial, flooded, nomatch
|
|
||||||
{"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
|
||||||
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
|
||||||
{"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
|
||||||
{"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
|
|
||||||
|
|
||||||
// long-grab matcher
|
|
||||||
|
|
||||||
// full, partial, flooded, nomatch
|
|
||||||
{"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
|
||||||
{"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
|
||||||
{"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
|
||||||
{"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
|
|
||||||
|
|
||||||
// shift matcher
|
|
||||||
|
|
||||||
// full, partial, flooded, nomatch
|
|
||||||
{"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
|
||||||
{"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
|
||||||
{"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
|
||||||
{"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
|
|
||||||
|
|
||||||
// shift-grab matcher
|
|
||||||
|
|
||||||
// full, partial, flooded, nomatch
|
|
||||||
{"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
|
||||||
{"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
|
||||||
{"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
|
||||||
{"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
|
|
||||||
|
|
||||||
// doubleshift matcher
|
|
||||||
|
|
||||||
// full, partial (one and two shifts), flooded, nomatch
|
|
||||||
{"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
|
||||||
{"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
|
||||||
{"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
|
||||||
{"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
|
||||||
{"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
|
|
||||||
|
|
||||||
// doubleshift-grab matcher
|
|
||||||
|
|
||||||
// full, partial (one and two shifts), flooded, nomatch
|
|
||||||
{"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
|
||||||
{"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
|
||||||
{"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
|
||||||
{"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
|
||||||
{"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
|
|
||||||
};
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests));
|
|
||||||
|
|
||||||
// boring stuff for google test
|
|
||||||
void PrintTo(const MultiaccelTestParam &p, ::std::ostream *os) {
|
|
||||||
*os << "MultiaccelTestParam: " << p.match_pattern;
|
|
||||||
}
|
|
@ -1,81 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "src/ue2common.h"
|
|
||||||
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
#include "nfa/multiaccel_common.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Unit tests for the shifters.
|
|
||||||
*
|
|
||||||
* This is a bit messy, as shifters are macros, so we're using macros to test
|
|
||||||
* other macros.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define TEST_SHIFT(n) \
|
|
||||||
do { \
|
|
||||||
u64a val = ((u64a) 1 << n) - 1; \
|
|
||||||
JOIN(SHIFT, n)(val); \
|
|
||||||
ASSERT_EQ(val, 1); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
TEST(MultiaccelShift, StaticShift) {
|
|
||||||
TEST_SHIFT(1);
|
|
||||||
TEST_SHIFT(2);
|
|
||||||
TEST_SHIFT(3);
|
|
||||||
TEST_SHIFT(4);
|
|
||||||
TEST_SHIFT(5);
|
|
||||||
TEST_SHIFT(6);
|
|
||||||
TEST_SHIFT(7);
|
|
||||||
TEST_SHIFT(8);
|
|
||||||
TEST_SHIFT(10);
|
|
||||||
TEST_SHIFT(11);
|
|
||||||
TEST_SHIFT(12);
|
|
||||||
TEST_SHIFT(13);
|
|
||||||
TEST_SHIFT(14);
|
|
||||||
TEST_SHIFT(15);
|
|
||||||
TEST_SHIFT(16);
|
|
||||||
TEST_SHIFT(17);
|
|
||||||
TEST_SHIFT(18);
|
|
||||||
TEST_SHIFT(19);
|
|
||||||
TEST_SHIFT(20);
|
|
||||||
TEST_SHIFT(21);
|
|
||||||
TEST_SHIFT(22);
|
|
||||||
TEST_SHIFT(23);
|
|
||||||
TEST_SHIFT(24);
|
|
||||||
TEST_SHIFT(25);
|
|
||||||
TEST_SHIFT(26);
|
|
||||||
TEST_SHIFT(27);
|
|
||||||
TEST_SHIFT(28);
|
|
||||||
TEST_SHIFT(29);
|
|
||||||
TEST_SHIFT(30);
|
|
||||||
TEST_SHIFT(31);
|
|
||||||
TEST_SHIFT(32);
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user