unify some accel code/structures between limex and mcclellan

This commit is contained in:
Alex Coyte
2016-04-06 16:43:32 +10:00
committed by Matthew Barr
parent 850636dbd6
commit ff721ed8e4
9 changed files with 156 additions and 327 deletions

View File

@@ -49,209 +49,6 @@
#include "util/simd_utils_ssse3.h"
#include "util/shuffle_ssse3.h"
static
const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) {
assert(ISALIGNED(aux)); // must be SIMD aligned for shufti
assert(end > ptr);
assert(end - ptr >= 16); // must be at least 16 bytes to scan
const u8 *start = ptr;
u8 offset;
switch (aux->accel_type) {
case ACCEL_VERM:
DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
offset = aux->verm.offset;
ptr = vermicelliExec(aux->verm.c, 0, ptr, end);
break;
case ACCEL_VERM_NOCASE:
DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
offset = aux->verm.offset;
ptr = vermicelliExec(aux->verm.c, 1, ptr, end);
break;
case ACCEL_DVERM:
DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
offset = aux->dverm.offset;
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
break;
case ACCEL_DVERM_NOCASE:
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
offset = aux->dverm.offset;
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2,
1, ptr, end);
break;
case ACCEL_DVERM_MASKED:
DEBUG_PRINTF("double vermicelli masked for "
"0x%02hhx%02hhx/0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2,
aux->dverm.m1, aux->dverm.m2);
offset = aux->dverm.offset;
ptr = vermicelliDoubleMaskedExec(aux->dverm.c1, aux->dverm.c2,
aux->dverm.m1, aux->dverm.m2, ptr, end);
break;
case ACCEL_MLVERM:
DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = long_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MLVERM_NOCASE:
DEBUG_PRINTF("long vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = long_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MLGVERM:
DEBUG_PRINTF("long grab vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = longgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MLGVERM_NOCASE:
DEBUG_PRINTF("long grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = longgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MSVERM:
DEBUG_PRINTF("shift vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shift_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MSVERM_NOCASE:
DEBUG_PRINTF("shift vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shift_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MSGVERM:
DEBUG_PRINTF("shift grab vermicelli for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len);
break;
case ACCEL_MSGVERM_NOCASE:
DEBUG_PRINTF("shift grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c);
offset = aux->mverm.offset;
ptr = shiftgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len);
break;
case ACCEL_MDSVERM:
DEBUG_PRINTF("double shift vermicelli for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mdverm.offset;
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSVERM_NOCASE:
DEBUG_PRINTF("double shift vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshift_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSGVERM:
DEBUG_PRINTF("double shift grab vermicelli for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_MDSGVERM_NOCASE:
DEBUG_PRINTF("double shift grab vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c);
offset = aux->mverm.offset;
ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2);
break;
case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n");
offset = aux->shufti.offset;
ptr = shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
break;
case ACCEL_DSHUFTI:
DEBUG_PRINTF("double shufti\n");
offset = aux->dshufti.offset;
ptr = shuftiDoubleExec(aux->dshufti.lo1, aux->dshufti.hi1,
aux->dshufti.lo2, aux->dshufti.hi2, ptr, end);
break;
case ACCEL_MLSHUFTI:
offset = aux->mshufti.offset;
ptr = long_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MLGSHUFTI:
offset = aux->mshufti.offset;
ptr = longgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MSSHUFTI:
offset = aux->mshufti.offset;
ptr = shift_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MSGSHUFTI:
offset = aux->mshufti.offset;
ptr = shiftgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len);
break;
case ACCEL_MDSSHUFTI:
offset = aux->mdshufti.offset;
ptr = doubleshift_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end,
aux->mdshufti.len1, aux->mdshufti.len2);
break;
case ACCEL_MDSGSHUFTI:
offset = aux->mdshufti.offset;
ptr = doubleshiftgrab_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end,
aux->mdshufti.len1, aux->mdshufti.len2);
break;
case ACCEL_TRUFFLE:
DEBUG_PRINTF("truffle shuffle\n");
offset = aux->truffle.offset;
ptr = truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
break;
case ACCEL_MLTRUFFLE:
DEBUG_PRINTF("long match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = long_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MLGTRUFFLE:
DEBUG_PRINTF("long grab match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = longgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MSTRUFFLE:
DEBUG_PRINTF("shift match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = shift_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MSGTRUFFLE:
DEBUG_PRINTF("shift grab match truffle shuffle\n");
offset = aux->mtruffle.offset;
ptr = shiftgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2,
ptr, end, aux->mtruffle.len);
break;
case ACCEL_MDSTRUFFLE:
DEBUG_PRINTF("double shift match truffle shuffle\n");
offset = aux->mdtruffle.offset;
ptr = doubleshift_truffleExec(aux->mdtruffle.mask1,
aux->mdtruffle.mask2, ptr, end,
aux->mdtruffle.len1,
aux->mdtruffle.len2);
break;
case ACCEL_MDSGTRUFFLE:
DEBUG_PRINTF("double shift grab match truffle shuffle\n");
offset = aux->mdtruffle.offset;
ptr = doubleshiftgrab_truffleExec(aux->mdtruffle.mask1,
aux->mdtruffle.mask2, ptr, end,
aux->mdtruffle.len1,
aux->mdtruffle.len2);
break;
case ACCEL_RED_TAPE:
ptr = end; /* there is no escape */
offset = aux->generic.offset;
break;
default:
/* no acceleration, fall through and return current ptr */
offset = 0;
break;
}
if (offset) {
ptr -= offset;
if (ptr < start) {
return start;
}
}
return ptr;
}
static really_inline
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
const u8 *input, u32 idx, size_t i, size_t end) {
@@ -272,7 +69,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
}
aux = aux + aux_idx;
const u8 *ptr = accelScan(aux, &input[i], &input[end]);
const u8 *ptr = run_accel(aux, &input[i], &input[end]);
assert(ptr >= &input[i]);
size_t j = (size_t)(ptr - input);
DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i));