Implement new Vermicelli16 acceleration functions using SVE2.

The scheme utilises the MATCH and NMATCH instructions to
scan for 16 characters at the same rate as vermicelli
scans for one.

Change-Id: Ie2cef904c56651e6108593c668e9b65bc001a886
This commit is contained in:
George Wort
2021-06-28 16:29:43 +01:00
committed by Konstantinos Margaritis
parent c7086cb7f1
commit df926ef62f
25 changed files with 1153 additions and 8 deletions

View File

@@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2021, Arm Limited
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -62,6 +63,11 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
#ifdef HAVE_SVE2
case ACCEL_VERM16:
DEBUG_PRINTF("single vermicelli16\n");
return vermicelli16Exec(aux->verm16.mask, ptr, end);
#endif // HAVE_SVE2
case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n");
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);