Implement new Vermicelli16 acceleration functions using SVE2.

The scheme utilises the MATCH and NMATCH instructions to
scan for 16 characters at the same rate as vermicelli
scans for one.

Change-Id: Ie2cef904c56651e6108593c668e9b65bc001a886
This commit is contained in:
George Wort
2021-06-28 16:29:43 +01:00
committed by Konstantinos Margaritis
parent b6a7ee7e84
commit b54710d208
25 changed files with 1153 additions and 8 deletions

View File

@@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2021, Arm Limited
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -552,6 +553,42 @@ char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin,
return 1;
}
#ifdef HAVE_SVE2
static really_inline
char castleScanVerm16(const struct Castle *c, const u8 *buf, const size_t begin,
const size_t end, size_t *loc) {
const u8 *ptr = vermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end);
if (ptr == buf + end) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = ptr - buf;
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
static really_inline
char castleScanNVerm16(const struct Castle *c, const u8 *buf, const size_t begin,
const size_t end, size_t *loc) {
const u8 *ptr = nvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end);
if (ptr == buf + end) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = ptr - buf;
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
#endif // HAVE_SVE2
static really_inline
char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin,
const size_t end, size_t *loc) {
@@ -604,6 +641,12 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin,
return castleScanVerm(c, buf, begin, end, loc);
case CASTLE_NVERM:
return castleScanNVerm(c, buf, begin, end, loc);
#ifdef HAVE_SVE2
case CASTLE_VERM16:
return castleScanVerm16(c, buf, begin, end, loc);
case CASTLE_NVERM16:
return castleScanNVerm16(c, buf, begin, end, loc);
#endif // HAVE_SVE2
case CASTLE_SHUFTI:
return castleScanShufti(c, buf, begin, end, loc);
case CASTLE_TRUFFLE:
@@ -647,6 +690,42 @@ char castleRevScanNVerm(const struct Castle *c, const u8 *buf,
return 1;
}
#ifdef HAVE_SVE2
static really_inline
char castleRevScanVerm16(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
const u8 *ptr = rvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end);
if (ptr == buf + begin - 1) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = ptr - buf;
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
static really_inline
char castleRevScanNVerm16(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
const u8 *ptr = rnvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end);
if (ptr == buf + begin - 1) {
DEBUG_PRINTF("no escape found\n");
return 0;
}
assert(loc);
assert(ptr >= buf && ptr < buf + end);
*loc = ptr - buf;
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
return 1;
}
#endif // HAVE_SVE2
static really_inline
char castleRevScanShufti(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
@@ -699,6 +778,12 @@ char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin,
return castleRevScanVerm(c, buf, begin, end, loc);
case CASTLE_NVERM:
return castleRevScanNVerm(c, buf, begin, end, loc);
#ifdef HAVE_SVE2
case CASTLE_VERM16:
return castleRevScanVerm16(c, buf, begin, end, loc);
case CASTLE_NVERM16:
return castleRevScanNVerm16(c, buf, begin, end, loc);
#endif // HAVE_SVE2
case CASTLE_SHUFTI:
return castleRevScanShufti(c, buf, begin, end, loc);
case CASTLE_TRUFFLE: