Implement new Vermicelli16 acceleration functions using SVE2.

The scheme utilises the MATCH and NMATCH instructions to
scan for 16 characters at the same rate as vermicelli
scans for one.

Change-Id: Ie2cef904c56651e6108593c668e9b65bc001a886
This commit is contained in:
George Wort
2021-06-28 16:29:43 +01:00
committed by Konstantinos Margaritis
parent 185c45263b
commit db0d8f79e6
25 changed files with 1153 additions and 8 deletions

View File

@@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2021, Arm Limited
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -39,6 +40,7 @@
#include "repeatcompile.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "vermicellicompile.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng_equivalence.h"
#include "nfagraph/ng_repeat.h"
@@ -101,6 +103,19 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) {
return;
}
#ifdef HAVE_SVE2
if (cr.count() <= 16) {
c->type = CASTLE_NVERM16;
vermicelli16Build(cr, (u8 *)&c->u.verm16.mask);
return;
}
if (negated.count() <= 16) {
c->type = CASTLE_VERM16;
vermicelli16Build(negated, (u8 *)&c->u.verm16.mask);
return;
}
#endif // HAVE_SVE2
if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo,
(u8 *)&c->u.shuf.mask_hi) != -1) {
c->type = CASTLE_SHUFTI;