mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 11:16:29 +03:00
Convert compile-time code to not require SIMD
This commit is contained in:
@@ -461,7 +461,8 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
}
|
||||
|
||||
const CharReach &cr = reach[min_offset];
|
||||
if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
|
||||
if (-1 !=
|
||||
shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) {
|
||||
DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
|
||||
describeClass(cr).c_str(), cr.count(), min_offset);
|
||||
aux->shufti.accel_type = ACCEL_SHUFTI;
|
||||
@@ -469,7 +470,7 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
return;
|
||||
}
|
||||
|
||||
truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2);
|
||||
truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2);
|
||||
DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
|
||||
describeClass(cr).c_str(), cr.count(), min_offset);
|
||||
aux->truffle.accel_type = ACCEL_TRUFFLE;
|
||||
|
@@ -473,9 +473,10 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
}
|
||||
|
||||
if (double_byte_ok(info) &&
|
||||
shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
|
||||
&accel->dshufti.lo1, &accel->dshufti.hi1,
|
||||
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
|
||||
shuftiBuildDoubleMasks(
|
||||
info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1,
|
||||
(u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2,
|
||||
(u8 *)&accel->dshufti.hi2)) {
|
||||
accel->accel_type = ACCEL_DSHUFTI;
|
||||
accel->dshufti.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
||||
@@ -511,14 +512,16 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
}
|
||||
|
||||
accel->accel_type = ACCEL_SHUFTI;
|
||||
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) {
|
||||
if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo,
|
||||
(u8 *)&accel->shufti.hi)) {
|
||||
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!info.cr.none());
|
||||
accel->accel_type = ACCEL_TRUFFLE;
|
||||
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
|
||||
truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1,
|
||||
(u8 *)&accel->truffle.mask2);
|
||||
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
||||
}
|
||||
|
||||
|
@@ -41,7 +41,7 @@
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/dump_mask.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
@@ -147,16 +147,20 @@ const char *accelName(u8 accel_type) {
|
||||
}
|
||||
|
||||
static
|
||||
void dumpShuftiCharReach(FILE *f, const m128 &lo, const m128 &hi) {
|
||||
void dumpShuftiCharReach(FILE *f, const u8 *lo, const u8 *hi) {
|
||||
CharReach cr = shufti2cr(lo, hi);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
vector<CharReach> shufti2cr_array(const m128 lo_in, const m128 hi_in) {
|
||||
const u8 *lo = (const u8 *)&lo_in;
|
||||
const u8 *hi = (const u8 *)&hi_in;
|
||||
vector<CharReach> dshufti2cr_array(const u8 *lo_in, const u8 *hi_in) {
|
||||
u8 lo[16];
|
||||
u8 hi[16];
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
lo[i] = ~lo_in[i];
|
||||
hi[i] = ~hi_in[i];
|
||||
}
|
||||
vector<CharReach> crs(8);
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
u32 combined = lo[(u8)i & 0xf] & hi[(u8)i >> 4];
|
||||
@@ -169,10 +173,10 @@ vector<CharReach> shufti2cr_array(const m128 lo_in, const m128 hi_in) {
|
||||
}
|
||||
|
||||
static
|
||||
void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1,
|
||||
const m128 &lo2, const m128 &hi2) {
|
||||
vector<CharReach> cr1 = shufti2cr_array(not128(lo1), not128(hi1));
|
||||
vector<CharReach> cr2 = shufti2cr_array(not128(lo2), not128(hi2));
|
||||
void dumpDShuftiCharReach(FILE *f, const u8 *lo1, const u8 *hi1,
|
||||
const u8 *lo2, const u8 *hi2) {
|
||||
vector<CharReach> cr1 = dshufti2cr_array(lo1, hi1);
|
||||
vector<CharReach> cr2 = dshufti2cr_array(lo2, hi2);
|
||||
map<CharReach, set<u32> > cr1_group;
|
||||
assert(cr1.size() == 8 && cr2.size() == 8);
|
||||
for (u32 i = 0; i < 8; i++) {
|
||||
@@ -208,26 +212,22 @@ void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1,
|
||||
}
|
||||
|
||||
static
|
||||
void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) {
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&lo, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&hi, 128).c_str());
|
||||
void dumpShuftiMasks(FILE *f, const u8 *lo, const u8 *hi) {
|
||||
fprintf(f, "lo %s\n", dumpMask(lo, 128).c_str());
|
||||
fprintf(f, "hi %s\n", dumpMask(hi, 128).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTruffleCharReach(FILE *f, const m128 &hiset, const m128 &hiclear) {
|
||||
void dumpTruffleCharReach(FILE *f, const u8 *hiset, const u8 *hiclear) {
|
||||
CharReach cr = truffle2cr(hiset, hiclear);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTruffleMasks(FILE *f, const m128 &hiset, const m128 &hiclear) {
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&hiset, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&hiclear, 128).c_str());
|
||||
void dumpTruffleMasks(FILE *f, const u8 *hiset, const u8 *hiclear) {
|
||||
fprintf(f, "lo %s\n", dumpMask(hiset, 128).c_str());
|
||||
fprintf(f, "hi %s\n", dumpMask(hiclear, 128).c_str());
|
||||
}
|
||||
|
||||
|
||||
@@ -256,23 +256,31 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
||||
break;
|
||||
case ACCEL_SHUFTI: {
|
||||
fprintf(f, "\n");
|
||||
dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi);
|
||||
dumpShuftiCharReach(f, accel.shufti.lo, accel.shufti.hi);
|
||||
dumpShuftiMasks(f, (const u8 *)&accel.shufti.lo,
|
||||
(const u8 *)&accel.shufti.hi);
|
||||
dumpShuftiCharReach(f, (const u8 *)&accel.shufti.lo,
|
||||
(const u8 *)&accel.shufti.hi);
|
||||
break;
|
||||
}
|
||||
case ACCEL_DSHUFTI:
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "mask 1\n");
|
||||
dumpShuftiMasks(f, accel.dshufti.lo1, accel.dshufti.hi1);
|
||||
dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo1,
|
||||
(const u8 *)&accel.dshufti.hi1);
|
||||
fprintf(f, "mask 2\n");
|
||||
dumpShuftiMasks(f, accel.dshufti.lo2, accel.dshufti.hi2);
|
||||
dumpDShuftiCharReach(f, accel.dshufti.lo1, accel.dshufti.hi1,
|
||||
accel.dshufti.lo2, accel.dshufti.hi2);
|
||||
dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo2,
|
||||
(const u8 *)&accel.dshufti.hi2);
|
||||
dumpDShuftiCharReach(f, (const u8 *)&accel.dshufti.lo1,
|
||||
(const u8 *)&accel.dshufti.hi1,
|
||||
(const u8 *)&accel.dshufti.lo2,
|
||||
(const u8 *)&accel.dshufti.hi2);
|
||||
break;
|
||||
case ACCEL_TRUFFLE: {
|
||||
fprintf(f, "\n");
|
||||
dumpTruffleMasks(f, accel.truffle.mask1, accel.truffle.mask2);
|
||||
dumpTruffleCharReach(f, accel.truffle.mask1, accel.truffle.mask2);
|
||||
dumpTruffleMasks(f, (const u8 *)&accel.truffle.mask1,
|
||||
(const u8 *)&accel.truffle.mask2);
|
||||
dumpTruffleCharReach(f, (const u8 *)&accel.truffle.mask1,
|
||||
(const u8 *)&accel.truffle.mask2);
|
||||
break;
|
||||
}
|
||||
case ACCEL_MLVERM:
|
||||
@@ -297,28 +305,36 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
||||
case ACCEL_MSSHUFTI:
|
||||
case ACCEL_MSGSHUFTI:
|
||||
fprintf(f, " len:%u\n", accel.mshufti.len);
|
||||
dumpShuftiMasks(f, accel.mshufti.lo, accel.mshufti.hi);
|
||||
dumpShuftiCharReach(f, accel.mshufti.lo, accel.mshufti.hi);
|
||||
dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo,
|
||||
(const u8 *)&accel.mshufti.hi);
|
||||
dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo,
|
||||
(const u8 *)&accel.mshufti.hi);
|
||||
break;
|
||||
case ACCEL_MDSSHUFTI:
|
||||
case ACCEL_MDSGSHUFTI:
|
||||
fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2);
|
||||
dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi);
|
||||
dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi);
|
||||
dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo,
|
||||
(const u8 *)&accel.mdshufti.hi);
|
||||
dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo,
|
||||
(const u8 *)&accel.mdshufti.hi);
|
||||
break;
|
||||
case ACCEL_MLTRUFFLE:
|
||||
case ACCEL_MLGTRUFFLE:
|
||||
case ACCEL_MSTRUFFLE:
|
||||
case ACCEL_MSGTRUFFLE:
|
||||
fprintf(f, " len:%u\n", accel.mtruffle.len);
|
||||
dumpTruffleMasks(f, accel.mtruffle.mask1, accel.mtruffle.mask2);
|
||||
dumpTruffleCharReach(f, accel.mtruffle.mask1, accel.mtruffle.mask2);
|
||||
dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1,
|
||||
(const u8 *)&accel.mtruffle.mask2);
|
||||
dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1,
|
||||
(const u8 *)&accel.mtruffle.mask2);
|
||||
break;
|
||||
case ACCEL_MDSTRUFFLE:
|
||||
case ACCEL_MDSGTRUFFLE:
|
||||
fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2);
|
||||
dumpTruffleMasks(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2);
|
||||
dumpTruffleCharReach(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2);
|
||||
dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1,
|
||||
(const u8 *)&accel.mdtruffle.mask2);
|
||||
dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1,
|
||||
(const u8 *)&accel.mdtruffle.mask2);
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "\n");
|
||||
|
@@ -72,8 +72,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
||||
if (-1 != shuftiBuildMasks(info.single_stops, &aux->shufti.lo,
|
||||
&aux->shufti.hi)) {
|
||||
if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo,
|
||||
(u8 *)&aux->shufti.hi)) {
|
||||
aux->accel_type = ACCEL_SHUFTI;
|
||||
aux->shufti.offset = offset;
|
||||
DEBUG_PRINTF("shufti built OK\n");
|
||||
@@ -86,8 +86,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
||||
aux->accel_type = ACCEL_TRUFFLE;
|
||||
aux->truffle.offset = offset;
|
||||
truffleBuildMasks(info.single_stops, &aux->truffle.mask1,
|
||||
&aux->truffle.mask2);
|
||||
truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1,
|
||||
(u8 *)&aux->truffle.mask2);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -212,9 +212,10 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
||||
" two-byte literals\n", outs1, outs2);
|
||||
aux->accel_type = ACCEL_DSHUFTI;
|
||||
aux->dshufti.offset = offset;
|
||||
if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
|
||||
&aux->dshufti.lo1, &aux->dshufti.hi1,
|
||||
&aux->dshufti.lo2, &aux->dshufti.hi2)) {
|
||||
if (shuftiBuildDoubleMasks(
|
||||
info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1,
|
||||
(u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2,
|
||||
(u8 *)&aux->dshufti.hi2)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -372,8 +373,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
|
||||
switch (info.ma_type) {
|
||||
case MultibyteAccelInfo::MAT_LONG:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
||||
(u8 *)&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MLSHUFTI;
|
||||
@@ -381,8 +382,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
||||
(u8 *)&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MLGSHUFTI;
|
||||
@@ -390,8 +391,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_SHIFT:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
||||
(u8 *)&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MSSHUFTI;
|
||||
@@ -399,8 +400,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
||||
(u8 *)&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MSGSHUFTI;
|
||||
@@ -408,8 +409,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
||||
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
|
||||
&aux->mdshufti.hi) == -1) {
|
||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
|
||||
(u8 *)&aux->mdshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MDSSHUFTI;
|
||||
@@ -418,8 +419,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->mdshufti.len2 = info.ma_len2;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
||||
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
|
||||
&aux->mdshufti.hi) == -1) {
|
||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
|
||||
(u8 *)&aux->mdshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MDSGSHUFTI;
|
||||
@@ -441,45 +442,45 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->accel_type = ACCEL_MLTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
||||
(u8 *)&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
||||
aux->accel_type = ACCEL_MLGTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
||||
(u8 *)&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_SHIFT:
|
||||
aux->accel_type = ACCEL_MSTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
||||
(u8 *)&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
||||
aux->accel_type = ACCEL_MSGTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
||||
(u8 *)&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
||||
aux->accel_type = ACCEL_MDSTRUFFLE;
|
||||
aux->mdtruffle.offset = offset;
|
||||
aux->mdtruffle.len1 = info.ma_len1;
|
||||
aux->mdtruffle.len2 = info.ma_len2;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mdtruffle.mask2);
|
||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
||||
(u8 *)&aux->mdtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
||||
aux->accel_type = ACCEL_MDSGTRUFFLE;
|
||||
aux->mdtruffle.offset = offset;
|
||||
aux->mdtruffle.len1 = info.ma_len1;
|
||||
aux->mdtruffle.len2 = info.ma_len2;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mdtruffle.mask2);
|
||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
||||
(u8 *)&aux->mdtruffle.mask2);
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
|
@@ -88,13 +88,15 @@ void nfaExecCastle_dump(const struct NFA *nfa, const string &base) {
|
||||
fprintf(f, "negated verm, scanning for 0x%02x\n", c->u.verm.c);
|
||||
break;
|
||||
case CASTLE_SHUFTI: {
|
||||
const CharReach cr = shufti2cr(c->u.shuf.mask_lo, c->u.shuf.mask_hi);
|
||||
const CharReach cr = shufti2cr((const u8 *)&c->u.shuf.mask_lo,
|
||||
(const u8 *)&c->u.shuf.mask_hi);
|
||||
fprintf(f, "shufti, scanning for %s (%zu chars)\n",
|
||||
describeClass(cr).c_str(), cr.count());
|
||||
break;
|
||||
}
|
||||
case CASTLE_TRUFFLE: {
|
||||
const CharReach cr = truffle2cr(c->u.truffle.mask1, c->u.truffle.mask2);
|
||||
const CharReach cr = truffle2cr((const u8 *)&c->u.truffle.mask1,
|
||||
(const u8 *)&c->u.truffle.mask2);
|
||||
fprintf(f, "truffle, scanning for %s (%zu chars)\n",
|
||||
describeClass(cr).c_str(), cr.count());
|
||||
break;
|
||||
|
@@ -100,13 +100,15 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (shuftiBuildMasks(negated, &c->u.shuf.mask_lo, &c->u.shuf.mask_hi) != -1) {
|
||||
if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo,
|
||||
(u8 *)&c->u.shuf.mask_hi) != -1) {
|
||||
c->type = CASTLE_SHUFTI;
|
||||
return;
|
||||
}
|
||||
|
||||
c->type = CASTLE_TRUFFLE;
|
||||
truffleBuildMasks(negated, &c->u.truffle.mask1, &c->u.truffle.mask2);
|
||||
truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1,
|
||||
(u8 *)&c->u.truffle.mask2);
|
||||
}
|
||||
|
||||
static
|
||||
|
@@ -116,7 +116,8 @@ void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
|
||||
const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa);
|
||||
lbrDumpCommon(&ls->common, f);
|
||||
|
||||
CharReach cr = shufti2cr(ls->mask_lo, ls->mask_hi);
|
||||
CharReach cr = shufti2cr((const u8 *)&ls->mask_lo,
|
||||
(const u8 *)&ls->mask_hi);
|
||||
fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n",
|
||||
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
|
||||
fprintf(f, "\n");
|
||||
@@ -133,7 +134,8 @@ void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
|
||||
const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa);
|
||||
lbrDumpCommon(<->common, f);
|
||||
|
||||
CharReach cr = truffle2cr(lt->mask1, lt->mask2);
|
||||
CharReach cr = truffle2cr((const u8 *)<->mask1,
|
||||
(const u8 *)<->mask2);
|
||||
fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n",
|
||||
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
|
||||
fprintf(f, "\n");
|
||||
|
@@ -193,7 +193,7 @@ void createShuffleMasks(mcsheng *m, const dfa_info &info,
|
||||
}
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
||||
m->sheng_masks[i] = loadu128(masks[info.alpha_remap[i]].data());
|
||||
memcpy((u8*)&m->sheng_masks[i], (u8*)masks[info.alpha_remap[i]].data(), sizeof(m128));
|
||||
}
|
||||
m->sheng_end = sheng_end;
|
||||
m->sheng_accel_limit = sheng_end - 1;
|
||||
|
@@ -31,7 +31,7 @@
|
||||
|
||||
#include "nfa_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#define ACCEPT_FLAG 0x8000
|
||||
#define ACCEL_FLAG 0x4000
|
||||
|
@@ -175,12 +175,13 @@ void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
|
||||
size_t set = reach.find_first();
|
||||
assert(set != CharReach::npos);
|
||||
kp->u.verm.c = (char)set;
|
||||
} else if (shuftiBuildMasks(~reach, &kp->u.shuf.mask_lo,
|
||||
&kp->u.shuf.mask_hi) != -1) {
|
||||
} else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo,
|
||||
(u8 *)&kp->u.shuf.mask_hi) != -1) {
|
||||
kp->type = MPV_SHUFTI;
|
||||
} else {
|
||||
kp->type = MPV_TRUFFLE;
|
||||
truffleBuildMasks(~reach, &kp->u.truffle.mask1, &kp->u.truffle.mask2);
|
||||
truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1,
|
||||
(u8 *)&kp->u.truffle.mask2);
|
||||
}
|
||||
|
||||
kp->count = verify_u32(puffs.size());
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -36,7 +36,7 @@
|
||||
#define MULTISHUFTI_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
|
@@ -30,7 +30,7 @@
|
||||
#define SHENG_INTERNAL_H_
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#define SHENG_STATE_ACCEPT 0x10
|
||||
#define SHENG_STATE_DEAD 0x20
|
||||
|
@@ -48,7 +48,7 @@
|
||||
#include "util/compile_context.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
@@ -442,8 +442,7 @@ void createShuffleMasks(sheng *s, dfa_info &info,
|
||||
#ifdef DEBUG
|
||||
dumpShuffleMask(chr, buf, sizeof(buf));
|
||||
#endif
|
||||
m128 mask = loadu128(buf);
|
||||
s->shuffle_masks[chr] = mask;
|
||||
memcpy(&s->shuffle_masks[chr], buf, sizeof(m128));
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -39,7 +39,7 @@
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/dump_util.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
@@ -101,7 +101,7 @@ void dumpMasks(FILE *f, const sheng *s) {
|
||||
for (u32 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[16];
|
||||
m128 shuffle_mask = s->shuffle_masks[chr];
|
||||
store128(buf, shuffle_mask);
|
||||
memcpy(buf, &shuffle_mask, sizeof(m128));
|
||||
|
||||
fprintf(f, "%3u: ", chr);
|
||||
for (u32 pos = 0; pos < 16; pos++) {
|
||||
@@ -237,7 +237,7 @@ void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
|
||||
u8 buf[16];
|
||||
m128 shuffle_mask = s->shuffle_masks[i];
|
||||
|
||||
store128(buf, shuffle_mask);
|
||||
memcpy(buf, &shuffle_mask, sizeof(m128));
|
||||
|
||||
t[i] = buf[state] & SHENG_STATE_MASK;
|
||||
}
|
||||
|
@@ -51,7 +51,7 @@ namespace ue2 {
|
||||
*
|
||||
* Note: always able to construct masks for 8 or fewer characters.
|
||||
*/
|
||||
int shuftiBuildMasks(const CharReach &c, m128 *lo, m128 *hi) {
|
||||
int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) {
|
||||
/* Things could be packed much more optimally, but this should be able to
|
||||
* handle any set of characters entirely in the lower half. */
|
||||
|
||||
@@ -134,7 +134,7 @@ void set_buckets_from_mask(u16 nibble_mask, u32 bucket,
|
||||
|
||||
bool shuftiBuildDoubleMasks(const CharReach &onechar,
|
||||
const flat_set<pair<u8, u8>> &twochar,
|
||||
m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2) {
|
||||
u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) {
|
||||
DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(),
|
||||
twochar.size());
|
||||
array<u8, 16> lo1_a;
|
||||
@@ -210,9 +210,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar,
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
CharReach shufti2cr(const m128 lo_in, const m128 hi_in) {
|
||||
const u8 *lo = (const u8 *)&lo_in;
|
||||
const u8 *hi = (const u8 *)&hi_in;
|
||||
CharReach shufti2cr(const u8 *lo, const u8 *hi) {
|
||||
CharReach cr;
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) {
|
||||
|
@@ -48,7 +48,7 @@ namespace ue2 {
|
||||
*
|
||||
* Note: always able to construct masks for 8 or fewer characters.
|
||||
*/
|
||||
int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi);
|
||||
int shuftiBuildMasks(const CharReach &chars, u8 *lo, u8 *hi);
|
||||
|
||||
/** \brief Double-byte variant
|
||||
*
|
||||
@@ -56,7 +56,7 @@ int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi);
|
||||
*/
|
||||
bool shuftiBuildDoubleMasks(const CharReach &onechar,
|
||||
const flat_set<std::pair<u8, u8>> &twochar,
|
||||
m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2);
|
||||
u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2);
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
@@ -64,7 +64,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar,
|
||||
* \brief Dump code: returns a CharReach with the reach that would match this
|
||||
* shufti.
|
||||
*/
|
||||
CharReach shufti2cr(const m128 lo, const m128 hi);
|
||||
CharReach shufti2cr(const u8 *lo, const u8 *hi);
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -36,7 +36,7 @@
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/simd_types.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#include "util/dump_mask.h"
|
||||
|
||||
using namespace std;
|
||||
@@ -53,17 +53,15 @@ namespace ue2 {
|
||||
* bits 456 is the bit that is set at that offset.
|
||||
*/
|
||||
|
||||
void truffleBuildMasks(const CharReach &cr, m128 *shuf_mask_lo_highclear,
|
||||
m128 *shuf_mask_lo_highset) {
|
||||
*shuf_mask_lo_highset = zeroes128();
|
||||
*shuf_mask_lo_highclear = zeroes128();
|
||||
u8 *lo_highset = (u8 *)shuf_mask_lo_highset;
|
||||
u8 *lo_highclear = (u8 *)shuf_mask_lo_highclear;
|
||||
void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear,
|
||||
u8 *shuf_mask_lo_highset) {
|
||||
memset(shuf_mask_lo_highset, 0, sizeof(m128));
|
||||
memset(shuf_mask_lo_highclear, 0, sizeof(m128));
|
||||
|
||||
for (size_t v = cr.find_first(); v != CharReach::npos;
|
||||
v = cr.find_next(v)) {
|
||||
DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear");
|
||||
u8 *change_mask = (v & 0x80) ? lo_highset : lo_highclear;
|
||||
u8 *change_mask = (v & 0x80) ? shuf_mask_lo_highset : shuf_mask_lo_highclear;
|
||||
u8 low_nibble = v & 0xf;
|
||||
u8 bits_456 = (v & 0x70) >> 4;
|
||||
change_mask[low_nibble] |= 1 << bits_456;
|
||||
@@ -73,18 +71,16 @@ void truffleBuildMasks(const CharReach &cr, m128 *shuf_mask_lo_highclear,
|
||||
/*
|
||||
* Reconstruct the charclass that the truffle masks represent
|
||||
*/
|
||||
CharReach truffle2cr(const m128 highclear, const m128 highset) {
|
||||
const u8 *lo = (const u8 *)&highclear;
|
||||
const u8 *hi = (const u8 *)&highset;
|
||||
CharReach truffle2cr(const u8 *highclear, const u8 *highset) {
|
||||
CharReach cr;
|
||||
for (u8 i = 0; i < 16; i++) {
|
||||
u32 bits_456 = lo[i];
|
||||
u32 bits_456 = highclear[i];
|
||||
while (bits_456) {
|
||||
u32 pos = findAndClearLSB_32(&bits_456);
|
||||
assert(pos < 8);
|
||||
cr.set(pos << 4 | i);
|
||||
}
|
||||
bits_456 = hi[i];
|
||||
bits_456 = highset[i];
|
||||
while (bits_456) {
|
||||
u32 pos = findAndClearLSB_32(&bits_456);
|
||||
assert(pos < 8);
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -34,8 +34,8 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void truffleBuildMasks(const CharReach &cr, m128 *mask1, m128 *mask2);
|
||||
CharReach truffle2cr(const m128 lo_in, const m128 hi_in);
|
||||
void truffleBuildMasks(const CharReach &cr, u8 *mask1, u8 *mask2);
|
||||
CharReach truffle2cr(const u8 *lo_in, const u8 *hi_in);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -224,7 +224,7 @@ aligned_unique_ptr<NFA> buildLbrShuf(const CharReach &cr,
|
||||
fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
if (shuftiBuildMasks(~cr, &ls->mask_lo, &ls->mask_hi) == -1) {
|
||||
if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -245,7 +245,7 @@ aligned_unique_ptr<NFA> buildLbrTruf(const CharReach &cr,
|
||||
fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
truffleBuildMasks(~cr, &lc->mask1, &lc->mask2);
|
||||
truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2);
|
||||
|
||||
DEBUG_PRINTF("built truffle lbr\n");
|
||||
return nfa;
|
||||
|
@@ -2010,7 +2010,7 @@ void buildCountingMiracles(build_context &bc) {
|
||||
rcm.c = cr.find_first();
|
||||
} else {
|
||||
rcm.shufti = 1;
|
||||
int rv = shuftiBuildMasks(cr, &rcm.lo, &rcm.hi);
|
||||
int rv = shuftiBuildMasks(cr, (u8 *)&rcm.lo, (u8 *)&rcm.hi);
|
||||
if (rv == -1) {
|
||||
DEBUG_PRINTF("failed to build shufti\n");
|
||||
lbi.countingMiracleCount = 0; /* remove counting miracle */
|
||||
|
@@ -62,6 +62,10 @@
|
||||
#endif
|
||||
|
||||
typedef __m128i m128;
|
||||
#else
|
||||
typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128;
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
typedef __m256i m256;
|
||||
#else
|
||||
|
Reference in New Issue
Block a user