AVX512 Reinforced FAT teddy.

This commit is contained in:
Chang, Harry
2017-07-13 14:38:06 +08:00
committed by Matthew Barr
parent 4528485a56
commit 8da2d13baa
4 changed files with 366 additions and 37 deletions

View File

@@ -325,44 +325,56 @@ bool pack(const vector<hwlmLiteral> &lits,
#define REINFORCED_MSK_LEN 8
static
void initReinforcedTable(u8 *reinforcedMsk) {
u64a *mask = (u64a *)reinforcedMsk;
fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
void initReinforcedTable(u8 *rmsk, const size_t rmsklen,
const u32 maskWidth) {
for (u32 b = 0; b < maskWidth; b++) {
u64a *mask = (u64a *)(rmsk + b * (rmsklen / maskWidth));
fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
}
}
static
void fillReinforcedMskZero(u8 *reinforcedMsk) {
u8 *mc = reinforcedMsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN;
fill_n(mc, REINFORCED_MSK_LEN, 0x00);
void fillReinforcedMskZero(u8 *rmsk, const size_t rmsklen,
const u32 maskWidth) {
for (u32 b = 0; b < maskWidth; b++) {
u8 *mc = rmsk + b * (rmsklen / maskWidth) +
NO_REINFORCEMENT * REINFORCED_MSK_LEN;
fill_n(mc, REINFORCED_MSK_LEN, 0x00);
}
}
static
void fillReinforcedMsk(u8 *reinforcedMsk, u16 c, u32 j, u8 bmsk) {
void fillReinforcedMsk(u8 *rmsk, u32 boff, u16 c, u32 j, u8 bmsk) {
assert(j > 0);
if (c == ALL_CHAR_SET) {
for (size_t i = 0; i < N_CHARS; i++) {
u8 *mc = reinforcedMsk + i * REINFORCED_MSK_LEN;
u8 *mc = rmsk + boff + i * REINFORCED_MSK_LEN;
mc[j - 1] &= ~bmsk;
}
} else {
u8 *mc = reinforcedMsk + c * REINFORCED_MSK_LEN;
u8 *mc = rmsk + boff + c * REINFORCED_MSK_LEN;
mc[j - 1] &= ~bmsk;
}
}
#ifdef TEDDY_DEBUG
static
void dumpReinforcedMaskTable(const u8 *msks) {
for (u32 i = 0; i <= N_CHARS; i++) {
printf("0x%02x: ", i);
for (u32 j = 0; j < REINFORCED_MSK_LEN; j++) {
u8 val = msks[i * REINFORCED_MSK_LEN + j];
for (u32 k = 0; k < 8; k++) {
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
void dumpReinforcedMaskTable(const u8 *rmsk, const size_t rmsklen,
const u32 maskWidth) {
for (u32 b = 0; b < maskWidth; b++) {
printf("reinforcement table for bucket %u..%u:\n", b * 8, b * 8 + 7);
for (u32 i = 0; i <= N_CHARS; i++) {
printf("0x%02x: ", i);
for (u32 j = 0; j < REINFORCED_MSK_LEN; j++) {
u8 val = rmsk[b * (rmsklen / maskWidth) +
i * REINFORCED_MSK_LEN + j];
for (u32 k = 0; k < 8; k++) {
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
}
printf(" ");
}
printf(" ");
printf("\n");
}
printf("\n");
}
}
#endif
@@ -443,12 +455,13 @@ static
void fillReinforcedTable(const map<BucketIndex,
vector<LiteralIndex>> &bucketToLits,
const vector<hwlmLiteral> &lits,
u8 *reinforcedMsk) {
initReinforcedTable(reinforcedMsk);
u8 *rmsk, const size_t rmsklen, const u32 maskWidth) {
initReinforcedTable(rmsk, rmsklen, maskWidth);
for (const auto &b2l : bucketToLits) {
const u32 &bucket_id = b2l.first;
const vector<LiteralIndex> &ids = b2l.second;
const u32 boff = (bucket_id / 8) * (rmsklen / maskWidth);
const u8 bmsk = 1U << (bucket_id % 8);
for (const LiteralIndex &lit_id : ids) {
@@ -459,23 +472,23 @@ void fillReinforcedTable(const map<BucketIndex,
// fill in reinforced masks
for (u32 j = 1; j < REINFORCED_MSK_LEN; j++) {
if (sz - 1 < j) {
fillReinforcedMsk(reinforcedMsk, ALL_CHAR_SET, j, bmsk);
fillReinforcedMsk(rmsk, boff, ALL_CHAR_SET, j, bmsk);
} else {
u8 c = l.s[sz - 1 - j];
if (l.nocase && ourisalpha(c)) {
u8 c_up = c & 0xdf;
fillReinforcedMsk(reinforcedMsk, c_up, j, bmsk);
fillReinforcedMsk(rmsk, boff, c_up, j, bmsk);
u8 c_lo = c | 0x20;
fillReinforcedMsk(reinforcedMsk, c_lo, j, bmsk);
fillReinforcedMsk(rmsk, boff, c_lo, j, bmsk);
} else {
fillReinforcedMsk(reinforcedMsk, c, j, bmsk);
fillReinforcedMsk(rmsk, boff, c, j, bmsk);
}
}
}
}
}
fillReinforcedMskZero(reinforcedMsk);
fillReinforcedMskZero(rmsk, rmsklen, maskWidth);
}
bytecode_ptr<FDR> TeddyCompiler::build() {
@@ -483,7 +496,7 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
size_t headerSize = sizeof(Teddy);
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
size_t reinforcedMaskLen = (N_CHARS + 1) * REINFORCED_MSK_LEN;
size_t reinforcedMaskLen = (N_CHARS + 1) * REINFORCED_MSK_LEN * maskWidth;
auto floodTable = setupFDRFloodControl(lits, eng, grey);
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
@@ -525,7 +538,8 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
// Write reinforcement masks.
u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen);
fillReinforcedTable(bucketToLits, lits, reinforcedMsk);
fillReinforcedTable(bucketToLits, lits, reinforcedMsk,
reinforcedMaskLen, maskWidth);
#ifdef TEDDY_DEBUG
for (u32 i = 0; i < eng.numMasks * 2; i++) {
@@ -541,7 +555,7 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
printf("\n===============================================\n"
"reinforced mask table for low boundary (original)\n\n");
dumpReinforcedMaskTable(reinforcedMsk);
dumpReinforcedMaskTable(reinforcedMsk, reinforcedMaskLen, maskWidth);
#endif
return fdr;