fdr: remove confirm split and pull-back

This commit is contained in:
Wang, Xiang W 2016-12-14 21:38:03 -05:00 committed by Matthew Barr
parent 7c2627f2c2
commit df7bc22ae0
13 changed files with 68 additions and 158 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -38,29 +38,19 @@ class EngineDescription {
u32 id;
target_t code_target; // the target that we built this code for
u32 numBuckets;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
public:
EngineDescription(u32 id_in, const target_t &code_target_in,
u32 numBuckets_in, u32 confirmPullBackDistance_in,
u32 confirmTopLevelSplit_in)
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in),
confirmPullBackDistance(confirmPullBackDistance_in),
confirmTopLevelSplit(confirmTopLevelSplit_in) {}
u32 numBuckets_in)
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in) {}
virtual ~EngineDescription();
u32 getID() const { return id; }
u32 getNumBuckets() const { return numBuckets; }
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
void setConfirmTopLevelSplit(u32 split) { confirmTopLevelSplit = split; }
bool isValidOnTarget(const target_t &target_in) const;
virtual u32 getDefaultFloodSuffixLength() const = 0;
virtual bool typicallyHoldsOneCharLits() const { return true; }
};
/** Returns a target given a CPU feature set value. */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -315,7 +315,6 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
const u32 *confBase, const struct FDR_Runtime_Args *a,
const u8 *ptr, u32 *last_match_id, struct zone *z) {
const u8 bucket = 8;
const u8 pullback = 1;
if (likely(!*conf)) {
return;
@ -332,8 +331,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
u32 bit = findAndClearLSB_64(conf);
u32 byte = bit / bucket + offset;
u32 bitRem = bit % bucket;
u32 confSplit = *(ptr + byte);
u32 idx = confSplit * bucket + bitRem;
u32 idx = bitRem;
u32 cf = confBase[idx];
if (!cf) {
continue;
@ -353,8 +351,8 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
id, a->ctxt);
continue;
}
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control,
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1);
confWithBit(fdrc, a, ptr_main - a->buf + byte, control,
last_match_id, confVal);
} while (unlikely(!!*conf));
}

View File

@ -147,7 +147,7 @@ FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
size_t tabSize = eng.getTabSizeBytes();
auto floodControlTmp = setupFDRFloodControl(lits, eng);
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
assert(ISALIGNED_16(tabSize));
assert(ISALIGNED_16(confirmTmp.second));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -56,7 +56,7 @@ class EngineDescription;
class FDREngineDescription;
struct hwlmStreamingControl;
std::pair<aligned_unique_ptr<u8>, size_t> setupFullMultiConfs(
std::pair<aligned_unique_ptr<u8>, size_t> setupFullConfs(
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
bool make_small);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -45,9 +45,7 @@ using namespace std;
namespace ue2 {
using ConfSplitType = u8;
using BucketSplitPair = pair<BucketIndex, ConfSplitType>;
using BC2CONF = map<BucketSplitPair,
using BC2CONF = map<BucketIndex,
pair<aligned_unique_ptr<FDRConfirm>, size_t>>;
// return the number of bytes beyond a length threshold in all strings in lits
@ -151,8 +149,8 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
//#define FDR_CONFIRM_DUMP 1
static pair<aligned_unique_ptr<FDRConfirm>, size_t>
getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
bool make_small, bool make_confirm) {
getFDRConfirm(const vector<hwlmLiteral> &lits, bool make_small,
bool make_confirm) {
vector<LitInfo> tmpLitInfo(lits.size());
CONF_TYPE andmsk;
fillLitInfo(lits, tmpLitInfo, andmsk);
@ -177,8 +175,7 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
u32 soleLitCmp = 0;
u32 soleLitMsk = 0;
if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 &&
lits[0].msk.empty()) || make_confirm == false) {
if (!make_confirm) {
flags = FDRC_FLAG_NO_CONFIRM;
if (lits[0].noruns) {
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
@ -345,15 +342,11 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
return {move(fdrc), actual_size};
}
static
u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
const EngineDescription &eng, BC2CONF &bc2Conf,
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
pair<aligned_unique_ptr<u8>, size_t>
setupFullConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
bool make_small) {
u32 pullBack = eng.getConfirmPullBackDistance();
u32 splitMask = eng.getConfirmTopLevelSplit() - 1;
bool splitHasCase = splitMask & 0x20;
bool makeConfirm = true;
unique_ptr<TeddyEngineDescription> teddyDescr =
getTeddyDescription(eng.getID());
@ -361,81 +354,24 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
makeConfirm = teddyDescr->needConfirm(lits);
}
BC2CONF bc2Conf;
u32 totalConfirmSize = 0;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
if (!bucketToLits[b].empty()) {
vector<vector<hwlmLiteral>> vl(eng.getConfirmTopLevelSplit());
vector<hwlmLiteral> vl;
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
hwlmLiteral lit = lits[lit_idx]; // copy
// c is last char of this literal
u8 c = *(lit.s.rbegin());
bool suppressSplit = false;
if (pullBack) {
// make a shorter string to work over if we're pulling back
// getFDRConfirm doesn't know about that stuff
assert(lit.s.size() >= pullBack);
lit.s.resize(lit.s.size() - pullBack);
u8 c_sub, c_sub_msk;
if (lit.msk.empty()) {
c_sub = 0;
c_sub_msk = 0;
} else {
c_sub = *(lit.cmp.rbegin());
c_sub_msk = *(lit.msk.rbegin());
size_t len = lit.msk.size() -
min(lit.msk.size(), (size_t)pullBack);
lit.msk.resize(len);
lit.cmp.resize(len);
vl.push_back(lits[lit_idx]);
}
// if c_sub_msk is 0xff and lit.nocase
// resteer 'c' to an exact value and set suppressSplit
if ((c_sub_msk == 0xff) && (lit.nocase)) {
suppressSplit = true;
c = c_sub;
}
}
if (!suppressSplit && splitHasCase && lit.nocase &&
ourisalpha(c)) {
vl[(u8)(mytoupper(c) & splitMask)].push_back(lit);
vl[(u8)(mytolower(c) & splitMask)].push_back(lit);
} else {
vl[c & splitMask].push_back(lit);
}
}
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
if (vl[c].empty()) {
continue;
}
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
auto key = make_pair(b, c);
auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(),
make_small, makeConfirm);
DEBUG_PRINTF("b %d sz %zu\n", b, vl.size());
auto fc = getFDRConfirm(vl, make_small, makeConfirm);
totalConfirmSize += fc.second;
assert(bc2Conf.find(key) == end(bc2Conf));
bc2Conf.emplace(key, move(fc));
bc2Conf.emplace(b, move(fc));
}
}
}
return totalConfirmSize;
}
pair<aligned_unique_ptr<u8>, size_t>
setupFullMultiConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
bool make_small) {
BC2CONF bc2Conf;
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
make_small);
u32 primarySwitch = eng.getConfirmTopLevelSplit();
u32 nBuckets = eng.getNumBuckets();
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
u32 totalConfSwitchSize = nBuckets * sizeof(u32);
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
auto buf = aligned_zmalloc_unique<u8>(totalSize);
@ -445,14 +381,12 @@ setupFullMultiConfs(const vector<hwlmLiteral> &lits,
u8 *ptr = buf.get() + totalConfSwitchSize;
for (const auto &m : bc2Conf) {
const BucketIndex &b = m.first.first;
const u8 &c = m.first.second;
const BucketIndex &idx = m.first;
const pair<aligned_unique_ptr<FDRConfirm>, size_t> &p = m.second;
// confirm offset is relative to the base of this structure, now
u32 confirm_offset = verify_u32(ptr - buf.get());
memcpy(ptr, p.first.get(), p.second);
ptr += p.second;
u32 idx = c * nBuckets + b;
confBase[idx] = confirm_offset;
}
return {move(buf), totalSize};

View File

@ -40,8 +40,8 @@
// the whole confirmation procedure
static really_inline
void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
size_t i, u32 pullBackAmount, hwlmcb_rv_t *control,
u32 *last_match, u64a conf_key) {
size_t i, hwlmcb_rv_t *control, u32 *last_match,
u64a conf_key) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
@ -68,7 +68,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
goto out;
}
const u8 *loc = buf + i - li->size + 1 - pullBackAmount;
const u8 *loc = buf + i - li->size + 1;
if (loc < buf) {
u32 full_overhang = buf - loc;
@ -87,7 +87,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
}
if (unlikely(li->flags & ComplexConfirm)) {
const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount;
const u8 *loc2 = buf + i - li->extended_size + 1;
if (loc2 < buf) {
u32 full_overhang = buf - loc2;
size_t len_history = a->len_history;
@ -116,7 +116,7 @@ void confWithBit1(const struct FDRConfirm *fdrc,
assert(ISALIGNED(fdrc));
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, 0, control, last_match, conf_key);
confWithBit(fdrc, a, i, control, last_match, conf_key);
return;
} else {
u32 id = fdrc->nBitsOrSoleID;
@ -144,7 +144,7 @@ void confWithBitMany(const struct FDRConfirm *fdrc,
}
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, 0, control, last_match, conf_key);
confWithBit(fdrc, a, i, control, last_match, conf_key);
return;
} else {
const u32 id = fdrc->nBitsOrSoleID;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -44,8 +44,7 @@ namespace ue2 {
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets, def.confirmPullBackDistance,
def.confirmTopLevelSplit),
def.numBuckets),
schemeWidth(def.schemeWidth), stride(0), bits(0) {}
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
@ -55,7 +54,7 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
}
void getFdrDescriptions(vector<FDREngineDescription> *out) {
static const FDREngineDef def = {0, 128, 8, 0, 1, 256};
static const FDREngineDef def = {0, 128, 8, 0};
out->clear();
out->emplace_back(def);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -43,8 +43,6 @@ struct FDREngineDef {
u32 schemeWidth;
u32 numBuckets;
u64a cpu_features;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
};
class FDREngineDescription : public EngineDescription {
@ -64,7 +62,6 @@ public:
explicit FDREngineDescription(const FDREngineDef &def);
u32 getDefaultFloodSuffixLength() const override;
bool typicallyHoldsOneCharLits() const override { return stride == 1; }
};
std::unique_ptr<FDREngineDescription>

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -345,10 +345,7 @@ void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase,
const struct FDR_Runtime_Args *a, const u8 *ptr,
hwlmcb_rv_t *control, u32 *last_match) {
u32 byte = bits / 8;
u32 bitRem = bits % 8;
u32 confSplit = *(ptr+byte) & 0x1f;
u32 idx = confSplit * 8 + bitRem;
u32 cf = confBase[idx];
u32 cf = confBase[bits % 8];
if (!cf) {
return;
}
@ -358,7 +355,7 @@ void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase,
return;
}
u64a confVal = getConfVal(a, ptr, byte, reason);
confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, last_match, confVal);
confWithBit(fdrc, a, ptr - a->buf + byte, control, last_match, confVal);
}
static really_inline

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -309,7 +309,7 @@ TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
auto floodControlTmp = setupFDRFloodControl(lits, eng);
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) +
maskLen +

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -44,8 +44,7 @@ namespace ue2 {
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets, def.confirmPullBackDistance,
def.confirmTopLevelSplit),
def.numBuckets),
numMasks(def.numMasks), packed(def.packed) {}
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
@ -66,24 +65,24 @@ bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
static const TeddyEngineDef defns[] = {
{ 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false, 0, 1 },
{ 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true, 0, 32 },
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false, 0, 1 },
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true, 0, 32 },
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false, 0, 1 },
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true, 0, 32 },
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false, 0, 1 },
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true, 0, 32 },
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false, 0, 1 },
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true, 0, 32 },
{ 11, 0, 1, 8, false, 0, 1 },
{ 12, 0, 1, 8, true, 0, 32 },
{ 13, 0, 2, 8, false, 0, 1 },
{ 14, 0, 2, 8, true, 0, 32 },
{ 15, 0, 3, 8, false, 0, 1 },
{ 16, 0, 3, 8, true, 0, 32 },
{ 17, 0, 4, 8, false, 0, 1 },
{ 18, 0, 4, 8, true, 0, 32 },
{ 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false },
{ 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true },
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false },
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true },
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false },
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true },
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false },
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true },
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false },
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true },
{ 11, 0, 1, 8, false },
{ 12, 0, 1, 8, true },
{ 13, 0, 2, 8, false },
{ 14, 0, 2, 8, true },
{ 15, 0, 3, 8, false },
{ 16, 0, 3, 8, true },
{ 17, 0, 4, 8, false },
{ 18, 0, 4, 8, true },
};
out->clear();
for (const auto &def : defns) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -45,8 +45,6 @@ struct TeddyEngineDef {
u32 numMasks;
u32 numBuckets;
bool packed;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
};
class TeddyEngineDescription : public EngineDescription {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2016-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -180,9 +180,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
do {
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
u32 byte = bit / bucket + offset;
u32 bitRem = bit % bucket;
u32 confSplit = *(ptr+byte) & 0x1f;
u32 idx = confSplit * bucket + bitRem;
u32 idx = bit % bucket;
u32 cf = confBase[idx];
if (!cf) {
continue;
@ -193,7 +191,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
continue;
}
u64a confVal = getConfVal(a, ptr, byte, reason);
confWithBit(fdrc, a, ptr - a->buf + byte, 0, control,
confWithBit(fdrc, a, ptr - a->buf + byte, control,
last_match, confVal);
} while (unlikely(*conf));
}