diff --git a/src/fdr/engine_description.h b/src/fdr/engine_description.h index 09b16179..b545e647 100644 --- a/src/fdr/engine_description.h +++ b/src/fdr/engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,29 +38,19 @@ class EngineDescription { u32 id; target_t code_target; // the target that we built this code for u32 numBuckets; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; public: EngineDescription(u32 id_in, const target_t &code_target_in, - u32 numBuckets_in, u32 confirmPullBackDistance_in, - u32 confirmTopLevelSplit_in) - : id(id_in), code_target(code_target_in), numBuckets(numBuckets_in), - confirmPullBackDistance(confirmPullBackDistance_in), - confirmTopLevelSplit(confirmTopLevelSplit_in) {} + u32 numBuckets_in) + : id(id_in), code_target(code_target_in), numBuckets(numBuckets_in) {} virtual ~EngineDescription(); u32 getID() const { return id; } u32 getNumBuckets() const { return numBuckets; } - u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; } - u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; } - void setConfirmTopLevelSplit(u32 split) { confirmTopLevelSplit = split; } bool isValidOnTarget(const target_t &target_in) const; virtual u32 getDefaultFloodSuffixLength() const = 0; - - virtual bool typicallyHoldsOneCharLits() const { return true; } }; /** Returns a target given a CPU feature set value. */ diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 23416c70..5ac8388c 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -315,7 +315,6 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, const u32 *confBase, const struct FDR_Runtime_Args *a, const u8 *ptr, u32 *last_match_id, struct zone *z) { const u8 bucket = 8; - const u8 pullback = 1; if (likely(!*conf)) { return; @@ -332,8 +331,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, u32 bit = findAndClearLSB_64(conf); u32 byte = bit / bucket + offset; u32 bitRem = bit % bucket; - u32 confSplit = *(ptr + byte); - u32 idx = confSplit * bucket + bitRem; + u32 idx = bitRem; u32 cf = confBase[idx]; if (!cf) { continue; @@ -353,8 +351,8 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, id, a->ctxt); continue; } - u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a)); - confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control, + u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1); + confWithBit(fdrc, a, ptr_main - a->buf + byte, control, last_match_id, confVal); } while (unlikely(!!*conf)); } diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 85342f9a..cd3b57de 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -147,7 +147,7 @@ FDRCompiler::setupFDR(pair, size_t> &link) { size_t tabSize = eng.getTabSizeBytes(); auto floodControlTmp = setupFDRFloodControl(lits, eng); - auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); assert(ISALIGNED_16(tabSize)); assert(ISALIGNED_16(confirmTmp.second)); diff --git a/src/fdr/fdr_compile_internal.h b/src/fdr/fdr_compile_internal.h index 48e2ed6f..0fd59902 100644 --- a/src/fdr/fdr_compile_internal.h +++ b/src/fdr/fdr_compile_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,7 +56,7 @@ class EngineDescription; class FDREngineDescription; struct hwlmStreamingControl; -std::pair, size_t> setupFullMultiConfs( +std::pair, size_t> setupFullConfs( const std::vector &lits, const EngineDescription &eng, std::map> &bucketToLits, bool make_small); diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 30f682d1..e5969261 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,9 +45,7 @@ using namespace std; namespace ue2 { -using ConfSplitType = u8; -using BucketSplitPair = pair; -using BC2CONF = map, size_t>>; // return the number of bytes beyond a length threshold in all strings in lits @@ -151,8 +149,8 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, //#define FDR_CONFIRM_DUMP 1 static pair, size_t> -getFDRConfirm(const vector &lits, bool applyOneCharOpt, - bool make_small, bool make_confirm) { +getFDRConfirm(const vector &lits, bool make_small, + bool make_confirm) { vector tmpLitInfo(lits.size()); CONF_TYPE andmsk; fillLitInfo(lits, tmpLitInfo, andmsk); @@ -177,8 +175,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, u32 soleLitCmp = 0; u32 soleLitMsk = 0; - if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 && - lits[0].msk.empty()) || make_confirm == false) { + if (!make_confirm) { flags = FDRC_FLAG_NO_CONFIRM; if (lits[0].noruns) { flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted @@ -345,15 +342,11 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, return {move(fdrc), actual_size}; } -static -u32 setupMultiConfirms(const vector &lits, - const EngineDescription &eng, BC2CONF &bc2Conf, - map > &bucketToLits, - bool make_small) { - u32 pullBack = eng.getConfirmPullBackDistance(); - u32 splitMask = eng.getConfirmTopLevelSplit() - 1; - bool splitHasCase = splitMask & 0x20; - +pair, size_t> +setupFullConfs(const vector &lits, + const EngineDescription &eng, + map> &bucketToLits, + bool make_small) { bool makeConfirm = true; unique_ptr teddyDescr = getTeddyDescription(eng.getID()); @@ -361,81 +354,24 @@ u32 setupMultiConfirms(const vector &lits, makeConfirm = teddyDescr->needConfirm(lits); } + BC2CONF bc2Conf; u32 totalConfirmSize = 0; for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { if (!bucketToLits[b].empty()) { - vector> vl(eng.getConfirmTopLevelSplit()); + vector vl; for (const LiteralIndex &lit_idx : bucketToLits[b]) { - hwlmLiteral lit = lits[lit_idx]; // copy - // c is last char of this literal - u8 c = *(lit.s.rbegin()); - - bool suppressSplit = false; - if (pullBack) { - // make a shorter string to work over if we're pulling back - // getFDRConfirm doesn't know about that stuff - assert(lit.s.size() >= pullBack); - lit.s.resize(lit.s.size() - pullBack); - - u8 c_sub, c_sub_msk; - if (lit.msk.empty()) { - c_sub = 0; - c_sub_msk = 0; - } else { - c_sub = *(lit.cmp.rbegin()); - c_sub_msk = *(lit.msk.rbegin()); - size_t len = lit.msk.size() - - min(lit.msk.size(), (size_t)pullBack); - lit.msk.resize(len); - lit.cmp.resize(len); - } - - // if c_sub_msk is 0xff and lit.nocase - // resteer 'c' to an exact value and set suppressSplit - if ((c_sub_msk == 0xff) && (lit.nocase)) { - suppressSplit = true; - c = c_sub; - } - } - - if (!suppressSplit && splitHasCase && lit.nocase && - ourisalpha(c)) { - vl[(u8)(mytoupper(c) & splitMask)].push_back(lit); - vl[(u8)(mytolower(c) & splitMask)].push_back(lit); - } else { - vl[c & splitMask].push_back(lit); - } + vl.push_back(lits[lit_idx]); } - for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) { - if (vl[c].empty()) { - continue; - } - DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size()); - auto key = make_pair(b, c); - auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(), - make_small, makeConfirm); - totalConfirmSize += fc.second; - assert(bc2Conf.find(key) == end(bc2Conf)); - bc2Conf.emplace(key, move(fc)); - } + DEBUG_PRINTF("b %d sz %zu\n", b, vl.size()); + auto fc = getFDRConfirm(vl, make_small, makeConfirm); + totalConfirmSize += fc.second; + bc2Conf.emplace(b, move(fc)); } } - return totalConfirmSize; -} -pair, size_t> -setupFullMultiConfs(const vector &lits, - const EngineDescription &eng, - map> &bucketToLits, - bool make_small) { - BC2CONF bc2Conf; - u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits, - make_small); - - u32 primarySwitch = eng.getConfirmTopLevelSplit(); u32 nBuckets = eng.getNumBuckets(); - u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32); + u32 totalConfSwitchSize = nBuckets * sizeof(u32); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); auto buf = aligned_zmalloc_unique(totalSize); @@ -445,14 +381,12 @@ setupFullMultiConfs(const vector &lits, u8 *ptr = buf.get() + totalConfSwitchSize; for (const auto &m : bc2Conf) { - const BucketIndex &b = m.first.first; - const u8 &c = m.first.second; + const BucketIndex &idx = m.first; const pair, size_t> &p = m.second; // confirm offset is relative to the base of this structure, now u32 confirm_offset = verify_u32(ptr - buf.get()); memcpy(ptr, p.first.get(), p.second); ptr += p.second; - u32 idx = c * nBuckets + b; confBase[idx] = confirm_offset; } return {move(buf), totalSize}; diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 55985846..a0603c92 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -40,8 +40,8 @@ // the whole confirmation procedure static really_inline void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a, - size_t i, u32 pullBackAmount, hwlmcb_rv_t *control, - u32 *last_match, u64a conf_key) { + size_t i, hwlmcb_rv_t *control, u32 *last_match, + u64a conf_key) { assert(i < a->len); assert(ISALIGNED(fdrc)); @@ -68,7 +68,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a goto out; } - const u8 *loc = buf + i - li->size + 1 - pullBackAmount; + const u8 *loc = buf + i - li->size + 1; if (loc < buf) { u32 full_overhang = buf - loc; @@ -87,7 +87,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a } if (unlikely(li->flags & ComplexConfirm)) { - const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount; + const u8 *loc2 = buf + i - li->extended_size + 1; if (loc2 < buf) { u32 full_overhang = buf - loc2; size_t len_history = a->len_history; @@ -116,7 +116,7 @@ void confWithBit1(const struct FDRConfirm *fdrc, assert(ISALIGNED(fdrc)); if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, 0, control, last_match, conf_key); + confWithBit(fdrc, a, i, control, last_match, conf_key); return; } else { u32 id = fdrc->nBitsOrSoleID; @@ -144,7 +144,7 @@ void confWithBitMany(const struct FDRConfirm *fdrc, } if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, 0, control, last_match, conf_key); + confWithBit(fdrc, a, i, control, last_match, conf_key); return; } else { const u32 id = fdrc->nBitsOrSoleID; diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index 5e923b08..e44bfbb5 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,8 +44,7 @@ namespace ue2 { FDREngineDescription::FDREngineDescription(const FDREngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), - def.numBuckets, def.confirmPullBackDistance, - def.confirmTopLevelSplit), + def.numBuckets), schemeWidth(def.schemeWidth), stride(0), bits(0) {} u32 FDREngineDescription::getDefaultFloodSuffixLength() const { @@ -55,7 +54,7 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const { } void getFdrDescriptions(vector *out) { - static const FDREngineDef def = {0, 128, 8, 0, 1, 256}; + static const FDREngineDef def = {0, 128, 8, 0}; out->clear(); out->emplace_back(def); } diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h index d4e70d4b..09c5ce86 100644 --- a/src/fdr/fdr_engine_description.h +++ b/src/fdr/fdr_engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,8 +43,6 @@ struct FDREngineDef { u32 schemeWidth; u32 numBuckets; u64a cpu_features; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; }; class FDREngineDescription : public EngineDescription { @@ -64,7 +62,6 @@ public: explicit FDREngineDescription(const FDREngineDef &def); u32 getDefaultFloodSuffixLength() const override; - bool typicallyHoldsOneCharLits() const override { return stride == 1; } }; std::unique_ptr diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index e4a836d4..129b99c7 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -345,10 +345,7 @@ void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase, const struct FDR_Runtime_Args *a, const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match) { u32 byte = bits / 8; - u32 bitRem = bits % 8; - u32 confSplit = *(ptr+byte) & 0x1f; - u32 idx = confSplit * 8 + bitRem; - u32 cf = confBase[idx]; + u32 cf = confBase[bits % 8]; if (!cf) { return; } @@ -358,7 +355,7 @@ void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase, return; } u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, last_match, confVal); + confWithBit(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); } static really_inline diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 15b9665b..ac3a0203 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -309,7 +309,7 @@ TeddyCompiler::build(pair, size_t> &link) { size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; auto floodControlTmp = setupFDRFloodControl(lits, eng); - auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); size_t size = ROUNDUP_N(sizeof(Teddy) + maskLen + diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index d95f4937..9e876b0b 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,8 +44,7 @@ namespace ue2 { TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), - def.numBuckets, def.confirmPullBackDistance, - def.confirmTopLevelSplit), + def.numBuckets), numMasks(def.numMasks), packed(def.packed) {} u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const { @@ -66,24 +65,24 @@ bool TeddyEngineDescription::needConfirm(const vector &lits) const void getTeddyDescriptions(vector *out) { static const TeddyEngineDef defns[] = { - { 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false, 0, 1 }, - { 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true, 0, 32 }, - { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false, 0, 1 }, - { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true, 0, 32 }, - { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false, 0, 1 }, - { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true, 0, 32 }, - { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false, 0, 1 }, - { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true, 0, 32 }, - { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false, 0, 1 }, - { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true, 0, 32 }, - { 11, 0, 1, 8, false, 0, 1 }, - { 12, 0, 1, 8, true, 0, 32 }, - { 13, 0, 2, 8, false, 0, 1 }, - { 14, 0, 2, 8, true, 0, 32 }, - { 15, 0, 3, 8, false, 0, 1 }, - { 16, 0, 3, 8, true, 0, 32 }, - { 17, 0, 4, 8, false, 0, 1 }, - { 18, 0, 4, 8, true, 0, 32 }, + { 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false }, + { 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true }, + { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false }, + { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true }, + { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false }, + { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true }, + { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false }, + { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true }, + { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false }, + { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true }, + { 11, 0, 1, 8, false }, + { 12, 0, 1, 8, true }, + { 13, 0, 2, 8, false }, + { 14, 0, 2, 8, true }, + { 15, 0, 3, 8, false }, + { 16, 0, 3, 8, true }, + { 17, 0, 4, 8, false }, + { 18, 0, 4, 8, true }, }; out->clear(); for (const auto &def : defns) { diff --git a/src/fdr/teddy_engine_description.h b/src/fdr/teddy_engine_description.h index 88d20139..3979a5d3 100644 --- a/src/fdr/teddy_engine_description.h +++ b/src/fdr/teddy_engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,8 +45,6 @@ struct TeddyEngineDef { u32 numMasks; u32 numBuckets; bool packed; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; }; class TeddyEngineDescription : public EngineDescription { diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index dc65c70a..c5f0885f 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -180,9 +180,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, do { u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); u32 byte = bit / bucket + offset; - u32 bitRem = bit % bucket; - u32 confSplit = *(ptr+byte) & 0x1f; - u32 idx = confSplit * bucket + bitRem; + u32 idx = bit % bucket; u32 cf = confBase[idx]; if (!cf) { continue; @@ -193,7 +191,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, continue; } u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, + confWithBit(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); } while (unlikely(*conf)); }